summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClement Sibille <clements@lisible.xyz>2025-05-05 08:32:33 +0200
committerClement Sibille <clements@lisible.xyz>2025-05-05 12:24:27 +0200
commitb71eac2069a30349435c192d682e865718c86a15 (patch)
tree33754245a23533e31e6a83390bf190c11dfe2bb9
parent6017db0069977ae85e698a1234f4a2b7632ee495 (diff)
Add a vulkan renderer that renders an OBJ
-rw-r--r--assets/model.obj98
-rw-r--r--assets/texture.pngbin0 -> 5056 bytes
-rw-r--r--meson.build48
-rw-r--r--shaders/triangle.frag5
-rw-r--r--shaders/triangle.vert26
-rw-r--r--src/alloc.c171
-rw-r--r--src/alloc.h40
-rw-r--r--src/engine.c17
-rw-r--r--src/engine.h14
-rw-r--r--src/hash.c15
-rw-r--r--src/hash.h9
-rw-r--r--src/image.c20
-rw-r--r--src/image.h21
-rw-r--r--src/log.c13
-rw-r--r--src/log.h21
-rw-r--r--src/main.c28
-rw-r--r--src/maths.c103
-rw-r--r--src/maths.h49
-rw-r--r--src/platform.c5
-rw-r--r--src/platform.h98
-rw-r--r--src/platform_sdl.c124
-rw-r--r--src/renderer.c1470
-rw-r--r--src/renderer.h45
-rw-r--r--src/renderer/renderer.c2559
-rw-r--r--src/renderer/renderer.h126
-rw-r--r--src/renderer/vma_usage.cpp4
-rw-r--r--src/renderer/vma_usage.h6
-rw-r--r--src/str.c181
-rw-r--r--src/str.h62
-rw-r--r--thirdpartylicenses.md46
-rw-r--r--vendor/stb_image.h7988
-rw-r--r--vendor/tiny_obj_loader_c.h1793
-rw-r--r--vendor/vk_mem_alloc.h19111
33 files changed, 32614 insertions, 1702 deletions
diff --git a/assets/model.obj b/assets/model.obj
new file mode 100644
index 0000000..ab344bb
--- /dev/null
+++ b/assets/model.obj
@@ -0,0 +1,98 @@
+# Blender 4.4.1
+# www.blender.org
+mtllib model.mtl
+o Cube
+v 1.000000 1.000000 -1.000000
+v 1.000000 -1.000000 -1.000000
+v 1.000000 1.000000 1.000000
+v 1.000000 -1.000000 1.000000
+v -1.000000 1.000000 -1.000000
+v -1.000000 -1.000000 -1.000000
+v -1.000000 1.000000 1.000000
+v -1.000000 -1.000000 1.000000
+v -1.637802 -1.000000 0.000000
+v 1.389749 1.000000 0.000000
+v -1.637802 1.000000 0.000000
+v 1.389749 -1.000000 0.000000
+v 0.000000 -1.000000 -1.000000
+v 0.000000 1.000000 1.000000
+v 0.000000 -1.000000 1.000000
+v 0.000000 1.000000 -1.000000
+v 0.000000 -1.000000 0.000000
+v 0.000000 1.905488 0.000000
+vn -0.3792 0.6859 0.6211
+vn -0.0000 -0.0000 1.0000
+vn -0.8431 -0.0000 -0.5377
+vn -0.0000 -1.0000 -0.0000
+vn 0.9317 -0.0000 0.3631
+vn -0.0000 -0.0000 -1.0000
+vn 0.9317 -0.0000 -0.3631
+vn -0.8431 -0.0000 0.5377
+vn -0.3792 0.6859 -0.6211
+vn -0.0000 1.0000 -0.0000
+vn 0.4349 0.6675 -0.6044
+vn 0.4349 0.6675 0.6044
+vt 0.875000 0.625000
+vt 0.750000 0.750000
+vt 0.750000 0.625000
+vt 0.625000 0.875000
+vt 0.375000 1.000000
+vt 0.375000 0.875000
+vt 0.625000 0.125000
+vt 0.375000 0.250000
+vt 0.375000 0.125000
+vt 0.375000 0.625000
+vt 0.250000 0.750000
+vt 0.250000 0.625000
+vt 0.625000 0.625000
+vt 0.375000 0.750000
+vt 0.625000 0.375000
+vt 0.375000 0.500000
+vt 0.375000 0.375000
+vt 0.625000 0.500000
+vt 0.250000 0.500000
+vt 0.625000 0.000000
+vt 0.375000 0.000000
+vt 0.750000 0.500000
+vt 0.125000 0.625000
+vt 0.125000 0.500000
+vt 0.625000 0.250000
+vt 0.125000 0.750000
+vt 0.625000 0.750000
+vt 0.875000 0.750000
+vt 0.625000 1.000000
+vt 0.875000 0.500000
+s 0
+usemtl Material
+f 11/1/1 14/2/1 18/3/1
+f 14/4/2 8/5/2 15/6/2
+f 11/7/3 6/8/3 9/9/3
+f 12/10/4 15/11/4 17/12/4
+f 10/13/5 4/14/5 12/10/5
+f 16/15/6 2/16/6 13/17/6
+f 1/18/7 12/10/7 2/16/7
+f 2/16/4 17/12/4 13/19/4
+f 7/20/8 9/9/8 8/21/8
+f 16/22/9 11/1/9 18/3/9
+f 16/22/10 10/13/10 1/18/10
+f 13/19/4 9/23/4 6/24/4
+f 5/25/6 13/17/6 6/8/6
+f 17/12/4 8/26/4 9/23/4
+f 3/27/2 15/6/2 4/14/2
+f 10/13/10 14/2/10 3/27/10
+f 11/1/10 7/28/10 14/2/10
+f 14/4/2 7/29/2 8/5/2
+f 11/7/3 5/25/3 6/8/3
+f 12/10/4 4/14/4 15/11/4
+f 10/13/5 3/27/5 4/14/5
+f 16/15/6 1/18/6 2/16/6
+f 1/18/7 10/13/7 12/10/7
+f 2/16/4 12/10/4 17/12/4
+f 7/20/8 11/7/8 9/9/8
+f 16/22/10 5/30/10 11/1/10
+f 16/22/11 18/3/11 10/13/11
+f 13/19/4 17/12/4 9/23/4
+f 5/25/6 16/15/6 13/17/6
+f 17/12/4 15/11/4 8/26/4
+f 3/27/2 14/4/2 15/6/2
+f 10/13/12 18/3/12 14/2/12
diff --git a/assets/texture.png b/assets/texture.png
new file mode 100644
index 0000000..4065f75
--- /dev/null
+++ b/assets/texture.png
Binary files differ
diff --git a/meson.build b/meson.build
index 6aa0ceb..8ea3009 100644
--- a/meson.build
+++ b/meson.build
@@ -1,8 +1,11 @@
-project('visible-gltf', 'c', default_options: ['warning_level=3', 'c_std=c23'])
+project('visiblegltf', [ 'c', 'cpp' ], default_options: ['c_std=c2x', 'cpp_std=c++20', 'warning_level=3'])
build_type = get_option('buildtype')
sdl3_dep = dependency('sdl3')
+
+vendor_incdir = include_directories('vendor', is_system: true)
+
if host_machine.system() == 'darwin'
moltenvk_library_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/lib'
moltenvk_include_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/include'
@@ -10,7 +13,7 @@ vulkan_dep = declare_dependency(
link_args: ['-L' + moltenvk_library_path, '-lvulkan'],
include_directories: include_directories(moltenvk_include_path)
)
-else
+else
vulkan_dep = dependency('vulkan')
endif
@@ -19,14 +22,39 @@ if build_type == 'debug'
vgltf_c_args += '-DVGLTF_DEBUG'
endif
-executable(
+if host_machine.system() == 'darwin'
+ vgltf_c_args += '-DVGLTF_PLATFORM_MACOS'
+elif host_machine.system() == 'linux'
+ vgltf_c_args += '-DVGLTF_PLATFORM_LINUX'
+elif host_machine.system() == 'windows'
+ vgltf_c_args += '-DVGLTF_PLATFORM_WINDOWS'
+endif
+
+vgltf_deps = [
+ sdl3_dep,
+ vulkan_dep,
+]
+
+vgltf_srcs = [
+ 'src/main.c',
+ 'src/log.c',
+ 'src/maths.c',
+ 'src/alloc.c',
+ 'src/hash.c',
+ 'src/str.c',
+ 'src/platform.c',
+ 'src/platform_sdl.c',
+ 'src/image.c',
+ 'src/renderer/renderer.c',
+ 'src/renderer/vma_usage.cpp',
+ 'src/engine.c',
+]
+
+vgltf_exe = executable(
'vgltf',
- [
- 'src/main.c',
- 'src/log.c',
- 'src/platform_sdl.c',
- 'src/renderer.c',
- ],
+ vgltf_srcs,
c_args: vgltf_c_args,
- dependencies: [sdl3_dep, vulkan_dep],
+ dependencies: vgltf_deps,
+ link_language: 'cpp',
+ include_directories: [vendor_incdir]
)
diff --git a/shaders/triangle.frag b/shaders/triangle.frag
index 7c5b0e7..c7d99f3 100644
--- a/shaders/triangle.frag
+++ b/shaders/triangle.frag
@@ -1,9 +1,12 @@
#version 450
layout(location = 0) in vec3 fragColor;
+layout(location = 1) in vec2 fragTextureCoordinates;
layout(location = 0) out vec4 outColor;
+layout(binding = 1) uniform sampler2D textureSampler;
+
void main() {
- outColor = vec4(fragColor, 1.0);
+ outColor = vec4(fragColor * texture(textureSampler, fragTextureCoordinates).rgb, 1.0);
}
diff --git a/shaders/triangle.vert b/shaders/triangle.vert
index f5b2f8d..bf93f44 100644
--- a/shaders/triangle.vert
+++ b/shaders/triangle.vert
@@ -1,20 +1,20 @@
#version 450
-layout(location = 0) out vec3 fragColor;
+layout(location = 0) in vec3 inPosition;
+layout(location = 1) in vec3 inColor;
+layout(location = 2) in vec2 inTextureCoordinates;
-vec2 positions[3] = vec2[](
- vec2(0.0, -0.5),
- vec2(0.5, 0.5),
- vec2(-0.5, 0.5)
-);
+layout(location = 0) out vec3 fragColor;
+layout(location = 1) out vec2 fragTextureCoordinates;
-vec3 colors[3] = vec3[](
- vec3(1.0, 0.0, 0.0),
- vec3(0.0, 1.0, 0.0),
- vec3(0.0, 0.0, 1.0)
-);
+layout(set = 0, binding = 0) uniform UniformBufferObject {
+ mat4 model;
+ mat4 view;
+ mat4 projection;
+} ubo;
void main() {
- gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);
- fragColor = colors[gl_VertexIndex];
+ gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPosition, 1.0);
+ fragColor = inColor;
+ fragTextureCoordinates = inTextureCoordinates;
}
diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..2fb7a78
--- /dev/null
+++ b/src/alloc.c
@@ -0,0 +1,171 @@
+#include "alloc.h"
+#include "maths.h"
+#include "platform.h"
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size) {
+ assert(allocator);
+ return allocator->allocate(size, allocator->ctx);
+}
+void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator,
+ size_t alignment, size_t size) {
+ assert(allocator);
+ return allocator->allocate_aligned(alignment, size, allocator->ctx);
+}
+void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator,
+ size_t count, size_t item_size) {
+ assert(allocator);
+ return allocator->allocate_array(count, item_size, allocator->ctx);
+}
+void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr,
+ size_t old_size, size_t new_size) {
+ assert(allocator);
+ return allocator->reallocate(ptr, old_size, new_size, allocator->ctx);
+}
+void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr) {
+ assert(allocator);
+ allocator->free(ptr, allocator->ctx);
+}
+
+static void *memory_allocate(size_t size, void *ctx) {
+ (void)ctx;
+ void *ptr = malloc(size);
+ if (!ptr) {
+ VGLTF_PANIC("Couldn't allocate memory (out of mem?)");
+ }
+ return ptr;
+}
+
+static void *memory_allocate_aligned(size_t alignment, size_t size, void *ctx) {
+ (void)ctx;
+#ifdef VGLTF_PLATFORM_WINDOWS
+ void *ptr = _aligned_malloc(size, VGLTF_MAX(alignment, sizeof(void *)));
+#else
+ void *ptr = aligned_alloc(VGLTF_MAX(alignment, sizeof(void *)), size);
+#endif
+ if (!ptr) {
+ VGLTF_PANIC("Couldn't allocate aligned memory (out of mem?)");
+ }
+ return ptr;
+}
+
+static void *memory_allocate_array(size_t count, size_t item_size, void *ctx) {
+ (void)ctx;
+ void *ptr = calloc(count, item_size);
+ if (!ptr) {
+ VGLTF_PANIC("Couldn't allocate memory (out of mem?)");
+ }
+ return ptr;
+}
+
+static void *memory_reallocate(void *ptr, size_t old_size, size_t new_size,
+ void *ctx) {
+ (void)old_size;
+ (void)ctx;
+ ptr = realloc(ptr, new_size);
+ if (!ptr) {
+ VGLTF_PANIC("Couldn't allocate memory (out of mem?)");
+ }
+ return ptr;
+}
+
+static void memory_free(void *ptr, void *ctx) {
+ (void)ctx;
+ free(ptr);
+}
+
+thread_local struct vgltf_allocator system_allocator = {
+ .allocate = memory_allocate,
+ .allocate_aligned = memory_allocate_aligned,
+ .allocate_array = memory_allocate_array,
+ .reallocate = memory_reallocate,
+ .free = memory_free};
+
+void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena,
+ size_t size) {
+ assert(allocator);
+ assert(arena);
+ arena->size = 0;
+ arena->capacity = size;
+ arena->data = vgltf_allocator_allocate(allocator, size);
+}
+void vgltf_arena_deinit(struct vgltf_allocator *allocator,
+ struct vgltf_arena *arena) {
+ assert(allocator);
+ assert(arena);
+ vgltf_allocator_free(allocator, arena->data);
+}
+void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size) {
+ assert(arena);
+ assert(arena->size + size <= arena->capacity);
+ void *ptr = arena->data + arena->size;
+ arena->size += size;
+ return ptr;
+}
+
+void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count,
+ size_t item_size) {
+ assert(arena);
+ void *ptr = vgltf_arena_allocate(arena, count * item_size);
+ memset(ptr, 0, count * item_size);
+ return ptr;
+}
+
+void vgltf_arena_reset(struct vgltf_arena *arena) {
+ assert(arena);
+ arena->size = 0;
+}
+
+static void *arena_allocator_allocate(size_t size, void *ctx) {
+ assert(ctx);
+ return vgltf_arena_allocate(ctx, size);
+}
+static void *arena_allocator_allocate_aligned(size_t alignment, size_t size,
+ void *ctx) {
+ assert(ctx);
+ if (alignment < sizeof(void *) || (alignment & (alignment - 1)) != 0) {
+ return NULL;
+ }
+
+ void *ptr = vgltf_arena_allocate(ctx, size + alignment - 1 + sizeof(void *));
+ if (!ptr) {
+ return NULL;
+ }
+
+ return (void *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) &
+ ~(alignment - 1));
+}
+
+static void *arena_allocator_allocate_array(size_t count, size_t item_size,
+ void *ctx) {
+ assert(ctx);
+ return vgltf_arena_allocate_array(ctx, count, item_size);
+}
+
+static void *arena_allocator_reallocate(void *ptr, size_t old_size,
+ size_t new_size, void *ctx) {
+ assert(ptr);
+ assert(ctx);
+
+ void *new_ptr = vgltf_arena_allocate(ctx, new_size);
+ memcpy(new_ptr, ptr, old_size);
+ return new_ptr;
+}
+
+static void arena_allocator_free(void *ptr, void *ctx) {
+ assert(ctx);
+ (void)ptr;
+}
+
+struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena) {
+ return (struct vgltf_allocator){
+ .ctx = arena,
+ .allocate = arena_allocator_allocate,
+ .allocate_aligned = arena_allocator_allocate_aligned,
+ .allocate_array = arena_allocator_allocate_array,
+ .reallocate = arena_allocator_reallocate,
+ .free = arena_allocator_free};
+}
diff --git a/src/alloc.h b/src/alloc.h
new file mode 100644
index 0000000..bde1d55
--- /dev/null
+++ b/src/alloc.h
@@ -0,0 +1,40 @@
+#ifndef VGLTF_ALLOC_H
+#define VGLTF_ALLOC_H
+
+#include <stddef.h>
+
+struct vgltf_allocator {
+ void *(*allocate)(size_t size, void *ctx);
+ void *(*allocate_aligned)(size_t alignment, size_t size, void *ctx);
+ void *(*allocate_array)(size_t count, size_t item_size, void *ctx);
+ void *(*reallocate)(void *ptr, size_t old_size, size_t new_size, void *ctx);
+ void (*free)(void *ptr, void *ctx);
+ void *ctx;
+};
+
+void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size);
+void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator,
+ size_t alignment, size_t size);
+void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator,
+ size_t count, size_t item_size);
+void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr,
+ size_t old_size, size_t new_size);
+void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr);
+
+extern thread_local struct vgltf_allocator system_allocator;
+
+struct vgltf_arena {
+ size_t capacity;
+ size_t size;
+ char *data;
+};
+void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena,
+ size_t size);
+void vgltf_arena_deinit(struct vgltf_allocator *allocator, struct vgltf_arena *arena);
+void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size);
+void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count,
+ size_t item_size);
+void vgltf_arena_reset(struct vgltf_arena *arena);
+struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena);
+
+#endif // VGLTF_ALLOC_H
diff --git a/src/engine.c b/src/engine.c
new file mode 100644
index 0000000..8904474
--- /dev/null
+++ b/src/engine.c
@@ -0,0 +1,17 @@
+#include "engine.h"
+
+bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform) {
+ if (!vgltf_renderer_init(&engine->renderer, platform)) {
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+void vgltf_engine_deinit(struct vgltf_engine *engine) {
+ vgltf_renderer_deinit(&engine->renderer);
+}
+void vgltf_engine_run_frame(struct vgltf_engine *engine) {
+ vgltf_renderer_render_frame(&engine->renderer);
+}
diff --git a/src/engine.h b/src/engine.h
new file mode 100644
index 0000000..5a7bc2d
--- /dev/null
+++ b/src/engine.h
@@ -0,0 +1,14 @@
+#ifndef VGLTF_ENGINE_H
+#define VGLTF_ENGINE_H
+
+#include "renderer/renderer.h"
+
+struct vgltf_engine {
+ struct vgltf_renderer renderer;
+};
+
+bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform);
+void vgltf_engine_deinit(struct vgltf_engine *engine);
+void vgltf_engine_run_frame(struct vgltf_engine *engine);
+
+#endif // VGLTF_ENGINE_H
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..cfdafc3
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,15 @@
+#include "hash.h"
+#include <assert.h>
+
+uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes) {
+ assert(bytes);
+ static const uint64_t FNV_OFFSET_BASIS = 14695981039346656037u;
+ static const uint64_t FNV_PRIME = 1099511628211u;
+ uint64_t hash = FNV_OFFSET_BASIS;
+ for (size_t i = 0; i < nbytes; i++) {
+ hash = hash ^ bytes[i];
+ hash = hash * FNV_PRIME;
+ }
+
+ return hash;
+}
diff --git a/src/hash.h b/src/hash.h
new file mode 100644
index 0000000..f4f8e76
--- /dev/null
+++ b/src/hash.h
@@ -0,0 +1,9 @@
+#ifndef VGLTF_HASH_H
+#define VGLTF_HASH_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes);
+
+#endif // VGLTF_HASH_H
diff --git a/src/image.c b/src/image.c
new file mode 100644
index 0000000..a2d29c7
--- /dev/null
+++ b/src/image.c
@@ -0,0 +1,20 @@
+#include "image.h"
+
+#define STB_IMAGE_IMPLEMENTATION
+#include <stb_image.h>
+
+bool vgltf_image_load_from_file(struct vgltf_image *image,
+ struct vgltf_string_view path) {
+ int width;
+ int height;
+ int tex_channels;
+ image->data =
+ stbi_load(path.data, &width, &height, &tex_channels, STBI_rgb_alpha);
+ image->width = width;
+ image->height = height;
+ image->format = VGLTF_IMAGE_FORMAT_R8G8B8A8;
+
+ return image->data != nullptr;
+}
+
+void vgltf_image_deinit(struct vgltf_image *image) { stbi_image_free(image->data); }
diff --git a/src/image.h b/src/image.h
new file mode 100644
index 0000000..426d605
--- /dev/null
+++ b/src/image.h
@@ -0,0 +1,21 @@
+#ifndef VGLTF_IMAGE_H
+#define VGLTF_IMAGE_H
+
+#include <stdint.h>
+#include "str.h"
+
+enum vgltf_image_format {
+ VGLTF_IMAGE_FORMAT_R8G8B8A8,
+};
+
+struct vgltf_image {
+ unsigned char* data;
+ uint32_t width;
+ uint32_t height;
+ enum vgltf_image_format format;
+};
+
+bool vgltf_image_load_from_file(struct vgltf_image* image, struct vgltf_string_view path);
+void vgltf_image_deinit(struct vgltf_image* image);
+
+#endif // VGLTF_IMAGE_H
diff --git a/src/log.c b/src/log.c
index 0c1b8a4..99c10dc 100644
--- a/src/log.c
+++ b/src/log.c
@@ -1,12 +1,5 @@
#include "log.h"
-const char *vgltf_log_level_to_str(enum vgltf_log_level level) {
- switch (level) {
- case VGLTF_LOG_ERROR:
- return "error";
- case VGLTF_LOG_INFO:
- return "info";
- case VGLTF_LOG_DEBUG:
- return "debug";
- }
-}
+const char *vgltf_log_level_str[] = {[VGLTF_LOG_LEVEL_DBG] = "debug",
+ [VGLTF_LOG_LEVEL_INFO] = "info",
+ [VGLTF_LOG_LEVEL_ERR] = "error"};
diff --git a/src/log.h b/src/log.h
index 5e9dcc0..faf9edd 100644
--- a/src/log.h
+++ b/src/log.h
@@ -1,25 +1,26 @@
#ifndef VGLTF_LOG_H
#define VGLTF_LOG_H
-#include <stdio.h>
+#include <stdio.h> // IWYU pragma: keep
enum vgltf_log_level {
- VGLTF_LOG_DEBUG,
- VGLTF_LOG_INFO,
- VGLTF_LOG_ERROR,
+ VGLTF_LOG_LEVEL_DBG,
+ VGLTF_LOG_LEVEL_INFO,
+ VGLTF_LOG_LEVEL_ERR,
};
-const char *vgltf_log_level_to_str(enum vgltf_log_level level);
-#define VGLTF_LOG(level, ...) \
+extern const char *vgltf_log_level_str[];
+
+#define VGLTF_LOG(level, ...) \
do { \
- fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_to_str(level), __FILE__, \
+ fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_str[level], __FILE__, \
__LINE__); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\n"); \
} while (0)
-#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_DEBUG, __VA_ARGS__)
-#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_INFO, __VA_ARGS__)
-#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_ERROR, __VA_ARGS__)
+#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_LEVEL_DBG, __VA_ARGS__)
+#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_LEVEL_INFO, __VA_ARGS__)
+#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_LEVEL_ERR, __VA_ARGS__)
#endif // VGLTF_LOG_H
diff --git a/src/main.c b/src/main.c
index d2ea49d..d518632 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,43 +1,39 @@
+#include "engine.h"
#include "log.h"
#include "platform.h"
-#include "renderer.h"
int main(void) {
struct vgltf_platform platform = {};
if (!vgltf_platform_init(&platform)) {
- VGLTF_LOG_ERR("Couldn't initialize the platform layer");
+ VGLTF_LOG_ERR("Platform initialization failed");
goto err;
}
- struct vgltf_renderer renderer = {};
- if (!vgltf_renderer_init(&renderer, &platform)) {
- VGLTF_LOG_ERR("Couldn't initialize the renderer");
+ struct vgltf_engine engine = {};
+ if (!vgltf_engine_init(&engine, &platform)) {
+ VGLTF_LOG_ERR("Couldn't initialize the engine");
goto deinit_platform;
}
+ VGLTF_LOG_INFO("Starting main loop");
while (true) {
struct vgltf_event event;
while (vgltf_platform_poll_event(&platform, &event)) {
- if (event.type == VGLTF_EVENT_QUIT ||
- (event.type == VGLTF_EVENT_KEY_DOWN &&
- event.key.key == VGLTF_KEY_ESCAPE)) {
+ if (event.type == VGLTF_EVENT_QUIT || (event.type == VGLTF_EVENT_KEY_DOWN &&
+ event.key.key == VGLTF_KEY_ESCAPE)) {
goto out_main_loop;
- } else if (event.type == VGLTF_EVENT_WINDOW_RESIZED) {
- vgltf_renderer_on_window_resized(
- &renderer,
- (struct vgltf_window_size){.width = event.window_resized.width,
- .height = event.window_resized.height});
}
}
- vgltf_renderer_triangle_pass(&renderer);
+ vgltf_engine_run_frame(&engine);
}
out_main_loop:
- vgltf_renderer_deinit(&renderer);
+ VGLTF_LOG_INFO("Exiting main loop");
+ vgltf_engine_deinit(&engine);
vgltf_platform_deinit(&platform);
return 0;
deinit_platform:
vgltf_platform_deinit(&platform);
err:
- return 1;
+ return -1;
}
diff --git a/src/maths.c b/src/maths.c
new file mode 100644
index 0000000..a79c68f
--- /dev/null
+++ b/src/maths.c
@@ -0,0 +1,103 @@
+#include "maths.h"
+#include <math.h>
+#include <string.h>
+
+vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs) {
+ return (vgltf_vec3){.x = lhs.x - rhs.x, .y = lhs.y - rhs.y, .z = lhs.z - rhs.z};
+}
+vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs) {
+ return (vgltf_vec3){.x = lhs.y * rhs.z - lhs.z * rhs.y,
+ .y = lhs.z * rhs.x - lhs.x * rhs.z,
+ .z = lhs.x * rhs.y - lhs.y * rhs.x};
+}
+vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs) {
+ return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z;
+}
+vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec) {
+ return sqrtf(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
+}
+vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec) {
+ vgltf_vec_value_type length = vgltf_vec3_length(vec);
+ return (vgltf_vec3){
+ .x = vec.x / length, .y = vec.y / length, .z = vec.z / length};
+}
+void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs) {
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ out[i * 4 + j] =
+ lhs[i * 4 + 0] * rhs[0 * 4 + j] + lhs[i * 4 + 1] * rhs[1 * 4 + j] +
+ lhs[i * 4 + 2] * rhs[2 * 4 + j] + lhs[i * 4 + 3] * rhs[3 * 4 + j];
+ }
+ }
+}
+void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix,
+ vgltf_mat_value_type angle_radians, vgltf_vec3 axis) {
+ vgltf_vec3 a = vgltf_vec3_normalized(axis);
+ vgltf_vec_value_type c = cosf(angle_radians);
+ vgltf_vec_value_type s = sinf(angle_radians);
+ vgltf_vec_value_type t = 1.f - c;
+
+ vgltf_mat4 rotation_matrix = {t * a.x * a.x + c,
+ t * a.x * a.y - s * a.z,
+ t * a.x * a.z + s * a.y,
+ 0.f,
+ t * a.x * a.y + s * a.z,
+ t * a.y * a.y + c,
+ t * a.y * a.z - s * a.x,
+ 0.f,
+ t * a.x * a.z - s * a.y,
+ t * a.y * a.z + s * a.x,
+ t * a.z * a.z + c,
+ 0.f,
+ 0.f,
+ 0.f,
+ 0.f,
+ 1.f};
+
+ vgltf_mat4_multiply(out, matrix, rotation_matrix);
+}
+void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position,
+ vgltf_vec3 target_position, vgltf_vec3 up_axis) {
+ vgltf_vec3 forward =
+ vgltf_vec3_normalized(vgltf_vec3_sub(target_position, eye_position));
+ vgltf_vec3 right = vgltf_vec3_normalized(vgltf_vec3_cross(forward, up_axis));
+ vgltf_vec3 camera_up = vgltf_vec3_cross(right, forward);
+
+ memcpy(out, (const vgltf_mat4)VGLTF_MAT4_IDENTITY, sizeof(vgltf_mat4));
+ out[0 * 4 + 0] = right.x;
+ out[1 * 4 + 0] = right.y;
+ out[2 * 4 + 0] = right.z;
+ out[0 * 4 + 1] = camera_up.x;
+ out[1 * 4 + 1] = camera_up.y;
+ out[2 * 4 + 1] = camera_up.z;
+ out[0 * 4 + 2] = -forward.x;
+ out[1 * 4 + 2] = -forward.y;
+ out[2 * 4 + 2] = -forward.z;
+ out[3 * 4 + 0] = -vgltf_vec3_dot(right, eye_position);
+ out[3 * 4 + 1] = -vgltf_vec3_dot(camera_up, eye_position);
+ out[3 * 4 + 2] = vgltf_vec3_dot(forward, eye_position);
+}
+void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov_radians,
+ vgltf_mat_value_type aspect_ratio,
+ vgltf_mat_value_type near, vgltf_mat_value_type far) {
+ float tan_half_fovy = tanf(fov_radians / 2.0f);
+ out[0] = 1.f / (aspect_ratio * tan_half_fovy);
+ out[1] = 0.0f;
+ out[2] = 0.0f;
+ out[3] = 0.0f;
+
+ out[4] = 0.0f;
+ out[5] = 1.f / tan_half_fovy;
+ out[6] = 0.0f;
+ out[7] = 0.0f;
+
+ out[8] = 0.0f;
+ out[9] = 0.0f;
+ out[10] = -(far + near) / (far - near);
+ out[11] = -1.0f;
+
+ out[12] = 0.0f;
+ out[13] = 0.0f;
+ out[14] = -(2.0f * far * near) / (far - near);
+ out[15] = 0.0f;
+}
diff --git a/src/maths.h b/src/maths.h
new file mode 100644
index 0000000..d50f285
--- /dev/null
+++ b/src/maths.h
@@ -0,0 +1,49 @@
+#ifndef VGLTF_MATHS_H
+#define VGLTF_MATHS_H
+
+typedef float vgltf_vec_value_type;
+
+constexpr double VGLTF_MATHS_PI = 3.14159265358979323846;
+#define VGLTF_MATHS_DEG_TO_RAD(deg) (deg * VGLTF_MATHS_PI / 180.0)
+#define VGLTF_MAX(x, y) ((x) > (y) ? (x) : (y))
+
+typedef struct {
+ vgltf_vec_value_type x;
+ vgltf_vec_value_type y;
+} vgltf_vec2;
+
+typedef struct {
+ vgltf_vec_value_type x;
+ vgltf_vec_value_type y;
+ vgltf_vec_value_type z;
+} vgltf_vec3;
+vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs);
+vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs);
+vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs);
+
+vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec);
+vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec);
+
+typedef vgltf_vec_value_type vgltf_mat_value_type;
+
+// row major
+typedef vgltf_mat_value_type vgltf_mat4[16];
+void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs);
+void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix,
+ vgltf_mat_value_type angle_radians, vgltf_vec3 axis);
+void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position,
+ vgltf_vec3 target_position, vgltf_vec3 up_axis);
+void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov,
+ vgltf_mat_value_type aspect_ratio,
+ vgltf_mat_value_type near, vgltf_mat_value_type far);
+
+// clang-format off
+#define VGLTF_MAT4_IDENTITY { \
+ 1, 0, 0, 0, \
+ 0, 1, 0, 0, \
+ 0, 0, 1, 0, \
+ 0, 0, 0, 1, \
+}
+// clang-format on
+
+#endif // VGLTF_MATHS_H
diff --git a/src/platform.c b/src/platform.c
new file mode 100644
index 0000000..da4d7d4
--- /dev/null
+++ b/src/platform.c
@@ -0,0 +1,5 @@
+#include "platform.h"
+
+#define VGLTF_GENERATE_KEY_STRING(KEY) #KEY,
+const char *vgltf_key_str[] = {VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_STRING)};
+#undef VGLTF_GENERATE_KEY_STRING
diff --git a/src/platform.h b/src/platform.h
index fe719d3..aff673f 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -3,66 +3,61 @@
#include "log.h"
#include <stdint.h>
-#include <stdlib.h>
+#include <stdlib.h> // IWYU pragma: keep
-#define VGLTF_PANIC(...) \
+#define VGLTF_PANIC(...) \
do { \
- VGLTF_LOG_ERR("panic: " __VA_ARGS__); \
+ VGLTF_LOG_ERR("PANIC " __VA_ARGS__); \
exit(1); \
} while (0)
-enum vgltf_event_type {
- VGLTF_EVENT_QUIT,
- VGLTF_EVENT_KEY_DOWN,
- VGLTF_EVENT_WINDOW_RESIZED,
- VGLTF_EVENT_UNKNOWN,
-};
+#define VGLTF_FOREACH_KEY(_M) \
+ _M(A) \
+ _M(B) \
+ _M(C) \
+ _M(D) \
+ _M(E) \
+ _M(F) \
+ _M(G) \
+ _M(H) \
+ _M(I) \
+ _M(J) \
+ _M(K) \
+ _M(L) \
+ _M(M) \
+ _M(N) \
+ _M(O) \
+ _M(P) \
+ _M(Q) \
+ _M(R) \
+ _M(S) \
+ _M(T) \
+ _M(U) \
+ _M(V) \
+ _M(W) \
+ _M(X) \
+ _M(Y) \
+ _M(Z) \
+ _M(ESCAPE)
+#define VGLTF_GENERATE_KEY_ENUM(KEY) VGLTF_KEY_##KEY,
enum vgltf_key {
- VGLTF_KEY_A,
- VGLTF_KEY_B,
- VGLTF_KEY_C,
- VGLTF_KEY_D,
- VGLTF_KEY_E,
- VGLTF_KEY_F,
- VGLTF_KEY_G,
- VGLTF_KEY_H,
- VGLTF_KEY_I,
- VGLTF_KEY_J,
- VGLTF_KEY_K,
- VGLTF_KEY_L,
- VGLTF_KEY_M,
- VGLTF_KEY_N,
- VGLTF_KEY_O,
- VGLTF_KEY_P,
- VGLTF_KEY_Q,
- VGLTF_KEY_R,
- VGLTF_KEY_S,
- VGLTF_KEY_T,
- VGLTF_KEY_U,
- VGLTF_KEY_V,
- VGLTF_KEY_W,
- VGLTF_KEY_X,
- VGLTF_KEY_Y,
- VGLTF_KEY_Z,
- VGLTF_KEY_ESCAPE,
+ VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_ENUM) VGLTF_KEY_COUNT,
VGLTF_KEY_UNKNOWN
};
+#undef VGLTF_GENERATE_KEY_ENUM
+extern const char *vgltf_key_str[];
+
+enum vgltf_event_type { VGLTF_EVENT_QUIT, VGLTF_EVENT_KEY_DOWN, VGLTF_EVENT_UNKNOWN };
struct vgltf_key_event {
enum vgltf_key key;
};
-struct vgltf_window_resized_event {
- int32_t width;
- int32_t height;
-};
-
struct vgltf_event {
enum vgltf_event_type type;
union {
struct vgltf_key_event key;
- struct vgltf_window_resized_event window_resized;
};
};
@@ -75,18 +70,19 @@ struct vgltf_platform;
bool vgltf_platform_init(struct vgltf_platform *platform);
void vgltf_platform_deinit(struct vgltf_platform *platform);
bool vgltf_platform_poll_event(struct vgltf_platform *platform,
- struct vgltf_event *event);
+ struct vgltf_event *event);
bool vgltf_platform_get_window_size(struct vgltf_platform *platform,
- struct vgltf_window_size *window_size);
-
-// Vulkan specifics
-#include "vulkan/vulkan_core.h"
-char const *const *
+ struct vgltf_window_size *window_size);
+bool vgltf_platform_get_current_time_nanoseconds(long *time);
+char *vgltf_platform_read_file_to_string(const char *filepath, size_t *out_size);
+const char *const *
vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform,
- uint32_t *count);
+ uint32_t *count);
+
+#include <vulkan/vulkan.h>
bool vgltf_platform_create_vulkan_surface(struct vgltf_platform *platform,
- VkInstance instance,
- VkSurfaceKHR *surface);
+ VkInstance instance,
+ VkSurfaceKHR *surface);
#include "platform_sdl.h"
diff --git a/src/platform_sdl.c b/src/platform_sdl.c
index 5cc6032..6593b9e 100644
--- a/src/platform_sdl.c
+++ b/src/platform_sdl.c
@@ -1,29 +1,25 @@
+#include "platform_sdl.h"
#include "log.h"
#include "platform.h"
-#include "platform_sdl.h"
-#include <SDL3/SDL_vulkan.h>
bool vgltf_platform_init(struct vgltf_platform *platform) {
+ VGLTF_LOG_INFO("Initializing SDL platform...");
+
if (!SDL_Init(SDL_INIT_VIDEO)) {
VGLTF_LOG_ERR("SDL initialization failed: %s", SDL_GetError());
goto err;
}
- constexpr char WINDOW_TITLE[] = "VisibleGLTF";
- constexpr int WINDOW_WIDTH = 800;
- constexpr int WINDOW_HEIGHT = 600;
- SDL_Window *window =
- SDL_CreateWindow(WINDOW_TITLE, WINDOW_WIDTH, WINDOW_HEIGHT,
- SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE);
- if (!window) {
- VGLTF_LOG_ERR("SDL window creation failed: %s", SDL_GetError());
- goto quit_sdl;
+ platform->window = SDL_CreateWindow("vgltf", 800, 600, SDL_WINDOW_VULKAN);
+ if (!platform->window) {
+ VGLTF_LOG_ERR("Window creation failed: %s", SDL_GetError());
+ goto deinit_sdl;
}
- platform->window = window;
-
+ VGLTF_LOG_INFO("SDL platform initialized");
return true;
-quit_sdl:
+
+deinit_sdl:
SDL_Quit();
err:
return false;
@@ -31,67 +27,23 @@ err:
void vgltf_platform_deinit(struct vgltf_platform *platform) {
SDL_DestroyWindow(platform->window);
SDL_Quit();
+ VGLTF_LOG_INFO("SDL platform deinitialized");
}
-static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode keycode) {
- switch (keycode) {
- case SDLK_A:
- return VGLTF_KEY_A;
- case SDLK_B:
- return VGLTF_KEY_B;
- case SDLK_C:
- return VGLTF_KEY_C;
- case SDLK_D:
- return VGLTF_KEY_D;
- case SDLK_E:
- return VGLTF_KEY_E;
- case SDLK_F:
- return VGLTF_KEY_F;
- case SDLK_G:
- return VGLTF_KEY_G;
- case SDLK_H:
- return VGLTF_KEY_H;
- case SDLK_I:
- return VGLTF_KEY_I;
- case SDLK_J:
- return VGLTF_KEY_J;
- case SDLK_K:
- return VGLTF_KEY_K;
- case SDLK_L:
- return VGLTF_KEY_L;
- case SDLK_M:
- return VGLTF_KEY_M;
- case SDLK_N:
- return VGLTF_KEY_N;
- case SDLK_O:
- return VGLTF_KEY_O;
- case SDLK_P:
- return VGLTF_KEY_P;
- case SDLK_Q:
- return VGLTF_KEY_Q;
- case SDLK_R:
- return VGLTF_KEY_R;
- case SDLK_S:
- return VGLTF_KEY_S;
- case SDLK_T:
- return VGLTF_KEY_T;
- case SDLK_U:
- return VGLTF_KEY_U;
- case SDLK_V:
- return VGLTF_KEY_V;
- case SDLK_W:
- return VGLTF_KEY_W;
- case SDLK_X:
- return VGLTF_KEY_X;
- case SDLK_Y:
- return VGLTF_KEY_Y;
- case SDLK_Z:
- return VGLTF_KEY_Z;
- case SDLK_ESCAPE:
- return VGLTF_KEY_ESCAPE;
+
+#define VGLTF_GENERATE_SDL_KEYCODE_MAPPING(KEY) \
+ case SDLK_##KEY: \
+ return VGLTF_KEY_##KEY;
+
+static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode key_code) {
+ switch (key_code) {
+ VGLTF_FOREACH_KEY(VGLTF_GENERATE_SDL_KEYCODE_MAPPING)
default:
return VGLTF_KEY_UNKNOWN;
}
}
+
+#undef VGLTF_GENERATE_SDL_KEYCODE_MAPPING
+
bool vgltf_platform_poll_event(struct vgltf_platform *platform,
struct vgltf_event *event) {
(void)platform;
@@ -106,16 +58,12 @@ bool vgltf_platform_poll_event(struct vgltf_platform *platform,
event->type = VGLTF_EVENT_KEY_DOWN;
event->key.key = vgltf_key_from_sdl_keycode(sdl_event.key.key);
break;
- case SDL_EVENT_WINDOW_RESIZED:
- event->type = VGLTF_EVENT_WINDOW_RESIZED;
- event->window_resized.width = sdl_event.display.data1;
- event->window_resized.height = sdl_event.display.data2;
- break;
default:
event->type = VGLTF_EVENT_UNKNOWN;
break;
}
}
+
return pending_events;
}
bool vgltf_platform_get_window_size(struct vgltf_platform *platform,
@@ -123,7 +71,31 @@ bool vgltf_platform_get_window_size(struct vgltf_platform *platform,
return SDL_GetWindowSize(platform->window, &window_size->width,
&window_size->height);
}
-char const *const *
+bool vgltf_platform_get_current_time_nanoseconds(long *time) {
+ if (!SDL_GetCurrentTime(time)) {
+ VGLTF_LOG_ERR("'SDL_GetCurrentTime failed: %s", SDL_GetError());
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+char *vgltf_platform_read_file_to_string(const char *filepath,
+ size_t *out_size) {
+ char *file_data = SDL_LoadFile(filepath, out_size);
+ if (!file_data) {
+ VGLTF_LOG_ERR("Couldn't load file: %s", SDL_GetError());
+ return NULL;
+ }
+
+ return file_data;
+}
+
+#include <SDL3/SDL_vulkan.h>
+
+const char *const *
vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform,
uint32_t *count) {
(void)platform;
diff --git a/src/renderer.c b/src/renderer.c
deleted file mode 100644
index 7022af6..0000000
--- a/src/renderer.c
+++ /dev/null
@@ -1,1470 +0,0 @@
-#include "log.h"
-#include "renderer.h"
-#include "src/platform.h"
-#include "vulkan/vulkan_core.h"
-#include <assert.h>
-
-static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"};
-static constexpr int VALIDATION_LAYER_COUNT =
- sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]);
-
-#ifdef VGLTF_DEBUG
-static constexpr bool enable_validation_layers = true;
-#else
-static constexpr bool enable_validation_layers = false;
-#endif
-
-static VKAPI_ATTR VkBool32 VKAPI_CALL
-debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
- VkDebugUtilsMessageTypeFlagBitsEXT message_type,
- const VkDebugUtilsMessengerCallbackDataEXT *callback_data,
- void *user_data) {
- (void)message_severity;
- (void)message_type;
- (void)user_data;
- VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage);
- return VK_FALSE;
-}
-
-static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10;
-struct required_instance_extensions {
- const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
- uint32_t count;
-};
-void required_instance_extensions_push(
- struct required_instance_extensions *required_instance_extensions,
- const char *required_instance_extension) {
- if (required_instance_extensions->count ==
- REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
- VGLTF_PANIC("required instance extensions array is full");
- }
- required_instance_extensions
- ->extensions[required_instance_extensions->count++] =
- required_instance_extension;
-}
-
-static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128;
-struct supported_instance_extensions {
- VkExtensionProperties
- properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
- uint32_t count;
-};
-bool supported_instance_extensions_init(
- struct supported_instance_extensions *supported_instance_extensions) {
- if (vkEnumerateInstanceExtensionProperties(
- nullptr, &supported_instance_extensions->count, nullptr) !=
- VK_SUCCESS) {
- goto err;
- }
-
- if (supported_instance_extensions->count >
- SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
- VGLTF_LOG_ERR("supported instance extensions array cannot fit all the "
- "VkExtensionProperties");
- goto err;
- }
-
- if (vkEnumerateInstanceExtensionProperties(
- nullptr, &supported_instance_extensions->count,
- supported_instance_extensions->properties) != VK_SUCCESS) {
- goto err;
- }
- return true;
-err:
- return false;
-}
-void supported_instance_extensions_debug_print(
- const struct supported_instance_extensions *supported_instance_extensions) {
- VGLTF_LOG_DBG("Supported instance extensions:");
- for (uint32_t i = 0; i < supported_instance_extensions->count; i++) {
- VGLTF_LOG_DBG("\t- %s",
- supported_instance_extensions->properties[i].extensionName);
- }
-}
-bool supported_instance_extensions_includes(
- const struct supported_instance_extensions *supported_instance_extensions,
- const char *extension_name) {
- for (uint32_t supported_instance_extension_index = 0;
- supported_instance_extension_index <
- supported_instance_extensions->count;
- supported_instance_extension_index++) {
- const VkExtensionProperties *extension_properties =
- &supported_instance_extensions
- ->properties[supported_instance_extension_index];
- if (strcmp(extension_properties->extensionName, extension_name) == 0) {
- return true;
- }
- }
-
- return false;
-}
-
-static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64;
-struct supported_validation_layers {
- VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY];
- uint32_t count;
-};
-bool supported_validation_layers_init(
- struct supported_validation_layers *supported_validation_layers) {
- if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count,
- nullptr) != VK_SUCCESS) {
- goto err;
- }
-
- if (supported_validation_layers->count >
- SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) {
- VGLTF_LOG_ERR("supported validation layers array cannot fit all the "
- "VkLayerProperties");
- goto err;
- }
-
- if (vkEnumerateInstanceLayerProperties(
- &supported_validation_layers->count,
- supported_validation_layers->properties) != VK_SUCCESS) {
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static bool are_validation_layer_supported() {
- struct supported_validation_layers supported_layers = {};
- if (!supported_validation_layers_init(&supported_layers)) {
- goto err;
- }
-
- for (int requested_layer_index = 0;
- requested_layer_index < VALIDATION_LAYER_COUNT;
- requested_layer_index++) {
- const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index];
- bool requested_layer_found = false;
- for (uint32_t supported_layer_index = 0;
- supported_layer_index < supported_layers.count;
- supported_layer_index++) {
- VkLayerProperties *supported_layer =
- &supported_layers.properties[supported_layer_index];
- if (strcmp(requested_layer_name, supported_layer->layerName) == 0) {
- requested_layer_found = true;
- break;
- }
- }
-
- if (!requested_layer_found) {
- goto err;
- }
- }
-
- return true;
-err:
- return false;
-}
-
-static bool fetch_required_instance_extensions(
- struct required_instance_extensions *required_extensions,
- struct vgltf_platform *platform) {
- struct supported_instance_extensions supported_extensions = {};
- if (!supported_instance_extensions_init(&supported_extensions)) {
- VGLTF_LOG_ERR(
- "Couldn't fetch supported instance extensions details (OOM?)");
- goto err;
- }
- supported_instance_extensions_debug_print(&supported_extensions);
-
- uint32_t platform_required_extension_count = 0;
- const char *const *platform_required_extensions =
- vgltf_platform_get_vulkan_instance_extensions(
- platform, &platform_required_extension_count);
- for (uint32_t platform_required_extension_index = 0;
- platform_required_extension_index < platform_required_extension_count;
- platform_required_extension_index++) {
- required_instance_extensions_push(
- required_extensions,
- platform_required_extensions[platform_required_extension_index]);
- }
- required_instance_extensions_push(
- required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
-
- if (enable_validation_layers) {
- required_instance_extensions_push(required_extensions,
- VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
- }
-
- bool all_extensions_supported = true;
- for (uint32_t required_extension_index = 0;
- required_extension_index < required_extensions->count;
- required_extension_index++) {
- const char *required_extension_name =
- required_extensions->extensions[required_extension_index];
- if (!supported_instance_extensions_includes(&supported_extensions,
- required_extension_name)) {
- VGLTF_LOG_ERR("Unsupported instance extension: %s",
- required_extension_name);
- all_extensions_supported = false;
- }
- }
-
- if (!all_extensions_supported) {
- VGLTF_LOG_ERR("Some required extensions are unsupported.");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static void populate_debug_messenger_create_info(
- VkDebugUtilsMessengerCreateInfoEXT *create_info) {
- *create_info = (VkDebugUtilsMessengerCreateInfoEXT){};
- create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
- create_info->messageSeverity =
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
- create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
- create_info->pfnUserCallback = debug_callback;
-}
-
-static bool vgltf_renderer_create_instance(struct vgltf_renderer *renderer,
- struct vgltf_platform *platform) {
- VGLTF_LOG_INFO("Creating vulkan instance...");
- if (enable_validation_layers && !are_validation_layer_supported()) {
- VGLTF_LOG_ERR("Requested validation layers aren't supported");
- goto err;
- }
-
- VkApplicationInfo application_info = {
- .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
- .pApplicationName = "Visible GLTF",
- .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
- .pEngineName = "No Engine",
- .engineVersion = VK_MAKE_VERSION(1, 0, 0),
- .apiVersion = VK_API_VERSION_1_2};
-
- struct required_instance_extensions required_extensions = {};
- fetch_required_instance_extensions(&required_extensions, platform);
-
- VkInstanceCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
- .pApplicationInfo = &application_info,
- .enabledExtensionCount = required_extensions.count,
- .ppEnabledExtensionNames = required_extensions.extensions,
- .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR};
-
- VkDebugUtilsMessengerCreateInfoEXT debug_create_info;
- if (enable_validation_layers) {
- create_info.enabledLayerCount = VALIDATION_LAYER_COUNT;
- create_info.ppEnabledLayerNames = VALIDATION_LAYERS;
- populate_debug_messenger_create_info(&debug_create_info);
- create_info.pNext = &debug_create_info;
- }
-
- if (vkCreateInstance(&create_info, nullptr, &renderer->instance) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to create VkInstance");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static VkResult create_debug_utils_messenger_ext(
- VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info,
- const VkAllocationCallbacks *allocator,
- VkDebugUtilsMessengerEXT *debug_messenger) {
- auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
- instance, "vkCreateDebugUtilsMessengerEXT");
- if (func != nullptr) {
- return func(instance, create_info, allocator, debug_messenger);
- }
-
- return VK_ERROR_EXTENSION_NOT_PRESENT;
-}
-
-static void
-destroy_debug_utils_messenger_ext(VkInstance instance,
- VkDebugUtilsMessengerEXT debug_messenger,
- const VkAllocationCallbacks *allocator) {
- auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
- instance, "vkDestroyDebugUtilsMessengerEXT");
- if (func != nullptr) {
- func(instance, debug_messenger, allocator);
- }
-}
-
-static void
-vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) {
- if (!enable_validation_layers)
- return;
- VkDebugUtilsMessengerCreateInfoEXT create_info;
- populate_debug_messenger_create_info(&create_info);
- create_debug_utils_messenger_ext(renderer->instance, &create_info, nullptr,
- &renderer->debug_messenger);
-}
-
-static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128;
-struct available_physical_devices {
- VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY];
- uint32_t count;
-};
-static bool
-available_physical_devices_init(VkInstance instance,
- struct available_physical_devices *devices) {
-
- if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't enumerate physical devices");
- goto err;
- }
-
- if (devices->count == 0) {
- VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support");
- goto err;
- }
-
- if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) {
- VGLTF_LOG_ERR("available physical devices array cannot fit all available "
- "physical devices");
- goto err;
- }
-
- if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't enumerate physical devices");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-struct queue_family_indices {
- uint32_t graphics_family;
- uint32_t present_family;
- bool has_graphics_family;
- bool has_present_family;
-};
-bool queue_family_indices_is_complete(
- const struct queue_family_indices *indices) {
- return indices->has_graphics_family && indices->has_present_family;
-}
-bool queue_family_indices_for_device(struct queue_family_indices *indices,
- VkPhysicalDevice device,
- VkSurfaceKHR surface) {
- static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64;
- uint32_t queue_family_count = 0;
- vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
- nullptr);
-
- if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) {
- VGLTF_LOG_ERR(
- "Queue family properties array cannot fit all queue family properties");
- goto err;
- }
-
- VkQueueFamilyProperties
- queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {};
- vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
- queue_family_properties);
-
- for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count;
- queue_family_index++) {
- VkQueueFamilyProperties *queue_family =
- &queue_family_properties[queue_family_index];
-
- VkBool32 present_support;
- vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface,
- &present_support);
-
- if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) {
- indices->graphics_family = queue_family_index;
- indices->has_graphics_family = true;
- }
-
- if (present_support) {
- indices->present_family = queue_family_index;
- indices->has_present_family = true;
- }
-
- if (queue_family_indices_is_complete(indices)) {
- break;
- }
- }
-
- return true;
-err:
- return false;
-}
-
-static bool is_in_array(uint32_t *array, int length, uint32_t value) {
- for (int i = 0; i < length; i++) {
- if (array[i] == value) {
- return true;
- }
- }
-
- return false;
-}
-
-static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 128;
-struct supported_extensions {
- VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY];
- uint32_t count;
-};
-bool supported_extensions_init(
- struct supported_extensions *supported_extensions,
- VkPhysicalDevice device) {
- if (vkEnumerateDeviceExtensionProperties(device, nullptr,
- &supported_extensions->count,
- nullptr) != VK_SUCCESS) {
- goto err;
- }
-
- if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) {
- VGLTF_LOG_ERR(
- "supported extensions aarray cannot fit all the VkExtensionProperties");
- goto err;
- }
-
- if (vkEnumerateDeviceExtensionProperties(
- device, nullptr, &supported_extensions->count,
- supported_extensions->properties) != VK_SUCCESS) {
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static bool supported_extensions_includes_extension(
- struct supported_extensions *supported_extensions,
- const char *extension_name) {
- for (uint32_t supported_extension_index = 0;
- supported_extension_index < supported_extensions->count;
- supported_extension_index++) {
- if (strcmp(supported_extensions->properties[supported_extension_index]
- .extensionName,
- extension_name) == 0) {
- return true;
- }
- }
- return false;
-}
-
-static const char *DEVICE_EXTENSIONS[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
- "VK_KHR_portability_subset"};
-static constexpr int DEVICE_EXTENSION_COUNT =
- sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]);
-static bool are_device_extensions_supported(VkPhysicalDevice device) {
- struct supported_extensions supported_extensions = {};
- if (!supported_extensions_init(&supported_extensions, device)) {
- goto err;
- }
-
- for (uint32_t required_extension_index = 0;
- required_extension_index < DEVICE_EXTENSION_COUNT;
- required_extension_index++) {
- if (!supported_extensions_includes_extension(
- &supported_extensions,
- DEVICE_EXTENSIONS[required_extension_index])) {
- VGLTF_LOG_DBG("Unsupported: %s",
- DEVICE_EXTENSIONS[required_extension_index]);
- goto err;
- }
- }
-
- return true;
-
-err:
- return false;
-}
-
-static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256;
-static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256;
-struct swapchain_support_details {
- VkSurfaceCapabilitiesKHR capabilities;
- VkSurfaceFormatKHR
- formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT];
- VkPresentModeKHR
- present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT];
- uint32_t format_count;
- uint32_t present_mode_count;
-};
-bool swapchain_support_details_query_from_device(
- struct swapchain_support_details *swapchain_support_details,
- VkPhysicalDevice device, VkSurfaceKHR surface) {
- if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
- device, surface, &swapchain_support_details->capabilities) !=
- VK_SUCCESS) {
- goto err;
- }
-
- if (vkGetPhysicalDeviceSurfaceFormatsKHR(
- device, surface, &swapchain_support_details->format_count, nullptr) !=
- VK_SUCCESS) {
- goto err;
- }
-
- if (swapchain_support_details->format_count != 0 &&
- swapchain_support_details->format_count <
- SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) {
- if (vkGetPhysicalDeviceSurfaceFormatsKHR(
- device, surface, &swapchain_support_details->format_count,
- swapchain_support_details->formats) != VK_SUCCESS) {
- goto err;
- }
- }
-
- if (vkGetPhysicalDeviceSurfacePresentModesKHR(
- device, surface, &swapchain_support_details->present_mode_count,
- nullptr) != VK_SUCCESS) {
- goto err;
- }
-
- if (swapchain_support_details->present_mode_count != 0 &&
- swapchain_support_details->present_mode_count <
- SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) {
- if (vkGetPhysicalDeviceSurfacePresentModesKHR(
- device, surface, &swapchain_support_details->present_mode_count,
- swapchain_support_details->present_modes) != VK_SUCCESS) {
- goto err;
- }
- }
-
- return true;
-err:
- return false;
-}
-
-static bool is_physical_device_suitable(VkPhysicalDevice device,
- VkSurfaceKHR surface) {
- struct queue_family_indices indices = {};
- queue_family_indices_for_device(&indices, device, surface);
-
- VGLTF_LOG_DBG("Checking for physical device extension support");
- bool extensions_supported = are_device_extensions_supported(device);
- VGLTF_LOG_DBG("Supported: %d", extensions_supported);
-
- bool swapchain_adequate = false;
- if (extensions_supported) {
-
- VGLTF_LOG_DBG("Checking for swapchain support details");
- struct swapchain_support_details swapchain_support_details = {};
- if (!swapchain_support_details_query_from_device(&swapchain_support_details,
- device, surface)) {
- VGLTF_LOG_ERR("Couldn't query swapchain support details from device");
- goto err;
- }
-
- swapchain_adequate = swapchain_support_details.format_count > 0 &&
- swapchain_support_details.present_mode_count > 0;
- }
-
- return queue_family_indices_is_complete(&indices) && extensions_supported &&
- swapchain_adequate;
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_pick_physical_device(struct vgltf_renderer *renderer) {
- VkPhysicalDevice physical_device = VK_NULL_HANDLE;
-
- struct available_physical_devices available_physical_devices = {};
- if (!available_physical_devices_init(renderer->instance,
- &available_physical_devices)) {
- VGLTF_LOG_ERR("Couldn't fetch available physical devices");
- goto err;
- }
-
- for (uint32_t available_physical_device_index = 0;
- available_physical_device_index < available_physical_devices.count;
- available_physical_device_index++) {
- VkPhysicalDevice available_physical_device =
- available_physical_devices.devices[available_physical_device_index];
- if (is_physical_device_suitable(available_physical_device,
- renderer->surface)) {
- physical_device = available_physical_device;
- break;
- }
- }
-
- if (physical_device == VK_NULL_HANDLE) {
- VGLTF_LOG_ERR("Failed to find a suitable GPU");
- goto err;
- }
-
- renderer->physical_device = physical_device;
-
- return true;
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_create_logical_device(struct vgltf_renderer *renderer) {
- struct queue_family_indices queue_family_indices = {};
- queue_family_indices_for_device(&queue_family_indices,
- renderer->physical_device, renderer->surface);
- static constexpr int MAX_QUEUE_FAMILY_COUNT = 2;
-
- uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {};
- int unique_queue_family_count = 0;
-
- if (!is_in_array(unique_queue_families, unique_queue_family_count,
- queue_family_indices.graphics_family)) {
- assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
- unique_queue_families[unique_queue_family_count++] =
- queue_family_indices.graphics_family;
- }
- if (!is_in_array(unique_queue_families, unique_queue_family_count,
- queue_family_indices.present_family)) {
- assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
- unique_queue_families[unique_queue_family_count++] =
- queue_family_indices.present_family;
- }
-
- float queue_priority = 1.f;
- VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {};
- int queue_create_info_count = 0;
- for (int unique_queue_family_index = 0;
- unique_queue_family_index < unique_queue_family_count;
- unique_queue_family_index++) {
- queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){
- .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
- .queueFamilyIndex = unique_queue_families[unique_queue_family_index],
- .queueCount = 1,
- .pQueuePriorities = &queue_priority};
- }
-
- VkPhysicalDeviceFeatures device_features = {};
- VkDeviceCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .pQueueCreateInfos = queue_create_infos,
- .queueCreateInfoCount = queue_create_info_count,
- .pEnabledFeatures = &device_features,
- .ppEnabledExtensionNames = DEVICE_EXTENSIONS,
- .enabledExtensionCount = DEVICE_EXTENSION_COUNT};
- if (vkCreateDevice(renderer->physical_device, &create_info, nullptr,
- &renderer->device) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to create logical device");
- goto err;
- }
-
- vkGetDeviceQueue(renderer->device, queue_family_indices.graphics_family, 0,
- &renderer->graphics_queue);
- vkGetDeviceQueue(renderer->device, queue_family_indices.present_family, 0,
- &renderer->present_queue);
-
- return true;
-err:
- return false;
-}
-
-static bool vgltf_renderer_create_surface(struct vgltf_renderer *renderer,
- struct vgltf_platform *platform) {
- if (!vgltf_platform_create_vulkan_surface(platform, renderer->instance,
- &renderer->surface)) {
- VGLTF_LOG_ERR("Couldn't create surface");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static VkSurfaceFormatKHR
-choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats,
- uint32_t available_format_count) {
- for (uint32_t available_format_index = 0;
- available_format_index < available_format_count;
- available_format_index++) {
- VkSurfaceFormatKHR *available_format =
- &available_formats[available_format_index];
- if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB &&
- available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
- return *available_format;
- }
- }
-
- return available_formats[0];
-}
-
-static VkPresentModeKHR
-choose_swapchain_present_mode(VkPresentModeKHR *available_modes,
- uint32_t available_mode_count) {
- for (uint32_t available_mode_index = 0;
- available_mode_index < available_mode_count; available_mode_index++) {
- VkPresentModeKHR available_mode = available_modes[available_mode_index];
- if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) {
- return available_mode;
- }
- }
-
- return VK_PRESENT_MODE_FIFO_KHR;
-}
-
-static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) {
- return value < min ? min : value > max ? max : value;
-}
-
-static VkExtent2D
-choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width,
- int height) {
- if (capabilities->currentExtent.width != UINT32_MAX) {
- return capabilities->currentExtent;
- } else {
- VkExtent2D actual_extent = {width, height};
- actual_extent.width =
- clamp_uint32(capabilities->minImageExtent.width,
- capabilities->maxImageExtent.width, actual_extent.width);
- actual_extent.height =
- clamp_uint32(capabilities->minImageExtent.height,
- capabilities->maxImageExtent.height, actual_extent.height);
- return actual_extent;
- }
-}
-
-static bool vgltf_renderer_create_swapchain(struct vgltf_renderer *renderer) {
- struct swapchain_support_details swapchain_support_details = {};
- swapchain_support_details_query_from_device(
- &swapchain_support_details, renderer->physical_device, renderer->surface);
-
- VkSurfaceFormatKHR surface_format =
- choose_swapchain_surface_format(swapchain_support_details.formats,
- swapchain_support_details.format_count);
- VkPresentModeKHR present_mode = choose_swapchain_present_mode(
- swapchain_support_details.present_modes,
- swapchain_support_details.present_mode_count);
-
- VkExtent2D extent = choose_swapchain_extent(
- &swapchain_support_details.capabilities, renderer->window_size.width,
- renderer->window_size.height);
- uint32_t image_count =
- swapchain_support_details.capabilities.minImageCount + 1;
- if (swapchain_support_details.capabilities.maxImageCount > 0 &&
- image_count > swapchain_support_details.capabilities.maxImageCount) {
- image_count = swapchain_support_details.capabilities.maxImageCount;
- }
-
- VkSwapchainCreateInfoKHR create_info = {
- .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
- .surface = renderer->surface,
- .minImageCount = image_count,
- .imageFormat = surface_format.format,
- .imageColorSpace = surface_format.colorSpace,
- .imageExtent = extent,
- .imageArrayLayers = 1,
- .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT};
- struct queue_family_indices indices = {};
- queue_family_indices_for_device(&indices, renderer->physical_device,
- renderer->surface);
- uint32_t queue_family_indices[] = {indices.graphics_family,
- indices.present_family};
- if (indices.graphics_family != indices.present_family) {
- create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
- create_info.queueFamilyIndexCount = 2;
- create_info.pQueueFamilyIndices = queue_family_indices;
- } else {
- create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
- }
-
- create_info.preTransform =
- swapchain_support_details.capabilities.currentTransform;
- create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
- create_info.presentMode = present_mode;
- create_info.clipped = VK_TRUE;
- create_info.oldSwapchain = VK_NULL_HANDLE;
-
- if (vkCreateSwapchainKHR(renderer->device, &create_info, nullptr,
- &renderer->swapchain) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Swapchain creation failed!");
- goto err;
- }
-
- if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain,
- &renderer->swapchain_image_count,
- nullptr) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't get swapchain image count");
- goto destroy_swapchain;
- }
-
- if (renderer->swapchain_image_count >
- VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) {
- VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images",
- renderer->swapchain_image_count);
- goto destroy_swapchain;
- }
-
- if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain,
- &renderer->swapchain_image_count,
- renderer->swapchain_images) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't get swapchain images");
- goto destroy_swapchain;
- }
-
- renderer->swapchain_image_format = surface_format.format;
- renderer->swapchain_extent = extent;
-
- return true;
-destroy_swapchain:
- vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr);
-err:
- return false;
-}
-
-static bool vgltf_renderer_create_image_views(struct vgltf_renderer *renderer) {
- uint32_t swapchain_image_index;
- for (swapchain_image_index = 0;
- swapchain_image_index < renderer->swapchain_image_count;
- swapchain_image_index++) {
- VkImage swapchain_image = renderer->swapchain_images[swapchain_image_index];
-
- VkImageViewCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = swapchain_image,
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = renderer->swapchain_image_format,
- .components = {VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY},
- .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .levelCount = 1,
- .layerCount = 1}};
-
- if (vkCreateImageView(
- renderer->device, &create_info, nullptr,
- &renderer->swapchain_image_views[swapchain_image_index]) !=
- VK_SUCCESS) {
- goto err;
- }
- }
- return true;
-err:
- for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index;
- to_remove_index++) {
- vkDestroyImageView(renderer->device,
- renderer->swapchain_image_views[to_remove_index],
- nullptr);
- }
- return false;
-}
-
-static bool create_shader_module(VkDevice device, const unsigned char *code,
- int size, VkShaderModule *out) {
- VkShaderModuleCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- .codeSize = size,
- .pCode = (const uint32_t *)code,
- };
- if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't create shader module");
- goto err;
- }
- return true;
-err:
- return false;
-}
-
-static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) {
- VkAttachmentDescription color_attachment = {
- .format = renderer->swapchain_image_format,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR};
- VkAttachmentReference color_attachment_ref = {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- };
- VkSubpassDescription subpass = {.pipelineBindPoint =
- VK_PIPELINE_BIND_POINT_GRAPHICS,
- .pColorAttachments = &color_attachment_ref,
- .colorAttachmentCount = 1};
- VkSubpassDependency dependency = {
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- .srcAccessMask = 0,
- .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT};
-
- VkRenderPassCreateInfo render_pass_info = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &color_attachment,
- .subpassCount = 1,
- .pSubpasses = &subpass,
- .dependencyCount = 1,
- .pDependencies = &dependency};
-
- if (vkCreateRenderPass(renderer->device, &render_pass_info, nullptr,
- &renderer->render_pass) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to create render pass");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) {
- static constexpr unsigned char triangle_shader_vert_code[] = {
-#embed "../compiled_shaders/triangle.vert.spv"
- };
- static constexpr unsigned char triangle_shader_frag_code[] = {
-#embed "../compiled_shaders/triangle.frag.spv"
- };
-
- VkShaderModule triangle_shader_vert_module;
- if (!create_shader_module(renderer->device, triangle_shader_vert_code,
- sizeof(triangle_shader_vert_code),
- &triangle_shader_vert_module)) {
- VGLTF_LOG_ERR("Couldn't create triangle vert shader module");
- goto err;
- }
-
- VkShaderModule triangle_shader_frag_module;
- if (!create_shader_module(renderer->device, triangle_shader_frag_code,
- sizeof(triangle_shader_frag_code),
- &triangle_shader_frag_module)) {
- VGLTF_LOG_ERR("Couldn't create triangle frag shader module");
- goto destroy_vert_shader_module;
- }
-
- VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = triangle_shader_vert_module,
- .pName = "main"};
- VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = triangle_shader_frag_module,
- .pName = "main"};
- VkPipelineShaderStageCreateInfo shader_stages[] = {
- triangle_shader_vert_stage_create_info,
- triangle_shader_frag_stage_create_info};
-
- VkDynamicState dynamic_states[] = {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- };
-
- VkPipelineDynamicStateCreateInfo dynamic_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]),
- .pDynamicStates = dynamic_states};
-
- VkPipelineVertexInputStateCreateInfo vertex_input_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- };
-
- VkPipelineInputAssemblyStateCreateInfo input_assembly = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
- .primitiveRestartEnable = VK_FALSE,
- };
-
- VkPipelineViewportStateCreateInfo viewport_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1};
-
- VkPipelineRasterizationStateCreateInfo rasterizer = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .depthClampEnable = VK_FALSE,
- .rasterizerDiscardEnable = VK_FALSE,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .lineWidth = 1.f,
- .cullMode = VK_CULL_MODE_BACK_BIT,
- .frontFace = VK_FRONT_FACE_CLOCKWISE,
- .depthBiasEnable = VK_FALSE};
-
- VkPipelineMultisampleStateCreateInfo multisampling = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .sampleShadingEnable = VK_FALSE,
- .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
- };
-
- VkPipelineColorBlendAttachmentState color_blend_attachment = {
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
- .blendEnable = VK_FALSE,
- };
-
- VkPipelineColorBlendStateCreateInfo color_blending = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .logicOpEnable = VK_FALSE,
- .attachmentCount = 1,
- .pAttachments = &color_blend_attachment};
-
- VkPipelineLayoutCreateInfo pipeline_layout_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- };
-
- if (vkCreatePipelineLayout(renderer->device, &pipeline_layout_info, nullptr,
- &renderer->pipeline_layout) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't create pipeline layout");
- goto destroy_frag_shader_module;
- }
-
- VkGraphicsPipelineCreateInfo pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = shader_stages,
- .pVertexInputState = &vertex_input_info,
- .pInputAssemblyState = &input_assembly,
- .pViewportState = &viewport_state,
- .pRasterizationState = &rasterizer,
- .pMultisampleState = &multisampling,
- .pColorBlendState = &color_blending,
- .pDynamicState = &dynamic_state,
- .layout = renderer->pipeline_layout,
- .renderPass = renderer->render_pass,
- .subpass = 0,
- };
-
- if (vkCreateGraphicsPipelines(renderer->device, VK_NULL_HANDLE, 1,
- &pipeline_info, nullptr,
- &renderer->graphics_pipeline) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't create pipeline");
- goto destroy_pipeline_layout;
- }
-
- vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr);
- vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr);
- return true;
-destroy_pipeline_layout:
- vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr);
-destroy_frag_shader_module:
- vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr);
-destroy_vert_shader_module:
- vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr);
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) {
- for (uint32_t i = 0; i < renderer->swapchain_image_count; i++) {
- VkImageView attachments[] = {renderer->swapchain_image_views[i]};
-
- VkFramebufferCreateInfo framebuffer_info = {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .renderPass = renderer->render_pass,
- .attachmentCount = 1,
- .pAttachments = attachments,
- .width = renderer->swapchain_extent.width,
- .height = renderer->swapchain_extent.height,
- .layers = 1};
-
- if (vkCreateFramebuffer(renderer->device, &framebuffer_info, nullptr,
- &renderer->swapchain_framebuffers[i]) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to create framebuffer");
- goto err;
- }
- }
-
- return true;
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) {
- struct queue_family_indices queue_family_indices = {};
- if (!queue_family_indices_for_device(&queue_family_indices,
- renderer->physical_device,
- renderer->surface)) {
- VGLTF_LOG_ERR("Couldn't fetch queue family indices");
- goto err;
- }
-
- VkCommandPoolCreateInfo pool_info = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = queue_family_indices.graphics_family};
-
- if (vkCreateCommandPool(renderer->device, &pool_info, nullptr,
- &renderer->command_pool) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't create command pool");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) {
- VkCommandBufferAllocateInfo allocate_info = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .commandPool = renderer->command_pool,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT};
-
- if (vkAllocateCommandBuffers(renderer->device, &allocate_info,
- renderer->command_buffer) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't allocate command buffers");
- goto err;
- }
-
- return true;
-err:
- return false;
-}
-
-static bool
-vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) {
- VkSemaphoreCreateInfo semaphore_info = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- };
-
- VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .flags = VK_FENCE_CREATE_SIGNALED_BIT};
-
- int frame_in_flight_index = 0;
- for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
- frame_in_flight_index++) {
- if (vkCreateSemaphore(
- renderer->device, &semaphore_info, nullptr,
- &renderer->image_available_semaphores[frame_in_flight_index]) !=
- VK_SUCCESS ||
- vkCreateSemaphore(
- renderer->device, &semaphore_info, nullptr,
- &renderer->render_finished_semaphores[frame_in_flight_index]) !=
- VK_SUCCESS ||
- vkCreateFence(renderer->device, &fence_info, nullptr,
- &renderer->in_flight_fences[frame_in_flight_index]) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Couldn't create sync objects");
- goto err;
- }
- }
-
- return true;
-err:
- for (int frame_in_flight_to_delete_index = 0;
- frame_in_flight_to_delete_index < frame_in_flight_index;
- frame_in_flight_to_delete_index++) {
- vkDestroyFence(renderer->device,
- renderer->in_flight_fences[frame_in_flight_index], nullptr);
- vkDestroySemaphore(
- renderer->device,
- renderer->render_finished_semaphores[frame_in_flight_index], nullptr);
- vkDestroySemaphore(
- renderer->device,
- renderer->image_available_semaphores[frame_in_flight_index], nullptr);
- }
- return false;
-}
-
-static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) {
- for (uint32_t framebuffer_index = 0;
- framebuffer_index < renderer->swapchain_image_count;
- framebuffer_index++) {
- vkDestroyFramebuffer(renderer->device,
- renderer->swapchain_framebuffers[framebuffer_index],
- nullptr);
- }
-
- for (uint32_t image_view_index = 0;
- image_view_index < renderer->swapchain_image_count; image_view_index++) {
- vkDestroyImageView(renderer->device,
- renderer->swapchain_image_views[image_view_index],
- nullptr);
- }
-
- vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr);
-}
-
-static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) {
- vkDeviceWaitIdle(renderer->device);
- vgltf_renderer_cleanup_swapchain(renderer);
-
- // TODO add error handling
- vgltf_renderer_create_swapchain(renderer);
- vgltf_renderer_create_image_views(renderer);
- vgltf_renderer_create_framebuffers(renderer);
- return true;
-}
-
-bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer) {
- vkWaitForFences(renderer->device, 1,
- &renderer->in_flight_fences[renderer->current_frame], VK_TRUE,
- UINT64_MAX);
-
- uint32_t image_index;
- VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR(
- renderer->device, renderer->swapchain, UINT64_MAX,
- renderer->image_available_semaphores[renderer->current_frame],
- VK_NULL_HANDLE, &image_index);
- if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR ||
- acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR ||
- renderer->framebuffer_resized) {
- renderer->framebuffer_resized = false;
- vgltf_renderer_recreate_swapchain(renderer);
- return true;
- } else if (acquire_swapchain_image_result != VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to acquire a swapchain image");
- goto err;
- }
-
- vkResetFences(renderer->device, 1,
- &renderer->in_flight_fences[renderer->current_frame]);
-
- vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0);
- VkCommandBufferBeginInfo begin_info = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- };
-
- if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame],
- &begin_info) != VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to begin recording command buffer");
- goto err;
- }
-
- VkRenderPassBeginInfo render_pass_info = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = renderer->render_pass,
- .framebuffer = renderer->swapchain_framebuffers[image_index],
- .renderArea = {.offset = {}, .extent = renderer->swapchain_extent},
- .clearValueCount = 1,
- .pClearValues =
- &(const VkClearValue){.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}},
-
- };
-
- vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame],
- &render_pass_info, VK_SUBPASS_CONTENTS_INLINE);
- vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame],
- VK_PIPELINE_BIND_POINT_GRAPHICS,
- renderer->graphics_pipeline);
- VkViewport viewport = {.x = 0.f,
- .y = 0.f,
- .width = (float)renderer->swapchain_extent.width,
- .height = (float)renderer->swapchain_extent.height,
- .minDepth = 0.f,
- .maxDepth = 1.f};
- vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1,
- &viewport);
- VkRect2D scissor = {.offset = {}, .extent = renderer->swapchain_extent};
- vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1,
- &scissor);
-
- vkCmdDraw(renderer->command_buffer[renderer->current_frame], 3, 1, 0, 0);
-
- vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]);
-
- if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to record command buffer");
- goto err;
- }
-
- VkSubmitInfo submit_info = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- };
-
- VkSemaphore wait_semaphores[] = {
- renderer->image_available_semaphores[renderer->current_frame]};
- VkPipelineStageFlags wait_stages[] = {
- VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
- submit_info.waitSemaphoreCount = 1;
- submit_info.pWaitSemaphores = wait_semaphores;
- submit_info.pWaitDstStageMask = wait_stages;
- submit_info.commandBufferCount = 1;
- submit_info.pCommandBuffers =
- &renderer->command_buffer[renderer->current_frame];
-
- VkSemaphore signal_semaphores[] = {
- renderer->render_finished_semaphores[renderer->current_frame]};
- submit_info.signalSemaphoreCount = 1;
- submit_info.pSignalSemaphores = signal_semaphores;
- if (vkQueueSubmit(renderer->graphics_queue, 1, &submit_info,
- renderer->in_flight_fences[renderer->current_frame]) !=
- VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to submit draw command buffer");
- goto err;
- }
-
- VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
- .waitSemaphoreCount = 1,
- .pWaitSemaphores = signal_semaphores};
-
- VkSwapchainKHR swapchains[] = {renderer->swapchain};
- present_info.swapchainCount = 1;
- present_info.pSwapchains = swapchains;
- present_info.pImageIndices = &image_index;
- VkResult result = vkQueuePresentKHR(renderer->present_queue, &present_info);
- if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) {
- vgltf_renderer_recreate_swapchain(renderer);
- } else if (acquire_swapchain_image_result != VK_SUCCESS) {
- VGLTF_LOG_ERR("Failed to acquire a swapchain image");
- goto err;
- }
- renderer->current_frame =
- (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
- return true;
-err:
- return false;
-}
-
-bool vgltf_renderer_init(struct vgltf_renderer *renderer,
- struct vgltf_platform *platform) {
- if (!vgltf_renderer_create_instance(renderer, platform)) {
- VGLTF_LOG_ERR("instance creation failed");
- goto err;
- }
- vgltf_renderer_setup_debug_messenger(renderer);
- if (!vgltf_renderer_create_surface(renderer, platform)) {
- goto destroy_instance;
- }
-
- if (!vgltf_renderer_pick_physical_device(renderer)) {
- VGLTF_LOG_ERR("Couldn't pick physical device");
- goto destroy_surface;
- }
- if (!vgltf_renderer_create_logical_device(renderer)) {
- VGLTF_LOG_ERR("Couldn't create logical device");
- goto destroy_device;
- }
-
- struct vgltf_window_size window_size = {800, 600};
- if (!vgltf_platform_get_window_size(platform, &window_size)) {
- VGLTF_LOG_ERR("Couldn't get window size");
- goto destroy_device;
- }
- renderer->window_size = window_size;
-
- if (!vgltf_renderer_create_swapchain(renderer)) {
- VGLTF_LOG_ERR("Couldn't create swapchain");
- goto destroy_device;
- }
-
- if (!vgltf_renderer_create_image_views(renderer)) {
- VGLTF_LOG_ERR("Couldn't create image views");
- goto destroy_swapchain;
- }
-
- if (!vgltf_renderer_create_render_pass(renderer)) {
- VGLTF_LOG_ERR("Couldn't create render pass");
- goto destroy_image_views;
- }
-
- if (!vgltf_renderer_create_graphics_pipeline(renderer)) {
- VGLTF_LOG_ERR("Couldn't create graphics pipeline");
- goto destroy_render_pass;
- }
-
- if (!vgltf_renderer_create_framebuffers(renderer)) {
- VGLTF_LOG_ERR("Couldn't create framebuffers");
- goto destroy_graphics_pipeline;
- }
-
- if (!vgltf_renderer_create_command_pool(renderer)) {
- VGLTF_LOG_ERR("Couldn't create command pool");
- goto destroy_frame_buffers;
- }
-
- if (!vgltf_renderer_create_command_buffer(renderer)) {
- VGLTF_LOG_ERR("Couldn't create command buffer");
- goto destroy_command_pool;
- }
-
- if (!vgltf_renderer_create_sync_objects(renderer)) {
- VGLTF_LOG_ERR("Couldn't create sync objects");
- goto destroy_command_pool;
- }
-
- return true;
-
-destroy_command_pool:
- vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr);
-destroy_frame_buffers:
- for (uint32_t swapchain_framebuffer_index = 0;
- swapchain_framebuffer_index < renderer->swapchain_image_count;
- swapchain_framebuffer_index++) {
- vkDestroyFramebuffer(
- renderer->device,
- renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr);
- }
-destroy_graphics_pipeline:
- vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr);
- vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr);
-destroy_render_pass:
- vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr);
-destroy_image_views:
- for (uint32_t swapchain_image_view_index = 0;
- swapchain_image_view_index < renderer->swapchain_image_count;
- swapchain_image_view_index++) {
- vkDestroyImageView(
- renderer->device,
- renderer->swapchain_image_views[swapchain_image_view_index], nullptr);
- }
-destroy_swapchain:
- vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr);
-destroy_device:
- vkDestroyDevice(renderer->device, nullptr);
-destroy_surface:
- vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr);
-destroy_instance:
- if (enable_validation_layers) {
- destroy_debug_utils_messenger_ext(renderer->instance,
- renderer->debug_messenger, nullptr);
- }
- vkDestroyInstance(renderer->instance, nullptr);
-err:
- return false;
-}
-void vgltf_renderer_deinit(struct vgltf_renderer *renderer) {
- vkDeviceWaitIdle(renderer->device);
- vgltf_renderer_cleanup_swapchain(renderer);
- vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr);
- vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr);
- vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr);
- for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
- vkDestroySemaphore(renderer->device,
- renderer->image_available_semaphores[i], nullptr);
- vkDestroySemaphore(renderer->device,
- renderer->render_finished_semaphores[i], nullptr);
- vkDestroyFence(renderer->device, renderer->in_flight_fences[i], nullptr);
- }
- vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr);
- vkDestroyDevice(renderer->device, nullptr);
- if (enable_validation_layers) {
- destroy_debug_utils_messenger_ext(renderer->instance,
- renderer->debug_messenger, nullptr);
- }
- vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr);
- vkDestroyInstance(renderer->instance, nullptr);
-}
-void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
- struct vgltf_window_size size) {
- if (size.width > 0 && size.height > 0 &&
- size.width != renderer->window_size.width &&
- size.height != renderer->window_size.height) {
- renderer->window_size = size;
- renderer->framebuffer_resized = true;
- }
-}
diff --git a/src/renderer.h b/src/renderer.h
deleted file mode 100644
index a0417aa..0000000
--- a/src/renderer.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef VGLTF_RENDERER_H
-#define VGLTF_RENDERER_H
-
-#include "platform.h"
-#include <vulkan/vulkan.h>
-
-constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2;
-constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32;
-struct vgltf_renderer {
- VkInstance instance;
- VkPhysicalDevice physical_device;
- VkDevice device;
- VkQueue graphics_queue;
- VkQueue present_queue;
- VkDebugUtilsMessengerEXT debug_messenger;
- VkSurfaceKHR surface;
- VkSwapchainKHR swapchain;
- VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
- VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
- VkFormat swapchain_image_format;
- VkExtent2D swapchain_extent;
- uint32_t swapchain_image_count;
- VkRenderPass render_pass;
- VkPipelineLayout pipeline_layout;
- VkPipeline graphics_pipeline;
- VkFramebuffer
- swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
- VkCommandPool command_pool;
- VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
- VkSemaphore
- image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
- VkSemaphore
- render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
- VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
- struct vgltf_window_size window_size;
- uint32_t current_frame;
- bool framebuffer_resized;
-};
-bool vgltf_renderer_init(struct vgltf_renderer *renderer,
- struct vgltf_platform *platform);
-void vgltf_renderer_deinit(struct vgltf_renderer *renderer);
-bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer);
-void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
- struct vgltf_window_size size);
-#endif // VGLTF_RENDERER_H
diff --git a/src/renderer/renderer.c b/src/renderer/renderer.c
new file mode 100644
index 0000000..d34ef73
--- /dev/null
+++ b/src/renderer/renderer.c
@@ -0,0 +1,2559 @@
+#include "renderer.h"
+#include "../image.h"
+#include "../log.h"
+#include "../maths.h"
+#include "../platform.h"
+#include "vma_usage.h"
+#include <math.h>
+
+#define TINYOBJ_LOADER_C_IMPLEMENTATION
+#include "vendor/tiny_obj_loader_c.h"
+
+#include <assert.h>
+#include <vulkan/vulkan_core.h>
+
+static const char MODEL_PATH[] = "assets/model.obj";
+static const char TEXTURE_PATH[] = "assets/texture.png";
+
+VkVertexInputBindingDescription vgltf_vertex_binding_description() {
+ return (VkVertexInputBindingDescription){
+ .binding = 0,
+ .stride = sizeof(struct vgltf_vertex),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX};
+}
+struct vgltf_vertex_input_attribute_descriptions
+vgltf_vertex_attribute_descriptions(void) {
+ return (struct vgltf_vertex_input_attribute_descriptions){
+ .descriptions = {(VkVertexInputAttributeDescription){
+ .binding = 0,
+ .location = 0,
+ .format = VK_FORMAT_R32G32B32_SFLOAT,
+ .offset = offsetof(struct vgltf_vertex, position)},
+ (VkVertexInputAttributeDescription){
+ .binding = 0,
+ .location = 1,
+ .format = VK_FORMAT_R32G32B32_SFLOAT,
+ .offset = offsetof(struct vgltf_vertex, color)},
+ (VkVertexInputAttributeDescription){
+ .binding = 0,
+ .location = 2,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct vgltf_vertex,
+ texture_coordinates)}},
+ .count = 3};
+}
+
+static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"};
+static constexpr int VALIDATION_LAYER_COUNT =
+ sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]);
+
+#ifdef VGLTF_DEBUG
+static constexpr bool enable_validation_layers = true;
+#else
+static constexpr bool enable_validation_layers = false;
+#endif
+
+static VKAPI_ATTR VkBool32 VKAPI_CALL
+debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
+ VkDebugUtilsMessageTypeFlagBitsEXT message_type,
+ const VkDebugUtilsMessengerCallbackDataEXT *callback_data,
+ void *user_data) {
+ (void)message_severity;
+ (void)message_type;
+ (void)user_data;
+ VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage);
+ return VK_FALSE;
+}
+
+static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10;
+struct required_instance_extensions {
+ const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
+ uint32_t count;
+};
+void required_instance_extensions_push(
+ struct required_instance_extensions *required_instance_extensions,
+ const char *required_instance_extension) {
+ if (required_instance_extensions->count ==
+ REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
+ VGLTF_PANIC("required instance extensions array is full");
+ }
+ required_instance_extensions
+ ->extensions[required_instance_extensions->count++] =
+ required_instance_extension;
+}
+
+static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128;
+struct supported_instance_extensions {
+ VkExtensionProperties
+ properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
+ uint32_t count;
+};
+bool supported_instance_extensions_init(
+ struct supported_instance_extensions *supported_instance_extensions) {
+ if (vkEnumerateInstanceExtensionProperties(
+ nullptr, &supported_instance_extensions->count, nullptr) !=
+ VK_SUCCESS) {
+ goto err;
+ }
+
+ if (supported_instance_extensions->count >
+ SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
+ VGLTF_LOG_ERR("supported instance extensions array cannot fit all the "
+ "VkExtensionProperties");
+ goto err;
+ }
+
+ if (vkEnumerateInstanceExtensionProperties(
+ nullptr, &supported_instance_extensions->count,
+ supported_instance_extensions->properties) != VK_SUCCESS) {
+ goto err;
+ }
+ return true;
+err:
+ return false;
+}
+void supported_instance_extensions_debug_print(
+ const struct supported_instance_extensions *supported_instance_extensions) {
+ VGLTF_LOG_DBG("Supported instance extensions:");
+ for (uint32_t i = 0; i < supported_instance_extensions->count; i++) {
+ VGLTF_LOG_DBG("\t- %s",
+ supported_instance_extensions->properties[i].extensionName);
+ }
+}
+bool supported_instance_extensions_includes(
+ const struct supported_instance_extensions *supported_instance_extensions,
+ const char *extension_name) {
+ for (uint32_t supported_instance_extension_index = 0;
+ supported_instance_extension_index <
+ supported_instance_extensions->count;
+ supported_instance_extension_index++) {
+ const VkExtensionProperties *extension_properties =
+ &supported_instance_extensions
+ ->properties[supported_instance_extension_index];
+ if (strcmp(extension_properties->extensionName, extension_name) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64;
+struct supported_validation_layers {
+ VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY];
+ uint32_t count;
+};
+bool supported_validation_layers_init(
+ struct supported_validation_layers *supported_validation_layers) {
+ if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count,
+ nullptr) != VK_SUCCESS) {
+ goto err;
+ }
+
+ if (supported_validation_layers->count >
+ SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) {
+ VGLTF_LOG_ERR("supported validation layers array cannot fit all the "
+ "VkLayerProperties");
+ goto err;
+ }
+
+ if (vkEnumerateInstanceLayerProperties(
+ &supported_validation_layers->count,
+ supported_validation_layers->properties) != VK_SUCCESS) {
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool are_validation_layer_supported() {
+ struct supported_validation_layers supported_layers = {};
+ if (!supported_validation_layers_init(&supported_layers)) {
+ goto err;
+ }
+
+ for (int requested_layer_index = 0;
+ requested_layer_index < VALIDATION_LAYER_COUNT;
+ requested_layer_index++) {
+ const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index];
+ bool requested_layer_found = false;
+ for (uint32_t supported_layer_index = 0;
+ supported_layer_index < supported_layers.count;
+ supported_layer_index++) {
+ VkLayerProperties *supported_layer =
+ &supported_layers.properties[supported_layer_index];
+ if (strcmp(requested_layer_name, supported_layer->layerName) == 0) {
+ requested_layer_found = true;
+ break;
+ }
+ }
+
+ if (!requested_layer_found) {
+ goto err;
+ }
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool fetch_required_instance_extensions(
+ struct required_instance_extensions *required_extensions,
+ struct vgltf_platform *platform) {
+ struct supported_instance_extensions supported_extensions = {};
+ if (!supported_instance_extensions_init(&supported_extensions)) {
+ VGLTF_LOG_ERR(
+ "Couldn't fetch supported instance extensions details (OOM?)");
+ goto err;
+ }
+ supported_instance_extensions_debug_print(&supported_extensions);
+
+ uint32_t platform_required_extension_count = 0;
+ const char *const *platform_required_extensions =
+ vgltf_platform_get_vulkan_instance_extensions(
+ platform, &platform_required_extension_count);
+ for (uint32_t platform_required_extension_index = 0;
+ platform_required_extension_index < platform_required_extension_count;
+ platform_required_extension_index++) {
+ required_instance_extensions_push(
+ required_extensions,
+ platform_required_extensions[platform_required_extension_index]);
+ }
+#ifdef VGLTF_PLATFORM_MACOS
+ required_instance_extensions_push(
+ required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
+#endif // VGLTF_PLATFORM_MACOS
+
+ if (enable_validation_layers) {
+ required_instance_extensions_push(required_extensions,
+ VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+ }
+
+ bool all_extensions_supported = true;
+ for (uint32_t required_extension_index = 0;
+ required_extension_index < required_extensions->count;
+ required_extension_index++) {
+ const char *required_extension_name =
+ required_extensions->extensions[required_extension_index];
+ if (!supported_instance_extensions_includes(&supported_extensions,
+ required_extension_name)) {
+ VGLTF_LOG_ERR("Unsupported instance extension: %s",
+ required_extension_name);
+ all_extensions_supported = false;
+ }
+ }
+
+ if (!all_extensions_supported) {
+ VGLTF_LOG_ERR("Some required extensions are unsupported.");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static void populate_debug_messenger_create_info(
+ VkDebugUtilsMessengerCreateInfoEXT *create_info) {
+ *create_info = (VkDebugUtilsMessengerCreateInfoEXT){};
+ create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
+ create_info->messageSeverity =
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
+ create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
+ create_info->pfnUserCallback = debug_callback;
+}
+
+static bool vgltf_vk_instance_init(struct vgltf_vk_instance *instance,
+ struct vgltf_platform *platform) {
+ VGLTF_LOG_INFO("Creating vulkan instance...");
+ if (enable_validation_layers && !are_validation_layer_supported()) {
+ VGLTF_LOG_ERR("Requested validation layers aren't supported");
+ goto err;
+ }
+
+ VkApplicationInfo application_info = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "Visible GLTF",
+ .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
+ .pEngineName = "No Engine",
+ .engineVersion = VK_MAKE_VERSION(1, 0, 0),
+ .apiVersion = VK_API_VERSION_1_2};
+
+ struct required_instance_extensions required_extensions = {};
+ fetch_required_instance_extensions(&required_extensions, platform);
+
+ VkInstanceCreateFlags flags = 0;
+#ifdef VGLTF_PLATFORM_MACOS
+ flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
+#endif // VGLTF_PLATFORM_MACOS
+
+ VkInstanceCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &application_info,
+ .enabledExtensionCount = required_extensions.count,
+ .ppEnabledExtensionNames = required_extensions.extensions,
+ .flags = flags};
+
+ VkDebugUtilsMessengerCreateInfoEXT debug_create_info;
+ if (enable_validation_layers) {
+ create_info.enabledLayerCount = VALIDATION_LAYER_COUNT;
+ create_info.ppEnabledLayerNames = VALIDATION_LAYERS;
+ populate_debug_messenger_create_info(&debug_create_info);
+ create_info.pNext = &debug_create_info;
+ }
+
+ if (vkCreateInstance(&create_info, nullptr, &instance->instance) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to create VkInstance");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+static void vgltf_vk_instance_deinit(struct vgltf_vk_instance *instance) {
+ vkDestroyInstance(instance->instance, nullptr);
+}
+
+static VkResult create_debug_utils_messenger_ext(
+ VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info,
+ const VkAllocationCallbacks *allocator,
+ VkDebugUtilsMessengerEXT *debug_messenger) {
+ auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
+ instance, "vkCreateDebugUtilsMessengerEXT");
+ if (func != nullptr) {
+ return func(instance, create_info, allocator, debug_messenger);
+ }
+
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+static void
+destroy_debug_utils_messenger_ext(VkInstance instance,
+ VkDebugUtilsMessengerEXT debug_messenger,
+ const VkAllocationCallbacks *allocator) {
+ auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
+ instance, "vkDestroyDebugUtilsMessengerEXT");
+ if (func != nullptr) {
+ func(instance, debug_messenger, allocator);
+ }
+}
+
+static void
+vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) {
+ if (!enable_validation_layers)
+ return;
+ VkDebugUtilsMessengerCreateInfoEXT create_info;
+ populate_debug_messenger_create_info(&create_info);
+ create_debug_utils_messenger_ext(renderer->instance.instance, &create_info,
+ nullptr, &renderer->debug_messenger);
+}
+
+static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128;
+struct available_physical_devices {
+ VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY];
+ uint32_t count;
+};
+static bool
+available_physical_devices_init(VkInstance instance,
+ struct available_physical_devices *devices) {
+
+ if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't enumerate physical devices");
+ goto err;
+ }
+
+ if (devices->count == 0) {
+ VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support");
+ goto err;
+ }
+
+ if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) {
+ VGLTF_LOG_ERR("available physical devices array cannot fit all available "
+ "physical devices");
+ goto err;
+ }
+
+ if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't enumerate physical devices");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+struct queue_family_indices {
+ uint32_t graphics_family;
+ uint32_t present_family;
+ bool has_graphics_family;
+ bool has_present_family;
+};
+bool queue_family_indices_is_complete(
+ const struct queue_family_indices *indices) {
+ return indices->has_graphics_family && indices->has_present_family;
+}
+bool queue_family_indices_for_device(struct queue_family_indices *indices,
+ VkPhysicalDevice device,
+ VkSurfaceKHR surface) {
+ static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64;
+ uint32_t queue_family_count = 0;
+ vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
+ nullptr);
+
+ if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) {
+ VGLTF_LOG_ERR(
+ "Queue family properties array cannot fit all queue family properties");
+ goto err;
+ }
+
+ VkQueueFamilyProperties
+ queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {};
+ vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
+ queue_family_properties);
+
+ for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count;
+ queue_family_index++) {
+ VkQueueFamilyProperties *queue_family =
+ &queue_family_properties[queue_family_index];
+
+ VkBool32 present_support;
+ vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface,
+ &present_support);
+
+ if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) {
+ indices->graphics_family = queue_family_index;
+ indices->has_graphics_family = true;
+ }
+
+ if (present_support) {
+ indices->present_family = queue_family_index;
+ indices->has_present_family = true;
+ }
+
+ if (queue_family_indices_is_complete(indices)) {
+ break;
+ }
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool is_in_array(uint32_t *array, int length, uint32_t value) {
+ for (int i = 0; i < length; i++) {
+ if (array[i] == value) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 1024;
+struct supported_extensions {
+ VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY];
+ uint32_t count;
+};
+bool supported_extensions_init(
+ struct supported_extensions *supported_extensions,
+ VkPhysicalDevice device) {
+ if (vkEnumerateDeviceExtensionProperties(device, nullptr,
+ &supported_extensions->count,
+ nullptr) != VK_SUCCESS) {
+ goto err;
+ }
+
+ if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) {
+ VGLTF_LOG_ERR("supported extensions array cannot fit all the supported "
+ "VkExtensionProperties (%u)",
+ supported_extensions->count);
+ goto err;
+ }
+
+ if (vkEnumerateDeviceExtensionProperties(
+ device, nullptr, &supported_extensions->count,
+ supported_extensions->properties) != VK_SUCCESS) {
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool supported_extensions_includes_extension(
+ struct supported_extensions *supported_extensions,
+ const char *extension_name) {
+ for (uint32_t supported_extension_index = 0;
+ supported_extension_index < supported_extensions->count;
+ supported_extension_index++) {
+ if (strcmp(supported_extensions->properties[supported_extension_index]
+ .extensionName,
+ extension_name) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static const char *DEVICE_EXTENSIONS[] = {
+ VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+#ifdef VGLTF_PLATFORM_MACOS
+ "VK_KHR_portability_subset",
+#endif
+};
+static constexpr int DEVICE_EXTENSION_COUNT =
+ sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]);
+static bool are_device_extensions_supported(VkPhysicalDevice device) {
+ struct supported_extensions supported_extensions = {};
+ if (!supported_extensions_init(&supported_extensions, device)) {
+ goto err;
+ }
+
+ for (uint32_t required_extension_index = 0;
+ required_extension_index < DEVICE_EXTENSION_COUNT;
+ required_extension_index++) {
+ if (!supported_extensions_includes_extension(
+ &supported_extensions,
+ DEVICE_EXTENSIONS[required_extension_index])) {
+ VGLTF_LOG_DBG("Unsupported: %s",
+ DEVICE_EXTENSIONS[required_extension_index]);
+ goto err;
+ }
+ }
+
+ return true;
+
+err:
+ return false;
+}
+
+static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256;
+static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256;
+struct swapchain_support_details {
+ VkSurfaceCapabilitiesKHR capabilities;
+ VkSurfaceFormatKHR
+ formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT];
+ VkPresentModeKHR
+ present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT];
+ uint32_t format_count;
+ uint32_t present_mode_count;
+};
+bool swapchain_support_details_query_from_device(
+ struct swapchain_support_details *swapchain_support_details,
+ VkPhysicalDevice device, VkSurfaceKHR surface) {
+ if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
+ device, surface, &swapchain_support_details->capabilities) !=
+ VK_SUCCESS) {
+ goto err;
+ }
+
+ if (vkGetPhysicalDeviceSurfaceFormatsKHR(
+ device, surface, &swapchain_support_details->format_count, nullptr) !=
+ VK_SUCCESS) {
+ goto err;
+ }
+
+ if (swapchain_support_details->format_count != 0 &&
+ swapchain_support_details->format_count <
+ SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) {
+ if (vkGetPhysicalDeviceSurfaceFormatsKHR(
+ device, surface, &swapchain_support_details->format_count,
+ swapchain_support_details->formats) != VK_SUCCESS) {
+ goto err;
+ }
+ }
+
+ if (vkGetPhysicalDeviceSurfacePresentModesKHR(
+ device, surface, &swapchain_support_details->present_mode_count,
+ nullptr) != VK_SUCCESS) {
+ goto err;
+ }
+
+ if (swapchain_support_details->present_mode_count != 0 &&
+ swapchain_support_details->present_mode_count <
+ SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) {
+ if (vkGetPhysicalDeviceSurfacePresentModesKHR(
+ device, surface, &swapchain_support_details->present_mode_count,
+ swapchain_support_details->present_modes) != VK_SUCCESS) {
+ goto err;
+ }
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool is_physical_device_suitable(VkPhysicalDevice device,
+ VkSurfaceKHR surface) {
+ struct queue_family_indices indices = {};
+ queue_family_indices_for_device(&indices, device, surface);
+
+ VGLTF_LOG_DBG("Checking for physical device extension support");
+ bool extensions_supported = are_device_extensions_supported(device);
+ VGLTF_LOG_DBG("Supported: %d", extensions_supported);
+
+ bool swapchain_adequate = false;
+ if (extensions_supported) {
+
+ VGLTF_LOG_DBG("Checking for swapchain support details");
+ struct swapchain_support_details swapchain_support_details = {};
+ if (!swapchain_support_details_query_from_device(&swapchain_support_details,
+ device, surface)) {
+ VGLTF_LOG_ERR("Couldn't query swapchain support details from device");
+ goto err;
+ }
+
+ swapchain_adequate = swapchain_support_details.format_count > 0 &&
+ swapchain_support_details.present_mode_count > 0;
+ }
+
+ VkPhysicalDeviceFeatures supported_features;
+ vkGetPhysicalDeviceFeatures(device, &supported_features);
+
+ return queue_family_indices_is_complete(&indices) && extensions_supported &&
+ swapchain_adequate && supported_features.samplerAnisotropy;
+err:
+ return false;
+}
+
+static bool pick_physical_device(VkPhysicalDevice *physical_device,
+ struct vgltf_vk_instance *instance,
+ VkSurfaceKHR surface) {
+ VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE;
+ struct available_physical_devices available_physical_devices = {};
+ if (!available_physical_devices_init(instance->instance,
+ &available_physical_devices)) {
+ VGLTF_LOG_ERR("Couldn't fetch available physical devices");
+ goto err;
+ }
+
+ for (uint32_t available_physical_device_index = 0;
+ available_physical_device_index < available_physical_devices.count;
+ available_physical_device_index++) {
+ VkPhysicalDevice available_physical_device =
+ available_physical_devices.devices[available_physical_device_index];
+ if (is_physical_device_suitable(available_physical_device, surface)) {
+ vk_physical_device = available_physical_device;
+ break;
+ }
+ }
+
+ if (vk_physical_device == VK_NULL_HANDLE) {
+ VGLTF_LOG_ERR("Failed to find a suitable GPU");
+ goto err;
+ }
+
+ *physical_device = vk_physical_device;
+
+ return true;
+err:
+ return false;
+}
+
+static bool create_logical_device(VkDevice *device, VkQueue *graphics_queue,
+ VkQueue *present_queue,
+ VkPhysicalDevice physical_device,
+ VkSurfaceKHR surface) {
+ struct queue_family_indices queue_family_indices = {};
+ queue_family_indices_for_device(&queue_family_indices, physical_device,
+ surface);
+ static constexpr int MAX_QUEUE_FAMILY_COUNT = 2;
+
+ uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {};
+ int unique_queue_family_count = 0;
+
+ if (!is_in_array(unique_queue_families, unique_queue_family_count,
+ queue_family_indices.graphics_family)) {
+ assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
+ unique_queue_families[unique_queue_family_count++] =
+ queue_family_indices.graphics_family;
+ }
+ if (!is_in_array(unique_queue_families, unique_queue_family_count,
+ queue_family_indices.present_family)) {
+ assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
+ unique_queue_families[unique_queue_family_count++] =
+ queue_family_indices.present_family;
+ }
+
+ float queue_priority = 1.f;
+ VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {};
+ int queue_create_info_count = 0;
+ for (int unique_queue_family_index = 0;
+ unique_queue_family_index < unique_queue_family_count;
+ unique_queue_family_index++) {
+ queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = unique_queue_families[unique_queue_family_index],
+ .queueCount = 1,
+ .pQueuePriorities = &queue_priority};
+ }
+
+ VkPhysicalDeviceFeatures device_features = {
+ .samplerAnisotropy = VK_TRUE,
+ };
+ VkDeviceCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pQueueCreateInfos = queue_create_infos,
+ .queueCreateInfoCount = queue_create_info_count,
+ .pEnabledFeatures = &device_features,
+ .ppEnabledExtensionNames = DEVICE_EXTENSIONS,
+ .enabledExtensionCount = DEVICE_EXTENSION_COUNT};
+ if (vkCreateDevice(physical_device, &create_info, nullptr, device) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to create logical device");
+ goto err;
+ }
+
+ vkGetDeviceQueue(*device, queue_family_indices.graphics_family, 0,
+ graphics_queue);
+ vkGetDeviceQueue(*device, queue_family_indices.present_family, 0,
+ present_queue);
+
+ return true;
+err:
+ return false;
+}
+
+static bool create_allocator(VmaAllocator *allocator,
+ struct vgltf_vk_device *device,
+ struct vgltf_vk_instance *instance) {
+ VmaAllocatorCreateInfo create_info = {.device = device->device,
+ .instance = instance->instance,
+ .physicalDevice =
+ device->physical_device};
+
+ if (vmaCreateAllocator(&create_info, allocator) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create VMA allocator");
+ goto err;
+ }
+ return true;
+err:
+ return false;
+}
+
+static bool vgltf_vk_surface_init(struct vgltf_vk_surface *surface,
+ struct vgltf_vk_instance *instance,
+ struct vgltf_platform *platform) {
+ if (!vgltf_platform_create_vulkan_surface(platform, instance->instance,
+ &surface->surface)) {
+ VGLTF_LOG_ERR("Couldn't create surface");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static void vgltf_vk_surface_deinit(struct vgltf_vk_surface *surface,
+ struct vgltf_vk_instance *instance) {
+ vkDestroySurfaceKHR(instance->instance, surface->surface, nullptr);
+}
+
+static VkSurfaceFormatKHR
+choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats,
+ uint32_t available_format_count) {
+ for (uint32_t available_format_index = 0;
+ available_format_index < available_format_count;
+ available_format_index++) {
+ VkSurfaceFormatKHR *available_format =
+ &available_formats[available_format_index];
+ if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB &&
+ available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
+ return *available_format;
+ }
+ }
+
+ return available_formats[0];
+}
+
+static VkPresentModeKHR
+choose_swapchain_present_mode(VkPresentModeKHR *available_modes,
+ uint32_t available_mode_count) {
+ for (uint32_t available_mode_index = 0;
+ available_mode_index < available_mode_count; available_mode_index++) {
+ VkPresentModeKHR available_mode = available_modes[available_mode_index];
+ if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) {
+ return available_mode;
+ }
+ }
+
+ return VK_PRESENT_MODE_FIFO_KHR;
+}
+
+static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) {
+ return value < min ? min : value > max ? max : value;
+}
+
+static VkExtent2D
+choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width,
+ int height) {
+ if (capabilities->currentExtent.width != UINT32_MAX) {
+ return capabilities->currentExtent;
+ } else {
+ VkExtent2D actual_extent = {width, height};
+ actual_extent.width =
+ clamp_uint32(capabilities->minImageExtent.width,
+ capabilities->maxImageExtent.width, actual_extent.width);
+ actual_extent.height =
+ clamp_uint32(capabilities->minImageExtent.height,
+ capabilities->maxImageExtent.height, actual_extent.height);
+ return actual_extent;
+ }
+}
+
+static bool create_swapchain(struct vgltf_vk_swapchain *swapchain,
+ struct vgltf_vk_device *device,
+ struct vgltf_vk_surface *surface,
+ struct vgltf_window_size *window_size) {
+ struct swapchain_support_details swapchain_support_details = {};
+ swapchain_support_details_query_from_device(
+ &swapchain_support_details, device->physical_device, surface->surface);
+
+ VkSurfaceFormatKHR surface_format =
+ choose_swapchain_surface_format(swapchain_support_details.formats,
+ swapchain_support_details.format_count);
+ VkPresentModeKHR present_mode = choose_swapchain_present_mode(
+ swapchain_support_details.present_modes,
+ swapchain_support_details.present_mode_count);
+
+ VkExtent2D extent =
+ choose_swapchain_extent(&swapchain_support_details.capabilities,
+ window_size->width, window_size->height);
+ uint32_t image_count =
+ swapchain_support_details.capabilities.minImageCount + 1;
+ if (swapchain_support_details.capabilities.maxImageCount > 0 &&
+ image_count > swapchain_support_details.capabilities.maxImageCount) {
+ image_count = swapchain_support_details.capabilities.maxImageCount;
+ }
+
+ VkSwapchainCreateInfoKHR create_info = {
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .surface = surface->surface,
+ .minImageCount = image_count,
+ .imageFormat = surface_format.format,
+ .imageColorSpace = surface_format.colorSpace,
+ .imageExtent = extent,
+ .imageArrayLayers = 1,
+ .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT};
+ struct queue_family_indices indices = {};
+ queue_family_indices_for_device(&indices, device->physical_device,
+ surface->surface);
+ uint32_t queue_family_indices[] = {indices.graphics_family,
+ indices.present_family};
+ if (indices.graphics_family != indices.present_family) {
+ create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
+ create_info.queueFamilyIndexCount = 2;
+ create_info.pQueueFamilyIndices = queue_family_indices;
+ } else {
+ create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
+ }
+
+ create_info.preTransform =
+ swapchain_support_details.capabilities.currentTransform;
+ create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+ create_info.presentMode = present_mode;
+ create_info.clipped = VK_TRUE;
+ create_info.oldSwapchain = VK_NULL_HANDLE;
+
+ if (vkCreateSwapchainKHR(device->device, &create_info, nullptr,
+ &swapchain->swapchain) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Swapchain creation failed!");
+ goto err;
+ }
+
+ if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain,
+ &swapchain->swapchain_image_count,
+ nullptr) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't get swapchain image count");
+ goto destroy_swapchain;
+ }
+
+ if (swapchain->swapchain_image_count >
+ VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) {
+ VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images",
+ swapchain->swapchain_image_count);
+ goto destroy_swapchain;
+ }
+
+ if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain,
+ &swapchain->swapchain_image_count,
+ swapchain->swapchain_images) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't get swapchain images");
+ goto destroy_swapchain;
+ }
+
+ swapchain->swapchain_image_format = surface_format.format;
+ swapchain->swapchain_extent = extent;
+
+ return true;
+destroy_swapchain:
+ vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr);
+err:
+ return false;
+}
+
+static bool create_image_view(struct vgltf_vk_device *device, VkImage image,
+ VkFormat format, VkImageView *image_view,
+ VkImageAspectFlags aspect_flags,
+ uint32_t mip_level_count) {
+
+ VkImageViewCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = format,
+ .components = {VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY},
+ .subresourceRange = {.aspectMask = aspect_flags,
+ .levelCount = mip_level_count,
+ .layerCount = 1}};
+ if (vkCreateImageView(device->device, &create_info, nullptr, image_view) !=
+ VK_SUCCESS) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool create_swapchain_image_views(struct vgltf_vk_swapchain *swapchain,
+ struct vgltf_vk_device *device) {
+ uint32_t swapchain_image_index;
+ for (swapchain_image_index = 0;
+ swapchain_image_index < swapchain->swapchain_image_count;
+ swapchain_image_index++) {
+ VkImage swapchain_image =
+ swapchain->swapchain_images[swapchain_image_index];
+
+ if (!create_image_view(
+ device, swapchain_image, swapchain->swapchain_image_format,
+ &swapchain->swapchain_image_views[swapchain_image_index],
+ VK_IMAGE_ASPECT_COLOR_BIT, 1)) {
+ goto err;
+ }
+ }
+ return true;
+err:
+ for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index;
+ to_remove_index++) {
+ vkDestroyImageView(device->device,
+ swapchain->swapchain_image_views[to_remove_index],
+ nullptr);
+ }
+ return false;
+}
+
+static bool create_shader_module(VkDevice device, const unsigned char *code,
+ int size, VkShaderModule *out) {
+ VkShaderModuleCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = size,
+ .pCode = (const uint32_t *)code,
+ };
+ if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create shader module");
+ goto err;
+ }
+ return true;
+err:
+ return false;
+}
+
+static VkFormat find_supported_format(struct vgltf_renderer *renderer,
+ const VkFormat *candidates,
+ int candidate_count, VkImageTiling tiling,
+ VkFormatFeatureFlags features) {
+ for (int candidate_index = 0; candidate_index < candidate_count;
+ candidate_index++) {
+ VkFormat candidate = candidates[candidate_index];
+ VkFormatProperties properties;
+ vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device,
+ candidate, &properties);
+ if (tiling == VK_IMAGE_TILING_LINEAR &&
+ (properties.linearTilingFeatures & features) == features) {
+ return candidate;
+ } else if (tiling == VK_IMAGE_TILING_OPTIMAL &&
+ (properties.optimalTilingFeatures & features) == features) {
+ return candidate;
+ }
+ }
+
+ return VK_FORMAT_UNDEFINED;
+}
+
+static VkFormat find_depth_format(struct vgltf_renderer *renderer) {
+ return find_supported_format(renderer,
+ (const VkFormat[]){VK_FORMAT_D32_SFLOAT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_D24_UNORM_S8_UINT},
+ 3, VK_IMAGE_TILING_OPTIMAL,
+ VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
+}
+
+static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) {
+ VkAttachmentDescription color_attachment = {
+ .format = renderer->swapchain.swapchain_image_format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR};
+ VkAttachmentReference color_attachment_ref = {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+ VkAttachmentDescription depth_attachment = {
+ .format = find_depth_format(renderer),
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};
+ VkAttachmentReference depth_attachment_ref = {
+ .attachment = 1,
+ .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ };
+
+ VkSubpassDescription subpass = {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pColorAttachments = &color_attachment_ref,
+ .colorAttachmentCount = 1,
+ .pDepthStencilAttachment = &depth_attachment_ref};
+ VkSubpassDependency dependency = {
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+ .srcAccessMask = 0,
+ .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+ .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT};
+
+ VkAttachmentDescription attachments[] = {color_attachment, depth_attachment};
+ int attachment_count = sizeof(attachments) / sizeof(attachments[0]);
+ VkRenderPassCreateInfo render_pass_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = attachment_count,
+ .pAttachments = attachments,
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ .dependencyCount = 1,
+ .pDependencies = &dependency};
+
+ if (vkCreateRenderPass(renderer->device.device, &render_pass_info, nullptr,
+ &renderer->render_pass) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to create render pass");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_descriptor_set_layout(struct vgltf_renderer *renderer) {
+ VkDescriptorSetLayoutBinding ubo_layout_binding = {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ };
+ VkDescriptorSetLayoutBinding sampler_layout_binding = {
+ .binding = 1,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+
+ VkDescriptorSetLayoutBinding bindings[] = {ubo_layout_binding,
+ sampler_layout_binding};
+ int binding_count = sizeof(bindings) / sizeof(bindings[0]);
+
+ VkDescriptorSetLayoutCreateInfo layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = binding_count,
+ .pBindings = bindings};
+
+ if (vkCreateDescriptorSetLayout(renderer->device.device, &layout_info,
+ nullptr, &renderer->descriptor_set_layout) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to create descriptor set layout");
+ goto err;
+ }
+ return true;
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) {
+ static constexpr unsigned char triangle_shader_vert_code[] = {
+#embed "../compiled_shaders/triangle.vert.spv"
+ };
+ static constexpr unsigned char triangle_shader_frag_code[] = {
+#embed "../compiled_shaders/triangle.frag.spv"
+ };
+
+ VkShaderModule triangle_shader_vert_module;
+ if (!create_shader_module(renderer->device.device, triangle_shader_vert_code,
+ sizeof(triangle_shader_vert_code),
+ &triangle_shader_vert_module)) {
+ VGLTF_LOG_ERR("Couldn't create triangle vert shader module");
+ goto err;
+ }
+
+ VkShaderModule triangle_shader_frag_module;
+ if (!create_shader_module(renderer->device.device, triangle_shader_frag_code,
+ sizeof(triangle_shader_frag_code),
+ &triangle_shader_frag_module)) {
+ VGLTF_LOG_ERR("Couldn't create triangle frag shader module");
+ goto destroy_vert_shader_module;
+ }
+
+ VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = triangle_shader_vert_module,
+ .pName = "main"};
+ VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = triangle_shader_frag_module,
+ .pName = "main"};
+ VkPipelineShaderStageCreateInfo shader_stages[] = {
+ triangle_shader_vert_stage_create_info,
+ triangle_shader_frag_stage_create_info};
+
+ VkDynamicState dynamic_states[] = {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ };
+
+ VkPipelineDynamicStateCreateInfo dynamic_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]),
+ .pDynamicStates = dynamic_states};
+
+ VkVertexInputBindingDescription vertex_binding_description =
+ vgltf_vertex_binding_description();
+ struct vgltf_vertex_input_attribute_descriptions
+ vertex_attribute_descriptions = vgltf_vertex_attribute_descriptions();
+
+ VkPipelineVertexInputStateCreateInfo vertex_input_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .vertexAttributeDescriptionCount = vertex_attribute_descriptions.count,
+ .pVertexBindingDescriptions = &vertex_binding_description,
+ .pVertexAttributeDescriptions =
+ vertex_attribute_descriptions.descriptions};
+
+ VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1};
+
+ VkPipelineRasterizationStateCreateInfo rasterizer = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .lineWidth = 1.f,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE};
+
+ VkPipelineMultisampleStateCreateInfo multisampling = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .sampleShadingEnable = VK_FALSE,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ VkPipelineColorBlendAttachmentState color_blend_attachment = {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ .blendEnable = VK_FALSE,
+ };
+
+ VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = VK_TRUE,
+ .depthWriteEnable = VK_TRUE,
+ .depthCompareOp = VK_COMPARE_OP_LESS,
+ .depthBoundsTestEnable = VK_FALSE,
+ .stencilTestEnable = VK_FALSE,
+ };
+
+ VkPipelineColorBlendStateCreateInfo color_blending = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = VK_FALSE,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment};
+
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &renderer->descriptor_set_layout};
+
+ if (vkCreatePipelineLayout(renderer->device.device, &pipeline_layout_info,
+ nullptr,
+ &renderer->pipeline_layout) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create pipeline layout");
+ goto destroy_frag_shader_module;
+ }
+
+ VkGraphicsPipelineCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = shader_stages,
+ .pVertexInputState = &vertex_input_info,
+ .pInputAssemblyState = &input_assembly,
+ .pViewportState = &viewport_state,
+ .pRasterizationState = &rasterizer,
+ .pMultisampleState = &multisampling,
+ .pColorBlendState = &color_blending,
+ .pDepthStencilState = &depth_stencil,
+ .pDynamicState = &dynamic_state,
+ .layout = renderer->pipeline_layout,
+ .renderPass = renderer->render_pass,
+ .subpass = 0,
+ };
+
+ if (vkCreateGraphicsPipelines(renderer->device.device, VK_NULL_HANDLE, 1,
+ &pipeline_info, nullptr,
+ &renderer->graphics_pipeline) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create pipeline");
+ goto destroy_pipeline_layout;
+ }
+
+ vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module,
+ nullptr);
+ vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module,
+ nullptr);
+ return true;
+destroy_pipeline_layout:
+ vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout,
+ nullptr);
+destroy_frag_shader_module:
+ vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module,
+ nullptr);
+destroy_vert_shader_module:
+ vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module,
+ nullptr);
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) {
+ for (uint32_t i = 0; i < renderer->swapchain.swapchain_image_count; i++) {
+ VkImageView attachments[] = {renderer->swapchain.swapchain_image_views[i],
+ renderer->depth_image_view};
+ int attachment_count = sizeof(attachments) / sizeof(attachments[0]);
+
+ VkFramebufferCreateInfo framebuffer_info = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = renderer->render_pass,
+ .attachmentCount = attachment_count,
+ .pAttachments = attachments,
+ .width = renderer->swapchain.swapchain_extent.width,
+ .height = renderer->swapchain.swapchain_extent.height,
+ .layers = 1};
+
+ if (vkCreateFramebuffer(renderer->device.device, &framebuffer_info, nullptr,
+ &renderer->swapchain_framebuffers[i]) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to create framebuffer");
+ goto err;
+ }
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) {
+ struct queue_family_indices queue_family_indices = {};
+ if (!queue_family_indices_for_device(&queue_family_indices,
+ renderer->device.physical_device,
+ renderer->surface.surface)) {
+ VGLTF_LOG_ERR("Couldn't fetch queue family indices");
+ goto err;
+ }
+
+ VkCommandPoolCreateInfo pool_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = queue_family_indices.graphics_family};
+
+ if (vkCreateCommandPool(renderer->device.device, &pool_info, nullptr,
+ &renderer->command_pool) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create command pool");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static VkCommandBuffer
+begin_single_time_commands(struct vgltf_renderer *renderer) {
+ VkCommandBufferAllocateInfo allocate_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandPool = renderer->command_pool,
+ .commandBufferCount = 1};
+
+ VkCommandBuffer command_buffer;
+ vkAllocateCommandBuffers(renderer->device.device, &allocate_info,
+ &command_buffer);
+
+ VkCommandBufferBeginInfo begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
+
+ vkBeginCommandBuffer(command_buffer, &begin_info);
+
+ return command_buffer;
+}
+
+static void end_single_time_commands(struct vgltf_renderer *renderer,
+ VkCommandBuffer command_buffer) {
+ vkEndCommandBuffer(command_buffer);
+ VkSubmitInfo submit_info = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &command_buffer};
+
+ vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info,
+ VK_NULL_HANDLE);
+ vkQueueWaitIdle(renderer->device.graphics_queue);
+ vkFreeCommandBuffers(renderer->device.device, renderer->command_pool, 1,
+ &command_buffer);
+}
+
+static bool vgltf_renderer_copy_buffer(struct vgltf_renderer *renderer,
+ VkBuffer src_buffer, VkBuffer dst_buffer,
+ VkDeviceSize size) {
+ VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+ VkBufferCopy copy_region = {.size = size};
+ vkCmdCopyBuffer(command_buffer, src_buffer, dst_buffer, 1, &copy_region);
+ end_single_time_commands(renderer, command_buffer);
+ return true;
+}
+
+static void vgltf_renderer_create_image(
+ struct vgltf_renderer *renderer, uint32_t width, uint32_t height,
+ uint32_t mip_level_count, VkFormat format, VkImageTiling tiling,
+ VkImageUsageFlags usage, VkMemoryPropertyFlags properties,
+ struct vgltf_renderer_allocated_image *image) {
+
+ vmaCreateImage(
+ renderer->device.allocator,
+ &(const VkImageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .extent = {width, height, 1},
+ .mipLevels = mip_level_count,
+ .arrayLayers = 1,
+ .format = format,
+ .tiling = tiling,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ },
+ &(const VmaAllocationCreateInfo){.usage = VMA_MEMORY_USAGE_GPU_ONLY,
+ .requiredFlags = properties},
+ &image->image, &image->allocation, &image->info);
+}
+
+static bool has_stencil_component(VkFormat format) {
+ return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+ format == VK_FORMAT_D24_UNORM_S8_UINT;
+}
+
+static bool transition_image_layout(struct vgltf_renderer *renderer,
+ VkImage image, VkFormat format,
+ VkImageLayout old_layout,
+ VkImageLayout new_layout,
+ uint32_t mip_level_count) {
+ (void)format;
+ VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+ VkImageMemoryBarrier barrier = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .oldLayout = old_layout,
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = mip_level_count,
+ .baseArrayLayer = 0,
+ .layerCount = 1},
+ .srcAccessMask = 0,
+ .dstAccessMask = 0};
+
+ if (new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+ barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+
+ if (has_stencil_component(format)) {
+ barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ }
+ } else {
+ barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ }
+
+ VkPipelineStageFlags source_stage;
+ VkPipelineStageFlags destination_stage;
+ if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
+ new_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ destination_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ } else if (old_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+ new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ source_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destination_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ } else if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
+ new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ destination_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+ } else {
+ goto err;
+ }
+
+ vkCmdPipelineBarrier(command_buffer, source_stage, destination_stage, 0, 0,
+ nullptr, 0, nullptr, 1, &barrier);
+
+ end_single_time_commands(renderer, command_buffer);
+ return true;
+err:
+ return false;
+}
+
+void copy_buffer_to_image(struct vgltf_renderer *renderer, VkBuffer buffer,
+ VkImage image, uint32_t width, uint32_t height) {
+ VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+ VkBufferImageCopy region = {
+ .bufferOffset = 0,
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = 0,
+ .baseArrayLayer = 0,
+ .layerCount = 1},
+ .imageOffset = {0, 0, 0},
+ .imageExtent = {width, height, 1}};
+
+ vkCmdCopyBufferToImage(command_buffer, buffer, image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
+
+ end_single_time_commands(renderer, command_buffer);
+}
+
+static bool
+vgltf_renderer_create_depth_resources(struct vgltf_renderer *renderer) {
+ VkFormat depth_format = find_depth_format(renderer);
+ vgltf_renderer_create_image(
+ renderer, renderer->swapchain.swapchain_extent.width,
+ renderer->swapchain.swapchain_extent.height, 1, depth_format,
+ VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->depth_image);
+ create_image_view(&renderer->device, renderer->depth_image.image,
+ depth_format, &renderer->depth_image_view,
+ VK_IMAGE_ASPECT_DEPTH_BIT, 1);
+
+ transition_image_layout(renderer, renderer->depth_image.image, depth_format,
+ VK_IMAGE_LAYOUT_UNDEFINED,
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1);
+ return true;
+}
+
+static bool
+vgltf_renderer_create_buffer(struct vgltf_renderer *renderer, VkDeviceSize size,
+ VkBufferUsageFlags usage,
+ VkMemoryPropertyFlags properties,
+ struct vgltf_renderer_allocated_buffer *buffer) {
+ VkBufferCreateInfo buffer_info = {.sType =
+ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = size,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE};
+ VmaAllocationCreateInfo alloc_info = {
+ .usage = VMA_MEMORY_USAGE_AUTO,
+ .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
+ .preferredFlags = properties};
+
+ if (vmaCreateBuffer(renderer->device.allocator, &buffer_info, &alloc_info,
+ &buffer->buffer, &buffer->allocation,
+ &buffer->info) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to create buffer");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static void generate_mipmaps(struct vgltf_renderer *renderer, VkImage image,
+ VkFormat image_format, int32_t texture_width,
+ int32_t texture_height, uint32_t mip_levels) {
+ VkFormatProperties format_properties;
+ vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device,
+ image_format, &format_properties);
+ if (!(format_properties.optimalTilingFeatures &
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) {
+ VGLTF_PANIC("Texture image format does not support linear blitting!");
+ }
+
+ VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+ VkImageMemoryBarrier barrier = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .image = image,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ .levelCount = 1}};
+
+ int32_t mip_width = texture_width;
+ int32_t mip_height = texture_height;
+
+ for (uint32_t i = 1; i < mip_levels; i++) {
+ barrier.subresourceRange.baseMipLevel = i - 1;
+ barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+
+ vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
+ nullptr, 1, &barrier);
+ VkImageBlit blit = {
+ .srcOffsets = {{0, 0, 0}, {mip_width, mip_height, 1}},
+ .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = i - 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1},
+ .dstOffsets = {{0, 0, 0},
+ {mip_width > 1 ? mip_width / 2 : 1,
+ mip_height > 1 ? mip_height / 2 : 1, 1}},
+ .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = i,
+ .baseArrayLayer = 0,
+ .layerCount = 1},
+ };
+ vkCmdBlitImage(command_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit,
+ VK_FILTER_LINEAR);
+ barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+ barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr,
+ 0, nullptr, 1, &barrier);
+ if (mip_width > 1)
+ mip_width /= 2;
+ if (mip_height > 1)
+ mip_height /= 2;
+ }
+ barrier.subresourceRange.baseMipLevel = mip_levels - 1;
+ barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0,
+ nullptr, 1, &barrier);
+
+ end_single_time_commands(renderer, command_buffer);
+}
+
+static bool
+vgltf_renderer_create_texture_image(struct vgltf_renderer *renderer) {
+ struct vgltf_image image;
+ if (!vgltf_image_load_from_file(&image, SV(TEXTURE_PATH))) {
+ VGLTF_LOG_ERR("Couldn't load image from file");
+ goto err;
+ }
+ renderer->mip_level_count =
+ floor(log2(VGLTF_MAX(image.width, image.height))) + 1;
+
+ VkDeviceSize image_size = image.width * image.height * 4;
+ struct vgltf_renderer_allocated_buffer staging_buffer = {};
+ if (!vgltf_renderer_create_buffer(renderer, image_size,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &staging_buffer)) {
+ VGLTF_LOG_ERR("Couldn't create staging buffer");
+ goto deinit_image;
+ }
+
+ void *data;
+ vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data);
+ memcpy(data, image.data, image_size);
+ vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation);
+
+ vgltf_renderer_create_image(
+ renderer, image.width, image.height, renderer->mip_level_count,
+ VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_TILING_OPTIMAL,
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->texture_image);
+
+ transition_image_layout(renderer, renderer->texture_image.image,
+ VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ renderer->mip_level_count);
+ copy_buffer_to_image(renderer, staging_buffer.buffer,
+ renderer->texture_image.image, image.width,
+ image.height);
+
+ generate_mipmaps(renderer, renderer->texture_image.image,
+ VK_FORMAT_R8G8B8A8_SRGB, image.width, image.height,
+ renderer->mip_level_count);
+
+ vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+ staging_buffer.allocation);
+ vgltf_image_deinit(&image);
+ return true;
+deinit_image:
+ vgltf_image_deinit(&image);
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_texture_image_view(struct vgltf_renderer *renderer) {
+ return create_image_view(
+ &renderer->device, renderer->texture_image.image, VK_FORMAT_R8G8B8A8_SRGB,
+ &renderer->texture_image_view, VK_IMAGE_ASPECT_COLOR_BIT,
+ renderer->mip_level_count);
+}
+
+static bool
+vgltf_renderer_create_texture_sampler(struct vgltf_renderer *renderer) {
+ VkPhysicalDeviceProperties properties = {};
+ vkGetPhysicalDeviceProperties(renderer->device.physical_device, &properties);
+
+ VkSamplerCreateInfo sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = VK_FILTER_LINEAR,
+ .minFilter = VK_FILTER_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .anisotropyEnable = VK_TRUE,
+ .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
+ .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
+ .unnormalizedCoordinates = VK_FALSE,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .mipLodBias = 0.f,
+ .minLod = 0.f,
+ .maxLod = renderer->mip_level_count};
+
+ if (vkCreateSampler(renderer->device.device, &sampler_info, nullptr,
+ &renderer->texture_sampler) != VK_SUCCESS) {
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static void get_file_data(void *ctx, const char *filename, const int is_mtl,
+ const char *obj_filename, char **data, size_t *len) {
+ (void)ctx;
+ (void)is_mtl;
+
+ if (!filename) {
+ VGLTF_LOG_ERR("Null filename");
+ *data = NULL;
+ *len = 0;
+ return;
+ }
+ *data = vgltf_platform_read_file_to_string(obj_filename, len);
+}
+
+static bool load_model(struct vgltf_renderer *renderer) {
+ tinyobj_attrib_t attrib;
+ tinyobj_shape_t *shapes = nullptr;
+ size_t shape_count;
+ tinyobj_material_t *materials = nullptr;
+ size_t material_count;
+
+ if ((tinyobj_parse_obj(&attrib, &shapes, &shape_count, &materials,
+ &material_count, MODEL_PATH, get_file_data, nullptr,
+ TINYOBJ_FLAG_TRIANGULATE)) != TINYOBJ_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't load obj");
+ return false;
+ }
+
+ for (size_t shape_index = 0; shape_index < shape_count; shape_index++) {
+ tinyobj_shape_t *shape = &shapes[shape_index];
+ unsigned int face_offset = shape->face_offset;
+ for (size_t face_index = face_offset;
+ face_index < face_offset + shape->length; face_index++) {
+ float v[3][3];
+ float t[3][2];
+
+ tinyobj_vertex_index_t idx0 = attrib.faces[face_index * 3 + 0];
+ tinyobj_vertex_index_t idx1 = attrib.faces[face_index * 3 + 1];
+ tinyobj_vertex_index_t idx2 = attrib.faces[face_index * 3 + 2];
+
+ for (int k = 0; k < 3; k++) {
+ int f0 = idx0.v_idx;
+ int f1 = idx1.v_idx;
+ int f2 = idx2.v_idx;
+
+ v[0][k] = attrib.vertices[3 * (size_t)f0 + k];
+ v[1][k] = attrib.vertices[3 * (size_t)f1 + k];
+ v[2][k] = attrib.vertices[3 * (size_t)f2 + k];
+ }
+
+ for (int k = 0; k < 2; k++) {
+ int t0 = idx0.vt_idx;
+ int t1 = idx1.vt_idx;
+ int t2 = idx2.vt_idx;
+
+ t[0][k] = attrib.texcoords[2 * (size_t)t0 + k];
+ t[1][k] = attrib.texcoords[2 * (size_t)t1 + k];
+ t[2][k] = attrib.texcoords[2 * (size_t)t2 + k];
+ }
+
+ for (int k = 0; k < 3; k++) {
+ renderer->vertices[renderer->vertex_count++] = (struct vgltf_vertex){
+ .position = {v[k][0], v[k][1], v[k][2]},
+ .texture_coordinates = {t[k][0], 1.f - t[k][1]},
+ .color = {1.f, 1.f, 1.f}};
+ renderer->indices[renderer->index_count++] = renderer->index_count;
+ }
+ }
+ tinyobj_attrib_free(&attrib);
+ tinyobj_shapes_free(shapes, shape_count);
+ tinyobj_materials_free(materials, material_count);
+ }
+ return true;
+}
+
+static bool
+vgltf_renderer_create_vertex_buffer(struct vgltf_renderer *renderer) {
+ VkDeviceSize buffer_size =
+ renderer->vertex_count * sizeof(struct vgltf_vertex);
+
+ struct vgltf_renderer_allocated_buffer staging_buffer = {};
+ if (!vgltf_renderer_create_buffer(renderer, buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &staging_buffer)) {
+ VGLTF_LOG_ERR("Failed to create transfer buffer");
+ goto err;
+ }
+
+ void *data;
+ vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data);
+ memcpy(data, renderer->vertices,
+ renderer->vertex_count * sizeof(struct vgltf_vertex));
+ vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation);
+
+ if (!vgltf_renderer_create_buffer(
+ renderer, buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->vertex_buffer)) {
+ VGLTF_LOG_ERR("Failed to create vertex buffer");
+ goto destroy_staging_buffer;
+ }
+
+ vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer,
+ renderer->vertex_buffer.buffer, buffer_size);
+ vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+ staging_buffer.allocation);
+ return true;
+destroy_staging_buffer:
+ vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+ staging_buffer.allocation);
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_index_buffer(struct vgltf_renderer *renderer) {
+ VkDeviceSize buffer_size = renderer->index_count * sizeof(uint16_t);
+ struct vgltf_renderer_allocated_buffer staging_buffer = {};
+ if (!vgltf_renderer_create_buffer(renderer, buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &staging_buffer)) {
+ VGLTF_LOG_ERR("Failed to create transfer buffer");
+ goto err;
+ }
+
+ void *data;
+ vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data);
+ memcpy(data, renderer->indices, renderer->index_count * sizeof(uint16_t));
+ vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation);
+
+ if (!vgltf_renderer_create_buffer(
+ renderer, buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->index_buffer)) {
+ VGLTF_LOG_ERR("Failed to create index buffer");
+ goto destroy_staging_buffer;
+ }
+ vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer,
+ renderer->index_buffer.buffer, buffer_size);
+ vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+ staging_buffer.allocation);
+ return true;
+
+destroy_staging_buffer:
+ vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+ staging_buffer.allocation);
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) {
+ VkCommandBufferAllocateInfo allocate_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = renderer->command_pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT};
+
+ if (vkAllocateCommandBuffers(renderer->device.device, &allocate_info,
+ renderer->command_buffer) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't allocate command buffers");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool
+vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) {
+ VkSemaphoreCreateInfo semaphore_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+
+ VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT};
+
+ int frame_in_flight_index = 0;
+ for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+ frame_in_flight_index++) {
+ if (vkCreateSemaphore(
+ renderer->device.device, &semaphore_info, nullptr,
+ &renderer->image_available_semaphores[frame_in_flight_index]) !=
+ VK_SUCCESS ||
+ vkCreateSemaphore(
+ renderer->device.device, &semaphore_info, nullptr,
+ &renderer->render_finished_semaphores[frame_in_flight_index]) !=
+ VK_SUCCESS ||
+ vkCreateFence(renderer->device.device, &fence_info, nullptr,
+ &renderer->in_flight_fences[frame_in_flight_index]) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create sync objects");
+ goto err;
+ }
+ }
+
+ return true;
+err:
+ for (int frame_in_flight_to_delete_index = 0;
+ frame_in_flight_to_delete_index < frame_in_flight_index;
+ frame_in_flight_to_delete_index++) {
+ vkDestroyFence(renderer->device.device,
+ renderer->in_flight_fences[frame_in_flight_index], nullptr);
+ vkDestroySemaphore(
+ renderer->device.device,
+ renderer->render_finished_semaphores[frame_in_flight_index], nullptr);
+ vkDestroySemaphore(
+ renderer->device.device,
+ renderer->image_available_semaphores[frame_in_flight_index], nullptr);
+ }
+ return false;
+}
+
+static bool vgltf_vk_swapchain_init(struct vgltf_vk_swapchain *swapchain,
+ struct vgltf_vk_device *device,
+ struct vgltf_vk_surface *surface,
+ struct vgltf_window_size *window_size) {
+ if (!create_swapchain(swapchain, device, surface, window_size)) {
+ VGLTF_LOG_ERR("Couldn't create swapchain");
+ goto err;
+ }
+
+ if (!create_swapchain_image_views(swapchain, device)) {
+ VGLTF_LOG_ERR("Couldn't create image views");
+ goto destroy_swapchain;
+ }
+
+ return true;
+destroy_swapchain:
+ vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr);
+err:
+ return false;
+}
+
+static void vgltf_vk_swapchain_deinit(struct vgltf_vk_swapchain *swapchain,
+ struct vgltf_vk_device *device) {
+ for (uint32_t swapchain_image_view_index = 0;
+ swapchain_image_view_index < swapchain->swapchain_image_count;
+ swapchain_image_view_index++) {
+ vkDestroyImageView(
+ device->device,
+ swapchain->swapchain_image_views[swapchain_image_view_index], nullptr);
+ }
+ vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr);
+}
+
+static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) {
+ vkDestroyImageView(renderer->device.device, renderer->depth_image_view,
+ nullptr);
+ vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image,
+ renderer->depth_image.allocation);
+
+ for (uint32_t framebuffer_index = 0;
+ framebuffer_index < renderer->swapchain.swapchain_image_count;
+ framebuffer_index++) {
+ vkDestroyFramebuffer(renderer->device.device,
+ renderer->swapchain_framebuffers[framebuffer_index],
+ nullptr);
+ }
+
+ vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device);
+}
+
+static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) {
+ vkDeviceWaitIdle(renderer->device.device);
+ vgltf_renderer_cleanup_swapchain(renderer);
+
+ // TODO add error handling
+ create_swapchain(&renderer->swapchain, &renderer->device, &renderer->surface,
+ &renderer->window_size);
+ create_swapchain_image_views(&renderer->swapchain, &renderer->device);
+ vgltf_renderer_create_depth_resources(renderer);
+ vgltf_renderer_create_framebuffers(renderer);
+ return true;
+}
+
+static void vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer,
+ uint32_t swapchain_image_index) {
+ VkRenderPassBeginInfo render_pass_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = renderer->render_pass,
+ .framebuffer = renderer->swapchain_framebuffers[swapchain_image_index],
+ .renderArea = {.offset = {},
+ .extent = renderer->swapchain.swapchain_extent},
+ .clearValueCount = 2,
+ .pClearValues =
+ (const VkClearValue[]){{.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}},
+ {.depthStencil = {1.0f, 0}}},
+
+ };
+
+ vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame],
+ &render_pass_info, VK_SUBPASS_CONTENTS_INLINE);
+ vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame],
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ renderer->graphics_pipeline);
+ VkViewport viewport = {
+ .x = 0.f,
+ .y = 0.f,
+ .width = (float)renderer->swapchain.swapchain_extent.width,
+ .height = (float)renderer->swapchain.swapchain_extent.height,
+ .minDepth = 0.f,
+ .maxDepth = 1.f};
+ vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1,
+ &viewport);
+ VkRect2D scissor = {.offset = {},
+ .extent = renderer->swapchain.swapchain_extent};
+ vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1,
+ &scissor);
+
+ VkBuffer vertex_buffers[] = {renderer->vertex_buffer.buffer};
+ VkDeviceSize offsets[] = {0};
+ vkCmdBindVertexBuffers(renderer->command_buffer[renderer->current_frame], 0,
+ 1, vertex_buffers, offsets);
+ vkCmdBindIndexBuffer(renderer->command_buffer[renderer->current_frame],
+ renderer->index_buffer.buffer, 0, VK_INDEX_TYPE_UINT16);
+
+ vkCmdBindDescriptorSets(
+ renderer->command_buffer[renderer->current_frame],
+ VK_PIPELINE_BIND_POINT_GRAPHICS, renderer->pipeline_layout, 0, 1,
+ &renderer->descriptor_sets[renderer->current_frame], 0, nullptr);
+ vkCmdDrawIndexed(renderer->command_buffer[renderer->current_frame],
+ renderer->index_count, 1, 0, 0, 0);
+
+ vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]);
+}
+
+static void update_uniform_buffer(struct vgltf_renderer *renderer,
+ uint32_t current_frame) {
+ static long long start_time_nanoseconds = 0;
+ if (start_time_nanoseconds == 0) {
+ if (!vgltf_platform_get_current_time_nanoseconds(&start_time_nanoseconds)) {
+ VGLTF_LOG_ERR("Couldn't get current time");
+ }
+ }
+
+ long long current_time_nanoseconds = 0;
+ if (!vgltf_platform_get_current_time_nanoseconds(&current_time_nanoseconds)) {
+ VGLTF_LOG_ERR("Couldn't get current time");
+ }
+
+ long elapsed_time_nanoseconds =
+ current_time_nanoseconds - start_time_nanoseconds;
+ float elapsed_time_seconds = elapsed_time_nanoseconds / 1e9f;
+ VGLTF_LOG_INFO("Elapsed time: %f", elapsed_time_seconds);
+
+ vgltf_mat4 model_matrix;
+ vgltf_mat4_rotate(model_matrix, (vgltf_mat4)VGLTF_MAT4_IDENTITY,
+ elapsed_time_seconds * VGLTF_MATHS_DEG_TO_RAD(90.0f),
+ (vgltf_vec3){0.f, 0.f, 1.f});
+
+ vgltf_mat4 view_matrix;
+ vgltf_mat4_look_at(view_matrix, (vgltf_vec3){2.f, 2.f, 2.f},
+ (vgltf_vec3){0.f, 0.f, 0.f}, (vgltf_vec3){0.f, 0.f, 1.f});
+
+ vgltf_mat4 projection_matrix;
+ vgltf_mat4_perspective(projection_matrix, VGLTF_MATHS_DEG_TO_RAD(45.f),
+ (float)renderer->swapchain.swapchain_extent.width /
+ (float)renderer->swapchain.swapchain_extent.height,
+ 0.1f, 10.f);
+ projection_matrix[1 * 4 + 1] *= -1;
+
+ struct vgltf_renderer_uniform_buffer_object ubo = {};
+ memcpy(ubo.model, model_matrix, sizeof(vgltf_mat4));
+ memcpy(ubo.view, view_matrix, sizeof(vgltf_mat4));
+ memcpy(ubo.projection, projection_matrix, sizeof(vgltf_mat4));
+ memcpy(renderer->mapped_uniform_buffers[current_frame], &ubo, sizeof(ubo));
+}
+
+bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer) {
+ vkWaitForFences(renderer->device.device, 1,
+ &renderer->in_flight_fences[renderer->current_frame], VK_TRUE,
+ UINT64_MAX);
+
+ uint32_t image_index;
+ VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR(
+ renderer->device.device, renderer->swapchain.swapchain, UINT64_MAX,
+ renderer->image_available_semaphores[renderer->current_frame],
+ VK_NULL_HANDLE, &image_index);
+ if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR ||
+ acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR ||
+ renderer->framebuffer_resized) {
+ renderer->framebuffer_resized = false;
+ vgltf_renderer_recreate_swapchain(renderer);
+ return true;
+ } else if (acquire_swapchain_image_result != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to acquire a swapchain image");
+ goto err;
+ }
+
+ vkResetFences(renderer->device.device, 1,
+ &renderer->in_flight_fences[renderer->current_frame]);
+
+ vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0);
+ VkCommandBufferBeginInfo begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ };
+
+ if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame],
+ &begin_info) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to begin recording command buffer");
+ goto err;
+ }
+
+ vgltf_renderer_triangle_pass(renderer, image_index);
+
+ if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to record command buffer");
+ goto err;
+ }
+
+ update_uniform_buffer(renderer, renderer->current_frame);
+
+ VkSubmitInfo submit_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ };
+
+ VkSemaphore wait_semaphores[] = {
+ renderer->image_available_semaphores[renderer->current_frame]};
+ VkPipelineStageFlags wait_stages[] = {
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
+ submit_info.waitSemaphoreCount = 1;
+ submit_info.pWaitSemaphores = wait_semaphores;
+ submit_info.pWaitDstStageMask = wait_stages;
+ submit_info.commandBufferCount = 1;
+ submit_info.pCommandBuffers =
+ &renderer->command_buffer[renderer->current_frame];
+
+ VkSemaphore signal_semaphores[] = {
+ renderer->render_finished_semaphores[renderer->current_frame]};
+ submit_info.signalSemaphoreCount = 1;
+ submit_info.pSignalSemaphores = signal_semaphores;
+ if (vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info,
+ renderer->in_flight_fences[renderer->current_frame]) !=
+ VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to submit draw command buffer");
+ goto err;
+ }
+
+ VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = signal_semaphores};
+
+ VkSwapchainKHR swapchains[] = {renderer->swapchain.swapchain};
+ present_info.swapchainCount = 1;
+ present_info.pSwapchains = swapchains;
+ present_info.pImageIndices = &image_index;
+ VkResult result =
+ vkQueuePresentKHR(renderer->device.present_queue, &present_info);
+ if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) {
+ vgltf_renderer_recreate_swapchain(renderer);
+ } else if (acquire_swapchain_image_result != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Failed to acquire a swapchain image");
+ goto err;
+ }
+ renderer->current_frame =
+ (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+ return true;
+err:
+ return false;
+}
+static bool
+vgltf_renderer_create_uniform_buffers(struct vgltf_renderer *renderer) {
+ VkDeviceSize buffer_size =
+ sizeof(struct vgltf_renderer_uniform_buffer_object);
+
+ for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+ vgltf_renderer_create_buffer(renderer, buffer_size,
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &renderer->uniform_buffers[i]);
+ vmaMapMemory(renderer->device.allocator,
+ renderer->uniform_buffers[i].allocation,
+ &renderer->mapped_uniform_buffers[i]);
+ }
+
+ return true;
+}
+
+static bool
+vgltf_renderer_create_descriptor_pool(struct vgltf_renderer *renderer) {
+ VkDescriptorPoolSize pool_sizes[] = {
+ (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount =
+ VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT},
+ (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount =
+ VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}};
+ int pool_size_count = sizeof(pool_sizes) / sizeof(pool_sizes[0]);
+
+ VkDescriptorPoolCreateInfo pool_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = pool_size_count,
+ .pPoolSizes = pool_sizes,
+ .maxSets = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT};
+
+ if (vkCreateDescriptorPool(renderer->device.device, &pool_info, nullptr,
+ &renderer->descriptor_pool) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create uniform descriptor pool");
+ goto err;
+ }
+
+ return true;
+err:
+ return false;
+}
+static bool
+vgltf_renderer_create_descriptor_sets(struct vgltf_renderer *renderer) {
+ VkDescriptorSetLayout layouts[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT] = {};
+ for (int layout_index = 0;
+ layout_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+ layout_index++) {
+ layouts[layout_index] = renderer->descriptor_set_layout;
+ }
+
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = renderer->descriptor_pool,
+ .descriptorSetCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT,
+ .pSetLayouts = layouts};
+
+ if (vkAllocateDescriptorSets(renderer->device.device, &alloc_info,
+ renderer->descriptor_sets) != VK_SUCCESS) {
+ VGLTF_LOG_ERR("Couldn't create descriptor sets");
+ goto err;
+ }
+
+ for (int set_index = 0; set_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+ set_index++) {
+ VkDescriptorBufferInfo buffer_info = {
+ .buffer = renderer->uniform_buffers[set_index].buffer,
+ .offset = 0,
+ .range = sizeof(struct vgltf_renderer_uniform_buffer_object)};
+
+ VkDescriptorImageInfo image_info = {
+ .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .imageView = renderer->texture_image_view,
+ .sampler = renderer->texture_sampler,
+ };
+
+ VkWriteDescriptorSet descriptor_writes[] = {
+ (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = renderer->descriptor_sets[set_index],
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorType =
+ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .pBufferInfo = &buffer_info},
+
+ (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = renderer->descriptor_sets[set_index],
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorType =
+ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .pImageInfo = &image_info}};
+ int descriptor_write_count =
+ sizeof(descriptor_writes) / sizeof(descriptor_writes[0]);
+
+ vkUpdateDescriptorSets(renderer->device.device, descriptor_write_count,
+ descriptor_writes, 0, nullptr);
+ }
+
+ return true;
+err:
+ return false;
+}
+
+static bool vgltf_vk_device_init(struct vgltf_vk_device *device,
+ struct vgltf_vk_instance *instance,
+ struct vgltf_vk_surface *surface) {
+ if (!pick_physical_device(&device->physical_device, instance,
+ surface->surface)) {
+ VGLTF_LOG_ERR("Couldn't pick physical device");
+ goto err;
+ }
+
+ if (!create_logical_device(&device->device, &device->graphics_queue,
+ &device->present_queue, device->physical_device,
+ surface->surface)) {
+ VGLTF_LOG_ERR("Couldn't pick logical device");
+ goto err;
+ }
+
+ if (!create_allocator(&device->allocator, device, instance)) {
+ VGLTF_LOG_ERR("Couldn't create allocator");
+ goto destroy_logical_device;
+ }
+
+ return true;
+destroy_logical_device:
+ vkDestroyDevice(device->device, nullptr);
+err:
+ return false;
+}
+
+static void vgltf_vk_device_deinit(struct vgltf_vk_device *device) {
+ vmaDestroyAllocator(device->allocator);
+ vkDestroyDevice(device->device, nullptr);
+}
+
+bool vgltf_renderer_init(struct vgltf_renderer *renderer,
+ struct vgltf_platform *platform) {
+ if (!vgltf_vk_instance_init(&renderer->instance, platform)) {
+ VGLTF_LOG_ERR("instance creation failed");
+ goto err;
+ }
+ vgltf_renderer_setup_debug_messenger(renderer);
+ if (!vgltf_vk_surface_init(&renderer->surface, &renderer->instance,
+ platform)) {
+ goto destroy_instance;
+ }
+
+ if (!vgltf_vk_device_init(&renderer->device, &renderer->instance,
+ &renderer->surface)) {
+ VGLTF_LOG_ERR("Device creation failed");
+ goto destroy_surface;
+ }
+
+ struct vgltf_window_size window_size = {800, 600};
+ if (!vgltf_platform_get_window_size(platform, &window_size)) {
+ VGLTF_LOG_ERR("Couldn't get window size");
+ goto destroy_device;
+ }
+ renderer->window_size = window_size;
+
+ if (!vgltf_vk_swapchain_init(&renderer->swapchain, &renderer->device,
+ &renderer->surface, &renderer->window_size)) {
+ VGLTF_LOG_ERR("Couldn't create swapchain");
+ goto destroy_device;
+ }
+
+ if (!vgltf_renderer_create_render_pass(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create render pass");
+ goto destroy_swapchain;
+ }
+
+ if (!vgltf_renderer_create_descriptor_set_layout(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create descriptor set layout");
+ goto destroy_render_pass;
+ }
+
+ if (!vgltf_renderer_create_graphics_pipeline(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create graphics pipeline");
+ goto destroy_descriptor_set_layout;
+ }
+
+ if (!vgltf_renderer_create_command_pool(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create command pool");
+ goto destroy_graphics_pipeline;
+ }
+
+ if (!vgltf_renderer_create_depth_resources(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create depth resources");
+ goto destroy_command_pool;
+ }
+
+ if (!vgltf_renderer_create_framebuffers(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create framebuffers");
+ goto destroy_depth_resources;
+ }
+
+ if (!vgltf_renderer_create_texture_image(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create texture image");
+ goto destroy_frame_buffers;
+ }
+
+ if (!vgltf_renderer_create_texture_image_view(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create texture image view");
+ goto destroy_texture_image;
+ }
+
+ if (!vgltf_renderer_create_texture_sampler(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create texture sampler");
+ goto destroy_texture_image_view;
+ }
+
+ if (!load_model(renderer)) {
+ VGLTF_LOG_ERR("Couldn't load model");
+ goto destroy_texture_sampler;
+ }
+
+ if (!vgltf_renderer_create_vertex_buffer(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create vertex buffer");
+ goto destroy_model;
+ }
+
+ if (!vgltf_renderer_create_index_buffer(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create index buffer");
+ goto destroy_vertex_buffer;
+ }
+
+ if (!vgltf_renderer_create_uniform_buffers(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create uniform buffers");
+ goto destroy_index_buffer;
+ }
+
+ if (!vgltf_renderer_create_descriptor_pool(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create descriptor pool");
+ goto destroy_uniform_buffers;
+ }
+
+ if (!vgltf_renderer_create_descriptor_sets(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create descriptor sets");
+ goto destroy_descriptor_pool;
+ }
+
+ if (!vgltf_renderer_create_command_buffer(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create command buffer");
+ goto destroy_descriptor_pool;
+ }
+
+ if (!vgltf_renderer_create_sync_objects(renderer)) {
+ VGLTF_LOG_ERR("Couldn't create sync objects");
+ goto destroy_descriptor_pool;
+ }
+
+ return true;
+
+destroy_descriptor_pool:
+ vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool,
+ nullptr);
+destroy_uniform_buffers:
+ for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+ vmaDestroyBuffer(renderer->device.allocator,
+ renderer->uniform_buffers[i].buffer,
+ renderer->uniform_buffers[i].allocation);
+ }
+destroy_index_buffer:
+ vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer,
+ renderer->index_buffer.allocation);
+destroy_vertex_buffer:
+ vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer,
+ renderer->vertex_buffer.allocation);
+destroy_model:
+ // TODO
+destroy_texture_sampler:
+ vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr);
+destroy_texture_image_view:
+ vkDestroyImageView(renderer->device.device, renderer->texture_image_view,
+ nullptr);
+destroy_texture_image:
+ vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image,
+ renderer->texture_image.allocation);
+destroy_depth_resources:
+ vkDestroyImageView(renderer->device.device, renderer->depth_image_view,
+ nullptr);
+ vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image,
+ renderer->depth_image.allocation);
+destroy_command_pool:
+ vkDestroyCommandPool(renderer->device.device, renderer->command_pool,
+ nullptr);
+destroy_frame_buffers:
+ for (uint32_t swapchain_framebuffer_index = 0;
+ swapchain_framebuffer_index < renderer->swapchain.swapchain_image_count;
+ swapchain_framebuffer_index++) {
+ vkDestroyFramebuffer(
+ renderer->device.device,
+ renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr);
+ }
+destroy_graphics_pipeline:
+ vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline,
+ nullptr);
+ vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout,
+ nullptr);
+destroy_descriptor_set_layout:
+ vkDestroyDescriptorSetLayout(renderer->device.device,
+ renderer->descriptor_set_layout, nullptr);
+destroy_render_pass:
+ vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr);
+destroy_swapchain:
+ vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device);
+destroy_device:
+ vgltf_vk_device_deinit(&renderer->device);
+destroy_surface:
+ vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance);
+destroy_instance:
+ if (enable_validation_layers) {
+ destroy_debug_utils_messenger_ext(renderer->instance.instance,
+ renderer->debug_messenger, nullptr);
+ }
+ vgltf_vk_instance_deinit(&renderer->instance);
+err:
+ return false;
+}
+void vgltf_renderer_deinit(struct vgltf_renderer *renderer) {
+ vkDeviceWaitIdle(renderer->device.device);
+ vgltf_renderer_cleanup_swapchain(renderer);
+ for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+ vmaUnmapMemory(renderer->device.allocator,
+ renderer->uniform_buffers[i].allocation);
+ vmaDestroyBuffer(renderer->device.allocator,
+ renderer->uniform_buffers[i].buffer,
+ renderer->uniform_buffers[i].allocation);
+ }
+ vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer,
+ renderer->index_buffer.allocation);
+ vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer,
+ renderer->vertex_buffer.allocation);
+ vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr);
+ vkDestroyImageView(renderer->device.device, renderer->texture_image_view,
+ nullptr);
+ vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image,
+ renderer->texture_image.allocation);
+ vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline,
+ nullptr);
+ vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout,
+ nullptr);
+ vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool,
+ nullptr);
+ vkDestroyDescriptorSetLayout(renderer->device.device,
+ renderer->descriptor_set_layout, nullptr);
+ vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr);
+ for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+ vkDestroySemaphore(renderer->device.device,
+ renderer->image_available_semaphores[i], nullptr);
+ vkDestroySemaphore(renderer->device.device,
+ renderer->render_finished_semaphores[i], nullptr);
+ vkDestroyFence(renderer->device.device, renderer->in_flight_fences[i],
+ nullptr);
+ }
+ vkDestroyCommandPool(renderer->device.device, renderer->command_pool,
+ nullptr);
+ vgltf_vk_device_deinit(&renderer->device);
+ vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance);
+ if (enable_validation_layers) {
+ destroy_debug_utils_messenger_ext(renderer->instance.instance,
+ renderer->debug_messenger, nullptr);
+ }
+ vgltf_vk_instance_deinit(&renderer->instance);
+}
+void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
+ struct vgltf_window_size size) {
+ if (size.width > 0 && size.height > 0 &&
+ size.width != renderer->window_size.width &&
+ size.height != renderer->window_size.height) {
+ renderer->window_size = size;
+ renderer->framebuffer_resized = true;
+ }
+}
diff --git a/src/renderer/renderer.h b/src/renderer/renderer.h
new file mode 100644
index 0000000..79e1f3d
--- /dev/null
+++ b/src/renderer/renderer.h
@@ -0,0 +1,126 @@
+#ifndef VGLTF_RENDERER_H
+#define VGLTF_RENDERER_H
+
+#include "../maths.h"
+#include "../platform.h"
+#include "vma_usage.h"
+#include <vulkan/vulkan.h>
+
+struct vgltf_vertex {
+ vgltf_vec3 position;
+ vgltf_vec3 color;
+ vgltf_vec2 texture_coordinates;
+};
+VkVertexInputBindingDescription vgltf_vertex_binding_description(void);
+
+struct vgltf_vertex_input_attribute_descriptions {
+ VkVertexInputAttributeDescription descriptions[3];
+ uint32_t count;
+};
+struct vgltf_vertex_input_attribute_descriptions
+vgltf_vertex_attribute_descriptions(void);
+
+struct vgltf_renderer_uniform_buffer_object {
+ alignas(16) vgltf_mat4 model;
+ alignas(16) vgltf_mat4 view;
+ alignas(16) vgltf_mat4 projection;
+};
+
+struct vgltf_renderer_allocated_buffer {
+ VkBuffer buffer;
+ VmaAllocation allocation;
+ VmaAllocationInfo info;
+};
+
+struct vgltf_renderer_allocated_image {
+ VkImage image;
+ VmaAllocation allocation;
+ VmaAllocationInfo info;
+};
+
+struct vgltf_vk_instance {
+ VkInstance instance;
+};
+
+struct vgltf_vk_device {
+ VkPhysicalDevice physical_device;
+ VkDevice device;
+ VkQueue graphics_queue;
+ VkQueue present_queue;
+ VmaAllocator allocator;
+};
+
+struct vgltf_vk_surface {
+ VkSurfaceKHR surface;
+};
+
+constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32;
+struct vgltf_vk_swapchain {
+ VkSwapchainKHR swapchain;
+ VkFormat swapchain_image_format;
+ VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
+ VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
+ VkExtent2D swapchain_extent;
+ uint32_t swapchain_image_count;
+};
+
+struct vgltf_vk_pipeline {
+ VkPipelineLayout layout;
+ VkPipeline pipeline;
+};
+
+constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2;
+struct vgltf_renderer {
+ struct vgltf_vk_instance instance;
+ struct vgltf_vk_device device;
+ VkDebugUtilsMessengerEXT debug_messenger;
+ struct vgltf_vk_surface surface;
+ struct vgltf_vk_swapchain swapchain;
+ struct vgltf_renderer_allocated_image depth_image;
+ VkImageView depth_image_view;
+
+ VkRenderPass render_pass;
+ VkDescriptorSetLayout descriptor_set_layout;
+
+ VkDescriptorPool descriptor_pool;
+ VkDescriptorSet descriptor_sets[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+ VkPipelineLayout pipeline_layout;
+ VkPipeline graphics_pipeline;
+
+ VkFramebuffer swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
+
+ VkCommandPool command_pool;
+ VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+ VkSemaphore
+ image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+ VkSemaphore
+ render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+ VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+
+ struct vgltf_renderer_allocated_buffer
+ uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+ void *mapped_uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+
+ uint32_t mip_level_count;
+ struct vgltf_renderer_allocated_image texture_image;
+ VkImageView texture_image_view;
+ VkSampler texture_sampler;
+ struct vgltf_vertex vertices[100000];
+ int vertex_count;
+ uint16_t indices[100000];
+ int index_count;
+ struct vgltf_renderer_allocated_buffer vertex_buffer;
+ struct vgltf_renderer_allocated_buffer index_buffer;
+
+ struct vgltf_window_size window_size;
+ uint32_t current_frame;
+ bool framebuffer_resized;
+};
+bool vgltf_renderer_init(struct vgltf_renderer *renderer,
+ struct vgltf_platform *platform);
+void vgltf_renderer_deinit(struct vgltf_renderer *renderer);
+bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer);
+void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
+ struct vgltf_window_size size);
+
+#endif // VGLTF_RENDERER_H
diff --git a/src/renderer/vma_usage.cpp b/src/renderer/vma_usage.cpp
new file mode 100644
index 0000000..83006a1
--- /dev/null
+++ b/src/renderer/vma_usage.cpp
@@ -0,0 +1,4 @@
+#include "vma_usage.h"
+
+#define VMA_IMPLEMENTATION
+#include <vk_mem_alloc.h>
diff --git a/src/renderer/vma_usage.h b/src/renderer/vma_usage.h
new file mode 100644
index 0000000..e9b5aa4
--- /dev/null
+++ b/src/renderer/vma_usage.h
@@ -0,0 +1,6 @@
+#ifndef VGLTF_VMA_USAGE_H
+#define VGLTF_VMA_USAGE_H
+
+#include <vk_mem_alloc.h>
+
+#endif // VGLTF_VMA_USAGE_H
diff --git a/src/str.c b/src/str.c
new file mode 100644
index 0000000..9c68d43
--- /dev/null
+++ b/src/str.c
@@ -0,0 +1,181 @@
+#include "str.h"
+#include "alloc.h"
+#include "hash.h"
+#include "platform.h"
+#include <assert.h>
+#include <stdarg.h>
+#include <string.h>
+
+struct vgltf_string_view vgltf_string_view_from_literal(const char *str) {
+ assert(str);
+ size_t length = strlen(str);
+ return (struct vgltf_string_view){.length = length, .data = str};
+}
+struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string) {
+ return (struct vgltf_string_view){.length = string.length, .data = string.data};
+}
+char vgltf_string_view_at(const struct vgltf_string_view *string_view,
+ size_t index) {
+ assert(string_view);
+ assert(index < string_view->length);
+ return string_view->data[index];
+}
+bool vgltf_string_view_eq(struct vgltf_string_view view,
+ struct vgltf_string_view other) {
+ return view.length == other.length &&
+ (strncmp(view.data, other.data, view.length) == 0);
+}
+size_t vgltf_string_view_length(const struct vgltf_string_view *string_view) {
+ assert(string_view);
+ return string_view->length;
+}
+
+uint64_t vgltf_string_view_hash(const struct vgltf_string_view view) {
+ return vgltf_hash_fnv_1a(view.data, view.length);
+}
+
+int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view,
+ size_t offset,
+ uint32_t *codepoint) {
+ assert(codepoint);
+ assert(offset < view.length);
+
+ const unsigned char *s = (unsigned char *)&view.data[offset];
+
+ int size;
+ if ((*s & 0x80) == 0) {
+ *codepoint = *s;
+ size = 1;
+ } else if ((*s & 0xE0) == 0xC0) {
+ *codepoint = *s & 0x1f;
+ size = 2;
+ } else if ((*s & 0xF0) == 0xE0) {
+ *codepoint = *s & 0x0f;
+ size = 3;
+ } else if ((*s & 0xF8) == 0xF0) {
+ *codepoint = *s & 0x07;
+ size = 4;
+ } else {
+ VGLTF_LOG_ERR("Invalid UTF-8 sequence");
+ return 0;
+ }
+
+ for (int i = 1; i < size; i++) {
+ if ((s[i] & 0xC0) != 0x80) {
+ VGLTF_LOG_ERR("Invalid UTF-8 continuation byte");
+ return 0;
+ }
+
+ *codepoint = (*codepoint << 6) | (s[i] & 0x3F);
+ }
+
+ return size;
+}
+int vgltf_string_utf8_encode_codepoint(uint32_t codepoint,
+ char encoded_codepoint[4]) {
+ assert(encoded_codepoint);
+ if (codepoint > 0x10FFFF) {
+ return -1;
+ }
+
+ if (codepoint <= 0x7F) {
+ encoded_codepoint[0] = (uint8_t)codepoint;
+ return 1;
+ } else if (codepoint <= 0x7FF) {
+ encoded_codepoint[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+ encoded_codepoint[1] = 0x80 | (codepoint & 0x3F);
+ return 2;
+ } else if (codepoint <= 0xFFFF) {
+ encoded_codepoint[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
+ encoded_codepoint[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+ encoded_codepoint[2] = 0x80 | (codepoint & 0x3F);
+ return 3;
+ } else {
+ encoded_codepoint[0] = 0xF0 | ((codepoint >> 18) & 0x07);
+ encoded_codepoint[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+ encoded_codepoint[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+ encoded_codepoint[3] = 0x80 | (codepoint & 0x3F);
+ return 4;
+ }
+}
+
+struct vgltf_string
+vgltf_string_from_null_terminated(struct vgltf_allocator *allocator,
+ const char *str) {
+ assert(allocator);
+ assert(str);
+ struct vgltf_string string;
+ size_t length = strlen(str);
+ char *data = vgltf_allocator_allocate(allocator, length + 1);
+ if (!data) {
+ VGLTF_PANIC("Couldn't allocate string");
+ }
+ strncpy(data, str, length);
+ string.length = length;
+ string.data = data;
+ return string;
+}
+struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator,
+ const struct vgltf_string string) {
+ assert(allocator);
+
+ size_t length = string.length;
+ char *data = vgltf_allocator_allocate(allocator, length + 1);
+ memcpy(data, string.data, length);
+ data[length] = '\0';
+
+ return (struct vgltf_string){.data = data, .length = length};
+}
+struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator,
+ struct vgltf_string_view head,
+ struct vgltf_string_view tail) {
+ assert(allocator);
+ size_t length = head.length + tail.length;
+ char *data = vgltf_allocator_allocate(allocator, length + 1);
+ memcpy(data, head.data, head.length);
+ memcpy(data + head.length, tail.data, tail.length);
+ data[length] = '\0';
+ return (struct vgltf_string){.data = data, .length = length};
+}
+struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator,
+ struct vgltf_string_view fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ struct vgltf_string formatted_string =
+ vgltf_string_vformatted(allocator, fmt, args);
+ va_end(args);
+
+ return formatted_string;
+}
+struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator,
+ struct vgltf_string_view fmt,
+ va_list args) {
+ assert(allocator);
+ char str[1024];
+ size_t length = vsnprintf(str, 1024, fmt.data, args);
+ char *data = vgltf_allocator_allocate(allocator, length + 1);
+ memcpy(data, str, length);
+ data[length] = '\0';
+ return (struct vgltf_string){.data = data, .length = length};
+}
+void vgltf_string_deinit(struct vgltf_allocator *allocator,
+ struct vgltf_string *string) {
+ assert(allocator);
+ assert(string);
+ vgltf_allocator_free(allocator, string->data);
+}
+size_t vgltf_string_length(const struct vgltf_string *string) {
+ return string->length;
+}
+bool vgltf_string_eq_view(const struct vgltf_string string,
+ const struct vgltf_string_view view) {
+ return string.length == view.length &&
+ (strncmp(string.data, view.data, string.length) == 0);
+}
+uint64_t vgltf_string_hash(const struct vgltf_string string) {
+ return vgltf_hash_fnv_1a(string.data, string.length);
+}
+bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other) {
+ return string.length == other.length &&
+ (strncmp(string.data, other.data, string.length) == 0);
+}
diff --git a/src/str.h b/src/str.h
new file mode 100644
index 0000000..c0e4e5c
--- /dev/null
+++ b/src/str.h
@@ -0,0 +1,62 @@
+#ifndef VGLTF_STR_H
+#define VGLTF_STR_H
+
+#include "alloc.h"
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h> // IWYU pragma: keep
+
+#define SV(str) \
+ (struct vgltf_string_view) { .data = str, .length = strlen(str) }
+
+struct vgltf_string;
+struct vgltf_string_view {
+ const char *data;
+ size_t length;
+};
+
+struct vgltf_string_view vgltf_string_view_from_literal(const char *str);
+struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string);
+size_t vgltf_string_view_length(const struct vgltf_string_view *string_view);
+char vgltf_string_view_at(const struct vgltf_string_view *string_view,
+ size_t index);
+bool vgltf_string_view_eq(struct vgltf_string_view view,
+ struct vgltf_string_view other);
+uint64_t vgltf_string_view_hash(const struct vgltf_string_view view);
+// Fetches the next utf8 codepoint in the string at the given offset
+// Returns the size of the codepoint in bytes, 0 in case of error
+int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view,
+ size_t offset,
+ uint32_t *codepoint);
+// codepoint has to be a char[4]
+int vgltf_string_utf8_encode_codepoint(uint32_t codepoint,
+ char encoded_codepoint[4]);
+
+struct vgltf_string {
+ char *data;
+ size_t length;
+};
+struct vgltf_string
+vgltf_string_from_null_terminated(struct vgltf_allocator *allocator,
+ const char *str);
+struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator,
+ const struct vgltf_string string);
+struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator,
+ struct vgltf_string_view head,
+ struct vgltf_string_view tail);
+struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator,
+ struct vgltf_string_view fmt, ...);
+struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator,
+ struct vgltf_string_view fmt,
+ va_list args);
+void vgltf_string_deinit(struct vgltf_allocator *allocator,
+ struct vgltf_string *string);
+size_t vgltf_string_length(const struct vgltf_string *string);
+bool vgltf_string_eq_view(const struct vgltf_string string,
+ const struct vgltf_string_view view);
+uint64_t vgltf_string_hash(const struct vgltf_string string);
+bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other);
+
+#endif // VGLTF_STR_H
diff --git a/thirdpartylicenses.md b/thirdpartylicenses.md
new file mode 100644
index 0000000..3d898b6
--- /dev/null
+++ b/thirdpartylicenses.md
@@ -0,0 +1,46 @@
+# stb_image:
+Public Domain
+
+# vk_mem_alloc:
+Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+# tinyobjloader_c:
+The MIT License (MIT)
+
+Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/stb_image.h b/vendor/stb_image.h
new file mode 100644
index 0000000..9eedabe
--- /dev/null
+++ b/vendor/stb_image.h
@@ -0,0 +1,7988 @@
+/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb
+ no warranty implied; use at your own risk
+
+ Do this:
+ #define STB_IMAGE_IMPLEMENTATION
+ before you include this file in *one* C or C++ file to create the implementation.
+
+ // i.e. it should look like this:
+ #include ...
+ #include ...
+ #include ...
+ #define STB_IMAGE_IMPLEMENTATION
+ #include "stb_image.h"
+
+ You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+ And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
+
+ QUICK NOTES:
+ Primarily of interest to game developers and other people who can
+ avoid problematic images and only need the trivial interface
+
+ JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+ PNG 1/2/4/8/16-bit-per-channel
+
+ TGA (not sure what subset, if a subset)
+ BMP non-1bpp, non-RLE
+ PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+
+ GIF (*comp always reports as 4-channel)
+ HDR (radiance rgbE format)
+ PIC (Softimage PIC)
+ PNM (PPM and PGM binary only)
+
+ Animated GIF still needs a proper API, but here's one way to do it:
+ http://gist.github.com/urraka/685d9a6340b26b830d49
+
+ - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+ - decode from arbitrary I/O callbacks
+ - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
+ Full documentation under "DOCUMENTATION" below.
+
+
+LICENSE
+
+ See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+ 2.30 (2024-05-31) avoid erroneous gcc warning
+ 2.29 (2023-05-xx) optimizations
+ 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff
+ 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
+ 2.26 (2020-07-13) many minor fixes
+ 2.25 (2020-02-02) fix warnings
+ 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
+ 2.23 (2019-08-11) fix clang static analysis warning
+ 2.22 (2019-03-04) gif fixes, fix warnings
+ 2.21 (2019-02-25) fix typo in comment
+ 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+ 2.19 (2018-02-11) fix warning
+ 2.18 (2018-01-30) fix warnings
+ 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+ 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+ 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+ 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+ 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
+ 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+ 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+ RGB-format JPEG; remove white matting in PSD;
+ allocate large structures on the stack;
+ correct channel count for PNG & BMP
+ 2.10 (2016-01-22) avoid warning introduced in 2.09
+ 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+
+ See end of file for full revision history.
+
+
+ ============================ Contributors =========================
+
+ Image formats Extensions, features
+ Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
+ Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
+ Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
+ Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
+ Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
+ Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
+ Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
+ github:urraka (animated gif) Junggon Kim (PNM comments)
+ Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
+ socks-the-fox (16-bit PNG)
+ Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
+ Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
+ Arseny Kapoulkine Simon Breuss (16-bit PNM)
+ John-Mark Allen
+ Carmelo J Fdez-Aguera
+
+ Bug & warning fixes
+ Marc LeBlanc David Woo Guillaume George Martins Mozeiko
+ Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski
+ Phil Jordan Dave Moore Roy Eltham
+ Hayaki Saito Nathan Reed Won Chun
+ Luke Graham Johan Duparc Nick Verigakis the Horde3D community
+ Thomas Ruf Ronny Chevalier github:rlyeh
+ Janez Zemva John Bartholomew Michal Cichon github:romigrou
+ Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
+ Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
+ Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
+ Cass Everitt Ryamond Barbiero github:grim210
+ Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
+ Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus
+ Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo
+ Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
+ Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
+ Brad Weinberger Matvey Cherevko github:mosra
+ Luca Sas Alexander Veselov Zack Middleton [reserved]
+ Ryan C. Gordon [reserved] [reserved]
+ DO NOT ADD YOUR NAME HERE
+
+ Jacko Dirks
+
+ To add your name to the credits, pick a random blank space in the middle and fill it.
+ 80% of merge conflicts on stb PRs are due to people adding their name at the end
+ of the credits.
+*/
+
+#ifndef STBI_INCLUDE_STB_IMAGE_H
+#define STBI_INCLUDE_STB_IMAGE_H
+
+// DOCUMENTATION
+//
+// Limitations:
+// - no 12-bit-per-channel JPEG
+// - no JPEGs with arithmetic coding
+// - GIF always returns *comp=4
+//
+// Basic usage (see HDR discussion below for HDR usage):
+// int x,y,n;
+// unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+// // ... process data if not NULL ...
+// // ... x = width, y = height, n = # 8-bit components per pixel ...
+// // ... replace '0' with '1'..'4' to force that many components per pixel
+// // ... but 'n' will always be the number that it would have been if you said 0
+// stbi_image_free(data);
+//
+// Standard parameters:
+// int *x -- outputs image width in pixels
+// int *y -- outputs image height in pixels
+// int *channels_in_file -- outputs # of image components in image file
+// int desired_channels -- if non-zero, # of image components requested in result
+//
+// The return value from an image loader is an 'unsigned char *' which points
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+// with each pixel consisting of N interleaved 8-bit components; the first
+// pixel pointed to is top-left-most in the image. There is no padding between
+// image scanlines or between pixels, regardless of format. The number of
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
+//
+// An output image with N components has the following components interleaved
+// in this order in each pixel:
+//
+// N=#comp components
+// 1 grey
+// 2 grey, alpha
+// 3 red, green, blue
+// 4 red, green, blue, alpha
+//
+// If image loading fails for any reason, the return value will be NULL,
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// more user-friendly ones.
+//
+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+//
+// To query the width, height and component count of an image without having to
+// decode the full file, you can use the stbi_info family of functions:
+//
+// int x,y,n,ok;
+// ok = stbi_info(filename, &x, &y, &n);
+// // returns ok=1 and sets x, y, n if image is a supported format,
+// // 0 otherwise.
+//
+// Note that stb_image pervasively uses ints in its public API for sizes,
+// including sizes of memory buffers. This is now part of the API and thus
+// hard to change without causing breakage. As a result, the various image
+// loaders all have certain limits on image size; these differ somewhat
+// by format but generally boil down to either just under 2GB or just under
+// 1GB. When the decoded image would be larger than this, stb_image decoding
+// will fail.
+//
+// Additionally, stb_image will reject image files that have any of their
+// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
+// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
+// the only way to have an image with such dimensions load correctly
+// is for it to have a rather extreme aspect ratio. Either way, the
+// assumption here is that such larger images are likely to be malformed
+// or malicious. If you do need to load an image with individual dimensions
+// larger than that, and it still fits in the overall size limit, you can
+// #define STBI_MAX_DIMENSIONS on your own to be something larger.
+//
+// ===========================================================================
+//
+// UNICODE:
+//
+// If compiling for Windows and you wish to use Unicode filenames, compile
+// with
+// #define STBI_WINDOWS_UTF8
+// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
+// Windows wchar_t filenames to utf8.
+//
+// ===========================================================================
+//
+// Philosophy
+//
+// stb libraries are designed with the following priorities:
+//
+// 1. easy to use
+// 2. easy to maintain
+// 3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy-to-use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// provide more explicit reasons why performance can't be emphasized.
+//
+// - Portable ("ease of use")
+// - Small source code footprint ("easy to maintain")
+// - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
+//
+// ===========================================================================
+//
+// HDR image support (disable by defining STBI_NO_HDR)
+//
+// stb_image supports loading HDR images in general, and currently the Radiance
+// .HDR file format specifically. You can still load any file through the existing
+// interface; if you attempt to load an HDR file, it will be automatically remapped
+// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// both of these constants can be reconfigured through this interface:
+//
+// stbi_hdr_to_ldr_gamma(2.2f);
+// stbi_hdr_to_ldr_scale(1.0f);
+//
+// (note, do not use _inverse_ constants; stbi_image will invert them
+// appropriately).
+//
+// Additionally, there is a new, parallel interface for loading files as
+// (linear) floats to preserve the full dynamic range:
+//
+// float *data = stbi_loadf(filename, &x, &y, &n, 0);
+//
+// If you load LDR images through this interface, those images will
+// be promoted to floating point values, run through the inverse of
+// constants corresponding to the above:
+//
+// stbi_ldr_to_hdr_scale(1.0f);
+// stbi_ldr_to_hdr_gamma(2.2f);
+//
+// Finally, given a filename (or an open file or memory block--see header
+// file for details) containing image data, you can query for the "most
+// appropriate" interface to use (that is, whether the image is HDR or
+// not), using:
+//
+// stbi_is_hdr(char *filename);
+//
+// ===========================================================================
+//
+// iPhone PNG support:
+//
+// We optionally support converting iPhone-formatted PNGs (which store
+// premultiplied BGRA) back to RGB, even though they're internally encoded
+// differently. To enable this conversion, call
+// stbi_convert_iphone_png_to_rgb(1).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
+//
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+// - You can suppress implementation of any of the decoders to reduce
+// your code footprint by #defining one or more of the following
+// symbols before creating the implementation.
+//
+// STBI_NO_JPEG
+// STBI_NO_PNG
+// STBI_NO_BMP
+// STBI_NO_PSD
+// STBI_NO_TGA
+// STBI_NO_GIF
+// STBI_NO_HDR
+// STBI_NO_PIC
+// STBI_NO_PNM (.ppm and .pgm)
+//
+// - You can request *only* certain decoders and suppress all other ones
+// (this will be more forward-compatible, as addition of new decoders
+// doesn't require you to disable them explicitly):
+//
+// STBI_ONLY_JPEG
+// STBI_ONLY_PNG
+// STBI_ONLY_BMP
+// STBI_ONLY_PSD
+// STBI_ONLY_TGA
+// STBI_ONLY_GIF
+// STBI_ONLY_HDR
+// STBI_ONLY_PIC
+// STBI_ONLY_PNM (.ppm and .pgm)
+//
+// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
+// than that size (in either width or height) without further processing.
+// This is to let programs in the wild set an upper bound to prevent
+// denial-of-service attacks on untrusted data, as one could generate a
+// valid image of gigantic dimensions and force stb_image to allocate a
+// huge block of memory and spend disproportionate time decoding it. By
+// default this is set to (1 << 24), which is 16777216, but that's still
+// very big.
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif // STBI_NO_STDIO
+
+#define STBI_VERSION 1
+
+enum
+{
+ STBI_default = 0, // only used for desired_channels
+
+ STBI_grey = 1,
+ STBI_grey_alpha = 2,
+ STBI_rgb = 3,
+ STBI_rgb_alpha = 4
+};
+
+#include <stdlib.h>
+typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef STBIDEF
+#ifdef STB_IMAGE_STATIC
+#define STBIDEF static
+#else
+#define STBIDEF extern
+#endif
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// PRIMARY API - works on images of any type
+//
+
+//
+// load image by filename, open file, or memory buffer
+//
+
+typedef struct
+{
+ int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
+ void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+ int (*eof) (void *user); // returns nonzero if we are at end of file/data
+} stbi_io_callbacks;
+
+////////////////////////////////////
+//
+// 8-bits-per-channel interface
+//
+
+STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+#endif
+
+#ifdef STBI_WINDOWS_UTF8
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+
+////////////////////////////////////
+//
+// float-per-channel interface
+//
+#ifndef STBI_NO_LINEAR
+ STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+ STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+ #ifndef STBI_NO_STDIO
+ STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+ STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+ #endif
+#endif
+
+#ifndef STBI_NO_HDR
+ STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
+ STBIDEF void stbi_hdr_to_ldr_scale(float scale);
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_LINEAR
+ STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
+ STBIDEF void stbi_ldr_to_hdr_scale(float scale);
+#endif // STBI_NO_LINEAR
+
+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_is_hdr (char const *filename);
+STBIDEF int stbi_is_hdr_from_file(FILE *f);
+#endif // STBI_NO_STDIO
+
+
+// get a VERY brief reason for failure
+// on most compilers (and ALL modern mainstream compilers) this is threadsafe
+STBIDEF const char *stbi_failure_reason (void);
+
+// free the loaded image -- this is just free()
+STBIDEF void stbi_image_free (void *retval_from_stbi_load);
+
+// get image dimensions & components without fully decoding
+STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp);
+STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
+STBIDEF int stbi_is_16_bit (char const *filename);
+STBIDEF int stbi_is_16_bit_from_file(FILE *f);
+#endif
+
+
+
+// for image formats that explicitly notate that they have premultiplied alpha,
+// we just return the colors as stored in the file. set this flag to force
+// unpremultiplication. results are undefined if the unpremultiply overflow.
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
+// indicate whether we should process iphone images back to canonical format,
+// or just pass them through "as-is"
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
+// flip the image vertically, so the first pixel in the output array is the bottom left
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
+// as above, but only applies to images loaded on the thread that calls the function
+// this function is only available if your compiler supports thread-local variables;
+// calling it will fail to link if your compiler doesn't
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
+
+// ZLIB client - used by PNG, available for other purposes
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
+STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+//
+//// end header file /////////////////////////////////////////////////////
+#endif // STBI_INCLUDE_STB_IMAGE_H
+
+#ifdef STB_IMAGE_IMPLEMENTATION
+
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+ || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+ || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+ || defined(STBI_ONLY_ZLIB)
+ #ifndef STBI_ONLY_JPEG
+ #define STBI_NO_JPEG
+ #endif
+ #ifndef STBI_ONLY_PNG
+ #define STBI_NO_PNG
+ #endif
+ #ifndef STBI_ONLY_BMP
+ #define STBI_NO_BMP
+ #endif
+ #ifndef STBI_ONLY_PSD
+ #define STBI_NO_PSD
+ #endif
+ #ifndef STBI_ONLY_TGA
+ #define STBI_NO_TGA
+ #endif
+ #ifndef STBI_ONLY_GIF
+ #define STBI_NO_GIF
+ #endif
+ #ifndef STBI_ONLY_HDR
+ #define STBI_NO_HDR
+ #endif
+ #ifndef STBI_ONLY_PIC
+ #define STBI_NO_PIC
+ #endif
+ #ifndef STBI_ONLY_PNM
+ #define STBI_NO_PNM
+ #endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#include <math.h> // ldexp, pow
+#endif
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STBI_ASSERT
+#include <assert.h>
+#define STBI_ASSERT(x) assert(x)
+#endif
+
+#ifdef __cplusplus
+#define STBI_EXTERN extern "C"
+#else
+#define STBI_EXTERN extern
+#endif
+
+
+#ifndef _MSC_VER
+ #ifdef __cplusplus
+ #define stbi_inline inline
+ #else
+ #define stbi_inline
+ #endif
+#else
+ #define stbi_inline __forceinline
+#endif
+
+#ifndef STBI_NO_THREAD_LOCALS
+ #if defined(__cplusplus) && __cplusplus >= 201103L
+ #define STBI_THREAD_LOCAL thread_local
+ #elif defined(__GNUC__) && __GNUC__ < 5
+ #define STBI_THREAD_LOCAL __thread
+ #elif defined(_MSC_VER)
+ #define STBI_THREAD_LOCAL __declspec(thread)
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
+ #define STBI_THREAD_LOCAL _Thread_local
+ #endif
+
+ #ifndef STBI_THREAD_LOCAL
+ #if defined(__GNUC__)
+ #define STBI_THREAD_LOCAL __thread
+ #endif
+ #endif
+#endif
+
+#if defined(_MSC_VER) || defined(__SYMBIAN32__)
+typedef unsigned short stbi__uint16;
+typedef signed short stbi__int16;
+typedef unsigned int stbi__uint32;
+typedef signed int stbi__int32;
+#else
+#include <stdint.h>
+typedef uint16_t stbi__uint16;
+typedef int16_t stbi__int16;
+typedef uint32_t stbi__uint32;
+typedef int32_t stbi__int32;
+#endif
+
+// should produce compiler error if size is wrong
+typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBI_NOTUSED(v) (void)(v)
+#else
+#define STBI_NOTUSED(v) (void)sizeof(v)
+#endif
+
+#ifdef _MSC_VER
+#define STBI_HAS_LROTL
+#endif
+
+#ifdef STBI_HAS_LROTL
+ #define stbi_lrot(x,y) _lrotl(x,y)
+#else
+ #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
+#endif
+
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz) malloc(sz)
+#define STBI_REALLOC(p,newsz) realloc(p,newsz)
+#define STBI_FREE(p) free(p)
+#endif
+
+#ifndef STBI_REALLOC_SIZED
+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
+#endif
+
+// x86/x64 detection
+#if defined(__x86_64__) || defined(_M_X64)
+#define STBI__X64_TARGET
+#elif defined(__i386) || defined(_M_IX86)
+#define STBI__X86_TARGET
+#endif
+
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
+#define STBI_NO_SIMD
+#endif
+
+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+//
+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+// simultaneously enabling "-mstackrealign".
+//
+// See https://github.com/nothings/stb/issues/81 for more information.
+//
+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+#define STBI_NO_SIMD
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400 // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+ int info[4];
+ __cpuid(info,1);
+ return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+ int res;
+ __asm {
+ mov eax,1
+ cpuid
+ mov res,edx
+ }
+ return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
+{
+ int info3 = stbi__cpuid3();
+ return ((info3 >> 26) & 1) != 0;
+}
+#endif
+
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
+{
+ // If we're even attempting to compile this on GCC/Clang, that means
+ // -msse2 is on, which means the compiler is allowed to use SSE2
+ // instructions at will, and so are we.
+ return 1;
+}
+#endif
+
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+#ifdef _MSC_VER
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+#else
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
+#ifndef STBI_MAX_DIMENSIONS
+#define STBI_MAX_DIMENSIONS (1 << 24)
+#endif
+
+///////////////////////////////////////////////
+//
+// stbi__context struct and start_xxx functions
+
+// stbi__context structure is our basic context used by all images, so it
+// contains all the IO context, plus some basic image information
+typedef struct
+{
+ stbi__uint32 img_x, img_y;
+ int img_n, img_out_n;
+
+ stbi_io_callbacks io;
+ void *io_user_data;
+
+ int read_from_callbacks;
+ int buflen;
+ stbi_uc buffer_start[128];
+ int callback_already_read;
+
+ stbi_uc *img_buffer, *img_buffer_end;
+ stbi_uc *img_buffer_original, *img_buffer_original_end;
+} stbi__context;
+
+
+static void stbi__refill_buffer(stbi__context *s);
+
+// initialize a memory-decode context
+static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
+{
+ s->io.read = NULL;
+ s->read_from_callbacks = 0;
+ s->callback_already_read = 0;
+ s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
+ s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
+}
+
+// initialize a callback-based context
+static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
+{
+ s->io = *c;
+ s->io_user_data = user;
+ s->buflen = sizeof(s->buffer_start);
+ s->read_from_callbacks = 1;
+ s->callback_already_read = 0;
+ s->img_buffer = s->img_buffer_original = s->buffer_start;
+ stbi__refill_buffer(s);
+ s->img_buffer_original_end = s->img_buffer_end;
+}
+
+#ifndef STBI_NO_STDIO
+
+static int stbi__stdio_read(void *user, char *data, int size)
+{
+ return (int) fread(data,1,size,(FILE*) user);
+}
+
+static void stbi__stdio_skip(void *user, int n)
+{
+ int ch;
+ fseek((FILE*) user, n, SEEK_CUR);
+ ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */
+ if (ch != EOF) {
+ ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */
+ }
+}
+
+static int stbi__stdio_eof(void *user)
+{
+ return feof((FILE*) user) || ferror((FILE *) user);
+}
+
+static stbi_io_callbacks stbi__stdio_callbacks =
+{
+ stbi__stdio_read,
+ stbi__stdio_skip,
+ stbi__stdio_eof,
+};
+
+static void stbi__start_file(stbi__context *s, FILE *f)
+{
+ stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
+}
+
+//static void stop_file(stbi__context *s) { }
+
+#endif // !STBI_NO_STDIO
+
+static void stbi__rewind(stbi__context *s)
+{
+ // conceptually rewind SHOULD rewind to the beginning of the stream,
+ // but we just rewind to the beginning of the initial buffer, because
+ // we only use it after doing 'test', which only ever looks at at most 92 bytes
+ s->img_buffer = s->img_buffer_original;
+ s->img_buffer_end = s->img_buffer_original_end;
+}
+
+enum
+{
+ STBI_ORDER_RGB,
+ STBI_ORDER_BGR
+};
+
+typedef struct
+{
+ int bits_per_channel;
+ int num_channels;
+ int channel_order;
+} stbi__result_info;
+
+#ifndef STBI_NO_JPEG
+static int stbi__jpeg_test(stbi__context *s);
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNG
+static int stbi__png_test(stbi__context *s);
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+static int stbi__png_is16(stbi__context *s);
+#endif
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test(stbi__context *s);
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_TGA
+static int stbi__tga_test(stbi__context *s);
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context *s);
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+static int stbi__psd_is16(stbi__context *s);
+#endif
+
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test(stbi__context *s);
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_test(stbi__context *s);
+static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_GIF
+static int stbi__gif_test(stbi__context *s);
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNM
+static int stbi__pnm_test(stbi__context *s);
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+static int stbi__pnm_is16(stbi__context *s);
+#endif
+
+static
+#ifdef STBI_THREAD_LOCAL
+STBI_THREAD_LOCAL
+#endif
+const char *stbi__g_failure_reason;
+
+STBIDEF const char *stbi_failure_reason(void)
+{
+ return stbi__g_failure_reason;
+}
+
+#ifndef STBI_NO_FAILURE_STRINGS
+static int stbi__err(const char *str)
+{
+ stbi__g_failure_reason = str;
+ return 0;
+}
+#endif
+
+static void *stbi__malloc(size_t size)
+{
+ return STBI_MALLOC(size);
+}
+
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+ if (b < 0) return 0;
+ // now 0 <= b <= INT_MAX, hence also
+ // 0 <= INT_MAX - b <= INTMAX.
+ // And "a + b <= INT_MAX" (which might overflow) is the
+ // same as a <= INT_MAX - b (no overflow)
+ return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+ if (a < 0 || b < 0) return 0;
+ if (b == 0) return 1; // mul-by-0 is always safe
+ // portable way to check for no overflows in a*b
+ return a <= INT_MAX/b;
+}
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+ return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
+}
+#endif
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+ return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+ stbi__addsizes_valid(a*b*c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+ return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+ stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
+}
+#endif
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// mallocs with size overflow checking
+static void *stbi__malloc_mad2(int a, int b, int add)
+{
+ if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+ return stbi__malloc(a*b + add);
+}
+#endif
+
+static void *stbi__malloc_mad3(int a, int b, int c, int add)
+{
+ if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+ return stbi__malloc(a*b*c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+ if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+ return stbi__malloc(a*b*c*d + add);
+}
+#endif
+
+// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
+static int stbi__addints_valid(int a, int b)
+{
+ if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
+ if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
+ return a <= INT_MAX - b;
+}
+
+// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
+static int stbi__mul2shorts_valid(int a, int b)
+{
+ if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
+ if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
+ if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
+ return a >= SHRT_MIN / b;
+}
+
+// stbi__err - error
+// stbi__errpf - error returning pointer to float
+// stbi__errpuc - error returning pointer to unsigned char
+
+#ifdef STBI_NO_FAILURE_STRINGS
+ #define stbi__err(x,y) 0
+#elif defined(STBI_FAILURE_USERMSG)
+ #define stbi__err(x,y) stbi__err(y)
+#else
+ #define stbi__err(x,y) stbi__err(x)
+#endif
+
+#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
+#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+STBIDEF void stbi_image_free(void *retval_from_stbi_load)
+{
+ STBI_FREE(retval_from_stbi_load);
+}
+
+#ifndef STBI_NO_LINEAR
+static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
+#endif
+
+static int stbi__vertically_flip_on_load_global = 0;
+
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
+{
+ stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
+}
+
+#ifndef STBI_THREAD_LOCAL
+#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
+#else
+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
+
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
+{
+ stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
+ stbi__vertically_flip_on_load_set = 1;
+}
+
+#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \
+ ? stbi__vertically_flip_on_load_local \
+ : stbi__vertically_flip_on_load_global)
+#endif // STBI_THREAD_LOCAL
+
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+ memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+ ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+ ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+ ri->num_channels = 0;
+
+ // test the formats with a very explicit header first (at least a FOURCC
+ // or distinctive magic number first)
+ #ifndef STBI_NO_PNG
+ if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
+ #endif
+ #ifndef STBI_NO_BMP
+ if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
+ #endif
+ #ifndef STBI_NO_GIF
+ if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
+ #endif
+ #ifndef STBI_NO_PSD
+ if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
+ #else
+ STBI_NOTUSED(bpc);
+ #endif
+ #ifndef STBI_NO_PIC
+ if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
+ #endif
+
+ // then the formats that can end up attempting to load with just 1 or 2
+ // bytes matching expectations; these are prone to false positives, so
+ // try them later
+ #ifndef STBI_NO_JPEG
+ if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
+ #endif
+ #ifndef STBI_NO_PNM
+ if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
+ #endif
+
+ #ifndef STBI_NO_HDR
+ if (stbi__hdr_test(s)) {
+ float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
+ return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+ }
+ #endif
+
+ #ifndef STBI_NO_TGA
+ // test tga last because it's a crappy test!
+ if (stbi__tga_test(s))
+ return stbi__tga_load(s,x,y,comp,req_comp, ri);
+ #endif
+
+ return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
+{
+ int i;
+ int img_len = w * h * channels;
+ stbi_uc *reduced;
+
+ reduced = (stbi_uc *) stbi__malloc(img_len);
+ if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+ for (i = 0; i < img_len; ++i)
+ reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+ STBI_FREE(orig);
+ return reduced;
+}
+
+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
+{
+ int i;
+ int img_len = w * h * channels;
+ stbi__uint16 *enlarged;
+
+ enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
+ if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+
+ for (i = 0; i < img_len; ++i)
+ enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+ STBI_FREE(orig);
+ return enlarged;
+}
+
+static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
+{
+ int row;
+ size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+ stbi_uc temp[2048];
+ stbi_uc *bytes = (stbi_uc *)image;
+
+ for (row = 0; row < (h>>1); row++) {
+ stbi_uc *row0 = bytes + row*bytes_per_row;
+ stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
+ // swap row0 with row1
+ size_t bytes_left = bytes_per_row;
+ while (bytes_left) {
+ size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+ memcpy(temp, row0, bytes_copy);
+ memcpy(row0, row1, bytes_copy);
+ memcpy(row1, temp, bytes_copy);
+ row0 += bytes_copy;
+ row1 += bytes_copy;
+ bytes_left -= bytes_copy;
+ }
+ }
+}
+
+#ifndef STBI_NO_GIF
+static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
+{
+ int slice;
+ int slice_size = w * h * bytes_per_pixel;
+
+ stbi_uc *bytes = (stbi_uc *)image;
+ for (slice = 0; slice < z; ++slice) {
+ stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+ bytes += slice_size;
+ }
+}
+#endif
+
+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__result_info ri;
+ void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+ if (result == NULL)
+ return NULL;
+
+ // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+ STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+ if (ri.bits_per_channel != 8) {
+ result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+ ri.bits_per_channel = 8;
+ }
+
+ // @TODO: move stbi__convert_format to here
+
+ if (stbi__vertically_flip_on_load) {
+ int channels = req_comp ? req_comp : *comp;
+ stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+ }
+
+ return (unsigned char *) result;
+}
+
+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__result_info ri;
+ void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+ if (result == NULL)
+ return NULL;
+
+ // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+ STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+ if (ri.bits_per_channel != 16) {
+ result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+ ri.bits_per_channel = 16;
+ }
+
+ // @TODO: move stbi__convert_format16 to here
+ // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+ if (stbi__vertically_flip_on_load) {
+ int channels = req_comp ? req_comp : *comp;
+ stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+ }
+
+ return (stbi__uint16 *) result;
+}
+
+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
+static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
+{
+ if (stbi__vertically_flip_on_load && result != NULL) {
+ int channels = req_comp ? req_comp : *comp;
+ stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
+ }
+}
+#endif
+
+#ifndef STBI_NO_STDIO
+
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+#endif
+
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+{
+ return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
+}
+#endif
+
+static FILE *stbi__fopen(char const *filename, char const *mode)
+{
+ FILE *f;
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+ wchar_t wMode[64];
+ wchar_t wFilename[1024];
+ if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
+ return 0;
+
+ if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
+ return 0;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+ if (0 != _wfopen_s(&f, wFilename, wMode))
+ f = 0;
+#else
+ f = _wfopen(wFilename, wMode);
+#endif
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400
+ if (0 != fopen_s(&f, filename, mode))
+ f=0;
+#else
+ f = fopen(filename, mode);
+#endif
+ return f;
+}
+
+
+STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+ FILE *f = stbi__fopen(filename, "rb");
+ unsigned char *result;
+ if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
+ result = stbi_load_from_file(f,x,y,comp,req_comp);
+ fclose(f);
+ return result;
+}
+
+STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+ unsigned char *result;
+ stbi__context s;
+ stbi__start_file(&s,f);
+ result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+ if (result) {
+ // need to 'unget' all the characters in the IO buffer
+ fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+ }
+ return result;
+}
+
+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__uint16 *result;
+ stbi__context s;
+ stbi__start_file(&s,f);
+ result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
+ if (result) {
+ // need to 'unget' all the characters in the IO buffer
+ fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+ }
+ return result;
+}
+
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+ FILE *f = stbi__fopen(filename, "rb");
+ stbi__uint16 *result;
+ if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
+ result = stbi_load_from_file_16(f,x,y,comp,req_comp);
+ fclose(f);
+ return result;
+}
+
+
+#endif //!STBI_NO_STDIO
+
+STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+ return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+ stbi__context s;
+ stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
+ return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+ return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+}
+
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__context s;
+ stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+ return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+ unsigned char *result;
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+
+ result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+ if (stbi__vertically_flip_on_load) {
+ stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
+ }
+
+ return result;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+ unsigned char *data;
+ #ifndef STBI_NO_HDR
+ if (stbi__hdr_test(s)) {
+ stbi__result_info ri;
+ float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
+ if (hdr_data)
+ stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
+ return hdr_data;
+ }
+ #endif
+ data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
+ if (data)
+ return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+ return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+}
+
+STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+ return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__context s;
+ stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+ return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+ float *result;
+ FILE *f = stbi__fopen(filename, "rb");
+ if (!f) return stbi__errpf("can't fopen", "Unable to open file");
+ result = stbi_loadf_from_file(f,x,y,comp,req_comp);
+ fclose(f);
+ return result;
+}
+
+STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+ stbi__context s;
+ stbi__start_file(&s,f);
+ return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+#endif // !STBI_NO_STDIO
+
+#endif // !STBI_NO_LINEAR
+
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+// reports false!
+
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
+{
+ #ifndef STBI_NO_HDR
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+ return stbi__hdr_test(&s);
+ #else
+ STBI_NOTUSED(buffer);
+ STBI_NOTUSED(len);
+ return 0;
+ #endif
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_is_hdr (char const *filename)
+{
+ FILE *f = stbi__fopen(filename, "rb");
+ int result=0;
+ if (f) {
+ result = stbi_is_hdr_from_file(f);
+ fclose(f);
+ }
+ return result;
+}
+
+STBIDEF int stbi_is_hdr_from_file(FILE *f)
+{
+ #ifndef STBI_NO_HDR
+ long pos = ftell(f);
+ int res;
+ stbi__context s;
+ stbi__start_file(&s,f);
+ res = stbi__hdr_test(&s);
+ fseek(f, pos, SEEK_SET);
+ return res;
+ #else
+ STBI_NOTUSED(f);
+ return 0;
+ #endif
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
+{
+ #ifndef STBI_NO_HDR
+ stbi__context s;
+ stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+ return stbi__hdr_test(&s);
+ #else
+ STBI_NOTUSED(clbk);
+ STBI_NOTUSED(user);
+ return 0;
+ #endif
+}
+
+#ifndef STBI_NO_LINEAR
+static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
+
+STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#endif
+
+static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
+
+STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
+STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Common code used by all image loaders
+//
+
+enum
+{
+ STBI__SCAN_load=0,
+ STBI__SCAN_type,
+ STBI__SCAN_header
+};
+
+static void stbi__refill_buffer(stbi__context *s)
+{
+ int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
+ s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
+ if (n == 0) {
+ // at end of file, treat same as if from memory, but need to handle case
+ // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+ s->read_from_callbacks = 0;
+ s->img_buffer = s->buffer_start;
+ s->img_buffer_end = s->buffer_start+1;
+ *s->img_buffer = 0;
+ } else {
+ s->img_buffer = s->buffer_start;
+ s->img_buffer_end = s->buffer_start + n;
+ }
+}
+
+stbi_inline static stbi_uc stbi__get8(stbi__context *s)
+{
+ if (s->img_buffer < s->img_buffer_end)
+ return *s->img_buffer++;
+ if (s->read_from_callbacks) {
+ stbi__refill_buffer(s);
+ return *s->img_buffer++;
+ }
+ return 0;
+}
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
+stbi_inline static int stbi__at_eof(stbi__context *s)
+{
+ if (s->io.read) {
+ if (!(s->io.eof)(s->io_user_data)) return 0;
+ // if feof() is true, check if buffer = end
+ // special case: we've only got the special 0 character at the end
+ if (s->read_from_callbacks == 0) return 1;
+ }
+
+ return s->img_buffer >= s->img_buffer_end;
+}
+#endif
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
+// nothing
+#else
+static void stbi__skip(stbi__context *s, int n)
+{
+ if (n == 0) return; // already there!
+ if (n < 0) {
+ s->img_buffer = s->img_buffer_end;
+ return;
+ }
+ if (s->io.read) {
+ int blen = (int) (s->img_buffer_end - s->img_buffer);
+ if (blen < n) {
+ s->img_buffer = s->img_buffer_end;
+ (s->io.skip)(s->io_user_data, n - blen);
+ return;
+ }
+ }
+ s->img_buffer += n;
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
+// nothing
+#else
+static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
+{
+ if (s->io.read) {
+ int blen = (int) (s->img_buffer_end - s->img_buffer);
+ if (blen < n) {
+ int res, count;
+
+ memcpy(buffer, s->img_buffer, blen);
+
+ count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
+ res = (count == (n-blen));
+ s->img_buffer = s->img_buffer_end;
+ return res;
+ }
+ }
+
+ if (s->img_buffer+n <= s->img_buffer_end) {
+ memcpy(buffer, s->img_buffer, n);
+ s->img_buffer += n;
+ return 1;
+ } else
+ return 0;
+}
+#endif
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
+static int stbi__get16be(stbi__context *s)
+{
+ int z = stbi__get8(s);
+ return (z << 8) + stbi__get8(s);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
+static stbi__uint32 stbi__get32be(stbi__context *s)
+{
+ stbi__uint32 z = stbi__get16be(s);
+ return (z << 16) + stbi__get16be(s);
+}
+#endif
+
+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+// nothing
+#else
+static int stbi__get16le(stbi__context *s)
+{
+ int z = stbi__get8(s);
+ return z + (stbi__get8(s) << 8);
+}
+#endif
+
+#ifndef STBI_NO_BMP
+static stbi__uint32 stbi__get32le(stbi__context *s)
+{
+ stbi__uint32 z = stbi__get16le(s);
+ z += (stbi__uint32)stbi__get16le(s) << 16;
+ return z;
+}
+#endif
+
+#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
+//////////////////////////////////////////////////////////////////////////////
+//
+// generic converter from built-in img_n to req_comp
+// individual types do this automatically as much as possible (e.g. jpeg
+// does all cases internally since it needs to colorspace convert anyway,
+// and it never has alpha, so very few cases ). png can automatically
+// interleave an alpha=255 channel, but falls back to this for other cases
+//
+// assume data buffer is malloced, so malloc a new one and free that one
+// only failure mode is malloc failing
+
+static stbi_uc stbi__compute_y(int r, int g, int b)
+{
+ return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
+static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+ int i,j;
+ unsigned char *good;
+
+ if (req_comp == img_n) return data;
+ STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+ good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
+ if (good == NULL) {
+ STBI_FREE(data);
+ return stbi__errpuc("outofmem", "Out of memory");
+ }
+
+ for (j=0; j < (int) y; ++j) {
+ unsigned char *src = data + j * x * img_n ;
+ unsigned char *dest = good + j * x * req_comp;
+
+ #define STBI__COMBO(a,b) ((a)*8+(b))
+ #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+ // convert source image with img_n components to one with req_comp components;
+ // avoid switch per pixel, so use switch per scanline and massive macros
+ switch (STBI__COMBO(img_n, req_comp)) {
+ STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break;
+ STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
+ STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break;
+ STBI__CASE(2,1) { dest[0]=src[0]; } break;
+ STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
+ STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
+ STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break;
+ STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
+ STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break;
+ STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
+ STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+ STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
+ default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
+ }
+ #undef STBI__CASE
+ }
+
+ STBI_FREE(data);
+ return good;
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+ return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+ int i,j;
+ stbi__uint16 *good;
+
+ if (req_comp == img_n) return data;
+ STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+ good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
+ if (good == NULL) {
+ STBI_FREE(data);
+ return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+ }
+
+ for (j=0; j < (int) y; ++j) {
+ stbi__uint16 *src = data + j * x * img_n ;
+ stbi__uint16 *dest = good + j * x * req_comp;
+
+ #define STBI__COMBO(a,b) ((a)*8+(b))
+ #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+ // convert source image with img_n components to one with req_comp components;
+ // avoid switch per pixel, so use switch per scanline and massive macros
+ switch (STBI__COMBO(img_n, req_comp)) {
+ STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break;
+ STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
+ STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break;
+ STBI__CASE(2,1) { dest[0]=src[0]; } break;
+ STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
+ STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
+ STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break;
+ STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
+ STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
+ STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
+ STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+ STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
+ default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
+ }
+ #undef STBI__CASE
+ }
+
+ STBI_FREE(data);
+ return good;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
+{
+ int i,k,n;
+ float *output;
+ if (!data) return NULL;
+ output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
+ if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
+ // compute number of non-alpha components
+ if (comp & 1) n = comp; else n = comp-1;
+ for (i=0; i < x*y; ++i) {
+ for (k=0; k < n; ++k) {
+ output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+ }
+ }
+ if (n < comp) {
+ for (i=0; i < x*y; ++i) {
+ output[i*comp + n] = data[i*comp + n]/255.0f;
+ }
+ }
+ STBI_FREE(data);
+ return output;
+}
+#endif
+
+#ifndef STBI_NO_HDR
+#define stbi__float2int(x) ((int) (x))
+static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
+{
+ int i,k,n;
+ stbi_uc *output;
+ if (!data) return NULL;
+ output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
+ if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
+ // compute number of non-alpha components
+ if (comp & 1) n = comp; else n = comp-1;
+ for (i=0; i < x*y; ++i) {
+ for (k=0; k < n; ++k) {
+ float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+ if (z < 0) z = 0;
+ if (z > 255) z = 255;
+ output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+ }
+ if (k < comp) {
+ float z = data[i*comp+k] * 255 + 0.5f;
+ if (z < 0) z = 0;
+ if (z > 255) z = 255;
+ output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+ }
+ }
+ STBI_FREE(data);
+ return output;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// "baseline" JPEG/JFIF decoder
+//
+// simple implementation
+// - doesn't support delayed output of y-dimension
+// - simple interface (only one output format: 8-bit interleaved RGB)
+// - doesn't try to recover corrupt jpegs
+// - doesn't allow partial loading, loading multiple at once
+// - still fast on x86 (copying globals into locals doesn't help x86)
+// - allocates lots of intermediate memory (full size of all components)
+// - non-interleaved case requires this anyway
+// - allows good upsampling (see next)
+// high-quality
+// - upsampled channels are bilinearly interpolated, even across blocks
+// - quality integer IDCT derived from IJG's 'slow'
+// performance
+// - fast huffman; reasonable integer IDCT
+// - some SIMD kernels for common paths on targets with SSE2/NEON
+// - uses a lot of intermediate memory, could cache poorly
+
+#ifndef STBI_NO_JPEG
+
+// huffman decoding acceleration
+#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
+
+typedef struct
+{
+ stbi_uc fast[1 << FAST_BITS];
+ // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+ stbi__uint16 code[256];
+ stbi_uc values[256];
+ stbi_uc size[257];
+ unsigned int maxcode[18];
+ int delta[17]; // old 'firstsymbol' - old 'firstcode'
+} stbi__huffman;
+
+typedef struct
+{
+ stbi__context *s;
+ stbi__huffman huff_dc[4];
+ stbi__huffman huff_ac[4];
+ stbi__uint16 dequant[4][64];
+ stbi__int16 fast_ac[4][1 << FAST_BITS];
+
+// sizes for components, interleaved MCUs
+ int img_h_max, img_v_max;
+ int img_mcu_x, img_mcu_y;
+ int img_mcu_w, img_mcu_h;
+
+// definition of jpeg image component
+ struct
+ {
+ int id;
+ int h,v;
+ int tq;
+ int hd,ha;
+ int dc_pred;
+
+ int x,y,w2,h2;
+ stbi_uc *data;
+ void *raw_data, *raw_coeff;
+ stbi_uc *linebuf;
+ short *coeff; // progressive only
+ int coeff_w, coeff_h; // number of 8x8 coefficient blocks
+ } img_comp[4];
+
+ stbi__uint32 code_buffer; // jpeg entropy-coded buffer
+ int code_bits; // number of valid bits
+ unsigned char marker; // marker seen while filling entropy buffer
+ int nomore; // flag if we saw a marker so must stop
+
+ int progressive;
+ int spec_start;
+ int spec_end;
+ int succ_high;
+ int succ_low;
+ int eob_run;
+ int jfif;
+ int app14_color_transform; // Adobe APP14 tag
+ int rgb;
+
+ int scan_n, order[4];
+ int restart_interval, todo;
+
+// kernels
+ void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
+ void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
+ stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
+} stbi__jpeg;
+
+static int stbi__build_huffman(stbi__huffman *h, int *count)
+{
+ int i,j,k=0;
+ unsigned int code;
+ // build size list for each symbol (from JPEG spec)
+ for (i=0; i < 16; ++i) {
+ for (j=0; j < count[i]; ++j) {
+ h->size[k++] = (stbi_uc) (i+1);
+ if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
+ }
+ }
+ h->size[k] = 0;
+
+ // compute actual symbols (from jpeg spec)
+ code = 0;
+ k = 0;
+ for(j=1; j <= 16; ++j) {
+ // compute delta to add to code to compute symbol id
+ h->delta[j] = k - code;
+ if (h->size[k] == j) {
+ while (h->size[k] == j)
+ h->code[k++] = (stbi__uint16) (code++);
+ if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
+ }
+ // compute largest code + 1 for this size, preshifted as needed later
+ h->maxcode[j] = code << (16-j);
+ code <<= 1;
+ }
+ h->maxcode[j] = 0xffffffff;
+
+ // build non-spec acceleration table; 255 is flag for not-accelerated
+ memset(h->fast, 255, 1 << FAST_BITS);
+ for (i=0; i < k; ++i) {
+ int s = h->size[i];
+ if (s <= FAST_BITS) {
+ int c = h->code[i] << (FAST_BITS-s);
+ int m = 1 << (FAST_BITS-s);
+ for (j=0; j < m; ++j) {
+ h->fast[c+j] = (stbi_uc) i;
+ }
+ }
+ }
+ return 1;
+}
+
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
+{
+ int i;
+ for (i=0; i < (1 << FAST_BITS); ++i) {
+ stbi_uc fast = h->fast[i];
+ fast_ac[i] = 0;
+ if (fast < 255) {
+ int rs = h->values[fast];
+ int run = (rs >> 4) & 15;
+ int magbits = rs & 15;
+ int len = h->size[fast];
+
+ if (magbits && len + magbits <= FAST_BITS) {
+ // magnitude code followed by receive_extend code
+ int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+ int m = 1 << (magbits - 1);
+ if (k < m) k += (~0U << magbits) + 1;
+ // if the result is small enough, we can fit it in fast_ac table
+ if (k >= -128 && k <= 127)
+ fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
+ }
+ }
+ }
+}
+
+static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
+{
+ do {
+ unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
+ if (b == 0xff) {
+ int c = stbi__get8(j->s);
+ while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
+ if (c != 0) {
+ j->marker = (unsigned char) c;
+ j->nomore = 1;
+ return;
+ }
+ }
+ j->code_buffer |= b << (24 - j->code_bits);
+ j->code_bits += 8;
+ } while (j->code_bits <= 24);
+}
+
+// (1 << n) - 1
+static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
+
+// decode a jpeg huffman value from the bitstream
+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
+{
+ unsigned int temp;
+ int c,k;
+
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+ // look at the top FAST_BITS and determine what symbol ID it is,
+ // if the code is <= FAST_BITS
+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+ k = h->fast[c];
+ if (k < 255) {
+ int s = h->size[k];
+ if (s > j->code_bits)
+ return -1;
+ j->code_buffer <<= s;
+ j->code_bits -= s;
+ return h->values[k];
+ }
+
+ // naive test is to shift the code_buffer down so k bits are
+ // valid, then test against maxcode. To speed this up, we've
+ // preshifted maxcode left so that it has (16-k) 0s at the
+ // end; in other words, regardless of the number of bits, it
+ // wants to be compared against something shifted to have 16;
+ // that way we don't need to shift inside the loop.
+ temp = j->code_buffer >> 16;
+ for (k=FAST_BITS+1 ; ; ++k)
+ if (temp < h->maxcode[k])
+ break;
+ if (k == 17) {
+ // error! code not found
+ j->code_bits -= 16;
+ return -1;
+ }
+
+ if (k > j->code_bits)
+ return -1;
+
+ // convert the huffman code to the symbol id
+ c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+ if(c < 0 || c >= 256) // symbol id out of bounds!
+ return -1;
+ STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
+ // convert the id to a symbol
+ j->code_bits -= k;
+ j->code_buffer <<= k;
+ return h->values[c];
+}
+
+// bias[n] = (-1<<n) + 1
+static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+
+// combined JPEG 'receive' and JPEG 'extend', since baseline
+// always extends everything it receives.
+stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
+{
+ unsigned int k;
+ int sgn;
+ if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+ if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
+
+ sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
+ k = stbi_lrot(j->code_buffer, n);
+ j->code_buffer = k & ~stbi__bmask[n];
+ k &= stbi__bmask[n];
+ j->code_bits -= n;
+ return k + (stbi__jbias[n] & (sgn - 1));
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
+{
+ unsigned int k;
+ if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+ if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
+ k = stbi_lrot(j->code_buffer, n);
+ j->code_buffer = k & ~stbi__bmask[n];
+ k &= stbi__bmask[n];
+ j->code_bits -= n;
+ return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
+{
+ unsigned int k;
+ if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+ if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
+ k = j->code_buffer;
+ j->code_buffer <<= 1;
+ --j->code_bits;
+ return k & 0x80000000;
+}
+
+// given a value that's at position X in the zigzag stream,
+// where does it appear in the 8x8 matrix coded as row-major?
+static const stbi_uc stbi__jpeg_dezigzag[64+15] =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ // let corrupt input sample past end
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63
+};
+
+// decode one 64-entry block--
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
+{
+ int diff,dc,k;
+ int t;
+
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+ t = stbi__jpeg_huff_decode(j, hdc);
+ if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
+
+ // 0 all the ac values now so we can do it 32-bits at a time
+ memset(data,0,64*sizeof(data[0]));
+
+ diff = t ? stbi__extend_receive(j, t) : 0;
+ if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
+ dc = j->img_comp[b].dc_pred + diff;
+ j->img_comp[b].dc_pred = dc;
+ if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+ data[0] = (short) (dc * dequant[0]);
+
+ // decode AC components, see JPEG spec
+ k = 1;
+ do {
+ unsigned int zig;
+ int c,r,s;
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+ r = fac[c];
+ if (r) { // fast-AC path
+ k += (r >> 4) & 15; // run
+ s = r & 15; // combined length
+ if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
+ j->code_buffer <<= s;
+ j->code_bits -= s;
+ // decode into unzigzag'd location
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) ((r >> 8) * dequant[zig]);
+ } else {
+ int rs = stbi__jpeg_huff_decode(j, hac);
+ if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+ s = rs & 15;
+ r = rs >> 4;
+ if (s == 0) {
+ if (rs != 0xf0) break; // end block
+ k += 16;
+ } else {
+ k += r;
+ // decode into unzigzag'd location
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
+ }
+ }
+ } while (k < 64);
+ return 1;
+}
+
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
+{
+ int diff,dc;
+ int t;
+ if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+ if (j->succ_high == 0) {
+ // first scan for DC coefficient, must be first
+ memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
+ t = stbi__jpeg_huff_decode(j, hdc);
+ if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+ diff = t ? stbi__extend_receive(j, t) : 0;
+
+ if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
+ dc = j->img_comp[b].dc_pred + diff;
+ j->img_comp[b].dc_pred = dc;
+ if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+ data[0] = (short) (dc * (1 << j->succ_low));
+ } else {
+ // refinement scan for DC coefficient
+ if (stbi__jpeg_get_bit(j))
+ data[0] += (short) (1 << j->succ_low);
+ }
+ return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
+{
+ int k;
+ if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+ if (j->succ_high == 0) {
+ int shift = j->succ_low;
+
+ if (j->eob_run) {
+ --j->eob_run;
+ return 1;
+ }
+
+ k = j->spec_start;
+ do {
+ unsigned int zig;
+ int c,r,s;
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+ r = fac[c];
+ if (r) { // fast-AC path
+ k += (r >> 4) & 15; // run
+ s = r & 15; // combined length
+ if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
+ j->code_buffer <<= s;
+ j->code_bits -= s;
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) ((r >> 8) * (1 << shift));
+ } else {
+ int rs = stbi__jpeg_huff_decode(j, hac);
+ if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+ s = rs & 15;
+ r = rs >> 4;
+ if (s == 0) {
+ if (r < 15) {
+ j->eob_run = (1 << r);
+ if (r)
+ j->eob_run += stbi__jpeg_get_bits(j, r);
+ --j->eob_run;
+ break;
+ }
+ k += 16;
+ } else {
+ k += r;
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
+ }
+ }
+ } while (k <= j->spec_end);
+ } else {
+ // refinement scan for these AC coefficients
+
+ short bit = (short) (1 << j->succ_low);
+
+ if (j->eob_run) {
+ --j->eob_run;
+ for (k = j->spec_start; k <= j->spec_end; ++k) {
+ short *p = &data[stbi__jpeg_dezigzag[k]];
+ if (*p != 0)
+ if (stbi__jpeg_get_bit(j))
+ if ((*p & bit)==0) {
+ if (*p > 0)
+ *p += bit;
+ else
+ *p -= bit;
+ }
+ }
+ } else {
+ k = j->spec_start;
+ do {
+ int r,s;
+ int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+ if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+ s = rs & 15;
+ r = rs >> 4;
+ if (s == 0) {
+ if (r < 15) {
+ j->eob_run = (1 << r) - 1;
+ if (r)
+ j->eob_run += stbi__jpeg_get_bits(j, r);
+ r = 64; // force end of block
+ } else {
+ // r=15 s=0 should write 16 0s, so we just do
+ // a run of 15 0s and then write s (which is 0),
+ // so we don't have to do anything special here
+ }
+ } else {
+ if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+ // sign bit
+ if (stbi__jpeg_get_bit(j))
+ s = bit;
+ else
+ s = -bit;
+ }
+
+ // advance by r
+ while (k <= j->spec_end) {
+ short *p = &data[stbi__jpeg_dezigzag[k++]];
+ if (*p != 0) {
+ if (stbi__jpeg_get_bit(j))
+ if ((*p & bit)==0) {
+ if (*p > 0)
+ *p += bit;
+ else
+ *p -= bit;
+ }
+ } else {
+ if (r == 0) {
+ *p = (short) s;
+ break;
+ }
+ --r;
+ }
+ }
+ } while (k <= j->spec_end);
+ }
+ }
+ return 1;
+}
+
+// take a -128..127 value and stbi__clamp it and convert to 0..255
+stbi_inline static stbi_uc stbi__clamp(int x)
+{
+ // trick to use a single test to catch both cases
+ if ((unsigned int) x > 255) {
+ if (x < 0) return 0;
+ if (x > 255) return 255;
+ }
+ return (stbi_uc) x;
+}
+
+#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
+#define stbi__fsh(x) ((x) * 4096)
+
+// derived from jidctint -- DCT_ISLOW
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+ int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
+ p2 = s2; \
+ p3 = s6; \
+ p1 = (p2+p3) * stbi__f2f(0.5411961f); \
+ t2 = p1 + p3*stbi__f2f(-1.847759065f); \
+ t3 = p1 + p2*stbi__f2f( 0.765366865f); \
+ p2 = s0; \
+ p3 = s4; \
+ t0 = stbi__fsh(p2+p3); \
+ t1 = stbi__fsh(p2-p3); \
+ x0 = t0+t3; \
+ x3 = t0-t3; \
+ x1 = t1+t2; \
+ x2 = t1-t2; \
+ t0 = s7; \
+ t1 = s5; \
+ t2 = s3; \
+ t3 = s1; \
+ p3 = t0+t2; \
+ p4 = t1+t3; \
+ p1 = t0+t3; \
+ p2 = t1+t2; \
+ p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
+ t0 = t0*stbi__f2f( 0.298631336f); \
+ t1 = t1*stbi__f2f( 2.053119869f); \
+ t2 = t2*stbi__f2f( 3.072711026f); \
+ t3 = t3*stbi__f2f( 1.501321110f); \
+ p1 = p5 + p1*stbi__f2f(-0.899976223f); \
+ p2 = p5 + p2*stbi__f2f(-2.562915447f); \
+ p3 = p3*stbi__f2f(-1.961570560f); \
+ p4 = p4*stbi__f2f(-0.390180644f); \
+ t3 += p1+p4; \
+ t2 += p2+p3; \
+ t1 += p2+p4; \
+ t0 += p1+p3;
+
+static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
+{
+ int i,val[64],*v=val;
+ stbi_uc *o;
+ short *d = data;
+
+ // columns
+ for (i=0; i < 8; ++i,++d, ++v) {
+ // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+ if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
+ && d[40]==0 && d[48]==0 && d[56]==0) {
+ // no shortcut 0 seconds
+ // (1|2|3|4|5|6|7)==0 0 seconds
+ // all separate -0.047 seconds
+ // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
+ int dcterm = d[0]*4;
+ v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+ } else {
+ STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
+ // constants scaled things up by 1<<12; let's bring them back
+ // down, but keep 2 extra bits of precision
+ x0 += 512; x1 += 512; x2 += 512; x3 += 512;
+ v[ 0] = (x0+t3) >> 10;
+ v[56] = (x0-t3) >> 10;
+ v[ 8] = (x1+t2) >> 10;
+ v[48] = (x1-t2) >> 10;
+ v[16] = (x2+t1) >> 10;
+ v[40] = (x2-t1) >> 10;
+ v[24] = (x3+t0) >> 10;
+ v[32] = (x3-t0) >> 10;
+ }
+ }
+
+ for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
+ // no fast case since the first 1D IDCT spread components out
+ STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
+ // constants scaled things up by 1<<12, plus we had 1<<2 from first
+ // loop, plus horizontal and vertical each scale by sqrt(8) so together
+ // we've got an extra 1<<3, so 1<<17 total we need to remove.
+ // so we want to round that, which means adding 0.5 * 1<<17,
+ // aka 65536. Also, we'll end up with -128 to 127 that we want
+ // to encode as 0..255 by adding 128, so we'll add that before the shift
+ x0 += 65536 + (128<<17);
+ x1 += 65536 + (128<<17);
+ x2 += 65536 + (128<<17);
+ x3 += 65536 + (128<<17);
+ // tried computing the shifts into temps, or'ing the temps to see
+ // if any were out of range, but that was slower
+ o[0] = stbi__clamp((x0+t3) >> 17);
+ o[7] = stbi__clamp((x0-t3) >> 17);
+ o[1] = stbi__clamp((x1+t2) >> 17);
+ o[6] = stbi__clamp((x1-t2) >> 17);
+ o[2] = stbi__clamp((x2+t1) >> 17);
+ o[5] = stbi__clamp((x2-t1) >> 17);
+ o[3] = stbi__clamp((x3+t0) >> 17);
+ o[4] = stbi__clamp((x3-t0) >> 17);
+ }
+}
+
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+ // This is constructed to match our regular (generic) integer IDCT exactly.
+ __m128i row0, row1, row2, row3, row4, row5, row6, row7;
+ __m128i tmp;
+
+ // dot product constant: even elems=x, odd elems=y
+ #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+ // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
+ // out(1) = c1[even]*x + c1[odd]*y
+ #define dct_rot(out0,out1, x,y,c0,c1) \
+ __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+ __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+ __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+ __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+ __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+ __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+ // out = in << 12 (in 16-bit, out 32-bit)
+ #define dct_widen(out, in) \
+ __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+ __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+ // wide add
+ #define dct_wadd(out, a, b) \
+ __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+ __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+ // wide sub
+ #define dct_wsub(out, a, b) \
+ __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+ __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+ // butterfly a/b, add bias, then shift by "s" and pack
+ #define dct_bfly32o(out0, out1, a,b,bias,s) \
+ { \
+ __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+ __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+ dct_wadd(sum, abiased, b); \
+ dct_wsub(dif, abiased, b); \
+ out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+ out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+ }
+
+ // 8-bit interleave step (for transposes)
+ #define dct_interleave8(a, b) \
+ tmp = a; \
+ a = _mm_unpacklo_epi8(a, b); \
+ b = _mm_unpackhi_epi8(tmp, b)
+
+ // 16-bit interleave step (for transposes)
+ #define dct_interleave16(a, b) \
+ tmp = a; \
+ a = _mm_unpacklo_epi16(a, b); \
+ b = _mm_unpackhi_epi16(tmp, b)
+
+ #define dct_pass(bias,shift) \
+ { \
+ /* even part */ \
+ dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+ __m128i sum04 = _mm_add_epi16(row0, row4); \
+ __m128i dif04 = _mm_sub_epi16(row0, row4); \
+ dct_widen(t0e, sum04); \
+ dct_widen(t1e, dif04); \
+ dct_wadd(x0, t0e, t3e); \
+ dct_wsub(x3, t0e, t3e); \
+ dct_wadd(x1, t1e, t2e); \
+ dct_wsub(x2, t1e, t2e); \
+ /* odd part */ \
+ dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+ dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+ __m128i sum17 = _mm_add_epi16(row1, row7); \
+ __m128i sum35 = _mm_add_epi16(row3, row5); \
+ dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+ dct_wadd(x4, y0o, y4o); \
+ dct_wadd(x5, y1o, y5o); \
+ dct_wadd(x6, y2o, y5o); \
+ dct_wadd(x7, y3o, y4o); \
+ dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+ dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+ dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+ dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+ }
+
+ __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+ __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
+ __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+ __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+ __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
+ __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
+ __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
+ __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
+
+ // rounding biases in column/row passes, see stbi__idct_block for explanation.
+ __m128i bias_0 = _mm_set1_epi32(512);
+ __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
+
+ // load
+ row0 = _mm_load_si128((const __m128i *) (data + 0*8));
+ row1 = _mm_load_si128((const __m128i *) (data + 1*8));
+ row2 = _mm_load_si128((const __m128i *) (data + 2*8));
+ row3 = _mm_load_si128((const __m128i *) (data + 3*8));
+ row4 = _mm_load_si128((const __m128i *) (data + 4*8));
+ row5 = _mm_load_si128((const __m128i *) (data + 5*8));
+ row6 = _mm_load_si128((const __m128i *) (data + 6*8));
+ row7 = _mm_load_si128((const __m128i *) (data + 7*8));
+
+ // column pass
+ dct_pass(bias_0, 10);
+
+ {
+ // 16bit 8x8 transpose pass 1
+ dct_interleave16(row0, row4);
+ dct_interleave16(row1, row5);
+ dct_interleave16(row2, row6);
+ dct_interleave16(row3, row7);
+
+ // transpose pass 2
+ dct_interleave16(row0, row2);
+ dct_interleave16(row1, row3);
+ dct_interleave16(row4, row6);
+ dct_interleave16(row5, row7);
+
+ // transpose pass 3
+ dct_interleave16(row0, row1);
+ dct_interleave16(row2, row3);
+ dct_interleave16(row4, row5);
+ dct_interleave16(row6, row7);
+ }
+
+ // row pass
+ dct_pass(bias_1, 17);
+
+ {
+ // pack
+ __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+ __m128i p1 = _mm_packus_epi16(row2, row3);
+ __m128i p2 = _mm_packus_epi16(row4, row5);
+ __m128i p3 = _mm_packus_epi16(row6, row7);
+
+ // 8bit 8x8 transpose pass 1
+ dct_interleave8(p0, p2); // a0e0a1e1...
+ dct_interleave8(p1, p3); // c0g0c1g1...
+
+ // transpose pass 2
+ dct_interleave8(p0, p1); // a0c0e0g0...
+ dct_interleave8(p2, p3); // b0d0f0h0...
+
+ // transpose pass 3
+ dct_interleave8(p0, p2); // a0b0c0d0...
+ dct_interleave8(p1, p3); // a4b4c4d4...
+
+ // store
+ _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
+ }
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+ int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+ int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+ int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+ int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
+ int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
+ int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+ int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+ int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+ int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+ int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
+ int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
+ int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
+ int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+ int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+ int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+ int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+ int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+ int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+ int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+// wide add
+#define dct_wadd(out, a, b) \
+ int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+ int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+ int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+ int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+ { \
+ dct_wadd(sum, a, b); \
+ dct_wsub(dif, a, b); \
+ out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+ out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+ }
+
+#define dct_pass(shiftop, shift) \
+ { \
+ /* even part */ \
+ int16x8_t sum26 = vaddq_s16(row2, row6); \
+ dct_long_mul(p1e, sum26, rot0_0); \
+ dct_long_mac(t2e, p1e, row6, rot0_1); \
+ dct_long_mac(t3e, p1e, row2, rot0_2); \
+ int16x8_t sum04 = vaddq_s16(row0, row4); \
+ int16x8_t dif04 = vsubq_s16(row0, row4); \
+ dct_widen(t0e, sum04); \
+ dct_widen(t1e, dif04); \
+ dct_wadd(x0, t0e, t3e); \
+ dct_wsub(x3, t0e, t3e); \
+ dct_wadd(x1, t1e, t2e); \
+ dct_wsub(x2, t1e, t2e); \
+ /* odd part */ \
+ int16x8_t sum15 = vaddq_s16(row1, row5); \
+ int16x8_t sum17 = vaddq_s16(row1, row7); \
+ int16x8_t sum35 = vaddq_s16(row3, row5); \
+ int16x8_t sum37 = vaddq_s16(row3, row7); \
+ int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+ dct_long_mul(p5o, sumodd, rot1_0); \
+ dct_long_mac(p1o, p5o, sum17, rot1_1); \
+ dct_long_mac(p2o, p5o, sum35, rot1_2); \
+ dct_long_mul(p3o, sum37, rot2_0); \
+ dct_long_mul(p4o, sum15, rot2_1); \
+ dct_wadd(sump13o, p1o, p3o); \
+ dct_wadd(sump24o, p2o, p4o); \
+ dct_wadd(sump23o, p2o, p3o); \
+ dct_wadd(sump14o, p1o, p4o); \
+ dct_long_mac(x4, sump13o, row7, rot3_0); \
+ dct_long_mac(x5, sump24o, row5, rot3_1); \
+ dct_long_mac(x6, sump23o, row3, rot3_2); \
+ dct_long_mac(x7, sump14o, row1, rot3_3); \
+ dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+ dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+ dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+ dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+ }
+
+ // load
+ row0 = vld1q_s16(data + 0*8);
+ row1 = vld1q_s16(data + 1*8);
+ row2 = vld1q_s16(data + 2*8);
+ row3 = vld1q_s16(data + 3*8);
+ row4 = vld1q_s16(data + 4*8);
+ row5 = vld1q_s16(data + 5*8);
+ row6 = vld1q_s16(data + 6*8);
+ row7 = vld1q_s16(data + 7*8);
+
+ // add DC bias
+ row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+ // column pass
+ dct_pass(vrshrn_n_s32, 10);
+
+ // 16bit 8x8 transpose
+ {
+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+ // pass 1
+ dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+ dct_trn16(row2, row3);
+ dct_trn16(row4, row5);
+ dct_trn16(row6, row7);
+
+ // pass 2
+ dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+ dct_trn32(row1, row3);
+ dct_trn32(row4, row6);
+ dct_trn32(row5, row7);
+
+ // pass 3
+ dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+ dct_trn64(row1, row5);
+ dct_trn64(row2, row6);
+ dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+ }
+
+ // row pass
+ // vrshrn_n_s32 only supports shifts up to 16, we need
+ // 17. so do a non-rounding shift of 16 first then follow
+ // up with a rounding shift by 1.
+ dct_pass(vshrn_n_s32, 16);
+
+ {
+ // pack and round
+ uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+ uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+ uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+ uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+ uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+ uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+ uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+ uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+ // again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+ // sadly can't use interleaved stores here since we only write
+ // 8 bytes to each scan line!
+
+ // 8x8 8-bit transpose pass 1
+ dct_trn8_8(p0, p1);
+ dct_trn8_8(p2, p3);
+ dct_trn8_8(p4, p5);
+ dct_trn8_8(p6, p7);
+
+ // pass 2
+ dct_trn8_16(p0, p2);
+ dct_trn8_16(p1, p3);
+ dct_trn8_16(p4, p6);
+ dct_trn8_16(p5, p7);
+
+ // pass 3
+ dct_trn8_32(p0, p4);
+ dct_trn8_32(p1, p5);
+ dct_trn8_32(p2, p6);
+ dct_trn8_32(p3, p7);
+
+ // store
+ vst1_u8(out, p0); out += out_stride;
+ vst1_u8(out, p1); out += out_stride;
+ vst1_u8(out, p2); out += out_stride;
+ vst1_u8(out, p3); out += out_stride;
+ vst1_u8(out, p4); out += out_stride;
+ vst1_u8(out, p5); out += out_stride;
+ vst1_u8(out, p6); out += out_stride;
+ vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+ }
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
+}
+
+#endif // STBI_NEON
+
+#define STBI__MARKER_none 0xff
+// if there's a pending marker from the entropy stream, return that
+// otherwise, fetch from the stream and get a marker. if there's no
+// marker, return 0xff, which is never a valid marker value
+static stbi_uc stbi__get_marker(stbi__jpeg *j)
+{
+ stbi_uc x;
+ if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
+ x = stbi__get8(j->s);
+ if (x != 0xff) return STBI__MARKER_none;
+ while (x == 0xff)
+ x = stbi__get8(j->s); // consume repeated 0xff fill bytes
+ return x;
+}
+
+// in each scan, we'll have scan_n components, and the order
+// of the components is specified by order[]
+#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
+
+// after a restart interval, stbi__jpeg_reset the entropy decoder and
+// the dc prediction
+static void stbi__jpeg_reset(stbi__jpeg *j)
+{
+ j->code_bits = 0;
+ j->code_buffer = 0;
+ j->nomore = 0;
+ j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
+ j->marker = STBI__MARKER_none;
+ j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+ j->eob_run = 0;
+ // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+ // since we don't even allow 1<<30 pixels
+}
+
+static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
+{
+ stbi__jpeg_reset(z);
+ if (!z->progressive) {
+ if (z->scan_n == 1) {
+ int i,j;
+ STBI_SIMD_ALIGN(short, data[64]);
+ int n = z->order[0];
+ // non-interleaved data, we just need to process one block at a time,
+ // in trivial scanline order
+ // number of blocks to do just depends on how many actual "pixels" this
+ // component has, independent of interleaved MCU blocking and such
+ int w = (z->img_comp[n].x+7) >> 3;
+ int h = (z->img_comp[n].y+7) >> 3;
+ for (j=0; j < h; ++j) {
+ for (i=0; i < w; ++i) {
+ int ha = z->img_comp[n].ha;
+ if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+ z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+ // every data block is an MCU, so countdown the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ // if it's NOT a restart, then just bail, so we get corrupt data
+ // rather than no data
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
+ }
+ }
+ return 1;
+ } else { // interleaved
+ int i,j,k,x,y;
+ STBI_SIMD_ALIGN(short, data[64]);
+ for (j=0; j < z->img_mcu_y; ++j) {
+ for (i=0; i < z->img_mcu_x; ++i) {
+ // scan an interleaved mcu... process scan_n components in order
+ for (k=0; k < z->scan_n; ++k) {
+ int n = z->order[k];
+ // scan out an mcu's worth of this component; that's just determined
+ // by the basic H and V specified for the component
+ for (y=0; y < z->img_comp[n].v; ++y) {
+ for (x=0; x < z->img_comp[n].h; ++x) {
+ int x2 = (i*z->img_comp[n].h + x)*8;
+ int y2 = (j*z->img_comp[n].v + y)*8;
+ int ha = z->img_comp[n].ha;
+ if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+ z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
+ }
+ }
+ }
+ // after all interleaved components, that's an interleaved MCU,
+ // so now count down the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
+ }
+ }
+ return 1;
+ }
+ } else {
+ if (z->scan_n == 1) {
+ int i,j;
+ int n = z->order[0];
+ // non-interleaved data, we just need to process one block at a time,
+ // in trivial scanline order
+ // number of blocks to do just depends on how many actual "pixels" this
+ // component has, independent of interleaved MCU blocking and such
+ int w = (z->img_comp[n].x+7) >> 3;
+ int h = (z->img_comp[n].y+7) >> 3;
+ for (j=0; j < h; ++j) {
+ for (i=0; i < w; ++i) {
+ short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+ if (z->spec_start == 0) {
+ if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+ return 0;
+ } else {
+ int ha = z->img_comp[n].ha;
+ if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+ return 0;
+ }
+ // every data block is an MCU, so countdown the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
+ }
+ }
+ return 1;
+ } else { // interleaved
+ int i,j,k,x,y;
+ for (j=0; j < z->img_mcu_y; ++j) {
+ for (i=0; i < z->img_mcu_x; ++i) {
+ // scan an interleaved mcu... process scan_n components in order
+ for (k=0; k < z->scan_n; ++k) {
+ int n = z->order[k];
+ // scan out an mcu's worth of this component; that's just determined
+ // by the basic H and V specified for the component
+ for (y=0; y < z->img_comp[n].v; ++y) {
+ for (x=0; x < z->img_comp[n].h; ++x) {
+ int x2 = (i*z->img_comp[n].h + x);
+ int y2 = (j*z->img_comp[n].v + y);
+ short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+ if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+ return 0;
+ }
+ }
+ }
+ // after all interleaved components, that's an interleaved MCU,
+ // so now count down the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
+ }
+ }
+ return 1;
+ }
+ }
+}
+
+static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
+{
+ int i;
+ for (i=0; i < 64; ++i)
+ data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg *z)
+{
+ if (z->progressive) {
+ // dequantize and idct the data
+ int i,j,n;
+ for (n=0; n < z->s->img_n; ++n) {
+ int w = (z->img_comp[n].x+7) >> 3;
+ int h = (z->img_comp[n].y+7) >> 3;
+ for (j=0; j < h; ++j) {
+ for (i=0; i < w; ++i) {
+ short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+ stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+ z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+ }
+ }
+ }
+ }
+}
+
+static int stbi__process_marker(stbi__jpeg *z, int m)
+{
+ int L;
+ switch (m) {
+ case STBI__MARKER_none: // no marker found
+ return stbi__err("expected marker","Corrupt JPEG");
+
+ case 0xDD: // DRI - specify restart interval
+ if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
+ z->restart_interval = stbi__get16be(z->s);
+ return 1;
+
+ case 0xDB: // DQT - define quantization table
+ L = stbi__get16be(z->s)-2;
+ while (L > 0) {
+ int q = stbi__get8(z->s);
+ int p = q >> 4, sixteen = (p != 0);
+ int t = q & 15,i;
+ if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
+ if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
+
+ for (i=0; i < 64; ++i)
+ z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+ L -= (sixteen ? 129 : 65);
+ }
+ return L==0;
+
+ case 0xC4: // DHT - define huffman table
+ L = stbi__get16be(z->s)-2;
+ while (L > 0) {
+ stbi_uc *v;
+ int sizes[16],i,n=0;
+ int q = stbi__get8(z->s);
+ int tc = q >> 4;
+ int th = q & 15;
+ if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
+ for (i=0; i < 16; ++i) {
+ sizes[i] = stbi__get8(z->s);
+ n += sizes[i];
+ }
+ if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
+ L -= 17;
+ if (tc == 0) {
+ if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
+ v = z->huff_dc[th].values;
+ } else {
+ if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
+ v = z->huff_ac[th].values;
+ }
+ for (i=0; i < n; ++i)
+ v[i] = stbi__get8(z->s);
+ if (tc != 0)
+ stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+ L -= n;
+ }
+ return L==0;
+ }
+
+ // check for comment block or APP blocks
+ if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+ L = stbi__get16be(z->s);
+ if (L < 2) {
+ if (m == 0xFE)
+ return stbi__err("bad COM len","Corrupt JPEG");
+ else
+ return stbi__err("bad APP len","Corrupt JPEG");
+ }
+ L -= 2;
+
+ if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+ static const unsigned char tag[5] = {'J','F','I','F','\0'};
+ int ok = 1;
+ int i;
+ for (i=0; i < 5; ++i)
+ if (stbi__get8(z->s) != tag[i])
+ ok = 0;
+ L -= 5;
+ if (ok)
+ z->jfif = 1;
+ } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+ static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
+ int ok = 1;
+ int i;
+ for (i=0; i < 6; ++i)
+ if (stbi__get8(z->s) != tag[i])
+ ok = 0;
+ L -= 6;
+ if (ok) {
+ stbi__get8(z->s); // version
+ stbi__get16be(z->s); // flags0
+ stbi__get16be(z->s); // flags1
+ z->app14_color_transform = stbi__get8(z->s); // color transform
+ L -= 6;
+ }
+ }
+
+ stbi__skip(z->s, L);
+ return 1;
+ }
+
+ return stbi__err("unknown marker","Corrupt JPEG");
+}
+
+// after we see SOS
+static int stbi__process_scan_header(stbi__jpeg *z)
+{
+ int i;
+ int Ls = stbi__get16be(z->s);
+ z->scan_n = stbi__get8(z->s);
+ if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
+ if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
+ for (i=0; i < z->scan_n; ++i) {
+ int id = stbi__get8(z->s), which;
+ int q = stbi__get8(z->s);
+ for (which = 0; which < z->s->img_n; ++which)
+ if (z->img_comp[which].id == id)
+ break;
+ if (which == z->s->img_n) return 0; // no match
+ z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
+ z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
+ z->order[i] = which;
+ }
+
+ {
+ int aa;
+ z->spec_start = stbi__get8(z->s);
+ z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
+ aa = stbi__get8(z->s);
+ z->succ_high = (aa >> 4);
+ z->succ_low = (aa & 15);
+ if (z->progressive) {
+ if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+ return stbi__err("bad SOS", "Corrupt JPEG");
+ } else {
+ if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
+ if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
+ z->spec_end = 63;
+ }
+ }
+
+ return 1;
+}
+
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+ int i;
+ for (i=0; i < ncomp; ++i) {
+ if (z->img_comp[i].raw_data) {
+ STBI_FREE(z->img_comp[i].raw_data);
+ z->img_comp[i].raw_data = NULL;
+ z->img_comp[i].data = NULL;
+ }
+ if (z->img_comp[i].raw_coeff) {
+ STBI_FREE(z->img_comp[i].raw_coeff);
+ z->img_comp[i].raw_coeff = 0;
+ z->img_comp[i].coeff = 0;
+ }
+ if (z->img_comp[i].linebuf) {
+ STBI_FREE(z->img_comp[i].linebuf);
+ z->img_comp[i].linebuf = NULL;
+ }
+ }
+ return why;
+}
+
+static int stbi__process_frame_header(stbi__jpeg *z, int scan)
+{
+ stbi__context *s = z->s;
+ int Lf,p,i,q, h_max=1,v_max=1,c;
+ Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
+ p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
+ s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+ s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
+ if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+ if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+ c = stbi__get8(s);
+ if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
+ s->img_n = c;
+ for (i=0; i < c; ++i) {
+ z->img_comp[i].data = NULL;
+ z->img_comp[i].linebuf = NULL;
+ }
+
+ if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
+
+ z->rgb = 0;
+ for (i=0; i < s->img_n; ++i) {
+ static const unsigned char rgb[3] = { 'R', 'G', 'B' };
+ z->img_comp[i].id = stbi__get8(s);
+ if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+ ++z->rgb;
+ q = stbi__get8(s);
+ z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
+ z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
+ z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
+ }
+
+ if (scan != STBI__SCAN_load) return 1;
+
+ if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
+
+ for (i=0; i < s->img_n; ++i) {
+ if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
+ if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+ }
+
+ // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+ // and I've never seen a non-corrupted JPEG file actually use them
+ for (i=0; i < s->img_n; ++i) {
+ if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
+ if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
+ }
+
+ // compute interleaved mcu info
+ z->img_h_max = h_max;
+ z->img_v_max = v_max;
+ z->img_mcu_w = h_max * 8;
+ z->img_mcu_h = v_max * 8;
+ // these sizes can't be more than 17 bits
+ z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
+ z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
+
+ for (i=0; i < s->img_n; ++i) {
+ // number of effective pixels (e.g. for non-interleaved MCU)
+ z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
+ z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
+ // to simplify generation, we'll allocate enough memory to decode
+ // the bogus oversized data from using interleaved MCUs and their
+ // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+ // discard the extra data until colorspace conversion
+ //
+ // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+ // so these muls can't overflow with 32-bit ints (which we require)
+ z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+ z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+ z->img_comp[i].coeff = 0;
+ z->img_comp[i].raw_coeff = 0;
+ z->img_comp[i].linebuf = NULL;
+ z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+ if (z->img_comp[i].raw_data == NULL)
+ return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
+ // align blocks for idct using mmx/sse
+ z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
+ if (z->progressive) {
+ // w2, h2 are multiples of 8 (see above)
+ z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+ z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+ z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+ if (z->img_comp[i].raw_coeff == NULL)
+ return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
+ z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
+ }
+ }
+
+ return 1;
+}
+
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
+#define stbi__DNL(x) ((x) == 0xdc)
+#define stbi__SOI(x) ((x) == 0xd8)
+#define stbi__EOI(x) ((x) == 0xd9)
+#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+#define stbi__SOS(x) ((x) == 0xda)
+
+#define stbi__SOF_progressive(x) ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
+{
+ int m;
+ z->jfif = 0;
+ z->app14_color_transform = -1; // valid values are 0,1,2
+ z->marker = STBI__MARKER_none; // initialize cached marker to empty
+ m = stbi__get_marker(z);
+ if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
+ if (scan == STBI__SCAN_type) return 1;
+ m = stbi__get_marker(z);
+ while (!stbi__SOF(m)) {
+ if (!stbi__process_marker(z,m)) return 0;
+ m = stbi__get_marker(z);
+ while (m == STBI__MARKER_none) {
+ // some files have extra padding after their blocks, so ok, we'll scan
+ if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
+ m = stbi__get_marker(z);
+ }
+ }
+ z->progressive = stbi__SOF_progressive(m);
+ if (!stbi__process_frame_header(z, scan)) return 0;
+ return 1;
+}
+
+static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
+{
+ // some JPEGs have junk at end, skip over it but if we find what looks
+ // like a valid marker, resume there
+ while (!stbi__at_eof(j->s)) {
+ stbi_uc x = stbi__get8(j->s);
+ while (x == 0xff) { // might be a marker
+ if (stbi__at_eof(j->s)) return STBI__MARKER_none;
+ x = stbi__get8(j->s);
+ if (x != 0x00 && x != 0xff) {
+ // not a stuffed zero or lead-in to another marker, looks
+ // like an actual marker, return it
+ return x;
+ }
+ // stuffed zero has x=0 now which ends the loop, meaning we go
+ // back to regular scan loop.
+ // repeated 0xff keeps trying to read the next byte of the marker.
+ }
+ }
+ return STBI__MARKER_none;
+}
+
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg *j)
+{
+ int m;
+ for (m = 0; m < 4; m++) {
+ j->img_comp[m].raw_data = NULL;
+ j->img_comp[m].raw_coeff = NULL;
+ }
+ j->restart_interval = 0;
+ if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
+ m = stbi__get_marker(j);
+ while (!stbi__EOI(m)) {
+ if (stbi__SOS(m)) {
+ if (!stbi__process_scan_header(j)) return 0;
+ if (!stbi__parse_entropy_coded_data(j)) return 0;
+ if (j->marker == STBI__MARKER_none ) {
+ j->marker = stbi__skip_jpeg_junk_at_end(j);
+ // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+ }
+ m = stbi__get_marker(j);
+ if (STBI__RESTART(m))
+ m = stbi__get_marker(j);
+ } else if (stbi__DNL(m)) {
+ int Ld = stbi__get16be(j->s);
+ stbi__uint32 NL = stbi__get16be(j->s);
+ if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+ if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
+ m = stbi__get_marker(j);
+ } else {
+ if (!stbi__process_marker(j, m)) return 1;
+ m = stbi__get_marker(j);
+ }
+ }
+ if (j->progressive)
+ stbi__jpeg_finish(j);
+ return 1;
+}
+
+// static jfif-centered resampling (across block boundaries)
+
+typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
+ int w, int hs);
+
+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
+
+static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ STBI_NOTUSED(out);
+ STBI_NOTUSED(in_far);
+ STBI_NOTUSED(w);
+ STBI_NOTUSED(hs);
+ return in_near;
+}
+
+static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ // need to generate two samples vertically for every one in input
+ int i;
+ STBI_NOTUSED(hs);
+ for (i=0; i < w; ++i)
+ out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
+ return out;
+}
+
+static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ // need to generate two samples horizontally for every one in input
+ int i;
+ stbi_uc *input = in_near;
+
+ if (w == 1) {
+ // if only one sample, can't do any interpolation
+ out[0] = out[1] = input[0];
+ return out;
+ }
+
+ out[0] = input[0];
+ out[1] = stbi__div4(input[0]*3 + input[1] + 2);
+ for (i=1; i < w-1; ++i) {
+ int n = 3*input[i]+2;
+ out[i*2+0] = stbi__div4(n+input[i-1]);
+ out[i*2+1] = stbi__div4(n+input[i+1]);
+ }
+ out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
+ out[i*2+1] = input[w-1];
+
+ STBI_NOTUSED(in_far);
+ STBI_NOTUSED(hs);
+
+ return out;
+}
+
+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
+
+static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ // need to generate 2x2 samples for every one in input
+ int i,t0,t1;
+ if (w == 1) {
+ out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+ return out;
+ }
+
+ t1 = 3*in_near[0] + in_far[0];
+ out[0] = stbi__div4(t1+2);
+ for (i=1; i < w; ++i) {
+ t0 = t1;
+ t1 = 3*in_near[i]+in_far[i];
+ out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+ out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
+ }
+ out[w*2-1] = stbi__div4(t1+2);
+
+ STBI_NOTUSED(hs);
+
+ return out;
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ // need to generate 2x2 samples for every one in input
+ int i=0,t0,t1;
+
+ if (w == 1) {
+ out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+ return out;
+ }
+
+ t1 = 3*in_near[0] + in_far[0];
+ // process groups of 8 pixels for as long as we can.
+ // note we can't handle the last pixel in a row in this loop
+ // because we need to handle the filter boundary conditions.
+ for (; i < ((w-1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+ // load and perform the vertical filtering pass
+ // this uses 3*x + y = 4*x + (y - x)
+ __m128i zero = _mm_setzero_si128();
+ __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
+ __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
+ __m128i farw = _mm_unpacklo_epi8(farb, zero);
+ __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+ __m128i diff = _mm_sub_epi16(farw, nearw);
+ __m128i nears = _mm_slli_epi16(nearw, 2);
+ __m128i curr = _mm_add_epi16(nears, diff); // current row
+
+ // horizontal filter works the same based on shifted vers of current
+ // row. "prev" is current row shifted right by 1 pixel; we need to
+ // insert the previous pixel value (from t1).
+ // "next" is current row shifted left by 1 pixel, with first pixel
+ // of next block of 8 pixels added in.
+ __m128i prv0 = _mm_slli_si128(curr, 2);
+ __m128i nxt0 = _mm_srli_si128(curr, 2);
+ __m128i prev = _mm_insert_epi16(prv0, t1, 0);
+ __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
+
+ // horizontal filter, polyphase implementation since it's convenient:
+ // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+ // odd pixels = 3*cur + next = cur*4 + (next - cur)
+ // note the shared term.
+ __m128i bias = _mm_set1_epi16(8);
+ __m128i curs = _mm_slli_epi16(curr, 2);
+ __m128i prvd = _mm_sub_epi16(prev, curr);
+ __m128i nxtd = _mm_sub_epi16(next, curr);
+ __m128i curb = _mm_add_epi16(curs, bias);
+ __m128i even = _mm_add_epi16(prvd, curb);
+ __m128i odd = _mm_add_epi16(nxtd, curb);
+
+ // interleave even and odd pixels, then undo scaling.
+ __m128i int0 = _mm_unpacklo_epi16(even, odd);
+ __m128i int1 = _mm_unpackhi_epi16(even, odd);
+ __m128i de0 = _mm_srli_epi16(int0, 4);
+ __m128i de1 = _mm_srli_epi16(int1, 4);
+
+ // pack and write output
+ __m128i outv = _mm_packus_epi16(de0, de1);
+ _mm_storeu_si128((__m128i *) (out + i*2), outv);
+#elif defined(STBI_NEON)
+ // load and perform the vertical filtering pass
+ // this uses 3*x + y = 4*x + (y - x)
+ uint8x8_t farb = vld1_u8(in_far + i);
+ uint8x8_t nearb = vld1_u8(in_near + i);
+ int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+ int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+ int16x8_t curr = vaddq_s16(nears, diff); // current row
+
+ // horizontal filter works the same based on shifted vers of current
+ // row. "prev" is current row shifted right by 1 pixel; we need to
+ // insert the previous pixel value (from t1).
+ // "next" is current row shifted left by 1 pixel, with first pixel
+ // of next block of 8 pixels added in.
+ int16x8_t prv0 = vextq_s16(curr, curr, 7);
+ int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+ int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+ int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
+
+ // horizontal filter, polyphase implementation since it's convenient:
+ // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+ // odd pixels = 3*cur + next = cur*4 + (next - cur)
+ // note the shared term.
+ int16x8_t curs = vshlq_n_s16(curr, 2);
+ int16x8_t prvd = vsubq_s16(prev, curr);
+ int16x8_t nxtd = vsubq_s16(next, curr);
+ int16x8_t even = vaddq_s16(curs, prvd);
+ int16x8_t odd = vaddq_s16(curs, nxtd);
+
+ // undo scaling and round, then store with even/odd phases interleaved
+ uint8x8x2_t o;
+ o.val[0] = vqrshrun_n_s16(even, 4);
+ o.val[1] = vqrshrun_n_s16(odd, 4);
+ vst2_u8(out + i*2, o);
+#endif
+
+ // "previous" value for next iter
+ t1 = 3*in_near[i+7] + in_far[i+7];
+ }
+
+ t0 = t1;
+ t1 = 3*in_near[i] + in_far[i];
+ out[i*2] = stbi__div16(3*t1 + t0 + 8);
+
+ for (++i; i < w; ++i) {
+ t0 = t1;
+ t1 = 3*in_near[i]+in_far[i];
+ out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+ out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
+ }
+ out[w*2-1] = stbi__div4(t1+2);
+
+ STBI_NOTUSED(hs);
+
+ return out;
+}
+#endif
+
+static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ // resample with nearest-neighbor
+ int i,j;
+ STBI_NOTUSED(in_far);
+ for (i=0; i < w; ++i)
+ for (j=0; j < hs; ++j)
+ out[i*hs+j] = in_near[i];
+ return out;
+}
+
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
+{
+ int i;
+ for (i=0; i < count; ++i) {
+ int y_fixed = (y[i] << 20) + (1<<19); // rounding
+ int r,g,b;
+ int cr = pcr[i] - 128;
+ int cb = pcb[i] - 128;
+ r = y_fixed + cr* stbi__float2fixed(1.40200f);
+ g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+ b = y_fixed + cb* stbi__float2fixed(1.77200f);
+ r >>= 20;
+ g >>= 20;
+ b >>= 20;
+ if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+ if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+ if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+ out[0] = (stbi_uc)r;
+ out[1] = (stbi_uc)g;
+ out[2] = (stbi_uc)b;
+ out[3] = 255;
+ out += step;
+ }
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
+{
+ int i = 0;
+
+#ifdef STBI_SSE2
+ // step == 3 is pretty ugly on the final interleave, and i'm not convinced
+ // it's useful in practice (you wouldn't use it for textures, for example).
+ // so just accelerate step == 4 case.
+ if (step == 4) {
+ // this is a fairly straightforward implementation and not super-optimized.
+ __m128i signflip = _mm_set1_epi8(-0x80);
+ __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
+ __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
+ __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
+ __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
+ __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
+ __m128i xw = _mm_set1_epi16(255); // alpha channel
+
+ for (; i+7 < count; i += 8) {
+ // load
+ __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
+ __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
+ __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
+ __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+ __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+ // unpack to short (and left-shift cr, cb by 8)
+ __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
+ __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+ __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+ // color transform
+ __m128i yws = _mm_srli_epi16(yw, 4);
+ __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+ __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+ __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+ __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+ __m128i rws = _mm_add_epi16(cr0, yws);
+ __m128i gwt = _mm_add_epi16(cb0, yws);
+ __m128i bws = _mm_add_epi16(yws, cb1);
+ __m128i gws = _mm_add_epi16(gwt, cr1);
+
+ // descale
+ __m128i rw = _mm_srai_epi16(rws, 4);
+ __m128i bw = _mm_srai_epi16(bws, 4);
+ __m128i gw = _mm_srai_epi16(gws, 4);
+
+ // back to byte, set up for transpose
+ __m128i brb = _mm_packus_epi16(rw, bw);
+ __m128i gxb = _mm_packus_epi16(gw, xw);
+
+ // transpose to interleave channels
+ __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+ __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+ __m128i o0 = _mm_unpacklo_epi16(t0, t1);
+ __m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+ // store
+ _mm_storeu_si128((__m128i *) (out + 0), o0);
+ _mm_storeu_si128((__m128i *) (out + 16), o1);
+ out += 32;
+ }
+ }
+#endif
+
+#ifdef STBI_NEON
+ // in this version, step=3 support would be easy to add. but is there demand?
+ if (step == 4) {
+ // this is a fairly straightforward implementation and not super-optimized.
+ uint8x8_t signflip = vdup_n_u8(0x80);
+ int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
+ int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
+ int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
+ int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
+
+ for (; i+7 < count; i += 8) {
+ // load
+ uint8x8_t y_bytes = vld1_u8(y + i);
+ uint8x8_t cr_bytes = vld1_u8(pcr + i);
+ uint8x8_t cb_bytes = vld1_u8(pcb + i);
+ int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+ int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+ // expand to s16
+ int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+ int16x8_t crw = vshll_n_s8(cr_biased, 7);
+ int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+ // color transform
+ int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+ int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+ int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+ int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+ int16x8_t rws = vaddq_s16(yws, cr0);
+ int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+ int16x8_t bws = vaddq_s16(yws, cb1);
+
+ // undo scaling, round, convert to byte
+ uint8x8x4_t o;
+ o.val[0] = vqrshrun_n_s16(rws, 4);
+ o.val[1] = vqrshrun_n_s16(gws, 4);
+ o.val[2] = vqrshrun_n_s16(bws, 4);
+ o.val[3] = vdup_n_u8(255);
+
+ // store, interleaving r/g/b/a
+ vst4_u8(out, o);
+ out += 8*4;
+ }
+ }
+#endif
+
+ for (; i < count; ++i) {
+ int y_fixed = (y[i] << 20) + (1<<19); // rounding
+ int r,g,b;
+ int cr = pcr[i] - 128;
+ int cb = pcb[i] - 128;
+ r = y_fixed + cr* stbi__float2fixed(1.40200f);
+ g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+ b = y_fixed + cb* stbi__float2fixed(1.77200f);
+ r >>= 20;
+ g >>= 20;
+ b >>= 20;
+ if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+ if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+ if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+ out[0] = (stbi_uc)r;
+ out[1] = (stbi_uc)g;
+ out[2] = (stbi_uc)b;
+ out[3] = 255;
+ out += step;
+ }
+}
+#endif
+
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg *j)
+{
+ j->idct_block_kernel = stbi__idct_block;
+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+ if (stbi__sse2_available()) {
+ j->idct_block_kernel = stbi__idct_simd;
+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+ }
+#endif
+
+#ifdef STBI_NEON
+ j->idct_block_kernel = stbi__idct_simd;
+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
+
+// clean up the temporary component buffers
+static void stbi__cleanup_jpeg(stbi__jpeg *j)
+{
+ stbi__free_jpeg_components(j, j->s->img_n, 0);
+}
+
+typedef struct
+{
+ resample_row_func resample;
+ stbi_uc *line0,*line1;
+ int hs,vs; // expansion factor in each axis
+ int w_lores; // horizontal pixels pre-expansion
+ int ystep; // how far through vertical expansion we are
+ int ypos; // which pre-expansion row we're on
+} stbi__resample;
+
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+ unsigned int t = x*y + 128;
+ return (stbi_uc) ((t + (t >>8)) >> 8);
+}
+
+static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
+{
+ int n, decode_n, is_rgb;
+ z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
+ // validate req_comp
+ if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+
+ // load a jpeg image from whichever source, but leave in YCbCr format
+ if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+
+ // determine actual number of components to generate
+ n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
+ is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
+ if (z->s->img_n == 3 && n < 3 && !is_rgb)
+ decode_n = 1;
+ else
+ decode_n = z->s->img_n;
+
+ // nothing to do if no components requested; check this now to avoid
+ // accessing uninitialized coutput[0] later
+ if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
+
+ // resample and color-convert
+ {
+ int k;
+ unsigned int i,j;
+ stbi_uc *output;
+ stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
+
+ stbi__resample res_comp[4];
+
+ for (k=0; k < decode_n; ++k) {
+ stbi__resample *r = &res_comp[k];
+
+ // allocate line buffer big enough for upsampling off the edges
+ // with upsample factor of 4
+ z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
+ if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+ r->hs = z->img_h_max / z->img_comp[k].h;
+ r->vs = z->img_v_max / z->img_comp[k].v;
+ r->ystep = r->vs >> 1;
+ r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
+ r->ypos = 0;
+ r->line0 = r->line1 = z->img_comp[k].data;
+
+ if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
+ else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
+ else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
+ else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
+ else r->resample = stbi__resample_row_generic;
+ }
+
+ // can't error after this so, this is safe
+ output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
+ if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+ // now go ahead and resample
+ for (j=0; j < z->s->img_y; ++j) {
+ stbi_uc *out = output + n * z->s->img_x * j;
+ for (k=0; k < decode_n; ++k) {
+ stbi__resample *r = &res_comp[k];
+ int y_bot = r->ystep >= (r->vs >> 1);
+ coutput[k] = r->resample(z->img_comp[k].linebuf,
+ y_bot ? r->line1 : r->line0,
+ y_bot ? r->line0 : r->line1,
+ r->w_lores, r->hs);
+ if (++r->ystep >= r->vs) {
+ r->ystep = 0;
+ r->line0 = r->line1;
+ if (++r->ypos < z->img_comp[k].y)
+ r->line1 += z->img_comp[k].w2;
+ }
+ }
+ if (n >= 3) {
+ stbi_uc *y = coutput[0];
+ if (z->s->img_n == 3) {
+ if (is_rgb) {
+ for (i=0; i < z->s->img_x; ++i) {
+ out[0] = y[i];
+ out[1] = coutput[1][i];
+ out[2] = coutput[2][i];
+ out[3] = 255;
+ out += n;
+ }
+ } else {
+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+ }
+ } else if (z->s->img_n == 4) {
+ if (z->app14_color_transform == 0) { // CMYK
+ for (i=0; i < z->s->img_x; ++i) {
+ stbi_uc m = coutput[3][i];
+ out[0] = stbi__blinn_8x8(coutput[0][i], m);
+ out[1] = stbi__blinn_8x8(coutput[1][i], m);
+ out[2] = stbi__blinn_8x8(coutput[2][i], m);
+ out[3] = 255;
+ out += n;
+ }
+ } else if (z->app14_color_transform == 2) { // YCCK
+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+ for (i=0; i < z->s->img_x; ++i) {
+ stbi_uc m = coutput[3][i];
+ out[0] = stbi__blinn_8x8(255 - out[0], m);
+ out[1] = stbi__blinn_8x8(255 - out[1], m);
+ out[2] = stbi__blinn_8x8(255 - out[2], m);
+ out += n;
+ }
+ } else { // YCbCr + alpha? Ignore the fourth channel for now
+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+ }
+ } else
+ for (i=0; i < z->s->img_x; ++i) {
+ out[0] = out[1] = out[2] = y[i];
+ out[3] = 255; // not used if n==3
+ out += n;
+ }
+ } else {
+ if (is_rgb) {
+ if (n == 1)
+ for (i=0; i < z->s->img_x; ++i)
+ *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+ else {
+ for (i=0; i < z->s->img_x; ++i, out += 2) {
+ out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+ out[1] = 255;
+ }
+ }
+ } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+ for (i=0; i < z->s->img_x; ++i) {
+ stbi_uc m = coutput[3][i];
+ stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+ stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+ stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+ out[0] = stbi__compute_y(r, g, b);
+ out[1] = 255;
+ out += n;
+ }
+ } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+ for (i=0; i < z->s->img_x; ++i) {
+ out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+ out[1] = 255;
+ out += n;
+ }
+ } else {
+ stbi_uc *y = coutput[0];
+ if (n == 1)
+ for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
+ else
+ for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
+ }
+ }
+ }
+ stbi__cleanup_jpeg(z);
+ *out_x = z->s->img_x;
+ *out_y = z->s->img_y;
+ if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
+ return output;
+ }
+}
+
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ unsigned char* result;
+ stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+ if (!j) return stbi__errpuc("outofmem", "Out of memory");
+ memset(j, 0, sizeof(stbi__jpeg));
+ STBI_NOTUSED(ri);
+ j->s = s;
+ stbi__setup_jpeg(j);
+ result = load_jpeg_image(j, x,y,comp,req_comp);
+ STBI_FREE(j);
+ return result;
+}
+
+static int stbi__jpeg_test(stbi__context *s)
+{
+ int r;
+ stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+ if (!j) return stbi__err("outofmem", "Out of memory");
+ memset(j, 0, sizeof(stbi__jpeg));
+ j->s = s;
+ stbi__setup_jpeg(j);
+ r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
+ stbi__rewind(s);
+ STBI_FREE(j);
+ return r;
+}
+
+static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
+{
+ if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
+ stbi__rewind( j->s );
+ return 0;
+ }
+ if (x) *x = j->s->img_x;
+ if (y) *y = j->s->img_y;
+ if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
+ return 1;
+}
+
+static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ int result;
+ stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+ if (!j) return stbi__err("outofmem", "Out of memory");
+ memset(j, 0, sizeof(stbi__jpeg));
+ j->s = s;
+ result = stbi__jpeg_info_raw(j, x, y, comp);
+ STBI_FREE(j);
+ return result;
+}
+#endif
+
+// public domain zlib decode v0.2 Sean Barrett 2006-11-18
+// simple implementation
+// - all input must be provided in an upfront buffer
+// - all output is written to a single output buffer (can malloc/realloc)
+// performance
+// - fast huffman
+
+#ifndef STBI_NO_ZLIB
+
+// fast-way is faster to check than jpeg huffman, but slow way is slower
+#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
+#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
+#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
+
+// zlib-style huffman encoding
+// (jpegs packs from left, zlib from right, so can't share code)
+typedef struct
+{
+ stbi__uint16 fast[1 << STBI__ZFAST_BITS];
+ stbi__uint16 firstcode[16];
+ int maxcode[17];
+ stbi__uint16 firstsymbol[16];
+ stbi_uc size[STBI__ZNSYMS];
+ stbi__uint16 value[STBI__ZNSYMS];
+} stbi__zhuffman;
+
+stbi_inline static int stbi__bitreverse16(int n)
+{
+ n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
+ n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
+ n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
+ n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
+ return n;
+}
+
+stbi_inline static int stbi__bit_reverse(int v, int bits)
+{
+ STBI_ASSERT(bits <= 16);
+ // to bit reverse n bits, reverse 16 and shift
+ // e.g. 11 bits, bit reverse and shift away 5
+ return stbi__bitreverse16(v) >> (16-bits);
+}
+
+static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
+{
+ int i,k=0;
+ int code, next_code[16], sizes[17];
+
+ // DEFLATE spec for generating codes
+ memset(sizes, 0, sizeof(sizes));
+ memset(z->fast, 0, sizeof(z->fast));
+ for (i=0; i < num; ++i)
+ ++sizes[sizelist[i]];
+ sizes[0] = 0;
+ for (i=1; i < 16; ++i)
+ if (sizes[i] > (1 << i))
+ return stbi__err("bad sizes", "Corrupt PNG");
+ code = 0;
+ for (i=1; i < 16; ++i) {
+ next_code[i] = code;
+ z->firstcode[i] = (stbi__uint16) code;
+ z->firstsymbol[i] = (stbi__uint16) k;
+ code = (code + sizes[i]);
+ if (sizes[i])
+ if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
+ z->maxcode[i] = code << (16-i); // preshift for inner loop
+ code <<= 1;
+ k += sizes[i];
+ }
+ z->maxcode[16] = 0x10000; // sentinel
+ for (i=0; i < num; ++i) {
+ int s = sizelist[i];
+ if (s) {
+ int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+ stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
+ z->size [c] = (stbi_uc ) s;
+ z->value[c] = (stbi__uint16) i;
+ if (s <= STBI__ZFAST_BITS) {
+ int j = stbi__bit_reverse(next_code[s],s);
+ while (j < (1 << STBI__ZFAST_BITS)) {
+ z->fast[j] = fastv;
+ j += (1 << s);
+ }
+ }
+ ++next_code[s];
+ }
+ }
+ return 1;
+}
+
+// zlib-from-memory implementation for PNG reading
+// because PNG allows splitting the zlib stream arbitrarily,
+// and it's annoying structurally to have PNG call ZLIB call PNG,
+// we require PNG read all the IDATs and combine them into a single
+// memory buffer
+
+typedef struct
+{
+ stbi_uc *zbuffer, *zbuffer_end;
+ int num_bits;
+ int hit_zeof_once;
+ stbi__uint32 code_buffer;
+
+ char *zout;
+ char *zout_start;
+ char *zout_end;
+ int z_expandable;
+
+ stbi__zhuffman z_length, z_distance;
+} stbi__zbuf;
+
+stbi_inline static int stbi__zeof(stbi__zbuf *z)
+{
+ return (z->zbuffer >= z->zbuffer_end);
+}
+
+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
+{
+ return stbi__zeof(z) ? 0 : *z->zbuffer++;
+}
+
+static void stbi__fill_bits(stbi__zbuf *z)
+{
+ do {
+ if (z->code_buffer >= (1U << z->num_bits)) {
+ z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
+ return;
+ }
+ z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
+ z->num_bits += 8;
+ } while (z->num_bits <= 24);
+}
+
+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
+{
+ unsigned int k;
+ if (z->num_bits < n) stbi__fill_bits(z);
+ k = z->code_buffer & ((1 << n) - 1);
+ z->code_buffer >>= n;
+ z->num_bits -= n;
+ return k;
+}
+
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
+{
+ int b,s,k;
+ // not resolved by fast table, so compute it the slow way
+ // use jpeg approach, which requires MSbits at top
+ k = stbi__bit_reverse(a->code_buffer, 16);
+ for (s=STBI__ZFAST_BITS+1; ; ++s)
+ if (k < z->maxcode[s])
+ break;
+ if (s >= 16) return -1; // invalid code!
+ // code size is s, so:
+ b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
+ if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
+ if (z->size[b] != s) return -1; // was originally an assert, but report failure instead.
+ a->code_buffer >>= s;
+ a->num_bits -= s;
+ return z->value[b];
+}
+
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
+{
+ int b,s;
+ if (a->num_bits < 16) {
+ if (stbi__zeof(a)) {
+ if (!a->hit_zeof_once) {
+ // This is the first time we hit eof, insert 16 extra padding btis
+ // to allow us to keep going; if we actually consume any of them
+ // though, that is invalid data. This is caught later.
+ a->hit_zeof_once = 1;
+ a->num_bits += 16; // add 16 implicit zero bits
+ } else {
+ // We already inserted our extra 16 padding bits and are again
+ // out, this stream is actually prematurely terminated.
+ return -1;
+ }
+ } else {
+ stbi__fill_bits(a);
+ }
+ }
+ b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+ if (b) {
+ s = b >> 9;
+ a->code_buffer >>= s;
+ a->num_bits -= s;
+ return b & 511;
+ }
+ return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
+{
+ char *q;
+ unsigned int cur, limit, old_limit;
+ z->zout = zout;
+ if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
+ cur = (unsigned int) (z->zout - z->zout_start);
+ limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
+ if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
+ while (cur + n > limit) {
+ if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
+ limit *= 2;
+ }
+ q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+ STBI_NOTUSED(old_limit);
+ if (q == NULL) return stbi__err("outofmem", "Out of memory");
+ z->zout_start = q;
+ z->zout = q + cur;
+ z->zout_end = q + limit;
+ return 1;
+}
+
+static const int stbi__zlength_base[31] = {
+ 3,4,5,6,7,8,9,10,11,13,
+ 15,17,19,23,27,31,35,43,51,59,
+ 67,83,99,115,131,163,195,227,258,0,0 };
+
+static const int stbi__zlength_extra[31]=
+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+
+static const int stbi__zdist_extra[32] =
+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+static int stbi__parse_huffman_block(stbi__zbuf *a)
+{
+ char *zout = a->zout;
+ for(;;) {
+ int z = stbi__zhuffman_decode(a, &a->z_length);
+ if (z < 256) {
+ if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
+ if (zout >= a->zout_end) {
+ if (!stbi__zexpand(a, zout, 1)) return 0;
+ zout = a->zout;
+ }
+ *zout++ = (char) z;
+ } else {
+ stbi_uc *p;
+ int len,dist;
+ if (z == 256) {
+ a->zout = zout;
+ if (a->hit_zeof_once && a->num_bits < 16) {
+ // The first time we hit zeof, we inserted 16 extra zero bits into our bit
+ // buffer so the decoder can just do its speculative decoding. But if we
+ // actually consumed any of those bits (which is the case when num_bits < 16),
+ // the stream actually read past the end so it is malformed.
+ return stbi__err("unexpected end","Corrupt PNG");
+ }
+ return 1;
+ }
+ if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
+ z -= 257;
+ len = stbi__zlength_base[z];
+ if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+ z = stbi__zhuffman_decode(a, &a->z_distance);
+ if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
+ dist = stbi__zdist_base[z];
+ if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+ if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
+ if (len > a->zout_end - zout) {
+ if (!stbi__zexpand(a, zout, len)) return 0;
+ zout = a->zout;
+ }
+ p = (stbi_uc *) (zout - dist);
+ if (dist == 1) { // run of one byte; common in images.
+ stbi_uc v = *p;
+ if (len) { do *zout++ = v; while (--len); }
+ } else {
+ if (len) { do *zout++ = *p++; while (--len); }
+ }
+ }
+ }
+}
+
+static int stbi__compute_huffman_codes(stbi__zbuf *a)
+{
+ static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+ stbi__zhuffman z_codelength;
+ stbi_uc lencodes[286+32+137];//padding for maximum single op
+ stbi_uc codelength_sizes[19];
+ int i,n;
+
+ int hlit = stbi__zreceive(a,5) + 257;
+ int hdist = stbi__zreceive(a,5) + 1;
+ int hclen = stbi__zreceive(a,4) + 4;
+ int ntot = hlit + hdist;
+
+ memset(codelength_sizes, 0, sizeof(codelength_sizes));
+ for (i=0; i < hclen; ++i) {
+ int s = stbi__zreceive(a,3);
+ codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
+ }
+ if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
+
+ n = 0;
+ while (n < ntot) {
+ int c = stbi__zhuffman_decode(a, &z_codelength);
+ if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
+ if (c < 16)
+ lencodes[n++] = (stbi_uc) c;
+ else {
+ stbi_uc fill = 0;
+ if (c == 16) {
+ c = stbi__zreceive(a,2)+3;
+ if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+ fill = lencodes[n-1];
+ } else if (c == 17) {
+ c = stbi__zreceive(a,3)+3;
+ } else if (c == 18) {
+ c = stbi__zreceive(a,7)+11;
+ } else {
+ return stbi__err("bad codelengths", "Corrupt PNG");
+ }
+ if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+ memset(lencodes+n, fill, c);
+ n += c;
+ }
+ }
+ if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
+ if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
+ if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
+ return 1;
+}
+
+static int stbi__parse_uncompressed_block(stbi__zbuf *a)
+{
+ stbi_uc header[4];
+ int len,nlen,k;
+ if (a->num_bits & 7)
+ stbi__zreceive(a, a->num_bits & 7); // discard
+ // drain the bit-packed data into header
+ k = 0;
+ while (a->num_bits > 0) {
+ header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
+ a->code_buffer >>= 8;
+ a->num_bits -= 8;
+ }
+ if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
+ // now fill header the normal way
+ while (k < 4)
+ header[k++] = stbi__zget8(a);
+ len = header[1] * 256 + header[0];
+ nlen = header[3] * 256 + header[2];
+ if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
+ if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
+ if (a->zout + len > a->zout_end)
+ if (!stbi__zexpand(a, a->zout, len)) return 0;
+ memcpy(a->zout, a->zbuffer, len);
+ a->zbuffer += len;
+ a->zout += len;
+ return 1;
+}
+
+static int stbi__parse_zlib_header(stbi__zbuf *a)
+{
+ int cmf = stbi__zget8(a);
+ int cm = cmf & 15;
+ /* int cinfo = cmf >> 4; */
+ int flg = stbi__zget8(a);
+ if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
+ if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
+ if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
+ if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
+ // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+ return 1;
+}
+
+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
+{
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
+{
+ int i; // use <= to match clearly with spec
+ for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
+ for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
+ for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
+ for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
+
+ for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
+}
+*/
+
+static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
+{
+ int final, type;
+ if (parse_header)
+ if (!stbi__parse_zlib_header(a)) return 0;
+ a->num_bits = 0;
+ a->code_buffer = 0;
+ a->hit_zeof_once = 0;
+ do {
+ final = stbi__zreceive(a,1);
+ type = stbi__zreceive(a,2);
+ if (type == 0) {
+ if (!stbi__parse_uncompressed_block(a)) return 0;
+ } else if (type == 3) {
+ return 0;
+ } else {
+ if (type == 1) {
+ // use fixed code lengths
+ if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0;
+ if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
+ } else {
+ if (!stbi__compute_huffman_codes(a)) return 0;
+ }
+ if (!stbi__parse_huffman_block(a)) return 0;
+ }
+ } while (!final);
+ return 1;
+}
+
+static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
+{
+ a->zout_start = obuf;
+ a->zout = obuf;
+ a->zout_end = obuf + olen;
+ a->z_expandable = exp;
+
+ return stbi__parse_zlib(a, parse_header);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
+{
+ stbi__zbuf a;
+ char *p = (char *) stbi__malloc(initial_size);
+ if (p == NULL) return NULL;
+ a.zbuffer = (stbi_uc *) buffer;
+ a.zbuffer_end = (stbi_uc *) buffer + len;
+ if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+ if (outlen) *outlen = (int) (a.zout - a.zout_start);
+ return a.zout_start;
+ } else {
+ STBI_FREE(a.zout_start);
+ return NULL;
+ }
+}
+
+STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
+{
+ return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
+{
+ stbi__zbuf a;
+ char *p = (char *) stbi__malloc(initial_size);
+ if (p == NULL) return NULL;
+ a.zbuffer = (stbi_uc *) buffer;
+ a.zbuffer_end = (stbi_uc *) buffer + len;
+ if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+ if (outlen) *outlen = (int) (a.zout - a.zout_start);
+ return a.zout_start;
+ } else {
+ STBI_FREE(a.zout_start);
+ return NULL;
+ }
+}
+
+STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
+{
+ stbi__zbuf a;
+ a.zbuffer = (stbi_uc *) ibuffer;
+ a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+ if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+ return (int) (a.zout - a.zout_start);
+ else
+ return -1;
+}
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
+{
+ stbi__zbuf a;
+ char *p = (char *) stbi__malloc(16384);
+ if (p == NULL) return NULL;
+ a.zbuffer = (stbi_uc *) buffer;
+ a.zbuffer_end = (stbi_uc *) buffer+len;
+ if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+ if (outlen) *outlen = (int) (a.zout - a.zout_start);
+ return a.zout_start;
+ } else {
+ STBI_FREE(a.zout_start);
+ return NULL;
+ }
+}
+
+STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
+{
+ stbi__zbuf a;
+ a.zbuffer = (stbi_uc *) ibuffer;
+ a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+ if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+ return (int) (a.zout - a.zout_start);
+ else
+ return -1;
+}
+#endif
+
+// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
+// simple implementation
+// - only 8-bit samples
+// - no CRC checking
+// - allocates lots of intermediate memory
+// - avoids problem of streaming data between subsystems
+// - avoids explicit window management
+// performance
+// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
+#ifndef STBI_NO_PNG
+typedef struct
+{
+ stbi__uint32 length;
+ stbi__uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
+{
+ stbi__pngchunk c;
+ c.length = stbi__get32be(s);
+ c.type = stbi__get32be(s);
+ return c;
+}
+
+static int stbi__check_png_header(stbi__context *s)
+{
+ static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+ int i;
+ for (i=0; i < 8; ++i)
+ if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
+ return 1;
+}
+
+typedef struct
+{
+ stbi__context *s;
+ stbi_uc *idata, *expanded, *out;
+ int depth;
+} stbi__png;
+
+
+enum {
+ STBI__F_none=0,
+ STBI__F_sub=1,
+ STBI__F_up=2,
+ STBI__F_avg=3,
+ STBI__F_paeth=4,
+ // synthetic filter used for first scanline to avoid needing a dummy row of 0s
+ STBI__F_avg_first
+};
+
+static stbi_uc first_row_filter[5] =
+{
+ STBI__F_none,
+ STBI__F_sub,
+ STBI__F_none,
+ STBI__F_avg_first,
+ STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
+};
+
+static int stbi__paeth(int a, int b, int c)
+{
+ // This formulation looks very different from the reference in the PNG spec, but is
+ // actually equivalent and has favorable data dependencies and admits straightforward
+ // generation of branch-free code, which helps performance significantly.
+ int thresh = c*3 - (a + b);
+ int lo = a < b ? a : b;
+ int hi = a < b ? b : a;
+ int t0 = (hi <= thresh) ? lo : c;
+ int t1 = (thresh <= lo) ? hi : t0;
+ return t1;
+}
+
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// adds an extra all-255 alpha channel
+// dest == src is legal
+// img_n must be 1 or 3
+static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
+{
+ int i;
+ // must process data backwards since we allow dest==src
+ if (img_n == 1) {
+ for (i=x-1; i >= 0; --i) {
+ dest[i*2+1] = 255;
+ dest[i*2+0] = src[i];
+ }
+ } else {
+ STBI_ASSERT(img_n == 3);
+ for (i=x-1; i >= 0; --i) {
+ dest[i*4+3] = 255;
+ dest[i*4+2] = src[i*3+2];
+ dest[i*4+1] = src[i*3+1];
+ dest[i*4+0] = src[i*3+0];
+ }
+ }
+}
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+ int bytes = (depth == 16 ? 2 : 1);
+ stbi__context *s = a->s;
+ stbi__uint32 i,j,stride = x*out_n*bytes;
+ stbi__uint32 img_len, img_width_bytes;
+ stbi_uc *filter_buf;
+ int all_ok = 1;
+ int k;
+ int img_n = s->img_n; // copy it into a local for later
+
+ int output_bytes = out_n*bytes;
+ int filter_bytes = img_n*bytes;
+ int width = x;
+
+ STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
+ a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
+ if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+ // note: error exits here don't need to clean up a->out individually,
+ // stbi__do_png always does on error.
+ if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
+ img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+ if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
+ img_len = (img_width_bytes + 1) * y;
+
+ // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+ // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+ // so just check for raw_len < img_len always.
+ if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
+
+ // Allocate two scan lines worth of filter workspace buffer.
+ filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
+ if (!filter_buf) return stbi__err("outofmem", "Out of memory");
+
+ // Filtering for low-bit-depth images
+ if (depth < 8) {
+ filter_bytes = 1;
+ width = img_width_bytes;
+ }
+
+ for (j=0; j < y; ++j) {
+ // cur/prior filter buffers alternate
+ stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
+ stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
+ stbi_uc *dest = a->out + stride*j;
+ int nk = width * filter_bytes;
+ int filter = *raw++;
+
+ // check filter type
+ if (filter > 4) {
+ all_ok = stbi__err("invalid filter","Corrupt PNG");
+ break;
+ }
+
+ // if first row, use special filter that doesn't sample previous row
+ if (j == 0) filter = first_row_filter[filter];
+
+ // perform actual filtering
+ switch (filter) {
+ case STBI__F_none:
+ memcpy(cur, raw, nk);
+ break;
+ case STBI__F_sub:
+ memcpy(cur, raw, filter_bytes);
+ for (k = filter_bytes; k < nk; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
+ break;
+ case STBI__F_up:
+ for (k = 0; k < nk; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
+ break;
+ case STBI__F_avg:
+ for (k = 0; k < filter_bytes; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
+ for (k = filter_bytes; k < nk; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
+ break;
+ case STBI__F_paeth:
+ for (k = 0; k < filter_bytes; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
+ for (k = filter_bytes; k < nk; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
+ break;
+ case STBI__F_avg_first:
+ memcpy(cur, raw, filter_bytes);
+ for (k = filter_bytes; k < nk; ++k)
+ cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
+ break;
+ }
+
+ raw += nk;
+
+ // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
+ if (depth < 8) {
+ stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+ stbi_uc *in = cur;
+ stbi_uc *out = dest;
+ stbi_uc inb = 0;
+ stbi__uint32 nsmp = x*img_n;
+
+ // expand bits to bytes first
+ if (depth == 4) {
+ for (i=0; i < nsmp; ++i) {
+ if ((i & 1) == 0) inb = *in++;
+ *out++ = scale * (inb >> 4);
+ inb <<= 4;
+ }
+ } else if (depth == 2) {
+ for (i=0; i < nsmp; ++i) {
+ if ((i & 3) == 0) inb = *in++;
+ *out++ = scale * (inb >> 6);
+ inb <<= 2;
+ }
+ } else {
+ STBI_ASSERT(depth == 1);
+ for (i=0; i < nsmp; ++i) {
+ if ((i & 7) == 0) inb = *in++;
+ *out++ = scale * (inb >> 7);
+ inb <<= 1;
+ }
+ }
+
+ // insert alpha=255 values if desired
+ if (img_n != out_n)
+ stbi__create_png_alpha_expand8(dest, dest, x, img_n);
+ } else if (depth == 8) {
+ if (img_n == out_n)
+ memcpy(dest, cur, x*img_n);
+ else
+ stbi__create_png_alpha_expand8(dest, cur, x, img_n);
+ } else if (depth == 16) {
+ // convert the image data from big-endian to platform-native
+ stbi__uint16 *dest16 = (stbi__uint16*)dest;
+ stbi__uint32 nsmp = x*img_n;
+
+ if (img_n == out_n) {
+ for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
+ *dest16 = (cur[0] << 8) | cur[1];
+ } else {
+ STBI_ASSERT(img_n+1 == out_n);
+ if (img_n == 1) {
+ for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
+ dest16[0] = (cur[0] << 8) | cur[1];
+ dest16[1] = 0xffff;
+ }
+ } else {
+ STBI_ASSERT(img_n == 3);
+ for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
+ dest16[0] = (cur[0] << 8) | cur[1];
+ dest16[1] = (cur[2] << 8) | cur[3];
+ dest16[2] = (cur[4] << 8) | cur[5];
+ dest16[3] = 0xffff;
+ }
+ }
+ }
+ }
+ }
+
+ STBI_FREE(filter_buf);
+ if (!all_ok) return 0;
+
+ return 1;
+}
+
+static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
+{
+ int bytes = (depth == 16 ? 2 : 1);
+ int out_bytes = out_n * bytes;
+ stbi_uc *final;
+ int p;
+ if (!interlaced)
+ return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
+ // de-interlacing
+ final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+ if (!final) return stbi__err("outofmem", "Out of memory");
+ for (p=0; p < 7; ++p) {
+ int xorig[] = { 0,4,0,2,0,1,0 };
+ int yorig[] = { 0,0,4,0,2,0,1 };
+ int xspc[] = { 8,8,4,4,2,2,1 };
+ int yspc[] = { 8,8,8,4,4,2,2 };
+ int i,j,x,y;
+ // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+ x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
+ y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
+ if (x && y) {
+ stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+ if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+ STBI_FREE(final);
+ return 0;
+ }
+ for (j=0; j < y; ++j) {
+ for (i=0; i < x; ++i) {
+ int out_y = j*yspc[p]+yorig[p];
+ int out_x = i*xspc[p]+xorig[p];
+ memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
+ a->out + (j*x+i)*out_bytes, out_bytes);
+ }
+ }
+ STBI_FREE(a->out);
+ image_data += img_len;
+ image_data_len -= img_len;
+ }
+ }
+ a->out = final;
+
+ return 1;
+}
+
+static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
+{
+ stbi__context *s = z->s;
+ stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+ stbi_uc *p = z->out;
+
+ // compute color-based transparency, assuming we've
+ // already got 255 as the alpha value in the output
+ STBI_ASSERT(out_n == 2 || out_n == 4);
+
+ if (out_n == 2) {
+ for (i=0; i < pixel_count; ++i) {
+ p[1] = (p[0] == tc[0] ? 0 : 255);
+ p += 2;
+ }
+ } else {
+ for (i=0; i < pixel_count; ++i) {
+ if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+ p[3] = 0;
+ p += 4;
+ }
+ }
+ return 1;
+}
+
+static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
+{
+ stbi__context *s = z->s;
+ stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+ stbi__uint16 *p = (stbi__uint16*) z->out;
+
+ // compute color-based transparency, assuming we've
+ // already got 65535 as the alpha value in the output
+ STBI_ASSERT(out_n == 2 || out_n == 4);
+
+ if (out_n == 2) {
+ for (i = 0; i < pixel_count; ++i) {
+ p[1] = (p[0] == tc[0] ? 0 : 65535);
+ p += 2;
+ }
+ } else {
+ for (i = 0; i < pixel_count; ++i) {
+ if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+ p[3] = 0;
+ p += 4;
+ }
+ }
+ return 1;
+}
+
+static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
+{
+ stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+ stbi_uc *p, *temp_out, *orig = a->out;
+
+ p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
+ if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+ // between here and free(out) below, exitting would leak
+ temp_out = p;
+
+ if (pal_img_n == 3) {
+ for (i=0; i < pixel_count; ++i) {
+ int n = orig[i]*4;
+ p[0] = palette[n ];
+ p[1] = palette[n+1];
+ p[2] = palette[n+2];
+ p += 3;
+ }
+ } else {
+ for (i=0; i < pixel_count; ++i) {
+ int n = orig[i]*4;
+ p[0] = palette[n ];
+ p[1] = palette[n+1];
+ p[2] = palette[n+2];
+ p[3] = palette[n+3];
+ p += 4;
+ }
+ }
+ STBI_FREE(a->out);
+ a->out = temp_out;
+
+ STBI_NOTUSED(len);
+
+ return 1;
+}
+
+static int stbi__unpremultiply_on_load_global = 0;
+static int stbi__de_iphone_flag_global = 0;
+
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+ stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+ stbi__de_iphone_flag_global = flag_true_if_should_convert;
+}
+
+#ifndef STBI_THREAD_LOCAL
+#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
+#define stbi__de_iphone_flag stbi__de_iphone_flag_global
+#else
+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
+
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
+{
+ stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
+ stbi__unpremultiply_on_load_set = 1;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
+{
+ stbi__de_iphone_flag_local = flag_true_if_should_convert;
+ stbi__de_iphone_flag_set = 1;
+}
+
+#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \
+ ? stbi__unpremultiply_on_load_local \
+ : stbi__unpremultiply_on_load_global)
+#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \
+ ? stbi__de_iphone_flag_local \
+ : stbi__de_iphone_flag_global)
+#endif // STBI_THREAD_LOCAL
+
+static void stbi__de_iphone(stbi__png *z)
+{
+ stbi__context *s = z->s;
+ stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+ stbi_uc *p = z->out;
+
+ if (s->img_out_n == 3) { // convert bgr to rgb
+ for (i=0; i < pixel_count; ++i) {
+ stbi_uc t = p[0];
+ p[0] = p[2];
+ p[2] = t;
+ p += 3;
+ }
+ } else {
+ STBI_ASSERT(s->img_out_n == 4);
+ if (stbi__unpremultiply_on_load) {
+ // convert bgr to rgb and unpremultiply
+ for (i=0; i < pixel_count; ++i) {
+ stbi_uc a = p[3];
+ stbi_uc t = p[0];
+ if (a) {
+ stbi_uc half = a / 2;
+ p[0] = (p[2] * 255 + half) / a;
+ p[1] = (p[1] * 255 + half) / a;
+ p[2] = ( t * 255 + half) / a;
+ } else {
+ p[0] = p[2];
+ p[2] = t;
+ }
+ p += 4;
+ }
+ } else {
+ // convert bgr to rgb
+ for (i=0; i < pixel_count; ++i) {
+ stbi_uc t = p[0];
+ p[0] = p[2];
+ p[2] = t;
+ p += 4;
+ }
+ }
+ }
+}
+
+#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
+
+static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
+{
+ stbi_uc palette[1024], pal_img_n=0;
+ stbi_uc has_trans=0, tc[3]={0};
+ stbi__uint16 tc16[3];
+ stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
+ int first=1,k,interlace=0, color=0, is_iphone=0;
+ stbi__context *s = z->s;
+
+ z->expanded = NULL;
+ z->idata = NULL;
+ z->out = NULL;
+
+ if (!stbi__check_png_header(s)) return 0;
+
+ if (scan == STBI__SCAN_type) return 1;
+
+ for (;;) {
+ stbi__pngchunk c = stbi__get_chunk_header(s);
+ switch (c.type) {
+ case STBI__PNG_TYPE('C','g','B','I'):
+ is_iphone = 1;
+ stbi__skip(s, c.length);
+ break;
+ case STBI__PNG_TYPE('I','H','D','R'): {
+ int comp,filter;
+ if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
+ first = 0;
+ if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
+ s->img_x = stbi__get32be(s);
+ s->img_y = stbi__get32be(s);
+ if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+ if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+ z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
+ color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
+ if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
+ if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
+ comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
+ filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
+ interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
+ if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
+ if (!pal_img_n) {
+ s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+ if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+ } else {
+ // if paletted, then pal_n is our final components, and
+ // img_n is # components to decompress/filter.
+ s->img_n = 1;
+ if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
+ }
+ // even with SCAN_header, have to scan to see if we have a tRNS
+ break;
+ }
+
+ case STBI__PNG_TYPE('P','L','T','E'): {
+ if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+ if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
+ pal_len = c.length / 3;
+ if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
+ for (i=0; i < pal_len; ++i) {
+ palette[i*4+0] = stbi__get8(s);
+ palette[i*4+1] = stbi__get8(s);
+ palette[i*4+2] = stbi__get8(s);
+ palette[i*4+3] = 255;
+ }
+ break;
+ }
+
+ case STBI__PNG_TYPE('t','R','N','S'): {
+ if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+ if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
+ if (pal_img_n) {
+ if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
+ if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
+ if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
+ pal_img_n = 4;
+ for (i=0; i < c.length; ++i)
+ palette[i*4+3] = stbi__get8(s);
+ } else {
+ if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
+ if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
+ has_trans = 1;
+ // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
+ if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
+ if (z->depth == 16) {
+ for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
+ tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+ } else {
+ for (k = 0; k < s->img_n && k < 3; ++k)
+ tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+ }
+ }
+ break;
+ }
+
+ case STBI__PNG_TYPE('I','D','A','T'): {
+ if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+ if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
+ if (scan == STBI__SCAN_header) {
+ // header scan definitely stops at first IDAT
+ if (pal_img_n)
+ s->img_n = pal_img_n;
+ return 1;
+ }
+ if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
+ if ((int)(ioff + c.length) < (int)ioff) return 0;
+ if (ioff + c.length > idata_limit) {
+ stbi__uint32 idata_limit_old = idata_limit;
+ stbi_uc *p;
+ if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
+ while (ioff + c.length > idata_limit)
+ idata_limit *= 2;
+ STBI_NOTUSED(idata_limit_old);
+ p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+ z->idata = p;
+ }
+ if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
+ ioff += c.length;
+ break;
+ }
+
+ case STBI__PNG_TYPE('I','E','N','D'): {
+ stbi__uint32 raw_len, bpl;
+ if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+ if (scan != STBI__SCAN_load) return 1;
+ if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
+ // initial guess for decoded data size to avoid unnecessary reallocs
+ bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+ raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+ z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
+ if (z->expanded == NULL) return 0; // zlib should set error
+ STBI_FREE(z->idata); z->idata = NULL;
+ if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
+ s->img_out_n = s->img_n+1;
+ else
+ s->img_out_n = s->img_n;
+ if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
+ if (has_trans) {
+ if (z->depth == 16) {
+ if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
+ } else {
+ if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
+ }
+ }
+ if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+ stbi__de_iphone(z);
+ if (pal_img_n) {
+ // pal_img_n == 3 or 4
+ s->img_n = pal_img_n; // record the actual colors we had
+ s->img_out_n = pal_img_n;
+ if (req_comp >= 3) s->img_out_n = req_comp;
+ if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+ return 0;
+ } else if (has_trans) {
+ // non-paletted image with tRNS -> source image has (constant) alpha
+ ++s->img_n;
+ }
+ STBI_FREE(z->expanded); z->expanded = NULL;
+ // end of PNG chunk, read and skip CRC
+ stbi__get32be(s);
+ return 1;
+ }
+
+ default:
+ // if critical, fail
+ if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+ if ((c.type & (1 << 29)) == 0) {
+ #ifndef STBI_NO_FAILURE_STRINGS
+ // not threadsafe
+ static char invalid_chunk[] = "XXXX PNG chunk not known";
+ invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+ invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+ invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
+ invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
+ #endif
+ return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+ }
+ stbi__skip(s, c.length);
+ break;
+ }
+ // end of PNG chunk, read and skip CRC
+ stbi__get32be(s);
+ }
+}
+
+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
+{
+ void *result=NULL;
+ if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+ if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
+ if (p->depth <= 8)
+ ri->bits_per_channel = 8;
+ else if (p->depth == 16)
+ ri->bits_per_channel = 16;
+ else
+ return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
+ result = p->out;
+ p->out = NULL;
+ if (req_comp && req_comp != p->s->img_out_n) {
+ if (ri->bits_per_channel == 8)
+ result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+ else
+ result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+ p->s->img_out_n = req_comp;
+ if (result == NULL) return result;
+ }
+ *x = p->s->img_x;
+ *y = p->s->img_y;
+ if (n) *n = p->s->img_n;
+ }
+ STBI_FREE(p->out); p->out = NULL;
+ STBI_FREE(p->expanded); p->expanded = NULL;
+ STBI_FREE(p->idata); p->idata = NULL;
+
+ return result;
+}
+
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ stbi__png p;
+ p.s = s;
+ return stbi__do_png(&p, x,y,comp,req_comp, ri);
+}
+
+static int stbi__png_test(stbi__context *s)
+{
+ int r;
+ r = stbi__check_png_header(s);
+ stbi__rewind(s);
+ return r;
+}
+
+static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
+{
+ if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
+ stbi__rewind( p->s );
+ return 0;
+ }
+ if (x) *x = p->s->img_x;
+ if (y) *y = p->s->img_y;
+ if (comp) *comp = p->s->img_n;
+ return 1;
+}
+
+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ stbi__png p;
+ p.s = s;
+ return stbi__png_info_raw(&p, x, y, comp);
+}
+
+static int stbi__png_is16(stbi__context *s)
+{
+ stbi__png p;
+ p.s = s;
+ if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+ return 0;
+ if (p.depth != 16) {
+ stbi__rewind(p.s);
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test_raw(stbi__context *s)
+{
+ int r;
+ int sz;
+ if (stbi__get8(s) != 'B') return 0;
+ if (stbi__get8(s) != 'M') return 0;
+ stbi__get32le(s); // discard filesize
+ stbi__get16le(s); // discard reserved
+ stbi__get16le(s); // discard reserved
+ stbi__get32le(s); // discard data offset
+ sz = stbi__get32le(s);
+ r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+ return r;
+}
+
+static int stbi__bmp_test(stbi__context *s)
+{
+ int r = stbi__bmp_test_raw(s);
+ stbi__rewind(s);
+ return r;
+}
+
+
+// returns 0..31 for the highest set bit
+static int stbi__high_bit(unsigned int z)
+{
+ int n=0;
+ if (z == 0) return -1;
+ if (z >= 0x10000) { n += 16; z >>= 16; }
+ if (z >= 0x00100) { n += 8; z >>= 8; }
+ if (z >= 0x00010) { n += 4; z >>= 4; }
+ if (z >= 0x00004) { n += 2; z >>= 2; }
+ if (z >= 0x00002) { n += 1;/* >>= 1;*/ }
+ return n;
+}
+
+static int stbi__bitcount(unsigned int a)
+{
+ a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
+ a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
+ a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+ a = (a + (a >> 8)); // max 16 per 8 bits
+ a = (a + (a >> 16)); // max 32 per 8 bits
+ return a & 0xff;
+}
+
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(unsigned int v, int shift, int bits)
+{
+ static unsigned int mul_table[9] = {
+ 0,
+ 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+ 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+ };
+ static unsigned int shift_table[9] = {
+ 0, 0,0,1,0,2,4,6,0,
+ };
+ if (shift < 0)
+ v <<= -shift;
+ else
+ v >>= shift;
+ STBI_ASSERT(v < 256);
+ v >>= (8-bits);
+ STBI_ASSERT(bits >= 0 && bits <= 8);
+ return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
+}
+
+typedef struct
+{
+ int bpp, offset, hsz;
+ unsigned int mr,mg,mb,ma, all_a;
+ int extra_read;
+} stbi__bmp_data;
+
+static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
+{
+ // BI_BITFIELDS specifies masks explicitly, don't override
+ if (compress == 3)
+ return 1;
+
+ if (compress == 0) {
+ if (info->bpp == 16) {
+ info->mr = 31u << 10;
+ info->mg = 31u << 5;
+ info->mb = 31u << 0;
+ } else if (info->bpp == 32) {
+ info->mr = 0xffu << 16;
+ info->mg = 0xffu << 8;
+ info->mb = 0xffu << 0;
+ info->ma = 0xffu << 24;
+ info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+ } else {
+ // otherwise, use defaults, which is all-0
+ info->mr = info->mg = info->mb = info->ma = 0;
+ }
+ return 1;
+ }
+ return 0; // error
+}
+
+static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
+{
+ int hsz;
+ if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
+ stbi__get32le(s); // discard filesize
+ stbi__get16le(s); // discard reserved
+ stbi__get16le(s); // discard reserved
+ info->offset = stbi__get32le(s);
+ info->hsz = hsz = stbi__get32le(s);
+ info->mr = info->mg = info->mb = info->ma = 0;
+ info->extra_read = 14;
+
+ if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
+
+ if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+ if (hsz == 12) {
+ s->img_x = stbi__get16le(s);
+ s->img_y = stbi__get16le(s);
+ } else {
+ s->img_x = stbi__get32le(s);
+ s->img_y = stbi__get32le(s);
+ }
+ if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
+ info->bpp = stbi__get16le(s);
+ if (hsz != 12) {
+ int compress = stbi__get32le(s);
+ if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+ if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
+ if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
+ stbi__get32le(s); // discard sizeof
+ stbi__get32le(s); // discard hres
+ stbi__get32le(s); // discard vres
+ stbi__get32le(s); // discard colorsused
+ stbi__get32le(s); // discard max important
+ if (hsz == 40 || hsz == 56) {
+ if (hsz == 56) {
+ stbi__get32le(s);
+ stbi__get32le(s);
+ stbi__get32le(s);
+ stbi__get32le(s);
+ }
+ if (info->bpp == 16 || info->bpp == 32) {
+ if (compress == 0) {
+ stbi__bmp_set_mask_defaults(info, compress);
+ } else if (compress == 3) {
+ info->mr = stbi__get32le(s);
+ info->mg = stbi__get32le(s);
+ info->mb = stbi__get32le(s);
+ info->extra_read += 12;
+ // not documented, but generated by photoshop and handled by mspaint
+ if (info->mr == info->mg && info->mg == info->mb) {
+ // ?!?!?
+ return stbi__errpuc("bad BMP", "bad BMP");
+ }
+ } else
+ return stbi__errpuc("bad BMP", "bad BMP");
+ }
+ } else {
+ // V4/V5 header
+ int i;
+ if (hsz != 108 && hsz != 124)
+ return stbi__errpuc("bad BMP", "bad BMP");
+ info->mr = stbi__get32le(s);
+ info->mg = stbi__get32le(s);
+ info->mb = stbi__get32le(s);
+ info->ma = stbi__get32le(s);
+ if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
+ stbi__bmp_set_mask_defaults(info, compress);
+ stbi__get32le(s); // discard color space
+ for (i=0; i < 12; ++i)
+ stbi__get32le(s); // discard color space parameters
+ if (hsz == 124) {
+ stbi__get32le(s); // discard rendering intent
+ stbi__get32le(s); // discard offset of profile data
+ stbi__get32le(s); // discard size of profile data
+ stbi__get32le(s); // discard reserved
+ }
+ }
+ }
+ return (void *) 1;
+}
+
+
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ stbi_uc *out;
+ unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
+ stbi_uc pal[256][4];
+ int psize=0,i,j,width;
+ int flip_vertically, pad, target;
+ stbi__bmp_data info;
+ STBI_NOTUSED(ri);
+
+ info.all_a = 255;
+ if (stbi__bmp_parse_header(s, &info) == NULL)
+ return NULL; // error code already set
+
+ flip_vertically = ((int) s->img_y) > 0;
+ s->img_y = abs((int) s->img_y);
+
+ if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+ if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+ mr = info.mr;
+ mg = info.mg;
+ mb = info.mb;
+ ma = info.ma;
+ all_a = info.all_a;
+
+ if (info.hsz == 12) {
+ if (info.bpp < 24)
+ psize = (info.offset - info.extra_read - 24) / 3;
+ } else {
+ if (info.bpp < 16)
+ psize = (info.offset - info.extra_read - info.hsz) >> 2;
+ }
+ if (psize == 0) {
+ // accept some number of extra bytes after the header, but if the offset points either to before
+ // the header ends or implies a large amount of extra data, reject the file as malformed
+ int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
+ int header_limit = 1024; // max we actually read is below 256 bytes currently.
+ int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
+ if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
+ return stbi__errpuc("bad header", "Corrupt BMP");
+ }
+ // we established that bytes_read_so_far is positive and sensible.
+ // the first half of this test rejects offsets that are either too small positives, or
+ // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
+ // ensures the number computed in the second half of the test can't overflow.
+ if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
+ return stbi__errpuc("bad offset", "Corrupt BMP");
+ } else {
+ stbi__skip(s, info.offset - bytes_read_so_far);
+ }
+ }
+
+ if (info.bpp == 24 && ma == 0xff000000)
+ s->img_n = 3;
+ else
+ s->img_n = ma ? 4 : 3;
+ if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+ target = req_comp;
+ else
+ target = s->img_n; // if they want monochrome, we'll post-convert
+
+ // sanity-check size
+ if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+ return stbi__errpuc("too large", "Corrupt BMP");
+
+ out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
+ if (!out) return stbi__errpuc("outofmem", "Out of memory");
+ if (info.bpp < 16) {
+ int z=0;
+ if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+ for (i=0; i < psize; ++i) {
+ pal[i][2] = stbi__get8(s);
+ pal[i][1] = stbi__get8(s);
+ pal[i][0] = stbi__get8(s);
+ if (info.hsz != 12) stbi__get8(s);
+ pal[i][3] = 255;
+ }
+ stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+ if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+ else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+ else if (info.bpp == 8) width = s->img_x;
+ else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+ pad = (-width)&3;
+ if (info.bpp == 1) {
+ for (j=0; j < (int) s->img_y; ++j) {
+ int bit_offset = 7, v = stbi__get8(s);
+ for (i=0; i < (int) s->img_x; ++i) {
+ int color = (v>>bit_offset)&0x1;
+ out[z++] = pal[color][0];
+ out[z++] = pal[color][1];
+ out[z++] = pal[color][2];
+ if (target == 4) out[z++] = 255;
+ if (i+1 == (int) s->img_x) break;
+ if((--bit_offset) < 0) {
+ bit_offset = 7;
+ v = stbi__get8(s);
+ }
+ }
+ stbi__skip(s, pad);
+ }
+ } else {
+ for (j=0; j < (int) s->img_y; ++j) {
+ for (i=0; i < (int) s->img_x; i += 2) {
+ int v=stbi__get8(s),v2=0;
+ if (info.bpp == 4) {
+ v2 = v & 15;
+ v >>= 4;
+ }
+ out[z++] = pal[v][0];
+ out[z++] = pal[v][1];
+ out[z++] = pal[v][2];
+ if (target == 4) out[z++] = 255;
+ if (i+1 == (int) s->img_x) break;
+ v = (info.bpp == 8) ? stbi__get8(s) : v2;
+ out[z++] = pal[v][0];
+ out[z++] = pal[v][1];
+ out[z++] = pal[v][2];
+ if (target == 4) out[z++] = 255;
+ }
+ stbi__skip(s, pad);
+ }
+ }
+ } else {
+ int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
+ int z = 0;
+ int easy=0;
+ stbi__skip(s, info.offset - info.extra_read - info.hsz);
+ if (info.bpp == 24) width = 3 * s->img_x;
+ else if (info.bpp == 16) width = 2*s->img_x;
+ else /* bpp = 32 and pad = 0 */ width=0;
+ pad = (-width) & 3;
+ if (info.bpp == 24) {
+ easy = 1;
+ } else if (info.bpp == 32) {
+ if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+ easy = 2;
+ }
+ if (!easy) {
+ if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+ // right shift amt to put high bit in position #7
+ rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
+ gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
+ bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
+ ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
+ if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+ }
+ for (j=0; j < (int) s->img_y; ++j) {
+ if (easy) {
+ for (i=0; i < (int) s->img_x; ++i) {
+ unsigned char a;
+ out[z+2] = stbi__get8(s);
+ out[z+1] = stbi__get8(s);
+ out[z+0] = stbi__get8(s);
+ z += 3;
+ a = (easy == 2 ? stbi__get8(s) : 255);
+ all_a |= a;
+ if (target == 4) out[z++] = a;
+ }
+ } else {
+ int bpp = info.bpp;
+ for (i=0; i < (int) s->img_x; ++i) {
+ stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
+ unsigned int a;
+ out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
+ out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
+ out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
+ a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+ all_a |= a;
+ if (target == 4) out[z++] = STBI__BYTECAST(a);
+ }
+ }
+ stbi__skip(s, pad);
+ }
+ }
+
+ // if alpha channel is all 0s, replace with all 255s
+ if (target == 4 && all_a == 0)
+ for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
+ out[i] = 255;
+
+ if (flip_vertically) {
+ stbi_uc t;
+ for (j=0; j < (int) s->img_y>>1; ++j) {
+ stbi_uc *p1 = out + j *s->img_x*target;
+ stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
+ for (i=0; i < (int) s->img_x*target; ++i) {
+ t = p1[i]; p1[i] = p2[i]; p2[i] = t;
+ }
+ }
+ }
+
+ if (req_comp && req_comp != target) {
+ out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+ if (out == NULL) return out; // stbi__convert_format frees input on failure
+ }
+
+ *x = s->img_x;
+ *y = s->img_y;
+ if (comp) *comp = s->img_n;
+ return out;
+}
+#endif
+
+// Targa Truevision - TGA
+// by Jonathan Dummer
+#ifndef STBI_NO_TGA
+// returns STBI_rgb or whatever, 0 on error
+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
+{
+ // only RGB or RGBA (incl. 16bit) or grey allowed
+ if (is_rgb16) *is_rgb16 = 0;
+ switch(bits_per_pixel) {
+ case 8: return STBI_grey;
+ case 16: if(is_grey) return STBI_grey_alpha;
+ // fallthrough
+ case 15: if(is_rgb16) *is_rgb16 = 1;
+ return STBI_rgb;
+ case 24: // fallthrough
+ case 32: return bits_per_pixel/8;
+ default: return 0;
+ }
+}
+
+static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
+ int sz, tga_colormap_type;
+ stbi__get8(s); // discard Offset
+ tga_colormap_type = stbi__get8(s); // colormap type
+ if( tga_colormap_type > 1 ) {
+ stbi__rewind(s);
+ return 0; // only RGB or indexed allowed
+ }
+ tga_image_type = stbi__get8(s); // image type
+ if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
+ if (tga_image_type != 1 && tga_image_type != 9) {
+ stbi__rewind(s);
+ return 0;
+ }
+ stbi__skip(s,4); // skip index of first colormap entry and number of entries
+ sz = stbi__get8(s); // check bits per palette color entry
+ if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
+ stbi__rewind(s);
+ return 0;
+ }
+ stbi__skip(s,4); // skip image x and y origin
+ tga_colormap_bpp = sz;
+ } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+ if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
+ stbi__rewind(s);
+ return 0; // only RGB or grey allowed, +/- RLE
+ }
+ stbi__skip(s,9); // skip colormap specification and image x/y origin
+ tga_colormap_bpp = 0;
+ }
+ tga_w = stbi__get16le(s);
+ if( tga_w < 1 ) {
+ stbi__rewind(s);
+ return 0; // test width
+ }
+ tga_h = stbi__get16le(s);
+ if( tga_h < 1 ) {
+ stbi__rewind(s);
+ return 0; // test height
+ }
+ tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+ stbi__get8(s); // ignore alpha bits
+ if (tga_colormap_bpp != 0) {
+ if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+ // when using a colormap, tga_bits_per_pixel is the size of the indexes
+ // I don't think anything but 8 or 16bit indexes makes sense
+ stbi__rewind(s);
+ return 0;
+ }
+ tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
+ } else {
+ tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
+ }
+ if(!tga_comp) {
+ stbi__rewind(s);
+ return 0;
+ }
+ if (x) *x = tga_w;
+ if (y) *y = tga_h;
+ if (comp) *comp = tga_comp;
+ return 1; // seems to have passed everything
+}
+
+static int stbi__tga_test(stbi__context *s)
+{
+ int res = 0;
+ int sz, tga_color_type;
+ stbi__get8(s); // discard Offset
+ tga_color_type = stbi__get8(s); // color type
+ if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
+ sz = stbi__get8(s); // image type
+ if ( tga_color_type == 1 ) { // colormapped (paletted) image
+ if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
+ stbi__skip(s,4); // skip index of first colormap entry and number of entries
+ sz = stbi__get8(s); // check bits per palette color entry
+ if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+ stbi__skip(s,4); // skip image x and y origin
+ } else { // "normal" image w/o colormap
+ if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
+ stbi__skip(s,9); // skip colormap specification and image x/y origin
+ }
+ if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
+ if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
+ sz = stbi__get8(s); // bits per pixel
+ if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
+ if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+
+ res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
+errorEnd:
+ stbi__rewind(s);
+ return res;
+}
+
+// read 16bit value and convert to 24bit RGB
+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+{
+ stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
+ stbi__uint16 fiveBitMask = 31;
+ // we have 3 channels with 5bits each
+ int r = (px >> 10) & fiveBitMask;
+ int g = (px >> 5) & fiveBitMask;
+ int b = px & fiveBitMask;
+ // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+ out[0] = (stbi_uc)((r * 255)/31);
+ out[1] = (stbi_uc)((g * 255)/31);
+ out[2] = (stbi_uc)((b * 255)/31);
+
+ // some people claim that the most significant bit might be used for alpha
+ // (possibly if an alpha-bit is set in the "image descriptor byte")
+ // but that only made 16bit test images completely translucent..
+ // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+}
+
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ // read in the TGA header stuff
+ int tga_offset = stbi__get8(s);
+ int tga_indexed = stbi__get8(s);
+ int tga_image_type = stbi__get8(s);
+ int tga_is_RLE = 0;
+ int tga_palette_start = stbi__get16le(s);
+ int tga_palette_len = stbi__get16le(s);
+ int tga_palette_bits = stbi__get8(s);
+ int tga_x_origin = stbi__get16le(s);
+ int tga_y_origin = stbi__get16le(s);
+ int tga_width = stbi__get16le(s);
+ int tga_height = stbi__get16le(s);
+ int tga_bits_per_pixel = stbi__get8(s);
+ int tga_comp, tga_rgb16=0;
+ int tga_inverted = stbi__get8(s);
+ // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+ // image data
+ unsigned char *tga_data;
+ unsigned char *tga_palette = NULL;
+ int i, j;
+ unsigned char raw_data[4] = {0};
+ int RLE_count = 0;
+ int RLE_repeating = 0;
+ int read_next_pixel = 1;
+ STBI_NOTUSED(ri);
+ STBI_NOTUSED(tga_x_origin); // @TODO
+ STBI_NOTUSED(tga_y_origin); // @TODO
+
+ if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+ if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+ // do a tiny bit of precessing
+ if ( tga_image_type >= 8 )
+ {
+ tga_image_type -= 8;
+ tga_is_RLE = 1;
+ }
+ tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
+ // If I'm paletted, then I'll use the number of bits from the palette
+ if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
+ else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
+
+ if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+ return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
+ // tga info
+ *x = tga_width;
+ *y = tga_height;
+ if (comp) *comp = tga_comp;
+
+ if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+ return stbi__errpuc("too large", "Corrupt TGA");
+
+ tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
+ if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
+
+ // skip to the data's starting position (offset usually = 0)
+ stbi__skip(s, tga_offset );
+
+ if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
+ for (i=0; i < tga_height; ++i) {
+ int row = tga_inverted ? tga_height -i - 1 : i;
+ stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
+ stbi__getn(s, tga_row, tga_width * tga_comp);
+ }
+ } else {
+ // do I need to load a palette?
+ if ( tga_indexed)
+ {
+ if (tga_palette_len == 0) { /* you have to have at least one entry! */
+ STBI_FREE(tga_data);
+ return stbi__errpuc("bad palette", "Corrupt TGA");
+ }
+
+ // any data to skip? (offset usually = 0)
+ stbi__skip(s, tga_palette_start );
+ // load the palette
+ tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
+ if (!tga_palette) {
+ STBI_FREE(tga_data);
+ return stbi__errpuc("outofmem", "Out of memory");
+ }
+ if (tga_rgb16) {
+ stbi_uc *pal_entry = tga_palette;
+ STBI_ASSERT(tga_comp == STBI_rgb);
+ for (i=0; i < tga_palette_len; ++i) {
+ stbi__tga_read_rgb16(s, pal_entry);
+ pal_entry += tga_comp;
+ }
+ } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+ STBI_FREE(tga_data);
+ STBI_FREE(tga_palette);
+ return stbi__errpuc("bad palette", "Corrupt TGA");
+ }
+ }
+ // load the data
+ for (i=0; i < tga_width * tga_height; ++i)
+ {
+ // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+ if ( tga_is_RLE )
+ {
+ if ( RLE_count == 0 )
+ {
+ // yep, get the next byte as a RLE command
+ int RLE_cmd = stbi__get8(s);
+ RLE_count = 1 + (RLE_cmd & 127);
+ RLE_repeating = RLE_cmd >> 7;
+ read_next_pixel = 1;
+ } else if ( !RLE_repeating )
+ {
+ read_next_pixel = 1;
+ }
+ } else
+ {
+ read_next_pixel = 1;
+ }
+ // OK, if I need to read a pixel, do it now
+ if ( read_next_pixel )
+ {
+ // load however much data we did have
+ if ( tga_indexed )
+ {
+ // read in index, then perform the lookup
+ int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
+ if ( pal_idx >= tga_palette_len ) {
+ // invalid index
+ pal_idx = 0;
+ }
+ pal_idx *= tga_comp;
+ for (j = 0; j < tga_comp; ++j) {
+ raw_data[j] = tga_palette[pal_idx+j];
+ }
+ } else if(tga_rgb16) {
+ STBI_ASSERT(tga_comp == STBI_rgb);
+ stbi__tga_read_rgb16(s, raw_data);
+ } else {
+ // read in the data raw
+ for (j = 0; j < tga_comp; ++j) {
+ raw_data[j] = stbi__get8(s);
+ }
+ }
+ // clear the reading flag for the next pixel
+ read_next_pixel = 0;
+ } // end of reading a pixel
+
+ // copy data
+ for (j = 0; j < tga_comp; ++j)
+ tga_data[i*tga_comp+j] = raw_data[j];
+
+ // in case we're in RLE mode, keep counting down
+ --RLE_count;
+ }
+ // do I need to invert the image?
+ if ( tga_inverted )
+ {
+ for (j = 0; j*2 < tga_height; ++j)
+ {
+ int index1 = j * tga_width * tga_comp;
+ int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+ for (i = tga_width * tga_comp; i > 0; --i)
+ {
+ unsigned char temp = tga_data[index1];
+ tga_data[index1] = tga_data[index2];
+ tga_data[index2] = temp;
+ ++index1;
+ ++index2;
+ }
+ }
+ }
+ // clear my palette, if I had one
+ if ( tga_palette != NULL )
+ {
+ STBI_FREE( tga_palette );
+ }
+ }
+
+ // swap RGB - if the source data was RGB16, it already is in the right order
+ if (tga_comp >= 3 && !tga_rgb16)
+ {
+ unsigned char* tga_pixel = tga_data;
+ for (i=0; i < tga_width * tga_height; ++i)
+ {
+ unsigned char temp = tga_pixel[0];
+ tga_pixel[0] = tga_pixel[2];
+ tga_pixel[2] = temp;
+ tga_pixel += tga_comp;
+ }
+ }
+
+ // convert to target component count
+ if (req_comp && req_comp != tga_comp)
+ tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
+
+ // the things I do to get rid of an error message, and yet keep
+ // Microsoft's C compilers happy... [8^(
+ tga_palette_start = tga_palette_len = tga_palette_bits =
+ tga_x_origin = tga_y_origin = 0;
+ STBI_NOTUSED(tga_palette_start);
+ // OK, done
+ return tga_data;
+}
+#endif
+
+// *************************************************************************************************
+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context *s)
+{
+ int r = (stbi__get32be(s) == 0x38425053);
+ stbi__rewind(s);
+ return r;
+}
+
+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
+{
+ int count, nleft, len;
+
+ count = 0;
+ while ((nleft = pixelCount - count) > 0) {
+ len = stbi__get8(s);
+ if (len == 128) {
+ // No-op.
+ } else if (len < 128) {
+ // Copy next len+1 bytes literally.
+ len++;
+ if (len > nleft) return 0; // corrupt data
+ count += len;
+ while (len) {
+ *p = stbi__get8(s);
+ p += 4;
+ len--;
+ }
+ } else if (len > 128) {
+ stbi_uc val;
+ // Next -len+1 bytes in the dest are replicated from next source byte.
+ // (Interpret len as a negative 8-bit int.)
+ len = 257 - len;
+ if (len > nleft) return 0; // corrupt data
+ val = stbi__get8(s);
+ count += len;
+ while (len) {
+ *p = val;
+ p += 4;
+ len--;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+ int pixelCount;
+ int channelCount, compression;
+ int channel, i;
+ int bitdepth;
+ int w,h;
+ stbi_uc *out;
+ STBI_NOTUSED(ri);
+
+ // Check identifier
+ if (stbi__get32be(s) != 0x38425053) // "8BPS"
+ return stbi__errpuc("not PSD", "Corrupt PSD image");
+
+ // Check file type version.
+ if (stbi__get16be(s) != 1)
+ return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
+ // Skip 6 reserved bytes.
+ stbi__skip(s, 6 );
+
+ // Read the number of channels (R, G, B, A, etc).
+ channelCount = stbi__get16be(s);
+ if (channelCount < 0 || channelCount > 16)
+ return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
+ // Read the rows and columns of the image.
+ h = stbi__get32be(s);
+ w = stbi__get32be(s);
+
+ if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+ if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+ // Make sure the depth is 8 bits.
+ bitdepth = stbi__get16be(s);
+ if (bitdepth != 8 && bitdepth != 16)
+ return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
+ // Make sure the color mode is RGB.
+ // Valid options are:
+ // 0: Bitmap
+ // 1: Grayscale
+ // 2: Indexed color
+ // 3: RGB color
+ // 4: CMYK color
+ // 7: Multichannel
+ // 8: Duotone
+ // 9: Lab color
+ if (stbi__get16be(s) != 3)
+ return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
+ // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
+ stbi__skip(s,stbi__get32be(s) );
+
+ // Skip the image resources. (resolution, pen tool paths, etc)
+ stbi__skip(s, stbi__get32be(s) );
+
+ // Skip the reserved data.
+ stbi__skip(s, stbi__get32be(s) );
+
+ // Find out if the data is compressed.
+ // Known values:
+ // 0: no compression
+ // 1: RLE compressed
+ compression = stbi__get16be(s);
+ if (compression > 1)
+ return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
+ // Check size
+ if (!stbi__mad3sizes_valid(4, w, h, 0))
+ return stbi__errpuc("too large", "Corrupt PSD");
+
+ // Create the destination image.
+
+ if (!compression && bitdepth == 16 && bpc == 16) {
+ out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
+ ri->bits_per_channel = 16;
+ } else
+ out = (stbi_uc *) stbi__malloc(4 * w*h);
+
+ if (!out) return stbi__errpuc("outofmem", "Out of memory");
+ pixelCount = w*h;
+
+ // Initialize the data to zero.
+ //memset( out, 0, pixelCount * 4 );
+
+ // Finally, the image data.
+ if (compression) {
+ // RLE as used by .PSD and .TIFF
+ // Loop until you get the number of unpacked bytes you are expecting:
+ // Read the next source byte into n.
+ // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+ // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+ // Else if n is 128, noop.
+ // Endloop
+
+ // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
+ // which we're going to just skip.
+ stbi__skip(s, h * channelCount * 2 );
+
+ // Read the RLE data by channel.
+ for (channel = 0; channel < 4; channel++) {
+ stbi_uc *p;
+
+ p = out+channel;
+ if (channel >= channelCount) {
+ // Fill this channel with default data.
+ for (i = 0; i < pixelCount; i++, p += 4)
+ *p = (channel == 3 ? 255 : 0);
+ } else {
+ // Read the RLE data.
+ if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+ STBI_FREE(out);
+ return stbi__errpuc("corrupt", "bad RLE data");
+ }
+ }
+ }
+
+ } else {
+ // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
+ // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
+
+ // Read the data by channel.
+ for (channel = 0; channel < 4; channel++) {
+ if (channel >= channelCount) {
+ // Fill this channel with default data.
+ if (bitdepth == 16 && bpc == 16) {
+ stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+ stbi__uint16 val = channel == 3 ? 65535 : 0;
+ for (i = 0; i < pixelCount; i++, q += 4)
+ *q = val;
+ } else {
+ stbi_uc *p = out+channel;
+ stbi_uc val = channel == 3 ? 255 : 0;
+ for (i = 0; i < pixelCount; i++, p += 4)
+ *p = val;
+ }
+ } else {
+ if (ri->bits_per_channel == 16) { // output bpc
+ stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+ for (i = 0; i < pixelCount; i++, q += 4)
+ *q = (stbi__uint16) stbi__get16be(s);
+ } else {
+ stbi_uc *p = out+channel;
+ if (bitdepth == 16) { // input bpc
+ for (i = 0; i < pixelCount; i++, p += 4)
+ *p = (stbi_uc) (stbi__get16be(s) >> 8);
+ } else {
+ for (i = 0; i < pixelCount; i++, p += 4)
+ *p = stbi__get8(s);
+ }
+ }
+ }
+ }
+ }
+
+ // remove weird white matte from PSD
+ if (channelCount >= 4) {
+ if (ri->bits_per_channel == 16) {
+ for (i=0; i < w*h; ++i) {
+ stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
+ if (pixel[3] != 0 && pixel[3] != 65535) {
+ float a = pixel[3] / 65535.0f;
+ float ra = 1.0f / a;
+ float inv_a = 65535.0f * (1 - ra);
+ pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
+ pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
+ pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
+ }
+ }
+ } else {
+ for (i=0; i < w*h; ++i) {
+ unsigned char *pixel = out + 4*i;
+ if (pixel[3] != 0 && pixel[3] != 255) {
+ float a = pixel[3] / 255.0f;
+ float ra = 1.0f / a;
+ float inv_a = 255.0f * (1 - ra);
+ pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+ pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+ pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+ }
+ }
+ }
+ }
+
+ // convert to desired output format
+ if (req_comp && req_comp != 4) {
+ if (ri->bits_per_channel == 16)
+ out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
+ else
+ out = stbi__convert_format(out, 4, req_comp, w, h);
+ if (out == NULL) return out; // stbi__convert_format frees input on failure
+ }
+
+ if (comp) *comp = 4;
+ *y = h;
+ *x = w;
+
+ return out;
+}
+#endif
+
+// *************************************************************************************************
+// Softimage PIC loader
+// by Tom Seddon
+//
+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_is4(stbi__context *s,const char *str)
+{
+ int i;
+ for (i=0; i<4; ++i)
+ if (stbi__get8(s) != (stbi_uc)str[i])
+ return 0;
+
+ return 1;
+}
+
+static int stbi__pic_test_core(stbi__context *s)
+{
+ int i;
+
+ if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
+ return 0;
+
+ for(i=0;i<84;++i)
+ stbi__get8(s);
+
+ if (!stbi__pic_is4(s,"PICT"))
+ return 0;
+
+ return 1;
+}
+
+typedef struct
+{
+ stbi_uc size,type,channel;
+} stbi__pic_packet;
+
+static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
+{
+ int mask=0x80, i;
+
+ for (i=0; i<4; ++i, mask>>=1) {
+ if (channel & mask) {
+ if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
+ dest[i]=stbi__get8(s);
+ }
+ }
+
+ return dest;
+}
+
+static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
+{
+ int mask=0x80,i;
+
+ for (i=0;i<4; ++i, mask>>=1)
+ if (channel&mask)
+ dest[i]=src[i];
+}
+
+static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
+{
+ int act_comp=0,num_packets=0,y,chained;
+ stbi__pic_packet packets[10];
+
+ // this will (should...) cater for even some bizarre stuff like having data
+ // for the same channel in multiple packets.
+ do {
+ stbi__pic_packet *packet;
+
+ if (num_packets==sizeof(packets)/sizeof(packets[0]))
+ return stbi__errpuc("bad format","too many packets");
+
+ packet = &packets[num_packets++];
+
+ chained = stbi__get8(s);
+ packet->size = stbi__get8(s);
+ packet->type = stbi__get8(s);
+ packet->channel = stbi__get8(s);
+
+ act_comp |= packet->channel;
+
+ if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
+ if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
+ } while (chained);
+
+ *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
+ for(y=0; y<height; ++y) {
+ int packet_idx;
+
+ for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
+ stbi__pic_packet *packet = &packets[packet_idx];
+ stbi_uc *dest = result+y*width*4;
+
+ switch (packet->type) {
+ default:
+ return stbi__errpuc("bad format","packet has bad compression type");
+
+ case 0: {//uncompressed
+ int x;
+
+ for(x=0;x<width;++x, dest+=4)
+ if (!stbi__readval(s,packet->channel,dest))
+ return 0;
+ break;
+ }
+
+ case 1://Pure RLE
+ {
+ int left=width, i;
+
+ while (left>0) {
+ stbi_uc count,value[4];
+
+ count=stbi__get8(s);
+ if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
+
+ if (count > left)
+ count = (stbi_uc) left;
+
+ if (!stbi__readval(s,packet->channel,value)) return 0;
+
+ for(i=0; i<count; ++i,dest+=4)
+ stbi__copyval(packet->channel,dest,value);
+ left -= count;
+ }
+ }
+ break;
+
+ case 2: {//Mixed RLE
+ int left=width;
+ while (left>0) {
+ int count = stbi__get8(s), i;
+ if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
+
+ if (count >= 128) { // Repeated
+ stbi_uc value[4];
+
+ if (count==128)
+ count = stbi__get16be(s);
+ else
+ count -= 127;
+ if (count > left)
+ return stbi__errpuc("bad file","scanline overrun");
+
+ if (!stbi__readval(s,packet->channel,value))
+ return 0;
+
+ for(i=0;i<count;++i, dest += 4)
+ stbi__copyval(packet->channel,dest,value);
+ } else { // Raw
+ ++count;
+ if (count>left) return stbi__errpuc("bad file","scanline overrun");
+
+ for(i=0;i<count;++i, dest+=4)
+ if (!stbi__readval(s,packet->channel,dest))
+ return 0;
+ }
+ left-=count;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
+{
+ stbi_uc *result;
+ int i, x,y, internal_comp;
+ STBI_NOTUSED(ri);
+
+ if (!comp) comp = &internal_comp;
+
+ for (i=0; i<92; ++i)
+ stbi__get8(s);
+
+ x = stbi__get16be(s);
+ y = stbi__get16be(s);
+
+ if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+ if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+ if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
+ if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
+
+ stbi__get32be(s); //skip `ratio'
+ stbi__get16be(s); //skip `fields'
+ stbi__get16be(s); //skip `pad'
+
+ // intermediate buffer is RGBA
+ result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
+ if (!result) return stbi__errpuc("outofmem", "Out of memory");
+ memset(result, 0xff, x*y*4);
+
+ if (!stbi__pic_load_core(s,x,y,comp, result)) {
+ STBI_FREE(result);
+ result=0;
+ }
+ *px = x;
+ *py = y;
+ if (req_comp == 0) req_comp = *comp;
+ result=stbi__convert_format(result,4,req_comp,x,y);
+
+ return result;
+}
+
+static int stbi__pic_test(stbi__context *s)
+{
+ int r = stbi__pic_test_core(s);
+ stbi__rewind(s);
+ return r;
+}
+#endif
+
+// *************************************************************************************************
+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
+#ifndef STBI_NO_GIF
+typedef struct
+{
+ stbi__int16 prefix;
+ stbi_uc first;
+ stbi_uc suffix;
+} stbi__gif_lzw;
+
+typedef struct
+{
+ int w,h;
+ stbi_uc *out; // output buffer (always 4 components)
+ stbi_uc *background; // The current "background" as far as a gif is concerned
+ stbi_uc *history;
+ int flags, bgindex, ratio, transparent, eflags;
+ stbi_uc pal[256][4];
+ stbi_uc lpal[256][4];
+ stbi__gif_lzw codes[8192];
+ stbi_uc *color_table;
+ int parse, step;
+ int lflags;
+ int start_x, start_y;
+ int max_x, max_y;
+ int cur_x, cur_y;
+ int line_size;
+ int delay;
+} stbi__gif;
+
+static int stbi__gif_test_raw(stbi__context *s)
+{
+ int sz;
+ if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
+ sz = stbi__get8(s);
+ if (sz != '9' && sz != '7') return 0;
+ if (stbi__get8(s) != 'a') return 0;
+ return 1;
+}
+
+static int stbi__gif_test(stbi__context *s)
+{
+ int r = stbi__gif_test_raw(s);
+ stbi__rewind(s);
+ return r;
+}
+
+static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
+{
+ int i;
+ for (i=0; i < num_entries; ++i) {
+ pal[i][2] = stbi__get8(s);
+ pal[i][1] = stbi__get8(s);
+ pal[i][0] = stbi__get8(s);
+ pal[i][3] = transp == i ? 0 : 255;
+ }
+}
+
+static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
+{
+ stbi_uc version;
+ if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+ return stbi__err("not GIF", "Corrupt GIF");
+
+ version = stbi__get8(s);
+ if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
+ if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
+
+ stbi__g_failure_reason = "";
+ g->w = stbi__get16le(s);
+ g->h = stbi__get16le(s);
+ g->flags = stbi__get8(s);
+ g->bgindex = stbi__get8(s);
+ g->ratio = stbi__get8(s);
+ g->transparent = -1;
+
+ if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+ if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+
+ if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
+
+ if (is_info) return 1;
+
+ if (g->flags & 0x80)
+ stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
+
+ return 1;
+}
+
+static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
+{
+ stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+ if (!g) return stbi__err("outofmem", "Out of memory");
+ if (!stbi__gif_header(s, g, comp, 1)) {
+ STBI_FREE(g);
+ stbi__rewind( s );
+ return 0;
+ }
+ if (x) *x = g->w;
+ if (y) *y = g->h;
+ STBI_FREE(g);
+ return 1;
+}
+
+static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
+{
+ stbi_uc *p, *c;
+ int idx;
+
+ // recurse to decode the prefixes, since the linked-list is backwards,
+ // and working backwards through an interleaved image would be nasty
+ if (g->codes[code].prefix >= 0)
+ stbi__out_gif_code(g, g->codes[code].prefix);
+
+ if (g->cur_y >= g->max_y) return;
+
+ idx = g->cur_x + g->cur_y;
+ p = &g->out[idx];
+ g->history[idx / 4] = 1;
+
+ c = &g->color_table[g->codes[code].suffix * 4];
+ if (c[3] > 128) { // don't render transparent pixels;
+ p[0] = c[2];
+ p[1] = c[1];
+ p[2] = c[0];
+ p[3] = c[3];
+ }
+ g->cur_x += 4;
+
+ if (g->cur_x >= g->max_x) {
+ g->cur_x = g->start_x;
+ g->cur_y += g->step;
+
+ while (g->cur_y >= g->max_y && g->parse > 0) {
+ g->step = (1 << g->parse) * g->line_size;
+ g->cur_y = g->start_y + (g->step >> 1);
+ --g->parse;
+ }
+ }
+}
+
+static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
+{
+ stbi_uc lzw_cs;
+ stbi__int32 len, init_code;
+ stbi__uint32 first;
+ stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
+ stbi__gif_lzw *p;
+
+ lzw_cs = stbi__get8(s);
+ if (lzw_cs > 12) return NULL;
+ clear = 1 << lzw_cs;
+ first = 1;
+ codesize = lzw_cs + 1;
+ codemask = (1 << codesize) - 1;
+ bits = 0;
+ valid_bits = 0;
+ for (init_code = 0; init_code < clear; init_code++) {
+ g->codes[init_code].prefix = -1;
+ g->codes[init_code].first = (stbi_uc) init_code;
+ g->codes[init_code].suffix = (stbi_uc) init_code;
+ }
+
+ // support no starting clear code
+ avail = clear+2;
+ oldcode = -1;
+
+ len = 0;
+ for(;;) {
+ if (valid_bits < codesize) {
+ if (len == 0) {
+ len = stbi__get8(s); // start new block
+ if (len == 0)
+ return g->out;
+ }
+ --len;
+ bits |= (stbi__int32) stbi__get8(s) << valid_bits;
+ valid_bits += 8;
+ } else {
+ stbi__int32 code = bits & codemask;
+ bits >>= codesize;
+ valid_bits -= codesize;
+ // @OPTIMIZE: is there some way we can accelerate the non-clear path?
+ if (code == clear) { // clear code
+ codesize = lzw_cs + 1;
+ codemask = (1 << codesize) - 1;
+ avail = clear + 2;
+ oldcode = -1;
+ first = 0;
+ } else if (code == clear + 1) { // end of stream code
+ stbi__skip(s, len);
+ while ((len = stbi__get8(s)) > 0)
+ stbi__skip(s,len);
+ return g->out;
+ } else if (code <= avail) {
+ if (first) {
+ return stbi__errpuc("no clear code", "Corrupt GIF");
+ }
+
+ if (oldcode >= 0) {
+ p = &g->codes[avail++];
+ if (avail > 8192) {
+ return stbi__errpuc("too many codes", "Corrupt GIF");
+ }
+
+ p->prefix = (stbi__int16) oldcode;
+ p->first = g->codes[oldcode].first;
+ p->suffix = (code == avail) ? p->first : g->codes[code].first;
+ } else if (code == avail)
+ return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
+ stbi__out_gif_code(g, (stbi__uint16) code);
+
+ if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+ codesize++;
+ codemask = (1 << codesize) - 1;
+ }
+
+ oldcode = code;
+ } else {
+ return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+ }
+ }
+ }
+}
+
+// this function is designed to support animated gifs, although stb_image doesn't support it
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
+{
+ int dispose;
+ int first_frame;
+ int pi;
+ int pcount;
+ STBI_NOTUSED(req_comp);
+
+ // on first frame, any non-written pixels get the background colour (non-transparent)
+ first_frame = 0;
+ if (g->out == 0) {
+ if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
+ if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
+ return stbi__errpuc("too large", "GIF image is too large");
+ pcount = g->w * g->h;
+ g->out = (stbi_uc *) stbi__malloc(4 * pcount);
+ g->background = (stbi_uc *) stbi__malloc(4 * pcount);
+ g->history = (stbi_uc *) stbi__malloc(pcount);
+ if (!g->out || !g->background || !g->history)
+ return stbi__errpuc("outofmem", "Out of memory");
+
+ // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
+ // background colour is only used for pixels that are not rendered first frame, after that "background"
+ // color refers to the color that was there the previous frame.
+ memset(g->out, 0x00, 4 * pcount);
+ memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
+ memset(g->history, 0x00, pcount); // pixels that were affected previous frame
+ first_frame = 1;
+ } else {
+ // second frame - how do we dispose of the previous one?
+ dispose = (g->eflags & 0x1C) >> 2;
+ pcount = g->w * g->h;
+
+ if ((dispose == 3) && (two_back == 0)) {
+ dispose = 2; // if I don't have an image to revert back to, default to the old background
+ }
+
+ if (dispose == 3) { // use previous graphic
+ for (pi = 0; pi < pcount; ++pi) {
+ if (g->history[pi]) {
+ memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
+ }
+ }
+ } else if (dispose == 2) {
+ // restore what was changed last frame to background before that frame;
+ for (pi = 0; pi < pcount; ++pi) {
+ if (g->history[pi]) {
+ memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
+ }
+ }
+ } else {
+ // This is a non-disposal case eithe way, so just
+ // leave the pixels as is, and they will become the new background
+ // 1: do not dispose
+ // 0: not specified.
+ }
+
+ // background is what out is after the undoing of the previou frame;
+ memcpy( g->background, g->out, 4 * g->w * g->h );
+ }
+
+ // clear my history;
+ memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame
+
+ for (;;) {
+ int tag = stbi__get8(s);
+ switch (tag) {
+ case 0x2C: /* Image Descriptor */
+ {
+ stbi__int32 x, y, w, h;
+ stbi_uc *o;
+
+ x = stbi__get16le(s);
+ y = stbi__get16le(s);
+ w = stbi__get16le(s);
+ h = stbi__get16le(s);
+ if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+ return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
+ g->line_size = g->w * 4;
+ g->start_x = x * 4;
+ g->start_y = y * g->line_size;
+ g->max_x = g->start_x + w * 4;
+ g->max_y = g->start_y + h * g->line_size;
+ g->cur_x = g->start_x;
+ g->cur_y = g->start_y;
+
+ // if the width of the specified rectangle is 0, that means
+ // we may not see *any* pixels or the image is malformed;
+ // to make sure this is caught, move the current y down to
+ // max_y (which is what out_gif_code checks).
+ if (w == 0)
+ g->cur_y = g->max_y;
+
+ g->lflags = stbi__get8(s);
+
+ if (g->lflags & 0x40) {
+ g->step = 8 * g->line_size; // first interlaced spacing
+ g->parse = 3;
+ } else {
+ g->step = g->line_size;
+ g->parse = 0;
+ }
+
+ if (g->lflags & 0x80) {
+ stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+ g->color_table = (stbi_uc *) g->lpal;
+ } else if (g->flags & 0x80) {
+ g->color_table = (stbi_uc *) g->pal;
+ } else
+ return stbi__errpuc("missing color table", "Corrupt GIF");
+
+ o = stbi__process_gif_raster(s, g);
+ if (!o) return NULL;
+
+ // if this was the first frame,
+ pcount = g->w * g->h;
+ if (first_frame && (g->bgindex > 0)) {
+ // if first frame, any pixel not drawn to gets the background color
+ for (pi = 0; pi < pcount; ++pi) {
+ if (g->history[pi] == 0) {
+ g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
+ memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
+ }
+ }
+ }
+
+ return o;
+ }
+
+ case 0x21: // Comment Extension.
+ {
+ int len;
+ int ext = stbi__get8(s);
+ if (ext == 0xF9) { // Graphic Control Extension.
+ len = stbi__get8(s);
+ if (len == 4) {
+ g->eflags = stbi__get8(s);
+ g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+ // unset old transparent
+ if (g->transparent >= 0) {
+ g->pal[g->transparent][3] = 255;
+ }
+ if (g->eflags & 0x01) {
+ g->transparent = stbi__get8(s);
+ if (g->transparent >= 0) {
+ g->pal[g->transparent][3] = 0;
+ }
+ } else {
+ // don't need transparent
+ stbi__skip(s, 1);
+ g->transparent = -1;
+ }
+ } else {
+ stbi__skip(s, len);
+ break;
+ }
+ }
+ while ((len = stbi__get8(s)) != 0) {
+ stbi__skip(s, len);
+ }
+ break;
+ }
+
+ case 0x3B: // gif stream termination code
+ return (stbi_uc *) s; // using '1' causes warning on some compilers
+
+ default:
+ return stbi__errpuc("unknown code", "Corrupt GIF");
+ }
+ }
+}
+
+static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
+{
+ STBI_FREE(g->out);
+ STBI_FREE(g->history);
+ STBI_FREE(g->background);
+
+ if (out) STBI_FREE(out);
+ if (delays && *delays) STBI_FREE(*delays);
+ return stbi__errpuc("outofmem", "Out of memory");
+}
+
+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+ if (stbi__gif_test(s)) {
+ int layers = 0;
+ stbi_uc *u = 0;
+ stbi_uc *out = 0;
+ stbi_uc *two_back = 0;
+ stbi__gif g;
+ int stride;
+ int out_size = 0;
+ int delays_size = 0;
+
+ STBI_NOTUSED(out_size);
+ STBI_NOTUSED(delays_size);
+
+ memset(&g, 0, sizeof(g));
+ if (delays) {
+ *delays = 0;
+ }
+
+ do {
+ u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+ if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
+
+ if (u) {
+ *x = g.w;
+ *y = g.h;
+ ++layers;
+ stride = g.w * g.h * 4;
+
+ if (out) {
+ void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
+ if (!tmp)
+ return stbi__load_gif_main_outofmem(&g, out, delays);
+ else {
+ out = (stbi_uc*) tmp;
+ out_size = layers * stride;
+ }
+
+ if (delays) {
+ int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
+ if (!new_delays)
+ return stbi__load_gif_main_outofmem(&g, out, delays);
+ *delays = new_delays;
+ delays_size = layers * sizeof(int);
+ }
+ } else {
+ out = (stbi_uc*)stbi__malloc( layers * stride );
+ if (!out)
+ return stbi__load_gif_main_outofmem(&g, out, delays);
+ out_size = layers * stride;
+ if (delays) {
+ *delays = (int*) stbi__malloc( layers * sizeof(int) );
+ if (!*delays)
+ return stbi__load_gif_main_outofmem(&g, out, delays);
+ delays_size = layers * sizeof(int);
+ }
+ }
+ memcpy( out + ((layers - 1) * stride), u, stride );
+ if (layers >= 2) {
+ two_back = out - 2 * stride;
+ }
+
+ if (delays) {
+ (*delays)[layers - 1U] = g.delay;
+ }
+ }
+ } while (u != 0);
+
+ // free temp buffer;
+ STBI_FREE(g.out);
+ STBI_FREE(g.history);
+ STBI_FREE(g.background);
+
+ // do the final conversion after loading everything;
+ if (req_comp && req_comp != 4)
+ out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+ *z = layers;
+ return out;
+ } else {
+ return stbi__errpuc("not GIF", "Image was not as a gif type.");
+ }
+}
+
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ stbi_uc *u = 0;
+ stbi__gif g;
+ memset(&g, 0, sizeof(g));
+ STBI_NOTUSED(ri);
+
+ u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
+ if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
+ if (u) {
+ *x = g.w;
+ *y = g.h;
+
+ // moved conversion to after successful load so that the same
+ // can be done for multiple frames.
+ if (req_comp && req_comp != 4)
+ u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+ } else if (g.out) {
+ // if there was an error and we allocated an image buffer, free it!
+ STBI_FREE(g.out);
+ }
+
+ // free buffers needed for multiple frame loading;
+ STBI_FREE(g.history);
+ STBI_FREE(g.background);
+
+ return u;
+}
+
+static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ return stbi__gif_info_raw(s,x,y,comp);
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR loader
+// originally by Nicolas Schulz
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
+{
+ int i;
+ for (i=0; signature[i]; ++i)
+ if (stbi__get8(s) != signature[i])
+ return 0;
+ stbi__rewind(s);
+ return 1;
+}
+
+static int stbi__hdr_test(stbi__context* s)
+{
+ int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
+ stbi__rewind(s);
+ if(!r) {
+ r = stbi__hdr_test_core(s, "#?RGBE\n");
+ stbi__rewind(s);
+ }
+ return r;
+}
+
+#define STBI__HDR_BUFLEN 1024
+static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
+{
+ int len=0;
+ char c = '\0';
+
+ c = (char) stbi__get8(z);
+
+ while (!stbi__at_eof(z) && c != '\n') {
+ buffer[len++] = c;
+ if (len == STBI__HDR_BUFLEN-1) {
+ // flush to end of line
+ while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+ ;
+ break;
+ }
+ c = (char) stbi__get8(z);
+ }
+
+ buffer[len] = 0;
+ return buffer;
+}
+
+static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
+{
+ if ( input[3] != 0 ) {
+ float f1;
+ // Exponent
+ f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
+ if (req_comp <= 2)
+ output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+ else {
+ output[0] = input[0] * f1;
+ output[1] = input[1] * f1;
+ output[2] = input[2] * f1;
+ }
+ if (req_comp == 2) output[1] = 1;
+ if (req_comp == 4) output[3] = 1;
+ } else {
+ switch (req_comp) {
+ case 4: output[3] = 1; /* fallthrough */
+ case 3: output[0] = output[1] = output[2] = 0;
+ break;
+ case 2: output[1] = 1; /* fallthrough */
+ case 1: output[0] = 0;
+ break;
+ }
+ }
+}
+
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ char buffer[STBI__HDR_BUFLEN];
+ char *token;
+ int valid = 0;
+ int width, height;
+ stbi_uc *scanline;
+ float *hdr_data;
+ int len;
+ unsigned char count, value;
+ int i, j, k, c1,c2, z;
+ const char *headerToken;
+ STBI_NOTUSED(ri);
+
+ // Check identifier
+ headerToken = stbi__hdr_gettoken(s,buffer);
+ if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
+ return stbi__errpf("not HDR", "Corrupt HDR image");
+
+ // Parse header
+ for(;;) {
+ token = stbi__hdr_gettoken(s,buffer);
+ if (token[0] == 0) break;
+ if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+ }
+
+ if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
+
+ // Parse width and height
+ // can't use sscanf() if we're not using stdio!
+ token = stbi__hdr_gettoken(s,buffer);
+ if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+ token += 3;
+ height = (int) strtol(token, &token, 10);
+ while (*token == ' ') ++token;
+ if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+ token += 3;
+ width = (int) strtol(token, NULL, 10);
+
+ if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+ if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+
+ *x = width;
+ *y = height;
+
+ if (comp) *comp = 3;
+ if (req_comp == 0) req_comp = 3;
+
+ if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+ return stbi__errpf("too large", "HDR image is too large");
+
+ // Read data
+ hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+ if (!hdr_data)
+ return stbi__errpf("outofmem", "Out of memory");
+
+ // Load image data
+ // image data is stored as some number of sca
+ if ( width < 8 || width >= 32768) {
+ // Read flat data
+ for (j=0; j < height; ++j) {
+ for (i=0; i < width; ++i) {
+ stbi_uc rgbe[4];
+ main_decode_loop:
+ stbi__getn(s, rgbe, 4);
+ stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
+ }
+ }
+ } else {
+ // Read RLE-encoded data
+ scanline = NULL;
+
+ for (j = 0; j < height; ++j) {
+ c1 = stbi__get8(s);
+ c2 = stbi__get8(s);
+ len = stbi__get8(s);
+ if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+ // not run-length encoded, so we have to actually use THIS data as a decoded
+ // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+ stbi_uc rgbe[4];
+ rgbe[0] = (stbi_uc) c1;
+ rgbe[1] = (stbi_uc) c2;
+ rgbe[2] = (stbi_uc) len;
+ rgbe[3] = (stbi_uc) stbi__get8(s);
+ stbi__hdr_convert(hdr_data, rgbe, req_comp);
+ i = 1;
+ j = 0;
+ STBI_FREE(scanline);
+ goto main_decode_loop; // yes, this makes no sense
+ }
+ len <<= 8;
+ len |= stbi__get8(s);
+ if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+ if (scanline == NULL) {
+ scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
+ if (!scanline) {
+ STBI_FREE(hdr_data);
+ return stbi__errpf("outofmem", "Out of memory");
+ }
+ }
+
+ for (k = 0; k < 4; ++k) {
+ int nleft;
+ i = 0;
+ while ((nleft = width - i) > 0) {
+ count = stbi__get8(s);
+ if (count > 128) {
+ // Run
+ value = stbi__get8(s);
+ count -= 128;
+ if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+ for (z = 0; z < count; ++z)
+ scanline[i++ * 4 + k] = value;
+ } else {
+ // Dump
+ if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+ for (z = 0; z < count; ++z)
+ scanline[i++ * 4 + k] = stbi__get8(s);
+ }
+ }
+ }
+ for (i=0; i < width; ++i)
+ stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
+ }
+ if (scanline)
+ STBI_FREE(scanline);
+ }
+
+ return hdr_data;
+}
+
+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ char buffer[STBI__HDR_BUFLEN];
+ char *token;
+ int valid = 0;
+ int dummy;
+
+ if (!x) x = &dummy;
+ if (!y) y = &dummy;
+ if (!comp) comp = &dummy;
+
+ if (stbi__hdr_test(s) == 0) {
+ stbi__rewind( s );
+ return 0;
+ }
+
+ for(;;) {
+ token = stbi__hdr_gettoken(s,buffer);
+ if (token[0] == 0) break;
+ if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+ }
+
+ if (!valid) {
+ stbi__rewind( s );
+ return 0;
+ }
+ token = stbi__hdr_gettoken(s,buffer);
+ if (strncmp(token, "-Y ", 3)) {
+ stbi__rewind( s );
+ return 0;
+ }
+ token += 3;
+ *y = (int) strtol(token, &token, 10);
+ while (*token == ' ') ++token;
+ if (strncmp(token, "+X ", 3)) {
+ stbi__rewind( s );
+ return 0;
+ }
+ token += 3;
+ *x = (int) strtol(token, NULL, 10);
+ *comp = 3;
+ return 1;
+}
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ void *p;
+ stbi__bmp_data info;
+
+ info.all_a = 255;
+ p = stbi__bmp_parse_header(s, &info);
+ if (p == NULL) {
+ stbi__rewind( s );
+ return 0;
+ }
+ if (x) *x = s->img_x;
+ if (y) *y = s->img_y;
+ if (comp) {
+ if (info.bpp == 24 && info.ma == 0xff000000)
+ *comp = 3;
+ else
+ *comp = info.ma ? 4 : 3;
+ }
+ return 1;
+}
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ int channelCount, dummy, depth;
+ if (!x) x = &dummy;
+ if (!y) y = &dummy;
+ if (!comp) comp = &dummy;
+ if (stbi__get32be(s) != 0x38425053) {
+ stbi__rewind( s );
+ return 0;
+ }
+ if (stbi__get16be(s) != 1) {
+ stbi__rewind( s );
+ return 0;
+ }
+ stbi__skip(s, 6);
+ channelCount = stbi__get16be(s);
+ if (channelCount < 0 || channelCount > 16) {
+ stbi__rewind( s );
+ return 0;
+ }
+ *y = stbi__get32be(s);
+ *x = stbi__get32be(s);
+ depth = stbi__get16be(s);
+ if (depth != 8 && depth != 16) {
+ stbi__rewind( s );
+ return 0;
+ }
+ if (stbi__get16be(s) != 3) {
+ stbi__rewind( s );
+ return 0;
+ }
+ *comp = 4;
+ return 1;
+}
+
+static int stbi__psd_is16(stbi__context *s)
+{
+ int channelCount, depth;
+ if (stbi__get32be(s) != 0x38425053) {
+ stbi__rewind( s );
+ return 0;
+ }
+ if (stbi__get16be(s) != 1) {
+ stbi__rewind( s );
+ return 0;
+ }
+ stbi__skip(s, 6);
+ channelCount = stbi__get16be(s);
+ if (channelCount < 0 || channelCount > 16) {
+ stbi__rewind( s );
+ return 0;
+ }
+ STBI_NOTUSED(stbi__get32be(s));
+ STBI_NOTUSED(stbi__get32be(s));
+ depth = stbi__get16be(s);
+ if (depth != 16) {
+ stbi__rewind( s );
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ int act_comp=0,num_packets=0,chained,dummy;
+ stbi__pic_packet packets[10];
+
+ if (!x) x = &dummy;
+ if (!y) y = &dummy;
+ if (!comp) comp = &dummy;
+
+ if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
+ stbi__rewind(s);
+ return 0;
+ }
+
+ stbi__skip(s, 88);
+
+ *x = stbi__get16be(s);
+ *y = stbi__get16be(s);
+ if (stbi__at_eof(s)) {
+ stbi__rewind( s);
+ return 0;
+ }
+ if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
+ stbi__rewind( s );
+ return 0;
+ }
+
+ stbi__skip(s, 8);
+
+ do {
+ stbi__pic_packet *packet;
+
+ if (num_packets==sizeof(packets)/sizeof(packets[0]))
+ return 0;
+
+ packet = &packets[num_packets++];
+ chained = stbi__get8(s);
+ packet->size = stbi__get8(s);
+ packet->type = stbi__get8(s);
+ packet->channel = stbi__get8(s);
+ act_comp |= packet->channel;
+
+ if (stbi__at_eof(s)) {
+ stbi__rewind( s );
+ return 0;
+ }
+ if (packet->size != 8) {
+ stbi__rewind( s );
+ return 0;
+ }
+ } while (chained);
+
+ *comp = (act_comp & 0x10 ? 4 : 3);
+
+ return 1;
+}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+// Does not support comments in the header section
+// Does not support ASCII image data (formats P2 and P3)
+
+#ifndef STBI_NO_PNM
+
+static int stbi__pnm_test(stbi__context *s)
+{
+ char p, t;
+ p = (char) stbi__get8(s);
+ t = (char) stbi__get8(s);
+ if (p != 'P' || (t != '5' && t != '6')) {
+ stbi__rewind( s );
+ return 0;
+ }
+ return 1;
+}
+
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+ stbi_uc *out;
+ STBI_NOTUSED(ri);
+
+ ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
+ if (ri->bits_per_channel == 0)
+ return 0;
+
+ if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+ if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+ *x = s->img_x;
+ *y = s->img_y;
+ if (comp) *comp = s->img_n;
+
+ if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
+ return stbi__errpuc("too large", "PNM too large");
+
+ out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
+ if (!out) return stbi__errpuc("outofmem", "Out of memory");
+ if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
+ STBI_FREE(out);
+ return stbi__errpuc("bad PNM", "PNM file truncated");
+ }
+
+ if (req_comp && req_comp != s->img_n) {
+ if (ri->bits_per_channel == 16) {
+ out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
+ } else {
+ out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+ }
+ if (out == NULL) return out; // stbi__convert_format frees input on failure
+ }
+ return out;
+}
+
+static int stbi__pnm_isspace(char c)
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
+{
+ for (;;) {
+ while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+ *c = (char) stbi__get8(s);
+
+ if (stbi__at_eof(s) || *c != '#')
+ break;
+
+ while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
+ *c = (char) stbi__get8(s);
+ }
+}
+
+static int stbi__pnm_isdigit(char c)
+{
+ return c >= '0' && c <= '9';
+}
+
+static int stbi__pnm_getinteger(stbi__context *s, char *c)
+{
+ int value = 0;
+
+ while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+ value = value*10 + (*c - '0');
+ *c = (char) stbi__get8(s);
+ if((value > 214748364) || (value == 214748364 && *c > '7'))
+ return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
+ }
+
+ return value;
+}
+
+static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ int maxv, dummy;
+ char c, p, t;
+
+ if (!x) x = &dummy;
+ if (!y) y = &dummy;
+ if (!comp) comp = &dummy;
+
+ stbi__rewind(s);
+
+ // Get identifier
+ p = (char) stbi__get8(s);
+ t = (char) stbi__get8(s);
+ if (p != 'P' || (t != '5' && t != '6')) {
+ stbi__rewind(s);
+ return 0;
+ }
+
+ *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+ c = (char) stbi__get8(s);
+ stbi__pnm_skip_whitespace(s, &c);
+
+ *x = stbi__pnm_getinteger(s, &c); // read width
+ if(*x == 0)
+ return stbi__err("invalid width", "PPM image header had zero or overflowing width");
+ stbi__pnm_skip_whitespace(s, &c);
+
+ *y = stbi__pnm_getinteger(s, &c); // read height
+ if (*y == 0)
+ return stbi__err("invalid width", "PPM image header had zero or overflowing width");
+ stbi__pnm_skip_whitespace(s, &c);
+
+ maxv = stbi__pnm_getinteger(s, &c); // read max value
+ if (maxv > 65535)
+ return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
+ else if (maxv > 255)
+ return 16;
+ else
+ return 8;
+}
+
+static int stbi__pnm_is16(stbi__context *s)
+{
+ if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
+ return 1;
+ return 0;
+}
+#endif
+
+static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
+{
+ #ifndef STBI_NO_JPEG
+ if (stbi__jpeg_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PNG
+ if (stbi__png_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_GIF
+ if (stbi__gif_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_BMP
+ if (stbi__bmp_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PSD
+ if (stbi__psd_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PIC
+ if (stbi__pic_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PNM
+ if (stbi__pnm_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_HDR
+ if (stbi__hdr_info(s, x, y, comp)) return 1;
+ #endif
+
+ // test tga last because it's a crappy test!
+ #ifndef STBI_NO_TGA
+ if (stbi__tga_info(s, x, y, comp))
+ return 1;
+ #endif
+ return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static int stbi__is_16_main(stbi__context *s)
+{
+ #ifndef STBI_NO_PNG
+ if (stbi__png_is16(s)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PSD
+ if (stbi__psd_is16(s)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PNM
+ if (stbi__pnm_is16(s)) return 1;
+ #endif
+ return 0;
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
+{
+ FILE *f = stbi__fopen(filename, "rb");
+ int result;
+ if (!f) return stbi__err("can't fopen", "Unable to open file");
+ result = stbi_info_from_file(f, x, y, comp);
+ fclose(f);
+ return result;
+}
+
+STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
+{
+ int r;
+ stbi__context s;
+ long pos = ftell(f);
+ stbi__start_file(&s, f);
+ r = stbi__info_main(&s,x,y,comp);
+ fseek(f,pos,SEEK_SET);
+ return r;
+}
+
+STBIDEF int stbi_is_16_bit(char const *filename)
+{
+ FILE *f = stbi__fopen(filename, "rb");
+ int result;
+ if (!f) return stbi__err("can't fopen", "Unable to open file");
+ result = stbi_is_16_bit_from_file(f);
+ fclose(f);
+ return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE *f)
+{
+ int r;
+ stbi__context s;
+ long pos = ftell(f);
+ stbi__start_file(&s, f);
+ r = stbi__is_16_main(&s);
+ fseek(f,pos,SEEK_SET);
+ return r;
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
+{
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+ return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
+{
+ stbi__context s;
+ stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+ return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
+{
+ stbi__context s;
+ stbi__start_mem(&s,buffer,len);
+ return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
+{
+ stbi__context s;
+ stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+ return stbi__is_16_main(&s);
+}
+
+#endif // STB_IMAGE_IMPLEMENTATION
+
+/*
+ revision history:
+ 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+ 2.19 (2018-02-11) fix warning
+ 2.18 (2018-01-30) fix warnings
+ 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+ 1-bit BMP
+ *_is_16_bit api
+ avoid warnings
+ 2.16 (2017-07-23) all functions have 16-bit variants;
+ STBI_NO_STDIO works again;
+ compilation fixes;
+ fix rounding in unpremultiply;
+ optimize vertical flip;
+ disable raw_len validation;
+ documentation fixes
+ 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+ warning fixes; disable run-time SSE detection on gcc;
+ uniform handling of optional "return" values;
+ thread-safe initialization of zlib tables
+ 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+ 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now
+ 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+ 2.11 (2016-04-02) allocate large structures on the stack
+ remove white matting for transparent PSD
+ fix reported channel count for PNG & BMP
+ re-enable SSE2 in non-gcc 64-bit
+ support RGB-formatted JPEG
+ read 16-bit PNGs (only as 8-bit)
+ 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+ 2.09 (2016-01-16) allow comments in PNM files
+ 16-bit-per-pixel TGA (not bit-per-component)
+ info() for TGA could break due to .hdr handling
+ info() for BMP to shares code instead of sloppy parse
+ can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+ code cleanup
+ 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+ 2.07 (2015-09-13) fix compiler warnings
+ partial animated GIF support
+ limited 16-bpc PSD support
+ #ifdef unused functions
+ bug with < 92 byte PIC,PNM,HDR,TGA
+ 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
+ 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
+ 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+ 2.03 (2015-04-12) extra corruption checking (mmozeiko)
+ stbi_set_flip_vertically_on_load (nguillemot)
+ fix NEON support; fix mingw support
+ 2.02 (2015-01-19) fix incorrect assert, fix warning
+ 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+ 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+ 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+ progressive JPEG (stb)
+ PGM/PPM support (Ken Miller)
+ STBI_MALLOC,STBI_REALLOC,STBI_FREE
+ GIF bugfix -- seemingly never worked
+ STBI_NO_*, STBI_ONLY_*
+ 1.48 (2014-12-14) fix incorrectly-named assert()
+ 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+ optimize PNG (ryg)
+ fix bug in interlaced PNG with user-specified channel count (stb)
+ 1.46 (2014-08-26)
+ fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+ 1.45 (2014-08-16)
+ fix MSVC-ARM internal compiler error by wrapping malloc
+ 1.44 (2014-08-07)
+ various warning fixes from Ronny Chevalier
+ 1.43 (2014-07-15)
+ fix MSVC-only compiler problem in code changed in 1.42
+ 1.42 (2014-07-09)
+ don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+ fixes to stbi__cleanup_jpeg path
+ added STBI_ASSERT to avoid requiring assert.h
+ 1.41 (2014-06-25)
+ fix search&replace from 1.36 that messed up comments/error messages
+ 1.40 (2014-06-22)
+ fix gcc struct-initialization warning
+ 1.39 (2014-06-15)
+ fix to TGA optimization when req_comp != number of components in TGA;
+ fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+ add support for BMP version 5 (more ignored fields)
+ 1.38 (2014-06-06)
+ suppress MSVC warnings on integer casts truncating values
+ fix accidental rename of 'skip' field of I/O
+ 1.37 (2014-06-04)
+ remove duplicate typedef
+ 1.36 (2014-06-03)
+ convert to header file single-file library
+ if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+ 1.35 (2014-05-27)
+ various warnings
+ fix broken STBI_SIMD path
+ fix bug where stbi_load_from_file no longer left file pointer in correct place
+ fix broken non-easy path for 32-bit BMP (possibly never used)
+ TGA optimization by Arseny Kapoulkine
+ 1.34 (unknown)
+ use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+ 1.33 (2011-07-14)
+ make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+ 1.32 (2011-07-13)
+ support for "info" function for all supported filetypes (SpartanJ)
+ 1.31 (2011-06-20)
+ a few more leak fixes, bug in PNG handling (SpartanJ)
+ 1.30 (2011-06-11)
+ added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+ removed deprecated format-specific test/load functions
+ removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+ error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+ fix inefficiency in decoding 32-bit BMP (David Woo)
+ 1.29 (2010-08-16)
+ various warning fixes from Aurelien Pocheville
+ 1.28 (2010-08-01)
+ fix bug in GIF palette transparency (SpartanJ)
+ 1.27 (2010-08-01)
+ cast-to-stbi_uc to fix warnings
+ 1.26 (2010-07-24)
+ fix bug in file buffering for PNG reported by SpartanJ
+ 1.25 (2010-07-17)
+ refix trans_data warning (Won Chun)
+ 1.24 (2010-07-12)
+ perf improvements reading from files on platforms with lock-heavy fgetc()
+ minor perf improvements for jpeg
+ deprecated type-specific functions so we'll get feedback if they're needed
+ attempt to fix trans_data warning (Won Chun)
+ 1.23 fixed bug in iPhone support
+ 1.22 (2010-07-10)
+ removed image *writing* support
+ stbi_info support from Jetro Lauha
+ GIF support from Jean-Marc Lienher
+ iPhone PNG-extensions from James Brown
+ warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+ 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
+ 1.20 added support for Softimage PIC, by Tom Seddon
+ 1.19 bug in interlaced PNG corruption check (found by ryg)
+ 1.18 (2008-08-02)
+ fix a threading bug (local mutable static)
+ 1.17 support interlaced PNG
+ 1.16 major bugfix - stbi__convert_format converted one too many pixels
+ 1.15 initialize some fields for thread safety
+ 1.14 fix threadsafe conversion bug
+ header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+ 1.13 threadsafe
+ 1.12 const qualifiers in the API
+ 1.11 Support installable IDCT, colorspace conversion routines
+ 1.10 Fixes for 64-bit (don't use "unsigned long")
+ optimized upsampling by Fabian "ryg" Giesen
+ 1.09 Fix format-conversion for PSD code (bad global variables!)
+ 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+ 1.07 attempt to fix C++ warning/errors again
+ 1.06 attempt to fix C++ warning/errors again
+ 1.05 fix TGA loading to return correct *comp and use good luminance calc
+ 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
+ 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+ 1.02 support for (subset of) HDR files, float interface for preferred access to them
+ 1.01 fix bug: possible bug in handling right-side up bmps... not sure
+ fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+ 1.00 interface to zlib that skips zlib header
+ 0.99 correct handling of alpha in palette
+ 0.98 TGA loader by lonesock; dynamically add loaders (untested)
+ 0.97 jpeg errors on too large a file; also catch another malloc failure
+ 0.96 fix detection of invalid v value - particleman@mollyrocket forum
+ 0.95 during header scan, seek to markers in case of padding
+ 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+ 0.93 handle jpegtran output; verbose errors
+ 0.92 read 4,8,16,24,32-bit BMP files of several formats
+ 0.91 output 24-bit Windows 3.0 BMP files
+ 0.90 fix a few more warnings; bump version number to approach 1.0
+ 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
+ 0.60 fix compiling as c++
+ 0.59 fix warnings: merge Dave Moore's -Wall fixes
+ 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
+ 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+ 0.56 fix bug: zlib uncompressed mode len vs. nlen
+ 0.55 fix bug: restart_interval not initialized to 0
+ 0.54 allow NULL for 'int *comp'
+ 0.53 fix bug in png 3->4; speedup png decoding
+ 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+ 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
+ on 'test' only check type, not whether we support this variant
+ 0.50 (2006-11-19)
+ first released version
+*/
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/vendor/tiny_obj_loader_c.h b/vendor/tiny_obj_loader_c.h
new file mode 100644
index 0000000..09087fe
--- /dev/null
+++ b/vendor/tiny_obj_loader_c.h
@@ -0,0 +1,1793 @@
+/*
+ The MIT License (MIT)
+
+ Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors.
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ */
+#ifndef TINOBJ_LOADER_C_H_
+#define TINOBJ_LOADER_C_H_
+
+/* @todo { Remove stddef dependency. size_t? } */
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ char *name;
+
+ float ambient[3];
+ float diffuse[3];
+ float specular[3];
+ float transmittance[3];
+ float emission[3];
+ float shininess;
+ float ior; /* index of refraction */
+ float dissolve; /* 1 == opaque; 0 == fully transparent */
+ /* illumination model (see http://www.fileformat.info/format/material/) */
+ int illum;
+
+ int pad0;
+
+ char *ambient_texname; /* map_Ka */
+ char *diffuse_texname; /* map_Kd */
+ char *specular_texname; /* map_Ks */
+ char *specular_highlight_texname; /* map_Ns */
+ char *bump_texname; /* map_bump, bump */
+ char *displacement_texname; /* disp */
+ char *alpha_texname; /* map_d */
+} tinyobj_material_t;
+
+typedef struct {
+ char *name; /* group name or object name. */
+ unsigned int face_offset;
+ unsigned int length;
+} tinyobj_shape_t;
+
+typedef struct {
+ int v_idx, vt_idx, vn_idx;
+} tinyobj_vertex_index_t;
+
+typedef struct {
+ unsigned int num_vertices;
+ unsigned int num_normals;
+ unsigned int num_texcoords;
+ unsigned int num_faces;
+ unsigned int num_face_num_verts;
+
+ int pad0;
+
+ float *vertices;
+ float *normals;
+ float *texcoords;
+ tinyobj_vertex_index_t *faces;
+ int *face_num_verts;
+ int *material_ids;
+} tinyobj_attrib_t;
+
+#define TINYOBJ_FLAG_TRIANGULATE (1 << 0)
+
+#define TINYOBJ_INVALID_INDEX (0x80000000)
+
+#define TINYOBJ_SUCCESS (0)
+#define TINYOBJ_ERROR_EMPTY (-1)
+#define TINYOBJ_ERROR_INVALID_PARAMETER (-2)
+#define TINYOBJ_ERROR_FILE_OPERATION (-3)
+
+/* Provide a callback that can read text file without any parsing or
+ * modification. The obj and mtl parser is going to read all the necessary data:
+ * tinyobj_parse_obj
+ * tinyobj_parse_mtl_file
+ *
+ * @param[in] ctx User provided context.
+ * @param[in] filename Filename to be loaded.
+ * @param[in] is_mtl 1 when the callback is invoked for loading .mtl. 0 for .obj
+ * @param[in] obj_filename .obj filename. Useful when you load .mtl from same
+ * location of .obj. When the callback is called to load .obj, `filename` and
+ * `obj_filename` are same.
+ * @param[out] buf Content of loaded file
+ * @param[out] len Size of content(file)
+ */
+typedef void (*file_reader_callback)(void *ctx, const char *filename,
+ int is_mtl, const char *obj_filename,
+ char **buf, size_t *len);
+
+/* Parse wavefront .obj
+ * @param[out] attrib Attibutes
+ * @param[out] shapes Array of parsed shapes
+ * @param[out] num_shapes Array length of `shapes`
+ * @param[out] materials Array of parsed materials
+ * @param[out] num_materials Array length of `materials`
+ * @param[in] file_name File name of .obj
+ * @param[in] file_reader File reader callback function(to read .obj and .mtl).
+ * @param[in] ctx Context pointer passed to the file_reader_callback.
+ * @param[in] flags combination of TINYOBJ_FLAG_***
+ *
+ * Returns TINYOBJ_SUCCESS if things goes well.
+ * Returns TINYOBJ_ERROR_*** when there is an error.
+ */
+extern int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes,
+ size_t *num_shapes, tinyobj_material_t **materials,
+ size_t *num_materials, const char *file_name,
+ file_reader_callback file_reader, void *ctx,
+ unsigned int flags);
+
+/* Parse wavefront .mtl
+ *
+ * @param[out] materials_out
+ * @param[out] num_materials_out
+ * @param[in] filename .mtl filename
+ * @param[in] filename of .obj filename. could be NULL if you just want to parse
+ .mtl file.
+ * @param[in] file_reader File reader callback
+ * @param[in[ ctx Context pointer passed to the file_reader callack.
+
+ * Returns TINYOBJ_SUCCESS if things goes well.
+ * Returns TINYOBJ_ERROR_*** when there is an error.
+ */
+extern int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out,
+ size_t *num_materials_out,
+ const char *filename,
+ const char *obj_filename,
+ file_reader_callback file_reader, void *ctx);
+
+extern void tinyobj_attrib_init(tinyobj_attrib_t *attrib);
+extern void tinyobj_attrib_free(tinyobj_attrib_t *attrib);
+extern void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes);
+extern void tinyobj_materials_free(tinyobj_material_t *materials,
+ size_t num_materials);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* TINOBJ_LOADER_C_H_ */
+
+#ifdef TINYOBJ_LOADER_C_IMPLEMENTATION
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#if defined(TINYOBJ_MALLOC) && defined(TINYOBJ_CALLOC) && \
+ defined(TINYOBJ_FREE) && \
+ (defined(TINYOBJ_REALLOC) || defined(TINYOBJ_REALLOC_SIZED))
+/* ok */
+#elif !defined(TINYOBJ_MALLOC) && !defined(TINYOBJ_CALLOC) && \
+ !defined(TINYOBJ_FREE) && !defined(TINYOBJ_REALLOC) && \
+ !defined(TINYOBJ_REALLOC_SIZED)
+/* ok */
+#else
+#error \
+ "Must define all or none of TINYOBJ_MALLOC, TINYOBJ_CALLOC, TINYOBJ_FREE, and TINYOBJ_REALLOC (or TINYOBJ_REALLOC_SIZED)."
+#endif
+
+#ifndef TINYOBJ_MALLOC
+#include <stdlib.h>
+#define TINYOBJ_MALLOC malloc
+#define TINYOBJ_REALLOC realloc
+#define TINYOBJ_CALLOC calloc
+#define TINYOBJ_FREE free
+#endif
+
+#ifndef TINYOBJ_REALLOC_SIZED
+#define TINYOBJ_REALLOC_SIZED(p, oldsz, newsz) TINYOBJ_REALLOC(p, newsz)
+#endif
+
+#define TINYOBJ_MAX_FACES_PER_F_LINE (16)
+#define TINYOBJ_MAX_FILEPATH (8192)
+
+#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
+#define IS_DIGIT(x) ((unsigned int)((x) - '0') < (unsigned int)(10))
+#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
+
+static void skip_space(const char **token) {
+ while ((*token)[0] == ' ' || (*token)[0] == '\t') {
+ (*token)++;
+ }
+}
+
+static void skip_space_and_cr(const char **token) {
+ while ((*token)[0] == ' ' || (*token)[0] == '\t' || (*token)[0] == '\r') {
+ (*token)++;
+ }
+}
+
+static int until_space(const char *token) {
+ const char *p = token;
+ while (p[0] != '\0' && p[0] != ' ' && p[0] != '\t' && p[0] != '\r') {
+ p++;
+ }
+
+ return (int)(p - token);
+}
+
+static size_t length_until_newline(const char *token, size_t n) {
+ size_t len = 0;
+
+ /* Assume token[n-1] = '\0' */
+ for (len = 0; len < n - 1; len++) {
+ if (token[len] == '\n') {
+ break;
+ }
+ if ((token[len] == '\r') && ((len < (n - 2)) && (token[len + 1] != '\n'))) {
+ break;
+ }
+ }
+
+ return len;
+}
+
+static size_t length_until_line_feed(const char *token, size_t n) {
+ size_t len = 0;
+
+ /* Assume token[n-1] = '\0' */
+ for (len = 0; len < n; len++) {
+ if ((token[len] == '\n') || (token[len] == '\r')) {
+ break;
+ }
+ }
+
+ return len;
+}
+
+/* http://stackoverflow.com/questions/5710091/how-does-atoi-function-in-c-work
+ */
+static int my_atoi(const char *c) {
+ int value = 0;
+ int sign = 1;
+ if (*c == '+' || *c == '-') {
+ if (*c == '-')
+ sign = -1;
+ c++;
+ }
+ while (((*c) >= '0') && ((*c) <= '9')) { /* isdigit(*c) */
+ value *= 10;
+ value += (int)(*c - '0');
+ c++;
+ }
+ return value * sign;
+}
+
+/* Make index zero-base, and also support relative index. */
+static int fixIndex(int idx, size_t n) {
+ if (idx > 0)
+ return idx - 1;
+ if (idx == 0)
+ return 0;
+ return (int)n + idx; /* negative value = relative */
+}
+
+/* Parse raw triples: i, i/j/k, i//k, i/j */
+static tinyobj_vertex_index_t parseRawTriple(const char **token) {
+ tinyobj_vertex_index_t vi;
+ /* 0x80000000 = -2147483648 = invalid */
+ vi.v_idx = (int)(0x80000000);
+ vi.vn_idx = (int)(0x80000000);
+ vi.vt_idx = (int)(0x80000000);
+
+ vi.v_idx = my_atoi((*token));
+ while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+ (*token)[0] != '\t' && (*token)[0] != '\r') {
+ (*token)++;
+ }
+ if ((*token)[0] != '/') {
+ return vi;
+ }
+ (*token)++;
+
+ /* i//k */
+ if ((*token)[0] == '/') {
+ (*token)++;
+ vi.vn_idx = my_atoi((*token));
+ while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+ (*token)[0] != '\t' && (*token)[0] != '\r') {
+ (*token)++;
+ }
+ return vi;
+ }
+
+ /* i/j/k or i/j */
+ vi.vt_idx = my_atoi((*token));
+ while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+ (*token)[0] != '\t' && (*token)[0] != '\r') {
+ (*token)++;
+ }
+ if ((*token)[0] != '/') {
+ return vi;
+ }
+
+ /* i/j/k */
+ (*token)++; /* skip '/' */
+ vi.vn_idx = my_atoi((*token));
+ while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+ (*token)[0] != '\t' && (*token)[0] != '\r') {
+ (*token)++;
+ }
+ return vi;
+}
+
+static int parseInt(const char **token) {
+ int i = 0;
+ skip_space(token);
+ i = my_atoi((*token));
+ (*token) += until_space((*token));
+ return i;
+}
+
+/*
+ * Tries to parse a floating point number located at s.
+ *
+ * s_end should be a location in the string where reading should absolutely
+ * stop. For example at the end of the string, to prevent buffer overflows.
+ *
+ * Parses the following EBNF grammar:
+ * sign = "+" | "-" ;
+ * END = ? anything not in digit ?
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ * integer = [sign] , digit , {digit} ;
+ * decimal = integer , ["." , integer] ;
+ * float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+ *
+ * Valid strings are for example:
+ * -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2
+ *
+ * If the parsing is a success, result is set to the parsed value and true
+ * is returned.
+ *
+ * The function is greedy and will parse until any of the following happens:
+ * - a non-conforming character is encountered.
+ * - s_end is reached.
+ *
+ * The following situations triggers a failure:
+ * - s >= s_end.
+ * - parse failure.
+ */
+static int tryParseDouble(const char *s, const char *s_end, double *result) {
+ double mantissa = 0.0;
+ /* This exponent is base 2 rather than 10.
+ * However the exponent we parse is supposed to be one of ten,
+ * thus we must take care to convert the exponent/and or the
+ * mantissa to a * 2^E, where a is the mantissa and E is the
+ * exponent.
+ * To get the final double we will use ldexp, it requires the
+ * exponent to be in base 2.
+ */
+ int exponent = 0;
+
+ /* NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
+ * TO JUMP OVER DEFINITIONS.
+ */
+ char sign = '+';
+ char exp_sign = '+';
+ char const *curr = s;
+
+ /* How many characters were read in a loop. */
+ int read = 0;
+ /* Tells whether a loop terminated due to reaching s_end. */
+ int end_not_reached = 0;
+
+ /*
+ BEGIN PARSING.
+ */
+
+ if (s >= s_end) {
+ return 0; /* fail */
+ }
+
+ /* Find out what sign we've got. */
+ if (*curr == '+' || *curr == '-') {
+ sign = *curr;
+ curr++;
+ } else if (IS_DIGIT(*curr)) { /* Pass through. */
+ } else {
+ goto fail;
+ }
+
+ /* Read the integer part. */
+ end_not_reached = (curr != s_end);
+ while (end_not_reached && IS_DIGIT(*curr)) {
+ mantissa *= 10;
+ mantissa += (int)(*curr - 0x30);
+ curr++;
+ read++;
+ end_not_reached = (curr != s_end);
+ }
+
+ /* We must make sure we actually got something. */
+ if (read == 0)
+ goto fail;
+ /* We allow numbers of form "#", "###" etc. */
+ if (!end_not_reached)
+ goto assemble;
+
+ /* Read the decimal part. */
+ if (*curr == '.') {
+ curr++;
+ read = 1;
+ end_not_reached = (curr != s_end);
+ while (end_not_reached && IS_DIGIT(*curr)) {
+ /* pow(10.0, -read) */
+ double frac_value = 1.0;
+ int f;
+ for (f = 0; f < read; f++) {
+ frac_value *= 0.1;
+ }
+ mantissa += (int)(*curr - 0x30) * frac_value;
+ read++;
+ curr++;
+ end_not_reached = (curr != s_end);
+ }
+ } else if (*curr == 'e' || *curr == 'E') {
+ } else {
+ goto assemble;
+ }
+
+ if (!end_not_reached)
+ goto assemble;
+
+ /* Read the exponent part. */
+ if (*curr == 'e' || *curr == 'E') {
+ curr++;
+ /* Figure out if a sign is present and if it is. */
+ end_not_reached = (curr != s_end);
+ if (end_not_reached && (*curr == '+' || *curr == '-')) {
+ exp_sign = *curr;
+ curr++;
+ } else if (IS_DIGIT(*curr)) { /* Pass through. */
+ } else {
+ /* Empty E is not allowed. */
+ goto fail;
+ }
+
+ read = 0;
+ end_not_reached = (curr != s_end);
+ while (end_not_reached && IS_DIGIT(*curr)) {
+ exponent *= 10;
+ exponent += (int)(*curr - 0x30);
+ curr++;
+ read++;
+ end_not_reached = (curr != s_end);
+ }
+ if (read == 0)
+ goto fail;
+ }
+
+assemble:
+
+{
+ double a = 1.0; /* = pow(5.0, exponent); */
+ double b = 1.0; /* = 2.0^exponent */
+ int i;
+ for (i = 0; i < exponent; i++) {
+ a = a * 5.0;
+ }
+
+ for (i = 0; i < exponent; i++) {
+ b = b * 2.0;
+ }
+
+ if (exp_sign == '-') {
+ a = 1.0 / a;
+ b = 1.0 / b;
+ }
+
+ *result =
+ /* (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent),
+ exponent); */
+ (sign == '+' ? 1 : -1) * (mantissa * a * b);
+}
+
+ return 1;
+fail:
+ return 0;
+}
+
+static float parseFloat(const char **token) {
+ const char *end;
+ double val = 0.0;
+ float f = 0.0f;
+ skip_space(token);
+ end = (*token) + until_space((*token));
+ val = 0.0;
+ tryParseDouble((*token), end, &val);
+ f = (float)(val);
+ (*token) = end;
+ return f;
+}
+
+static void parseFloat2(float *x, float *y, const char **token) {
+ (*x) = parseFloat(token);
+ (*y) = parseFloat(token);
+}
+
+static void parseFloat3(float *x, float *y, float *z, const char **token) {
+ (*x) = parseFloat(token);
+ (*y) = parseFloat(token);
+ (*z) = parseFloat(token);
+}
+
+static size_t my_strnlen(const char *s, size_t n) {
+ const char *p = (char *)memchr(s, 0, n);
+ return p ? (size_t)(p - s) : n;
+}
+
+static char *my_strdup(const char *s, size_t max_length) {
+ char *d;
+ size_t len;
+
+ if (s == NULL)
+ return NULL;
+
+ /* Do not consider CRLF line ending(#19) */
+ len = length_until_line_feed(s, max_length);
+ /* len = strlen(s); */
+
+ /* trim line ending and append '\0' */
+ d = (char *)TINYOBJ_MALLOC(len + 1); /* + '\0' */
+ memcpy(d, s, (size_t)(len));
+ d[len] = '\0';
+
+ return d;
+}
+
+static char *my_strndup(const char *s, size_t len) {
+ char *d;
+ size_t slen;
+
+ if (s == NULL)
+ return NULL;
+ if (len == 0)
+ return NULL;
+
+ slen = my_strnlen(s, len);
+ d = (char *)TINYOBJ_MALLOC(slen + 1); /* + '\0' */
+ if (!d) {
+ return NULL;
+ }
+ memcpy(d, s, slen);
+ d[slen] = '\0';
+
+ return d;
+}
+
+char *dynamic_fgets(char **buf, size_t *size, FILE *file) {
+ char *offset;
+ char *ret;
+ size_t old_size;
+
+ if (!(ret = fgets(*buf, (int)*size, file))) {
+ return ret;
+ }
+
+ if (NULL != strchr(*buf, '\n')) {
+ return ret;
+ }
+
+ do {
+ old_size = *size;
+ *size *= 2;
+ *buf = (char *)TINYOBJ_REALLOC_SIZED(*buf, old_size, *size);
+ offset = &((*buf)[old_size - 1]);
+
+ ret = fgets(offset, (int)(old_size + 1), file);
+ } while (ret && (NULL == strchr(*buf, '\n')));
+
+ return ret;
+}
+
+static void initMaterial(tinyobj_material_t *material) {
+ int i;
+ material->name = NULL;
+ material->ambient_texname = NULL;
+ material->diffuse_texname = NULL;
+ material->specular_texname = NULL;
+ material->specular_highlight_texname = NULL;
+ material->bump_texname = NULL;
+ material->displacement_texname = NULL;
+ material->alpha_texname = NULL;
+ for (i = 0; i < 3; i++) {
+ material->ambient[i] = 0.f;
+ material->diffuse[i] = 0.f;
+ material->specular[i] = 0.f;
+ material->transmittance[i] = 0.f;
+ material->emission[i] = 0.f;
+ }
+ material->illum = 0;
+ material->dissolve = 1.f;
+ material->shininess = 1.f;
+ material->ior = 1.f;
+}
+
+/* Implementation of string to int hashtable */
+
+#define HASH_TABLE_ERROR 1
+#define HASH_TABLE_SUCCESS 0
+
+#define HASH_TABLE_DEFAULT_SIZE 10
+
+typedef struct hash_table_entry_t {
+ unsigned long hash;
+ int filled;
+ int pad0;
+ long value;
+
+ struct hash_table_entry_t *next;
+} hash_table_entry_t;
+
+typedef struct {
+ unsigned long *hashes;
+ hash_table_entry_t *entries;
+ size_t capacity;
+ size_t n;
+} hash_table_t;
+
+static unsigned long hash_djb2(const unsigned char *str) {
+ unsigned long hash = 5381;
+ int c;
+
+ while ((c = *str++)) {
+ hash = ((hash << 5) + hash) + (unsigned long)(c);
+ }
+
+ return hash;
+}
+
+static void create_hash_table(size_t start_capacity, hash_table_t *hash_table) {
+ if (start_capacity < 1)
+ start_capacity = HASH_TABLE_DEFAULT_SIZE;
+ hash_table->hashes =
+ (unsigned long *)TINYOBJ_MALLOC(start_capacity * sizeof(unsigned long));
+ hash_table->entries = (hash_table_entry_t *)TINYOBJ_CALLOC(
+ start_capacity, sizeof(hash_table_entry_t));
+ hash_table->capacity = start_capacity;
+ hash_table->n = 0;
+}
+
+static void destroy_hash_table(hash_table_t *hash_table) {
+ TINYOBJ_FREE(hash_table->entries);
+ TINYOBJ_FREE(hash_table->hashes);
+}
+
+/* Insert with quadratic probing */
+static int hash_table_insert_value(unsigned long hash, long value,
+ hash_table_t *hash_table) {
+ /* Insert value */
+ size_t start_index = hash % hash_table->capacity;
+ size_t index = start_index;
+ hash_table_entry_t *start_entry = hash_table->entries + start_index;
+ size_t i;
+ hash_table_entry_t *entry;
+
+ for (i = 1; hash_table->entries[index].filled; i++) {
+ if (i >= hash_table->capacity)
+ return HASH_TABLE_ERROR;
+ index = (start_index + (i * i)) % hash_table->capacity;
+ }
+
+ entry = hash_table->entries + index;
+ entry->hash = hash;
+ entry->filled = 1;
+ entry->value = value;
+
+ if (index != start_index) {
+ /* This is a new entry, but not the start entry, hence we need to add a next
+ * pointer to our entry */
+ entry->next = start_entry->next;
+ start_entry->next = entry;
+ }
+
+ return HASH_TABLE_SUCCESS;
+}
+
+static int hash_table_insert(unsigned long hash, long value,
+ hash_table_t *hash_table) {
+ int ret = hash_table_insert_value(hash, value, hash_table);
+ if (ret == HASH_TABLE_SUCCESS) {
+ hash_table->hashes[hash_table->n] = hash;
+ hash_table->n++;
+ }
+ return ret;
+}
+
+static hash_table_entry_t *hash_table_find(unsigned long hash,
+ hash_table_t *hash_table) {
+ hash_table_entry_t *entry =
+ hash_table->entries + (hash % hash_table->capacity);
+ while (entry) {
+ if (entry->hash == hash && entry->filled) {
+ return entry;
+ }
+ entry = entry->next;
+ }
+ return NULL;
+}
+
+static void hash_table_grow(hash_table_t *hash_table) {
+ size_t new_capacity;
+ hash_table_t new_hash_table;
+ size_t i;
+
+ new_capacity = 2 * hash_table->capacity;
+ /* Create a new hash table. We're not calling create_hash_table because we
+ * want to realloc the hash array */
+ new_hash_table.hashes = hash_table->hashes =
+ (unsigned long *)TINYOBJ_REALLOC_SIZED(
+ (void *)hash_table->hashes,
+ sizeof(unsigned long) * hash_table->capacity,
+ sizeof(unsigned long) * new_capacity);
+ new_hash_table.entries = (hash_table_entry_t *)TINYOBJ_CALLOC(
+ new_capacity, sizeof(hash_table_entry_t));
+ new_hash_table.capacity = new_capacity;
+ new_hash_table.n = hash_table->n;
+
+ /* Rehash */
+ for (i = 0; i < hash_table->capacity; i++) {
+ hash_table_entry_t *entry = &hash_table->entries[i];
+ if (entry->filled) {
+ hash_table_insert_value(entry->hash, entry->value, &new_hash_table);
+ }
+ }
+
+ TINYOBJ_FREE(hash_table->entries);
+ (*hash_table) = new_hash_table;
+}
+
+static int hash_table_exists(const char *name, hash_table_t *hash_table) {
+ return hash_table_find(hash_djb2((const unsigned char *)name), hash_table) !=
+ NULL;
+}
+
+static void hash_table_set(const char *name, size_t val,
+ hash_table_t *hash_table) {
+ /* Hash name */
+ unsigned long hash = hash_djb2((const unsigned char *)name);
+
+ hash_table_entry_t *entry = hash_table_find(hash, hash_table);
+ if (entry) {
+ entry->value = (long)val;
+ return;
+ }
+
+ /* Expand if necessary
+ * Grow until the element has been added
+ */
+ while (hash_table_insert(hash, (long)val, hash_table) != HASH_TABLE_SUCCESS) {
+ hash_table_grow(hash_table);
+ }
+}
+
+static long hash_table_get(const char *name, hash_table_t *hash_table) {
+ hash_table_entry_t *ret =
+ hash_table_find(hash_djb2((const unsigned char *)(name)), hash_table);
+ return ret->value;
+}
+
+static tinyobj_material_t *tinyobj_material_add(tinyobj_material_t *prev,
+ size_t num_materials,
+ tinyobj_material_t *new_mat) {
+ tinyobj_material_t *dst;
+ size_t num_bytes = sizeof(tinyobj_material_t) * num_materials;
+ dst = (tinyobj_material_t *)TINYOBJ_REALLOC_SIZED(
+ prev, num_bytes, num_bytes + sizeof(tinyobj_material_t));
+
+ dst[num_materials] = (*new_mat); /* Just copy pointer for char* members */
+ return dst;
+}
+
+static int is_line_ending(const char *p, size_t i, size_t end_i) {
+ if (p[i] == '\0')
+ return 1;
+ if (p[i] == '\n')
+ return 1; /* this includes \r\n */
+ if (p[i] == '\r') {
+ if (((i + 1) < end_i) && (p[i + 1] != '\n')) { /* detect only \r case */
+ return 1;
+ }
+ }
+ return 0;
+}
+
+typedef struct {
+ size_t pos;
+ size_t len;
+} LineInfo;
+
+/* Find '\n' and create line data. */
+static int get_line_infos(const char *buf, size_t buf_len,
+ LineInfo **line_infos, size_t *num_lines) {
+ size_t i = 0;
+ size_t end_idx = buf_len;
+ size_t prev_pos = 0;
+ size_t line_no = 0;
+ size_t last_line_ending = 0;
+
+ /* Count # of lines. */
+ for (i = 0; i < end_idx; i++) {
+ if (is_line_ending(buf, i, end_idx)) {
+ (*num_lines)++;
+ last_line_ending = i;
+ }
+ }
+ /* The last char from the input may not be a line
+ * ending character so add an extra line if there
+ * are more characters after the last line ending
+ * that was found. */
+ if (end_idx - last_line_ending > 1) {
+ (*num_lines)++;
+ }
+
+ if (*num_lines == 0)
+ return TINYOBJ_ERROR_EMPTY;
+
+ *line_infos = (LineInfo *)TINYOBJ_MALLOC(sizeof(LineInfo) * (*num_lines));
+
+ /* Fill line infos. */
+ for (i = 0; i < end_idx; i++) {
+ if (is_line_ending(buf, i, end_idx)) {
+ (*line_infos)[line_no].pos = prev_pos;
+ (*line_infos)[line_no].len = i - prev_pos;
+ prev_pos = i + 1;
+ line_no++;
+ }
+ }
+ if (end_idx - last_line_ending > 1) {
+ (*line_infos)[line_no].pos = prev_pos;
+ (*line_infos)[line_no].len = end_idx - 1 - last_line_ending;
+ }
+
+ return 0;
+}
+
+static int tinyobj_parse_and_index_mtl_file(
+ tinyobj_material_t **materials_out, size_t *num_materials_out,
+ const char *mtl_filename, const char *obj_filename,
+ file_reader_callback file_reader, void *ctx, hash_table_t *material_table) {
+ tinyobj_material_t material;
+ size_t num_materials = 0;
+ tinyobj_material_t *materials = NULL;
+ int has_previous_material = 0;
+ const char *line_end = NULL;
+ size_t num_lines = 0;
+ LineInfo *line_infos = NULL;
+ size_t i = 0;
+ char *buf = NULL;
+ size_t len = 0;
+
+ if (materials_out == NULL) {
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ }
+
+ if (num_materials_out == NULL) {
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ }
+
+ (*materials_out) = NULL;
+ (*num_materials_out) = 0;
+
+ file_reader(ctx, mtl_filename, 1, obj_filename, &buf, &len);
+ if (len < 1)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (buf == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+
+ if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) {
+ TINYOBJ_FREE(line_infos);
+ return TINYOBJ_ERROR_EMPTY;
+ }
+
+ /* Create a default material */
+ initMaterial(&material);
+
+ for (i = 0; i < num_lines; i++) {
+ const char *p = &buf[line_infos[i].pos];
+ size_t p_len = line_infos[i].len;
+
+ char linebuf[4096];
+ const char *token;
+ assert(p_len < 4095);
+
+ memcpy(linebuf, p, p_len);
+ linebuf[p_len] = '\0';
+
+ token = linebuf;
+ line_end = token + p_len;
+
+ /* Skip leading space. */
+ token += strspn(token, " \t");
+
+ assert(token);
+ if (token[0] == '\0')
+ continue; /* empty line */
+
+ if (token[0] == '#')
+ continue; /* comment line */
+
+ /* new mtl */
+ if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
+ char namebuf[4096];
+
+ /* flush previous material. */
+ if (has_previous_material) {
+ materials = tinyobj_material_add(materials, num_materials, &material);
+ num_materials++;
+ } else {
+ has_previous_material = 1;
+ }
+
+ /* initial temporary material */
+ initMaterial(&material);
+
+ /* set new mtl name */
+ token += 7;
+#ifdef _MSC_VER
+ sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+ sscanf(token, "%s", namebuf);
+#endif
+ material.name = my_strdup(namebuf, (size_t)(line_end - token));
+
+ /* Add material to material table */
+ if (material_table)
+ hash_table_set(material.name, num_materials, material_table);
+
+ continue;
+ }
+
+ /* ambient */
+ if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
+ float r, g, b;
+ token += 2;
+ parseFloat3(&r, &g, &b, &token);
+ material.ambient[0] = r;
+ material.ambient[1] = g;
+ material.ambient[2] = b;
+ continue;
+ }
+
+ /* diffuse */
+ if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
+ float r, g, b;
+ token += 2;
+ parseFloat3(&r, &g, &b, &token);
+ material.diffuse[0] = r;
+ material.diffuse[1] = g;
+ material.diffuse[2] = b;
+ continue;
+ }
+
+ /* specular */
+ if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
+ float r, g, b;
+ token += 2;
+ parseFloat3(&r, &g, &b, &token);
+ material.specular[0] = r;
+ material.specular[1] = g;
+ material.specular[2] = b;
+ continue;
+ }
+
+ /* transmittance */
+ if (token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) {
+ float r, g, b;
+ token += 2;
+ parseFloat3(&r, &g, &b, &token);
+ material.transmittance[0] = r;
+ material.transmittance[1] = g;
+ material.transmittance[2] = b;
+ continue;
+ }
+
+ /* ior(index of refraction) */
+ if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
+ token += 2;
+ material.ior = parseFloat(&token);
+ continue;
+ }
+
+ /* emission */
+ if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
+ float r, g, b;
+ token += 2;
+ parseFloat3(&r, &g, &b, &token);
+ material.emission[0] = r;
+ material.emission[1] = g;
+ material.emission[2] = b;
+ continue;
+ }
+
+ /* shininess */
+ if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
+ token += 2;
+ material.shininess = parseFloat(&token);
+ continue;
+ }
+
+ /* illum model */
+ if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
+ token += 6;
+ material.illum = parseInt(&token);
+ continue;
+ }
+
+ /* dissolve */
+ if ((token[0] == 'd' && IS_SPACE(token[1]))) {
+ token += 1;
+ material.dissolve = parseFloat(&token);
+ continue;
+ }
+ if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
+ token += 2;
+ /* Invert value of Tr(assume Tr is in range [0, 1]) */
+ material.dissolve = 1.0f - parseFloat(&token);
+ continue;
+ }
+
+ /* ambient texture */
+ if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
+ token += 7;
+ material.ambient_texname = my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* diffuse texture */
+ if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
+ token += 7;
+ material.diffuse_texname = my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* specular texture */
+ if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
+ token += 7;
+ material.specular_texname = my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* specular highlight texture */
+ if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
+ token += 7;
+ material.specular_highlight_texname =
+ my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* bump texture */
+ if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) {
+ token += 9;
+ material.bump_texname = my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* alpha texture */
+ if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
+ token += 6;
+ material.alpha_texname = my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* bump texture */
+ if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
+ token += 5;
+ material.bump_texname = my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* displacement texture */
+ if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
+ token += 5;
+ material.displacement_texname =
+ my_strdup(token, (size_t)(line_end - token));
+ continue;
+ }
+
+ /* @todo { unknown parameter } */
+ }
+
+ TINYOBJ_FREE(line_infos);
+
+ if (material.name) {
+ /* Flush last material element */
+ materials = tinyobj_material_add(materials, num_materials, &material);
+ num_materials++;
+ }
+
+ (*num_materials_out) = num_materials;
+ (*materials_out) = materials;
+
+ return TINYOBJ_SUCCESS;
+}
+
+int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out,
+ size_t *num_materials_out, const char *mtl_filename,
+ const char *obj_filename,
+ file_reader_callback file_reader, void *ctx) {
+ return tinyobj_parse_and_index_mtl_file(materials_out, num_materials_out,
+ mtl_filename, obj_filename,
+ file_reader, ctx, NULL);
+}
+
+typedef enum {
+ COMMAND_EMPTY,
+ COMMAND_V,
+ COMMAND_VN,
+ COMMAND_VT,
+ COMMAND_F,
+ COMMAND_G,
+ COMMAND_O,
+ COMMAND_USEMTL,
+ COMMAND_MTLLIB
+
+} CommandType;
+
+typedef struct {
+ float vx, vy, vz;
+ float nx, ny, nz;
+ float tx, ty;
+
+ /* @todo { Use dynamic array } */
+ tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE];
+ size_t num_f;
+
+ int f_num_verts[TINYOBJ_MAX_FACES_PER_F_LINE];
+ size_t num_f_num_verts;
+
+ const char *group_name;
+ unsigned int group_name_len;
+ int pad0;
+
+ const char *object_name;
+ unsigned int object_name_len;
+ int pad1;
+
+ const char *material_name;
+ unsigned int material_name_len;
+ int pad2;
+
+ const char *mtllib_name;
+ unsigned int mtllib_name_len;
+
+ CommandType type;
+} Command;
+
+static int parseLine(Command *command, const char *p, size_t p_len,
+ int triangulate) {
+ char linebuf[4096];
+ const char *token;
+ assert(p_len < 4095);
+
+ memcpy(linebuf, p, p_len);
+ linebuf[p_len] = '\0';
+
+ token = linebuf;
+
+ command->type = COMMAND_EMPTY;
+
+ /* Skip leading space. */
+ skip_space(&token);
+
+ assert(token);
+ if (token[0] == '\0') { /* empty line */
+ return 0;
+ }
+
+ if (token[0] == '#') { /* comment line */
+ return 0;
+ }
+
+ /* vertex */
+ if (token[0] == 'v' && IS_SPACE((token[1]))) {
+ float x, y, z;
+ token += 2;
+ parseFloat3(&x, &y, &z, &token);
+ command->vx = x;
+ command->vy = y;
+ command->vz = z;
+ command->type = COMMAND_V;
+ return 1;
+ }
+
+ /* normal */
+ if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+ float x, y, z;
+ token += 3;
+ parseFloat3(&x, &y, &z, &token);
+ command->nx = x;
+ command->ny = y;
+ command->nz = z;
+ command->type = COMMAND_VN;
+ return 1;
+ }
+
+ /* texcoord */
+ if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+ float x, y;
+ token += 3;
+ parseFloat2(&x, &y, &token);
+ command->tx = x;
+ command->ty = y;
+ command->type = COMMAND_VT;
+ return 1;
+ }
+
+ /* face */
+ if (token[0] == 'f' && IS_SPACE((token[1]))) {
+ size_t num_f = 0;
+
+ tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE];
+ token += 2;
+ skip_space(&token);
+
+ while (!IS_NEW_LINE(token[0])) {
+ tinyobj_vertex_index_t vi = parseRawTriple(&token);
+ skip_space_and_cr(&token);
+
+ f[num_f] = vi;
+ num_f++;
+ }
+
+ command->type = COMMAND_F;
+
+ if (triangulate) {
+ size_t k;
+ size_t n = 0;
+
+ tinyobj_vertex_index_t i0 = f[0];
+ tinyobj_vertex_index_t i1;
+ tinyobj_vertex_index_t i2 = f[1];
+
+ assert(3 * num_f < TINYOBJ_MAX_FACES_PER_F_LINE);
+
+ for (k = 2; k < num_f; k++) {
+ i1 = i2;
+ i2 = f[k];
+ command->f[3 * n + 0] = i0;
+ command->f[3 * n + 1] = i1;
+ command->f[3 * n + 2] = i2;
+
+ command->f_num_verts[n] = 3;
+ n++;
+ }
+ command->num_f = 3 * n;
+ command->num_f_num_verts = n;
+
+ } else {
+ size_t k = 0;
+ assert(num_f < TINYOBJ_MAX_FACES_PER_F_LINE);
+ for (k = 0; k < num_f; k++) {
+ command->f[k] = f[k];
+ }
+
+ command->num_f = num_f;
+ command->f_num_verts[0] = (int)num_f;
+ command->num_f_num_verts = 1;
+ }
+
+ return 1;
+ }
+
+ /* use mtl */
+ if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+ token += 7;
+
+ skip_space(&token);
+ command->material_name = p + (token - linebuf);
+ command->material_name_len = (unsigned int)length_until_newline(
+ token, (p_len - (size_t)(token - linebuf)) + 1);
+ command->type = COMMAND_USEMTL;
+
+ return 1;
+ }
+
+ /* load mtl */
+ if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+ /* By specification, `mtllib` should be appear only once in .obj */
+ token += 7;
+
+ skip_space(&token);
+ command->mtllib_name = p + (token - linebuf);
+ command->mtllib_name_len = (unsigned int)length_until_newline(
+ token, p_len - (size_t)(token - linebuf)) +
+ 1;
+ command->type = COMMAND_MTLLIB;
+
+ return 1;
+ }
+
+ /* group name */
+ if (token[0] == 'g' && IS_SPACE((token[1]))) {
+ /* @todo { multiple group name. } */
+ token += 2;
+
+ command->group_name = p + (token - linebuf);
+ command->group_name_len = (unsigned int)length_until_newline(
+ token, p_len - (size_t)(token - linebuf)) +
+ 1;
+ command->type = COMMAND_G;
+
+ return 1;
+ }
+
+ /* object name */
+ if (token[0] == 'o' && IS_SPACE((token[1]))) {
+ /* @todo { multiple object name? } */
+ token += 2;
+
+ command->object_name = p + (token - linebuf);
+ command->object_name_len = (unsigned int)length_until_newline(
+ token, p_len - (size_t)(token - linebuf)) +
+ 1;
+ command->type = COMMAND_O;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static size_t basename_len(const char *filename, size_t filename_length) {
+ /* Count includes NUL terminator. */
+ const char *p = &filename[filename_length - 1];
+ size_t count = 1;
+
+/* On Windows, the directory delimiter is '\' and both it and '/' is
+ * reserved by the filesystem. On *nix platforms, only the '/' character
+ * is reserved, so account for the two cases separately. */
+#if _WIN32
+ while (p[-1] != '/' && p[-1] != '\\') {
+ if (p == filename) {
+ count = filename_length;
+ return count;
+ }
+ count++;
+ p--;
+ }
+ p++;
+ return count;
+#else
+ while (*(--p) != '/') {
+ if (p == filename) {
+ count = filename_length;
+ return count;
+ }
+ count++;
+ }
+ return count;
+#endif
+}
+
+static char *generate_mtl_filename(const char *obj_filename,
+ size_t obj_filename_length,
+ const char *mtllib_name,
+ size_t mtllib_name_length) {
+ /* Create a dynamically-allocated material filename. This allows the material
+ * and obj files to be separated, however the mtllib name in the OBJ file
+ * must be a relative path to the material file from the OBJ's directory.
+ * This does not support the matllib name as an absolute address. */
+ char *mtl_filename;
+ char *p;
+ size_t mtl_filename_length;
+ size_t obj_basename_length;
+
+ /* Calculate required size of mtl_filename and allocate */
+ obj_basename_length = basename_len(obj_filename, obj_filename_length);
+ mtl_filename_length =
+ (obj_filename_length - obj_basename_length) + mtllib_name_length;
+ mtl_filename = (char *)TINYOBJ_MALLOC(mtl_filename_length);
+
+ /* Copy over the obj's path */
+ memcpy(mtl_filename, obj_filename,
+ (obj_filename_length - obj_basename_length));
+
+ /* Overwrite the obj basename with the mtllib name, filling the string */
+ p = &mtl_filename[mtl_filename_length - mtllib_name_length];
+ strcpy(p, mtllib_name);
+ return mtl_filename;
+}
+
+int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes,
+ size_t *num_shapes, tinyobj_material_t **materials_out,
+ size_t *num_materials_out, const char *obj_filename,
+ file_reader_callback file_reader, void *ctx,
+ unsigned int flags) {
+ LineInfo *line_infos = NULL;
+ Command *commands = NULL;
+ size_t num_lines = 0;
+
+ size_t num_v = 0;
+ size_t num_vn = 0;
+ size_t num_vt = 0;
+ size_t num_f = 0;
+ size_t num_faces = 0;
+
+ int mtllib_line_index = -1;
+
+ tinyobj_material_t *materials = NULL;
+ size_t num_materials = 0;
+
+ hash_table_t material_table;
+
+ char *buf = NULL;
+ size_t len = 0;
+ file_reader(ctx, obj_filename, /* is_mtl */ 0, obj_filename, &buf, &len);
+
+ if (len < 1)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (attrib == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (shapes == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (num_shapes == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (buf == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (materials_out == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+ if (num_materials_out == NULL)
+ return TINYOBJ_ERROR_INVALID_PARAMETER;
+
+ tinyobj_attrib_init(attrib);
+
+ /* 1. create line data */
+ if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) {
+ return TINYOBJ_ERROR_EMPTY;
+ }
+
+ commands = (Command *)TINYOBJ_MALLOC(sizeof(Command) * num_lines);
+
+ create_hash_table(HASH_TABLE_DEFAULT_SIZE, &material_table);
+
+ /* 2. parse each line */
+ {
+ size_t i = 0;
+ for (i = 0; i < num_lines; i++) {
+ int ret = parseLine(&commands[i], &buf[line_infos[i].pos],
+ line_infos[i].len, flags & TINYOBJ_FLAG_TRIANGULATE);
+ if (ret) {
+ if (commands[i].type == COMMAND_V) {
+ num_v++;
+ } else if (commands[i].type == COMMAND_VN) {
+ num_vn++;
+ } else if (commands[i].type == COMMAND_VT) {
+ num_vt++;
+ } else if (commands[i].type == COMMAND_F) {
+ num_f += commands[i].num_f;
+ num_faces += commands[i].num_f_num_verts;
+ }
+
+ if (commands[i].type == COMMAND_MTLLIB) {
+ mtllib_line_index = (int)i;
+ }
+ }
+ }
+ }
+
+ /* line_infos are not used anymore. Release memory. */
+ if (line_infos) {
+ TINYOBJ_FREE(line_infos);
+ }
+
+ /* Load material (if it exists) */
+ if (mtllib_line_index >= 0 && commands[mtllib_line_index].mtllib_name &&
+ commands[mtllib_line_index].mtllib_name_len > 0) {
+ /* Maximum length allowed by Linux - higher than Windows and macOS */
+ size_t obj_filename_len = my_strnlen(obj_filename, 4096 + 255) + 1;
+ char *mtl_filename;
+ char *mtllib_name;
+ size_t mtllib_name_len = 0;
+ int ret;
+
+ mtllib_name_len =
+ length_until_line_feed(commands[mtllib_line_index].mtllib_name,
+ commands[mtllib_line_index].mtllib_name_len);
+
+ mtllib_name =
+ my_strndup(commands[mtllib_line_index].mtllib_name, mtllib_name_len);
+
+ /* allow for NUL terminator */
+ mtllib_name_len++;
+ mtl_filename = generate_mtl_filename(obj_filename, obj_filename_len,
+ mtllib_name, mtllib_name_len);
+
+ ret = tinyobj_parse_and_index_mtl_file(&materials, &num_materials,
+ mtl_filename, obj_filename,
+ file_reader, ctx, &material_table);
+
+ if (ret != TINYOBJ_SUCCESS) {
+ /* warning. */
+ fprintf(stderr, "TINYOBJ: Failed to parse material file '%s': %d\n",
+ mtl_filename, ret);
+ }
+ TINYOBJ_FREE(mtl_filename);
+ TINYOBJ_FREE(mtllib_name);
+ }
+
+ /* Construct attributes */
+
+ {
+ size_t v_count = 0;
+ size_t n_count = 0;
+ size_t t_count = 0;
+ size_t f_count = 0;
+ size_t face_count = 0;
+ int material_id = -1; /* -1 = default unknown material. */
+ size_t i = 0;
+
+ attrib->vertices = (float *)TINYOBJ_MALLOC(sizeof(float) * num_v * 3);
+ attrib->num_vertices = (unsigned int)num_v;
+ attrib->normals = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vn * 3);
+ attrib->num_normals = (unsigned int)num_vn;
+ attrib->texcoords = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vt * 2);
+ attrib->num_texcoords = (unsigned int)num_vt;
+ attrib->faces = (tinyobj_vertex_index_t *)TINYOBJ_MALLOC(
+ sizeof(tinyobj_vertex_index_t) * num_f);
+ attrib->num_faces = (unsigned int)num_f;
+ attrib->face_num_verts = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces);
+ attrib->material_ids = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces);
+ attrib->num_face_num_verts = (unsigned int)num_faces;
+
+ for (i = 0; i < num_lines; i++) {
+ if (commands[i].type == COMMAND_EMPTY) {
+ continue;
+ } else if (commands[i].type == COMMAND_USEMTL) {
+ /* @todo
+ if (commands[t][i].material_name &&
+ commands[t][i].material_name_len > 0) {
+ std::string material_name(commands[t][i].material_name,
+ commands[t][i].material_name_len);
+
+ if (material_map.find(material_name) != material_map.end()) {
+ material_id = material_map[material_name];
+ } else {
+ // Assign invalid material ID
+ material_id = -1;
+ }
+ }
+ */
+ if (commands[i].material_name && commands[i].material_name_len > 0) {
+ /* Create a null terminated string */
+ char *material_name_null_term =
+ (char *)TINYOBJ_MALLOC(commands[i].material_name_len + 1);
+ memcpy((void *)material_name_null_term,
+ (const void *)commands[i].material_name,
+ commands[i].material_name_len);
+ material_name_null_term[commands[i].material_name_len] = 0;
+
+ if (hash_table_exists(material_name_null_term, &material_table))
+ material_id =
+ (int)hash_table_get(material_name_null_term, &material_table);
+ else
+ material_id = -1;
+
+ TINYOBJ_FREE(material_name_null_term);
+ }
+ } else if (commands[i].type == COMMAND_V) {
+ attrib->vertices[3 * v_count + 0] = commands[i].vx;
+ attrib->vertices[3 * v_count + 1] = commands[i].vy;
+ attrib->vertices[3 * v_count + 2] = commands[i].vz;
+ v_count++;
+ } else if (commands[i].type == COMMAND_VN) {
+ attrib->normals[3 * n_count + 0] = commands[i].nx;
+ attrib->normals[3 * n_count + 1] = commands[i].ny;
+ attrib->normals[3 * n_count + 2] = commands[i].nz;
+ n_count++;
+ } else if (commands[i].type == COMMAND_VT) {
+ attrib->texcoords[2 * t_count + 0] = commands[i].tx;
+ attrib->texcoords[2 * t_count + 1] = commands[i].ty;
+ t_count++;
+ } else if (commands[i].type == COMMAND_F) {
+ size_t k = 0;
+ for (k = 0; k < commands[i].num_f; k++) {
+ tinyobj_vertex_index_t vi = commands[i].f[k];
+ int v_idx = fixIndex(vi.v_idx, v_count);
+ int vn_idx = fixIndex(vi.vn_idx, n_count);
+ int vt_idx = fixIndex(vi.vt_idx, t_count);
+ attrib->faces[f_count + k].v_idx = v_idx;
+ attrib->faces[f_count + k].vn_idx = vn_idx;
+ attrib->faces[f_count + k].vt_idx = vt_idx;
+ }
+
+ for (k = 0; k < commands[i].num_f_num_verts; k++) {
+ attrib->material_ids[face_count + k] = material_id;
+ attrib->face_num_verts[face_count + k] = commands[i].f_num_verts[k];
+ }
+
+ f_count += commands[i].num_f;
+ face_count += commands[i].num_f_num_verts;
+ }
+ }
+ }
+
+ /* 5. Construct shape information. */
+ {
+ unsigned int face_count = 0;
+ size_t i = 0;
+ size_t n = 0;
+ size_t shape_idx = 0;
+
+ const char *shape_name = NULL;
+ unsigned int shape_name_len = 0;
+ const char *prev_shape_name = NULL;
+ unsigned int prev_shape_name_len = 0;
+ unsigned int prev_shape_face_offset = 0;
+ unsigned int prev_face_offset = 0;
+ tinyobj_shape_t prev_shape = {NULL, 0, 0};
+
+ /* Find the number of shapes in .obj */
+ for (i = 0; i < num_lines; i++) {
+ if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) {
+ n++;
+ }
+ }
+
+ /* Allocate array of shapes with maximum possible size(+1 for unnamed
+ * group/object).
+ * Actual # of shapes found in .obj is determined in the later */
+ (*shapes) =
+ (tinyobj_shape_t *)TINYOBJ_MALLOC(sizeof(tinyobj_shape_t) * (n + 1));
+
+ for (i = 0; i < num_lines; i++) {
+ if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) {
+ if (commands[i].type == COMMAND_O) {
+ shape_name = commands[i].object_name;
+ shape_name_len = commands[i].object_name_len;
+ } else {
+ shape_name = commands[i].group_name;
+ shape_name_len = commands[i].group_name_len;
+ }
+
+ if (face_count == 0) {
+ /* 'o' or 'g' appears before any 'f' */
+ prev_shape_name = shape_name;
+ prev_shape_name_len = shape_name_len;
+ prev_shape_face_offset = face_count;
+ prev_face_offset = face_count;
+ } else {
+ if (shape_idx == 0) {
+ /* 'o' or 'g' after some 'v' lines. */
+ (*shapes)[shape_idx].name = my_strndup(
+ prev_shape_name, prev_shape_name_len); /* may be NULL */
+ (*shapes)[shape_idx].face_offset = prev_shape.face_offset;
+ (*shapes)[shape_idx].length = face_count - prev_face_offset;
+ shape_idx++;
+
+ prev_face_offset = face_count;
+
+ } else {
+ if ((face_count - prev_face_offset) > 0) {
+ (*shapes)[shape_idx].name =
+ my_strndup(prev_shape_name, prev_shape_name_len);
+ (*shapes)[shape_idx].face_offset = prev_face_offset;
+ (*shapes)[shape_idx].length = face_count - prev_face_offset;
+ shape_idx++;
+ prev_face_offset = face_count;
+ }
+ }
+
+ /* Record shape info for succeeding 'o' or 'g' command. */
+ prev_shape_name = shape_name;
+ prev_shape_name_len = shape_name_len;
+ prev_shape_face_offset = face_count;
+ }
+ }
+ if (commands[i].type == COMMAND_F) {
+ face_count++;
+ }
+ }
+
+ if ((face_count - prev_face_offset) > 0) {
+ size_t length = face_count - prev_shape_face_offset;
+ if (length > 0) {
+ (*shapes)[shape_idx].name =
+ my_strndup(prev_shape_name, prev_shape_name_len);
+ (*shapes)[shape_idx].face_offset = prev_face_offset;
+ (*shapes)[shape_idx].length = face_count - prev_face_offset;
+ shape_idx++;
+ }
+ } else {
+ /* Guess no 'v' line occurrence after 'o' or 'g', so discards current
+ * shape information. */
+ }
+
+ (*num_shapes) = shape_idx;
+ }
+
+ if (commands) {
+ TINYOBJ_FREE(commands);
+ }
+
+ destroy_hash_table(&material_table);
+
+ (*materials_out) = materials;
+ (*num_materials_out) = num_materials;
+
+ return TINYOBJ_SUCCESS;
+}
+
+void tinyobj_attrib_init(tinyobj_attrib_t *attrib) {
+ attrib->vertices = NULL;
+ attrib->num_vertices = 0;
+ attrib->normals = NULL;
+ attrib->num_normals = 0;
+ attrib->texcoords = NULL;
+ attrib->num_texcoords = 0;
+ attrib->faces = NULL;
+ attrib->num_faces = 0;
+ attrib->face_num_verts = NULL;
+ attrib->num_face_num_verts = 0;
+ attrib->material_ids = NULL;
+}
+
+void tinyobj_attrib_free(tinyobj_attrib_t *attrib) {
+ if (attrib->vertices)
+ TINYOBJ_FREE(attrib->vertices);
+ if (attrib->normals)
+ TINYOBJ_FREE(attrib->normals);
+ if (attrib->texcoords)
+ TINYOBJ_FREE(attrib->texcoords);
+ if (attrib->faces)
+ TINYOBJ_FREE(attrib->faces);
+ if (attrib->face_num_verts)
+ TINYOBJ_FREE(attrib->face_num_verts);
+ if (attrib->material_ids)
+ TINYOBJ_FREE(attrib->material_ids);
+}
+
+void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes) {
+ size_t i;
+ if (shapes == NULL)
+ return;
+
+ for (i = 0; i < num_shapes; i++) {
+ if (shapes[i].name)
+ TINYOBJ_FREE(shapes[i].name);
+ }
+
+ TINYOBJ_FREE(shapes);
+}
+
+void tinyobj_materials_free(tinyobj_material_t *materials,
+ size_t num_materials) {
+ size_t i;
+ if (materials == NULL)
+ return;
+
+ for (i = 0; i < num_materials; i++) {
+ if (materials[i].name)
+ TINYOBJ_FREE(materials[i].name);
+ if (materials[i].ambient_texname)
+ TINYOBJ_FREE(materials[i].ambient_texname);
+ if (materials[i].diffuse_texname)
+ TINYOBJ_FREE(materials[i].diffuse_texname);
+ if (materials[i].specular_texname)
+ TINYOBJ_FREE(materials[i].specular_texname);
+ if (materials[i].specular_highlight_texname)
+ TINYOBJ_FREE(materials[i].specular_highlight_texname);
+ if (materials[i].bump_texname)
+ TINYOBJ_FREE(materials[i].bump_texname);
+ if (materials[i].displacement_texname)
+ TINYOBJ_FREE(materials[i].displacement_texname);
+ if (materials[i].alpha_texname)
+ TINYOBJ_FREE(materials[i].alpha_texname);
+ }
+
+ TINYOBJ_FREE(materials);
+}
+#endif /* TINYOBJ_LOADER_C_IMPLEMENTATION */
diff --git a/vendor/vk_mem_alloc.h b/vendor/vk_mem_alloc.h
new file mode 100644
index 0000000..6f71d5b
--- /dev/null
+++ b/vendor/vk_mem_alloc.h
@@ -0,0 +1,19111 @@
+//
+// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+
+#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H
+#define AMD_VULKAN_MEMORY_ALLOCATOR_H
+
+/** \mainpage Vulkan Memory Allocator
+
+<b>Version 3.2.1</b>
+
+Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. \n
+License: MIT \n
+See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/),
+[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
+
+
+<b>API documentation divided into groups:</b> [Topics](topics.html)
+
+<b>General documentation chapters:</b>
+
+- <b>User guide</b>
+ - \subpage quick_start
+ - [Project setup](@ref quick_start_project_setup)
+ - [Initialization](@ref quick_start_initialization)
+ - [Resource allocation](@ref quick_start_resource_allocation)
+ - \subpage choosing_memory_type
+ - [Usage](@ref choosing_memory_type_usage)
+ - [Required and preferred flags](@ref choosing_memory_type_required_preferred_flags)
+ - [Explicit memory types](@ref choosing_memory_type_explicit_memory_types)
+ - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools)
+ - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations)
+ - \subpage memory_mapping
+ - [Copy functions](@ref memory_mapping_copy_functions)
+ - [Mapping functions](@ref memory_mapping_mapping_functions)
+ - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory)
+ - [Cache flush and invalidate](@ref memory_mapping_cache_control)
+ - \subpage staying_within_budget
+ - [Querying for budget](@ref staying_within_budget_querying_for_budget)
+ - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage)
+ - \subpage resource_aliasing
+ - \subpage custom_memory_pools
+ - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex)
+ - [When not to use custom pools](@ref custom_memory_pools_when_not_use)
+ - [Linear allocation algorithm](@ref linear_algorithm)
+ - [Free-at-once](@ref linear_algorithm_free_at_once)
+ - [Stack](@ref linear_algorithm_stack)
+ - [Double stack](@ref linear_algorithm_double_stack)
+ - [Ring buffer](@ref linear_algorithm_ring_buffer)
+ - \subpage defragmentation
+ - \subpage statistics
+ - [Numeric statistics](@ref statistics_numeric_statistics)
+ - [JSON dump](@ref statistics_json_dump)
+ - \subpage allocation_annotation
+ - [Allocation user data](@ref allocation_user_data)
+ - [Allocation names](@ref allocation_names)
+ - \subpage virtual_allocator
+ - \subpage debugging_memory_usage
+ - [Memory initialization](@ref debugging_memory_usage_initialization)
+ - [Margins](@ref debugging_memory_usage_margins)
+ - [Corruption detection](@ref debugging_memory_usage_corruption_detection)
+ - [Leak detection features](@ref debugging_memory_usage_leak_detection)
+ - \subpage other_api_interop
+- \subpage usage_patterns
+ - [GPU-only resource](@ref usage_patterns_gpu_only)
+ - [Staging copy for upload](@ref usage_patterns_staging_copy_upload)
+ - [Readback](@ref usage_patterns_readback)
+ - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading)
+ - [Other use cases](@ref usage_patterns_other_use_cases)
+- \subpage configuration
+ - [Pointers to Vulkan functions](@ref config_Vulkan_functions)
+ - [Custom host memory allocator](@ref custom_memory_allocator)
+ - [Device memory allocation callbacks](@ref allocation_callbacks)
+ - [Device heap memory limit](@ref heap_memory_limit)
+- <b>Extension support</b>
+ - \subpage vk_khr_dedicated_allocation
+ - \subpage enabling_buffer_device_address
+ - \subpage vk_ext_memory_priority
+ - \subpage vk_amd_device_coherent_memory
+ - \subpage vk_khr_external_memory_win32
+- \subpage general_considerations
+ - [Thread safety](@ref general_considerations_thread_safety)
+ - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility)
+ - [Validation layer warnings](@ref general_considerations_validation_layer_warnings)
+ - [Allocation algorithm](@ref general_considerations_allocation_algorithm)
+ - [Features not supported](@ref general_considerations_features_not_supported)
+
+\defgroup group_init Library initialization
+
+\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object.
+
+\defgroup group_alloc Memory allocation
+
+\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images.
+Most basic ones being: vmaCreateBuffer(), vmaCreateImage().
+
+\defgroup group_virtual Virtual allocator
+
+\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm
+for user-defined purpose without allocating any real GPU memory.
+
+\defgroup group_stats Statistics
+
+\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format.
+See documentation chapter: \ref statistics.
+*/
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(VULKAN_H_)
+#include <vulkan/vulkan.h>
+#endif
+
+#if !defined(VMA_VULKAN_VERSION)
+ #if defined(VK_VERSION_1_4)
+ #define VMA_VULKAN_VERSION 1004000
+ #elif defined(VK_VERSION_1_3)
+ #define VMA_VULKAN_VERSION 1003000
+ #elif defined(VK_VERSION_1_2)
+ #define VMA_VULKAN_VERSION 1002000
+ #elif defined(VK_VERSION_1_1)
+ #define VMA_VULKAN_VERSION 1001000
+ #else
+ #define VMA_VULKAN_VERSION 1000000
+ #endif
+#endif
+
+#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS
+ extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
+ extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr;
+ extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties;
+ extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties;
+ extern PFN_vkAllocateMemory vkAllocateMemory;
+ extern PFN_vkFreeMemory vkFreeMemory;
+ extern PFN_vkMapMemory vkMapMemory;
+ extern PFN_vkUnmapMemory vkUnmapMemory;
+ extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges;
+ extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges;
+ extern PFN_vkBindBufferMemory vkBindBufferMemory;
+ extern PFN_vkBindImageMemory vkBindImageMemory;
+ extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
+ extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
+ extern PFN_vkCreateBuffer vkCreateBuffer;
+ extern PFN_vkDestroyBuffer vkDestroyBuffer;
+ extern PFN_vkCreateImage vkCreateImage;
+ extern PFN_vkDestroyImage vkDestroyImage;
+ extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
+ #if VMA_VULKAN_VERSION >= 1001000
+ extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2;
+ extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2;
+ extern PFN_vkBindBufferMemory2 vkBindBufferMemory2;
+ extern PFN_vkBindImageMemory2 vkBindImageMemory2;
+ extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2;
+ #endif // #if VMA_VULKAN_VERSION >= 1001000
+#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES
+
+#if !defined(VMA_DEDICATED_ALLOCATION)
+ #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation
+ #define VMA_DEDICATED_ALLOCATION 1
+ #else
+ #define VMA_DEDICATED_ALLOCATION 0
+ #endif
+#endif
+
+#if !defined(VMA_BIND_MEMORY2)
+ #if VK_KHR_bind_memory2
+ #define VMA_BIND_MEMORY2 1
+ #else
+ #define VMA_BIND_MEMORY2 0
+ #endif
+#endif
+
+#if !defined(VMA_MEMORY_BUDGET)
+ #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000)
+ #define VMA_MEMORY_BUDGET 1
+ #else
+ #define VMA_MEMORY_BUDGET 0
+ #endif
+#endif
+
+// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers.
+#if !defined(VMA_BUFFER_DEVICE_ADDRESS)
+ #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000
+ #define VMA_BUFFER_DEVICE_ADDRESS 1
+ #else
+ #define VMA_BUFFER_DEVICE_ADDRESS 0
+ #endif
+#endif
+
+// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers.
+#if !defined(VMA_MEMORY_PRIORITY)
+ #if VK_EXT_memory_priority
+ #define VMA_MEMORY_PRIORITY 1
+ #else
+ #define VMA_MEMORY_PRIORITY 0
+ #endif
+#endif
+
+// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers.
+#if !defined(VMA_KHR_MAINTENANCE4)
+ #if VK_KHR_maintenance4
+ #define VMA_KHR_MAINTENANCE4 1
+ #else
+ #define VMA_KHR_MAINTENANCE4 0
+ #endif
+#endif
+
+// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers.
+#if !defined(VMA_KHR_MAINTENANCE5)
+ #if VK_KHR_maintenance5
+ #define VMA_KHR_MAINTENANCE5 1
+ #else
+ #define VMA_KHR_MAINTENANCE5 0
+ #endif
+#endif
+
+
+// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers.
+#if !defined(VMA_EXTERNAL_MEMORY)
+ #if VK_KHR_external_memory
+ #define VMA_EXTERNAL_MEMORY 1
+ #else
+ #define VMA_EXTERNAL_MEMORY 0
+ #endif
+#endif
+
+// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers.
+#if !defined(VMA_EXTERNAL_MEMORY_WIN32)
+ #if VK_KHR_external_memory_win32
+ #define VMA_EXTERNAL_MEMORY_WIN32 1
+ #else
+ #define VMA_EXTERNAL_MEMORY_WIN32 0
+ #endif
+#endif
+
+// Define these macros to decorate all public functions with additional code,
+// before and after returned type, appropriately. This may be useful for
+// exporting the functions when compiling VMA as a separate library. Example:
+// #define VMA_CALL_PRE __declspec(dllexport)
+// #define VMA_CALL_POST __cdecl
+#ifndef VMA_CALL_PRE
+ #define VMA_CALL_PRE
+#endif
+#ifndef VMA_CALL_POST
+ #define VMA_CALL_POST
+#endif
+
+// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan
+// structure that will be extended via the pNext chain.
+#ifndef VMA_EXTENDS_VK_STRUCT
+ #define VMA_EXTENDS_VK_STRUCT(vkStruct)
+#endif
+
+// Define this macro to decorate pointers with an attribute specifying the
+// length of the array they point to if they are not null.
+//
+// The length may be one of
+// - The name of another parameter in the argument list where the pointer is declared
+// - The name of another member in the struct where the pointer is declared
+// - The name of a member of a struct type, meaning the value of that member in
+// the context of the call. For example
+// VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"),
+// this means the number of memory heaps available in the device associated
+// with the VmaAllocator being dealt with.
+#ifndef VMA_LEN_IF_NOT_NULL
+ #define VMA_LEN_IF_NOT_NULL(len)
+#endif
+
+// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang.
+// see: https://clang.llvm.org/docs/AttributeReference.html#nullable
+#ifndef VMA_NULLABLE
+ #ifdef __clang__
+ #define VMA_NULLABLE _Nullable
+ #else
+ #define VMA_NULLABLE
+ #endif
+#endif
+
+// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang.
+// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull
+#ifndef VMA_NOT_NULL
+ #ifdef __clang__
+ #define VMA_NOT_NULL _Nonnull
+ #else
+ #define VMA_NOT_NULL
+ #endif
+#endif
+
+// If non-dispatchable handles are represented as pointers then we can give
+// then nullability annotations
+#ifndef VMA_NOT_NULL_NON_DISPATCHABLE
+ #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+ #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL
+ #else
+ #define VMA_NOT_NULL_NON_DISPATCHABLE
+ #endif
+#endif
+
+#ifndef VMA_NULLABLE_NON_DISPATCHABLE
+ #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+ #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE
+ #else
+ #define VMA_NULLABLE_NON_DISPATCHABLE
+ #endif
+#endif
+
+#ifndef VMA_STATS_STRING_ENABLED
+ #define VMA_STATS_STRING_ENABLED 1
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+// INTERFACE
+//
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE.
+#ifndef _VMA_ENUM_DECLARATIONS
+
+/**
+\addtogroup group_init
+@{
+*/
+
+/// Flags for created #VmaAllocator.
+typedef enum VmaAllocatorCreateFlagBits
+{
+ /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you.
+
+ Using this flag may increase performance because internal mutexes are not used.
+ */
+ VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001,
+ /** \brief Enables usage of VK_KHR_dedicated_allocation extension.
+
+ The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`.
+ When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1.
+
+ Using this extension will automatically allocate dedicated blocks of memory for
+ some buffers and images instead of suballocating place for them out of bigger
+ memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT
+ flag) when it is recommended by the driver. It may improve performance on some
+ GPUs.
+
+ You may set this flag only if you found out that following device extensions are
+ supported, you enabled them while creating Vulkan device passed as
+ VmaAllocatorCreateInfo::device, and you want them to be used internally by this
+ library:
+
+ - VK_KHR_get_memory_requirements2 (device extension)
+ - VK_KHR_dedicated_allocation (device extension)
+
+ When this flag is set, you can experience following warnings reported by Vulkan
+ validation layer. You can ignore them.
+
+ > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer.
+ */
+ VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002,
+ /**
+ Enables usage of VK_KHR_bind_memory2 extension.
+
+ The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`.
+ When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1.
+
+ You may set this flag only if you found out that this device extension is supported,
+ you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
+ and you want it to be used internally by this library.
+
+ The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`,
+ which allow to pass a chain of `pNext` structures while binding.
+ This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2().
+ */
+ VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004,
+ /**
+ Enables usage of VK_EXT_memory_budget extension.
+
+ You may set this flag only if you found out that this device extension is supported,
+ you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
+ and you want it to be used internally by this library, along with another instance extension
+ VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted).
+
+ The extension provides query for current memory usage and budget, which will probably
+ be more accurate than an estimation used by the library otherwise.
+ */
+ VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008,
+ /**
+ Enables usage of VK_AMD_device_coherent_memory extension.
+
+ You may set this flag only if you:
+
+ - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
+ - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device,
+ - want it to be used internally by this library.
+
+ The extension and accompanying device feature provide access to memory types with
+ `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags.
+ They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR.
+
+ When the extension is not enabled, such memory types are still enumerated, but their usage is illegal.
+ To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type,
+ returning `VK_ERROR_FEATURE_NOT_PRESENT`.
+ */
+ VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010,
+ /**
+ Enables usage of "buffer device address" feature, which allows you to use function
+ `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader.
+
+ You may set this flag only if you:
+
+ 1. (For Vulkan version < 1.2) Found as available and enabled device extension
+ VK_KHR_buffer_device_address.
+ This extension is promoted to core Vulkan 1.2.
+ 2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`.
+
+ When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA.
+ The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to
+ allocated memory blocks wherever it might be needed.
+
+ For more information, see documentation chapter \ref enabling_buffer_device_address.
+ */
+ VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020,
+ /**
+ Enables usage of VK_EXT_memory_priority extension in the library.
+
+ You may set this flag only if you found available and enabled this device extension,
+ along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`,
+ while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+
+ When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority
+ are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored.
+
+ A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations.
+ Larger values are higher priority. The granularity of the priorities is implementation-dependent.
+ It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`.
+ The value to be used for default priority is 0.5.
+ For more details, see the documentation of the VK_EXT_memory_priority extension.
+ */
+ VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040,
+ /**
+ Enables usage of VK_KHR_maintenance4 extension in the library.
+
+ You may set this flag only if you found available and enabled this device extension,
+ while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+ */
+ VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080,
+ /**
+ Enables usage of VK_KHR_maintenance5 extension in the library.
+
+ You should set this flag if you found available and enabled this device extension,
+ while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+ */
+ VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100,
+
+ /**
+ Enables usage of VK_KHR_external_memory_win32 extension in the library.
+
+ You should set this flag if you found available and enabled this device extension,
+ while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+ For more information, see \ref vk_khr_external_memory_win32.
+ */
+ VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200,
+
+ VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaAllocatorCreateFlagBits;
+/// See #VmaAllocatorCreateFlagBits.
+typedef VkFlags VmaAllocatorCreateFlags;
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/// \brief Intended usage of the allocated memory.
+typedef enum VmaMemoryUsage
+{
+ /** No intended memory usage specified.
+ Use other members of VmaAllocationCreateInfo to specify your requirements.
+ */
+ VMA_MEMORY_USAGE_UNKNOWN = 0,
+ /**
+ \deprecated Obsolete, preserved for backward compatibility.
+ Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+ */
+ VMA_MEMORY_USAGE_GPU_ONLY = 1,
+ /**
+ \deprecated Obsolete, preserved for backward compatibility.
+ Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`.
+ */
+ VMA_MEMORY_USAGE_CPU_ONLY = 2,
+ /**
+ \deprecated Obsolete, preserved for backward compatibility.
+ Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+ */
+ VMA_MEMORY_USAGE_CPU_TO_GPU = 3,
+ /**
+ \deprecated Obsolete, preserved for backward compatibility.
+ Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`.
+ */
+ VMA_MEMORY_USAGE_GPU_TO_CPU = 4,
+ /**
+ \deprecated Obsolete, preserved for backward compatibility.
+ Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+ */
+ VMA_MEMORY_USAGE_CPU_COPY = 5,
+ /**
+ Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`.
+ Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation.
+
+ Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`.
+
+ Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+ */
+ VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6,
+ /**
+ Selects best memory type automatically.
+ This flag is recommended for most common use cases.
+
+ When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
+ you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+ in VmaAllocationCreateInfo::flags.
+
+ It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
+ vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
+ and not with generic memory allocation functions.
+ */
+ VMA_MEMORY_USAGE_AUTO = 7,
+ /**
+ Selects best memory type automatically with preference for GPU (device) memory.
+
+ When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
+ you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+ in VmaAllocationCreateInfo::flags.
+
+ It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
+ vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
+ and not with generic memory allocation functions.
+ */
+ VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8,
+ /**
+ Selects best memory type automatically with preference for CPU (host) memory.
+
+ When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
+ you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+ in VmaAllocationCreateInfo::flags.
+
+ It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
+ vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
+ and not with generic memory allocation functions.
+ */
+ VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9,
+
+ VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF
+} VmaMemoryUsage;
+
+/// Flags to be passed as VmaAllocationCreateInfo::flags.
+typedef enum VmaAllocationCreateFlagBits
+{
+ /** \brief Set this flag if the allocation should have its own memory block.
+
+ Use it for special, big resources, like fullscreen images used as attachments.
+
+ If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo`
+ structure is applied if possible.
+ */
+ VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001,
+
+ /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block.
+
+ If new allocation cannot be placed in any of the existing blocks, allocation
+ fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error.
+
+ You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and
+ #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense.
+ */
+ VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002,
+ /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it.
+
+ Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData.
+
+ It is valid to use this flag for allocation made from memory type that is not
+ `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is
+ useful if you need an allocation that is efficient to use on GPU
+ (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that
+ support it (e.g. Intel GPU).
+ */
+ VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004,
+ /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead.
+
+ Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a
+ null-terminated string. Instead of copying pointer value, a local copy of the
+ string is made and stored in allocation's `pName`. The string is automatically
+ freed together with the allocation. It is also used in vmaBuildStatsString().
+ */
+ VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020,
+ /** Allocation will be created from upper stack in a double stack pool.
+
+ This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag.
+ */
+ VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040,
+ /** Create both buffer/image and allocation, but don't bind them together.
+ It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions.
+ The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage().
+ Otherwise it is ignored.
+
+ If you want to make sure the new buffer/image is not tied to the new memory allocation
+ through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block,
+ use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT.
+ */
+ VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080,
+ /** Create allocation only if additional device memory required for it, if any, won't exceed
+ memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+ */
+ VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100,
+ /** \brief Set this flag if the allocated memory will have aliasing resources.
+
+ Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified.
+ Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors.
+ */
+ VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200,
+ /**
+ Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT).
+
+ - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value,
+ you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect.
+ - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`.
+ This includes allocations created in \ref custom_memory_pools.
+
+ Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number,
+ never read or accessed randomly, so a memory type can be selected that is uncached and write-combined.
+
+ \warning Violating this declaration may work correctly, but will likely be very slow.
+ Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;`
+ Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once.
+ */
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400,
+ /**
+ Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT).
+
+ - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value,
+ you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect.
+ - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`.
+ This includes allocations created in \ref custom_memory_pools.
+
+ Declares that mapped memory can be read, written, and accessed in random order,
+ so a `HOST_CACHED` memory type is preferred.
+ */
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800,
+ /**
+ Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT,
+ it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected
+ if it may improve performance.
+
+ By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type
+ (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and
+ issue an explicit transfer to write/read your data.
+ To prepare for this possibility, don't forget to add appropriate flags like
+ `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image.
+ */
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000,
+ /** Allocation strategy that chooses smallest possible free range for the allocation
+ to minimize memory usage and fragmentation, possibly at the expense of allocation time.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000,
+ /** Allocation strategy that chooses first suitable free range for the allocation -
+ not necessarily in terms of the smallest offset but the one that is easiest and fastest to find
+ to minimize allocation time, possibly at the expense of allocation quality.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000,
+ /** Allocation strategy that chooses always the lowest offset in available space.
+ This is not the most efficient strategy but achieves highly packed data.
+ Used internally by defragmentation, not recommended in typical usage.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000,
+ /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT,
+ /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT,
+ /** A bit mask to extract only `STRATEGY` bits from entire set of flags.
+ */
+ VMA_ALLOCATION_CREATE_STRATEGY_MASK =
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT |
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT |
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+
+ VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaAllocationCreateFlagBits;
+/// See #VmaAllocationCreateFlagBits.
+typedef VkFlags VmaAllocationCreateFlags;
+
+/// Flags to be passed as VmaPoolCreateInfo::flags.
+typedef enum VmaPoolCreateFlagBits
+{
+ /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored.
+
+ This is an optional optimization flag.
+
+ If you always allocate using vmaCreateBuffer(), vmaCreateImage(),
+ vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator
+ knows exact type of your allocations so it can handle Buffer-Image Granularity
+ in the optimal way.
+
+ If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(),
+ exact type of such allocations is not known, so allocator must be conservative
+ in handling Buffer-Image Granularity, which can lead to suboptimal allocation
+ (wasted memory). In that case, if you can make sure you always allocate only
+ buffers and linear images or only optimal images out of this pool, use this flag
+ to make allocator disregard Buffer-Image Granularity and so make allocations
+ faster and more optimal.
+ */
+ VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002,
+
+ /** \brief Enables alternative, linear allocation algorithm in this pool.
+
+ Specify this flag to enable linear allocation algorithm, which always creates
+ new allocations after last one and doesn't reuse space from allocations freed in
+ between. It trades memory consumption for simplified algorithm and data
+ structure, which has better performance and uses less memory for metadata.
+
+ By using this flag, you can achieve behavior of free-at-once, stack,
+ ring buffer, and double stack.
+ For details, see documentation chapter \ref linear_algorithm.
+ */
+ VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004,
+
+ /** Bit mask to extract only `ALGORITHM` bits from entire set of flags.
+ */
+ VMA_POOL_CREATE_ALGORITHM_MASK =
+ VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT,
+
+ VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaPoolCreateFlagBits;
+/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits.
+typedef VkFlags VmaPoolCreateFlags;
+
+/// Flags to be passed as VmaDefragmentationInfo::flags.
+typedef enum VmaDefragmentationFlagBits
+{
+ /* \brief Use simple but fast algorithm for defragmentation.
+ May not achieve best results but will require least time to compute and least allocations to copy.
+ */
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1,
+ /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified.
+ Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved.
+ */
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2,
+ /* \brief Perform full defragmentation of memory.
+ Can result in notably more time to compute and allocations to copy, but will achieve best memory packing.
+ */
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4,
+ /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make.
+ Only available when bufferImageGranularity is greater than 1, since it aims to reduce
+ alignment issues between different types of resources.
+ Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT.
+ */
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8,
+
+ /// A bit mask to extract only `ALGORITHM` bits from entire set of flags.
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK =
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT |
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT |
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT |
+ VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT,
+
+ VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaDefragmentationFlagBits;
+/// See #VmaDefragmentationFlagBits.
+typedef VkFlags VmaDefragmentationFlags;
+
+/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove.
+typedef enum VmaDefragmentationMoveOperation
+{
+ /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass().
+ VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0,
+ /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged.
+ VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1,
+ /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed.
+ VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2,
+} VmaDefragmentationMoveOperation;
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/// Flags to be passed as VmaVirtualBlockCreateInfo::flags.
+typedef enum VmaVirtualBlockCreateFlagBits
+{
+ /** \brief Enables alternative, linear allocation algorithm in this virtual block.
+
+ Specify this flag to enable linear allocation algorithm, which always creates
+ new allocations after last one and doesn't reuse space from allocations freed in
+ between. It trades memory consumption for simplified algorithm and data
+ structure, which has better performance and uses less memory for metadata.
+
+ By using this flag, you can achieve behavior of free-at-once, stack,
+ ring buffer, and double stack.
+ For details, see documentation chapter \ref linear_algorithm.
+ */
+ VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001,
+
+ /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags.
+ */
+ VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK =
+ VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT,
+
+ VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaVirtualBlockCreateFlagBits;
+/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits.
+typedef VkFlags VmaVirtualBlockCreateFlags;
+
+/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags.
+typedef enum VmaVirtualAllocationCreateFlagBits
+{
+ /** \brief Allocation will be created from upper stack in a double stack pool.
+
+ This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag.
+ */
+ VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT,
+ /** \brief Allocation strategy that tries to minimize memory usage.
+ */
+ VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT,
+ /** \brief Allocation strategy that tries to minimize allocation time.
+ */
+ VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT,
+ /** Allocation strategy that chooses always the lowest offset in available space.
+ This is not the most efficient strategy but achieves highly packed data.
+ */
+ VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+ /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags.
+
+ These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits.
+ */
+ VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK,
+
+ VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaVirtualAllocationCreateFlagBits;
+/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits.
+typedef VkFlags VmaVirtualAllocationCreateFlags;
+
+/** @} */
+
+#endif // _VMA_ENUM_DECLARATIONS
+
+#ifndef _VMA_DATA_TYPES_DECLARATIONS
+
+/**
+\addtogroup group_init
+@{ */
+
+/** \struct VmaAllocator
+\brief Represents main object of this library initialized.
+
+Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it.
+Call function vmaDestroyAllocator() to destroy it.
+
+It is recommended to create just one object of this type per `VkDevice` object,
+right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed.
+*/
+VK_DEFINE_HANDLE(VmaAllocator)
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/** \struct VmaPool
+\brief Represents custom memory pool
+
+Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it.
+Call function vmaDestroyPool() to destroy it.
+
+For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools).
+*/
+VK_DEFINE_HANDLE(VmaPool)
+
+/** \struct VmaAllocation
+\brief Represents single memory allocation.
+
+It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type
+plus unique offset.
+
+There are multiple ways to create such object.
+You need to fill structure VmaAllocationCreateInfo.
+For more information see [Choosing memory type](@ref choosing_memory_type).
+
+Although the library provides convenience functions that create Vulkan buffer or image,
+allocate memory for it and bind them together,
+binding of the allocation to a buffer or an image is out of scope of the allocation itself.
+Allocation object can exist without buffer/image bound,
+binding can be done manually by the user, and destruction of it can be done
+independently of destruction of the allocation.
+
+The object also remembers its size and some other information.
+To retrieve this information, use function vmaGetAllocationInfo() and inspect
+returned structure VmaAllocationInfo.
+*/
+VK_DEFINE_HANDLE(VmaAllocation)
+
+/** \struct VmaDefragmentationContext
+\brief An opaque object that represents started defragmentation process.
+
+Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it.
+Call function vmaEndDefragmentation() to destroy it.
+*/
+VK_DEFINE_HANDLE(VmaDefragmentationContext)
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/** \struct VmaVirtualAllocation
+\brief Represents single memory allocation done inside VmaVirtualBlock.
+
+Use it as a unique identifier to virtual allocation within the single block.
+
+Use value `VK_NULL_HANDLE` to represent a null/invalid allocation.
+*/
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation)
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/** \struct VmaVirtualBlock
+\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory.
+
+Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it.
+For more information, see documentation chapter \ref virtual_allocator.
+
+This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally.
+*/
+VK_DEFINE_HANDLE(VmaVirtualBlock)
+
+/** @} */
+
+/**
+\addtogroup group_init
+@{
+*/
+
+/// Callback function called after successful vkAllocateMemory.
+typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t memoryType,
+ VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory,
+ VkDeviceSize size,
+ void* VMA_NULLABLE pUserData);
+
+/// Callback function called before vkFreeMemory.
+typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t memoryType,
+ VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory,
+ VkDeviceSize size,
+ void* VMA_NULLABLE pUserData);
+
+/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`.
+
+Provided for informative purpose, e.g. to gather statistics about number of
+allocations or total amount of memory allocated in Vulkan.
+
+Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks.
+*/
+typedef struct VmaDeviceMemoryCallbacks
+{
+ /// Optional, can be null.
+ PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate;
+ /// Optional, can be null.
+ PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree;
+ /// Optional, can be null.
+ void* VMA_NULLABLE pUserData;
+} VmaDeviceMemoryCallbacks;
+
+/** \brief Pointers to some Vulkan functions - a subset used by the library.
+
+Used in VmaAllocatorCreateInfo::pVulkanFunctions.
+*/
+typedef struct VmaVulkanFunctions
+{
+ /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+ PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr;
+ /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+ PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr;
+ PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties;
+ PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties;
+ PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory;
+ PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory;
+ PFN_vkMapMemory VMA_NULLABLE vkMapMemory;
+ PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory;
+ PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges;
+ PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges;
+ PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory;
+ PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory;
+ PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements;
+ PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements;
+ PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer;
+ PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer;
+ PFN_vkCreateImage VMA_NULLABLE vkCreateImage;
+ PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage;
+ PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer;
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+ PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR;
+ /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+ PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR;
+#endif
+#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+ /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension.
+ PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR;
+ /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension.
+ PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR;
+#endif
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+ /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2.
+ PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR;
+#endif
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+ PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements;
+ /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+ PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements;
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+ PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR;
+#else
+ void* VMA_NULLABLE vkGetMemoryWin32HandleKHR;
+#endif
+} VmaVulkanFunctions;
+
+/// Description of a Allocator to be created.
+typedef struct VmaAllocatorCreateInfo
+{
+ /// Flags for created allocator. Use #VmaAllocatorCreateFlagBits enum.
+ VmaAllocatorCreateFlags flags;
+ /// Vulkan physical device.
+ /** It must be valid throughout whole lifetime of created allocator. */
+ VkPhysicalDevice VMA_NOT_NULL physicalDevice;
+ /// Vulkan device.
+ /** It must be valid throughout whole lifetime of created allocator. */
+ VkDevice VMA_NOT_NULL device;
+ /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional.
+ /** Set to 0 to use default, which is currently 256 MiB. */
+ VkDeviceSize preferredLargeHeapBlockSize;
+ /// Custom CPU memory allocation callbacks. Optional.
+ /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */
+ const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks;
+ /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional.
+ /** Optional, can be null. */
+ const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks;
+ /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap.
+
+ If not NULL, it must be a pointer to an array of
+ `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on
+ maximum number of bytes that can be allocated out of particular Vulkan memory
+ heap.
+
+ Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that
+ heap. This is also the default in case of `pHeapSizeLimit` = NULL.
+
+ If there is a limit defined for a heap:
+
+ - If user tries to allocate more memory from that heap using this allocator,
+ the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+ - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the
+ value of this limit will be reported instead when using vmaGetMemoryProperties().
+
+ Warning! Using this feature may not be equivalent to installing a GPU with
+ smaller amount of memory, because graphics driver doesn't necessary fail new
+ allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is
+ exceeded. It may return success and just silently migrate some device memory
+ blocks to system RAM. This driver behavior can also be controlled using
+ VK_AMD_memory_overallocation_behavior extension.
+ */
+ const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit;
+
+ /** \brief Pointers to Vulkan functions. Can be null.
+
+ For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions).
+ */
+ const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions;
+ /** \brief Handle to Vulkan instance object.
+
+ Starting from version 3.0.0 this member is no longer optional, it must be set!
+ */
+ VkInstance VMA_NOT_NULL instance;
+ /** \brief Optional. Vulkan version that the application uses.
+
+ It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`.
+ The patch version number specified is ignored. Only the major and minor versions are considered.
+ Only versions 1.0...1.4 are supported by the current implementation.
+ Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`.
+ It must match the Vulkan version used by the application and supported on the selected physical device,
+ so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance`
+ and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used.
+ */
+ uint32_t vulkanApiVersion;
+#if VMA_EXTERNAL_MEMORY
+ /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type.
+
+ If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount`
+ elements, defining external memory handle types of particular Vulkan memory type,
+ to be passed using `VkExportMemoryAllocateInfoKHR`.
+
+ Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type.
+ This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL.
+ */
+ const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes;
+#endif // #if VMA_EXTERNAL_MEMORY
+} VmaAllocatorCreateInfo;
+
+/// Information about existing #VmaAllocator object.
+typedef struct VmaAllocatorInfo
+{
+ /** \brief Handle to Vulkan instance object.
+
+ This is the same value as has been passed through VmaAllocatorCreateInfo::instance.
+ */
+ VkInstance VMA_NOT_NULL instance;
+ /** \brief Handle to Vulkan physical device object.
+
+ This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice.
+ */
+ VkPhysicalDevice VMA_NOT_NULL physicalDevice;
+ /** \brief Handle to Vulkan device object.
+
+ This is the same value as has been passed through VmaAllocatorCreateInfo::device.
+ */
+ VkDevice VMA_NOT_NULL device;
+} VmaAllocatorInfo;
+
+/** @} */
+
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total.
+
+These are fast to calculate.
+See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics().
+*/
+typedef struct VmaStatistics
+{
+ /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated.
+ */
+ uint32_t blockCount;
+ /** \brief Number of #VmaAllocation objects allocated.
+
+ Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`.
+ */
+ uint32_t allocationCount;
+ /** \brief Number of bytes allocated in `VkDeviceMemory` blocks.
+
+ \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object
+ (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls
+ "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image.
+ */
+ VkDeviceSize blockBytes;
+ /** \brief Total number of bytes occupied by all #VmaAllocation objects.
+
+ Always less or equal than `blockBytes`.
+ Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan
+ but unused by any #VmaAllocation.
+ */
+ VkDeviceSize allocationBytes;
+} VmaStatistics;
+
+/** \brief More detailed statistics than #VmaStatistics.
+
+These are slower to calculate. Use for debugging purposes.
+See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics().
+
+Previous version of the statistics API provided averages, but they have been removed
+because they can be easily calculated as:
+
+\code
+VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount;
+VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes;
+VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount;
+\endcode
+*/
+typedef struct VmaDetailedStatistics
+{
+ /// Basic statistics.
+ VmaStatistics statistics;
+ /// Number of free ranges of memory between allocations.
+ uint32_t unusedRangeCount;
+ /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations.
+ VkDeviceSize allocationSizeMin;
+ /// Largest allocation size. 0 if there are 0 allocations.
+ VkDeviceSize allocationSizeMax;
+ /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges.
+ VkDeviceSize unusedRangeSizeMin;
+ /// Largest empty range size. 0 if there are 0 empty ranges.
+ VkDeviceSize unusedRangeSizeMax;
+} VmaDetailedStatistics;
+
+/** \brief General statistics from current state of the Allocator -
+total memory usage across all memory heaps and types.
+
+These are slower to calculate. Use for debugging purposes.
+See function vmaCalculateStatistics().
+*/
+typedef struct VmaTotalStatistics
+{
+ VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES];
+ VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS];
+ VmaDetailedStatistics total;
+} VmaTotalStatistics;
+
+/** \brief Statistics of current memory usage and available budget for a specific memory heap.
+
+These are fast to calculate.
+See function vmaGetHeapBudgets().
+*/
+typedef struct VmaBudget
+{
+ /** \brief Statistics fetched from the library.
+ */
+ VmaStatistics statistics;
+ /** \brief Estimated current memory usage of the program, in bytes.
+
+ Fetched from system using VK_EXT_memory_budget extension if enabled.
+
+ It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects
+ also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or
+ `VkDeviceMemory` blocks allocated outside of this library, if any.
+ */
+ VkDeviceSize usage;
+ /** \brief Estimated amount of memory available to the program, in bytes.
+
+ Fetched from system using VK_EXT_memory_budget extension if enabled.
+
+ It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors
+ external to the program, decided by the operating system.
+ Difference `budget - usage` is the amount of additional memory that can probably
+ be allocated without problems. Exceeding the budget may result in various problems.
+ */
+ VkDeviceSize budget;
+} VmaBudget;
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/** \brief Parameters of new #VmaAllocation.
+
+To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others.
+*/
+typedef struct VmaAllocationCreateInfo
+{
+ /// Use #VmaAllocationCreateFlagBits enum.
+ VmaAllocationCreateFlags flags;
+ /** \brief Intended usage of memory.
+
+ You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n
+ If `pool` is not null, this member is ignored.
+ */
+ VmaMemoryUsage usage;
+ /** \brief Flags that must be set in a Memory Type chosen for an allocation.
+
+ Leave 0 if you specify memory requirements in other way. \n
+ If `pool` is not null, this member is ignored.*/
+ VkMemoryPropertyFlags requiredFlags;
+ /** \brief Flags that preferably should be set in a memory type chosen for an allocation.
+
+ Set to 0 if no additional flags are preferred. \n
+ If `pool` is not null, this member is ignored. */
+ VkMemoryPropertyFlags preferredFlags;
+ /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation.
+
+ Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if
+ it meets other requirements specified by this structure, with no further
+ restrictions on memory type index. \n
+ If `pool` is not null, this member is ignored.
+ */
+ uint32_t memoryTypeBits;
+ /** \brief Pool that this allocation should be created in.
+
+ Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members:
+ `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored.
+ */
+ VmaPool VMA_NULLABLE pool;
+ /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData().
+
+ If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either
+ null or pointer to a null-terminated string. The string will be then copied to
+ internal buffer, so it doesn't need to be valid after allocation call.
+ */
+ void* VMA_NULLABLE pUserData;
+ /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations.
+
+ It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object
+ and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+ Otherwise, it has the priority of a memory block where it is placed and this variable is ignored.
+ */
+ float priority;
+} VmaAllocationCreateInfo;
+
+/// Describes parameter of created #VmaPool.
+typedef struct VmaPoolCreateInfo
+{
+ /** \brief Vulkan memory type index to allocate this pool from.
+ */
+ uint32_t memoryTypeIndex;
+ /** \brief Use combination of #VmaPoolCreateFlagBits.
+ */
+ VmaPoolCreateFlags flags;
+ /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional.
+
+ Specify nonzero to set explicit, constant size of memory blocks used by this
+ pool.
+
+ Leave 0 to use default and let the library manage block sizes automatically.
+ Sizes of particular blocks may vary.
+ In this case, the pool will also support dedicated allocations.
+ */
+ VkDeviceSize blockSize;
+ /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty.
+
+ Set to 0 to have no preallocated blocks and allow the pool be completely empty.
+ */
+ size_t minBlockCount;
+ /** \brief Maximum number of blocks that can be allocated in this pool. Optional.
+
+ Set to 0 to use default, which is `SIZE_MAX`, which means no limit.
+
+ Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated
+ throughout whole lifetime of this pool.
+ */
+ size_t maxBlockCount;
+ /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations.
+
+ It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object.
+ Otherwise, this variable is ignored.
+ */
+ float priority;
+ /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0.
+
+ Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two.
+ It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough,
+ e.g. when doing interop with OpenGL.
+ */
+ VkDeviceSize minAllocationAlignment;
+ /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional.
+
+ Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`.
+ It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`.
+ Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool.
+
+ Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`,
+ can be attached automatically by this library when using other, more convenient of its features.
+ */
+ void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext;
+} VmaPoolCreateInfo;
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/**
+Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo().
+
+There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2.
+*/
+typedef struct VmaAllocationInfo
+{
+ /** \brief Memory type index that this allocation was allocated from.
+
+ It never changes.
+ */
+ uint32_t memoryType;
+ /** \brief Handle to Vulkan memory object.
+
+ Same memory object can be shared by multiple allocations.
+
+ It can change after the allocation is moved during \ref defragmentation.
+ */
+ VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory;
+ /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation.
+
+ You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function
+ vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image,
+ not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation
+ and apply this offset automatically.
+
+ It can change after the allocation is moved during \ref defragmentation.
+ */
+ VkDeviceSize offset;
+ /** \brief Size of this allocation, in bytes.
+
+ It never changes.
+
+ \note Allocation size returned in this variable may be greater than the size
+ requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the
+ allocation is accessible for operations on memory e.g. using a pointer after
+ mapping with vmaMapMemory(), but operations on the resource e.g. using
+ `vkCmdCopyBuffer` must be limited to the size of the resource.
+ */
+ VkDeviceSize size;
+ /** \brief Pointer to the beginning of this allocation as mapped data.
+
+ If the allocation hasn't been mapped using vmaMapMemory() and hasn't been
+ created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null.
+
+ It can change after call to vmaMapMemory(), vmaUnmapMemory().
+ It can also change after the allocation is moved during \ref defragmentation.
+ */
+ void* VMA_NULLABLE pMappedData;
+ /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData().
+
+ It can change after call to vmaSetAllocationUserData() for this allocation.
+ */
+ void* VMA_NULLABLE pUserData;
+ /** \brief Custom allocation name that was set with vmaSetAllocationName().
+
+ It can change after call to vmaSetAllocationName() for this allocation.
+
+ Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with
+ additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED].
+ */
+ const char* VMA_NULLABLE pName;
+} VmaAllocationInfo;
+
+/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2().
+typedef struct VmaAllocationInfo2
+{
+ /** \brief Basic parameters of the allocation.
+
+ If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead.
+ */
+ VmaAllocationInfo allocationInfo;
+ /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to.
+
+ In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`.
+ */
+ VkDeviceSize blockSize;
+ /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block.
+
+ When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation
+ (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled).
+ */
+ VkBool32 dedicatedMemory;
+} VmaAllocationInfo2;
+
+/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass.
+
+Should return true if the defragmentation needs to stop current pass.
+*/
+typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData);
+
+/** \brief Parameters for defragmentation.
+
+To be used with function vmaBeginDefragmentation().
+*/
+typedef struct VmaDefragmentationInfo
+{
+ /// \brief Use combination of #VmaDefragmentationFlagBits.
+ VmaDefragmentationFlags flags;
+ /** \brief Custom pool to be defragmented.
+
+ If null then default pools will undergo defragmentation process.
+ */
+ VmaPool VMA_NULLABLE pool;
+ /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places.
+
+ `0` means no limit.
+ */
+ VkDeviceSize maxBytesPerPass;
+ /** \brief Maximum number of allocations that can be moved during single pass to a different place.
+
+ `0` means no limit.
+ */
+ uint32_t maxAllocationsPerPass;
+ /** \brief Optional custom callback for stopping vmaBeginDefragmentation().
+
+ Have to return true for breaking current defragmentation pass.
+ */
+ PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback;
+ /// \brief Optional data to pass to custom callback for stopping pass of defragmentation.
+ void* VMA_NULLABLE pBreakCallbackUserData;
+} VmaDefragmentationInfo;
+
+/// Single move of an allocation to be done for defragmentation.
+typedef struct VmaDefragmentationMove
+{
+ /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it.
+ VmaDefragmentationMoveOperation operation;
+ /// Allocation that should be moved.
+ VmaAllocation VMA_NOT_NULL srcAllocation;
+ /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`.
+
+ \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass,
+ to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory().
+ vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory.
+ */
+ VmaAllocation VMA_NOT_NULL dstTmpAllocation;
+} VmaDefragmentationMove;
+
+/** \brief Parameters for incremental defragmentation steps.
+
+To be used with function vmaBeginDefragmentationPass().
+*/
+typedef struct VmaDefragmentationPassMoveInfo
+{
+ /// Number of elements in the `pMoves` array.
+ uint32_t moveCount;
+ /** \brief Array of moves to be performed by the user in the current defragmentation pass.
+
+ Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass().
+
+ For each element, you should:
+
+ 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset.
+ 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`.
+ 3. Make sure these commands finished executing on the GPU.
+ 4. Destroy the old buffer/image.
+
+ Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass().
+ After this call, the allocation will point to the new place in memory.
+
+ Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE.
+
+ Alternatively, if you decide you want to completely remove the allocation:
+
+ 1. Destroy its buffer/image.
+ 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY.
+
+ Then, after vmaEndDefragmentationPass() the allocation will be freed.
+ */
+ VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves;
+} VmaDefragmentationPassMoveInfo;
+
+/// Statistics returned for defragmentation process in function vmaEndDefragmentation().
+typedef struct VmaDefragmentationStats
+{
+ /// Total number of bytes that have been copied while moving allocations to different places.
+ VkDeviceSize bytesMoved;
+ /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects.
+ VkDeviceSize bytesFreed;
+ /// Number of allocations that have been moved to different places.
+ uint32_t allocationsMoved;
+ /// Number of empty `VkDeviceMemory` objects that have been released to the system.
+ uint32_t deviceMemoryBlocksFreed;
+} VmaDefragmentationStats;
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock().
+typedef struct VmaVirtualBlockCreateInfo
+{
+ /** \brief Total size of the virtual block.
+
+ Sizes can be expressed in bytes or any units you want as long as you are consistent in using them.
+ For example, if you allocate from some array of structures, 1 can mean single instance of entire structure.
+ */
+ VkDeviceSize size;
+
+ /** \brief Use combination of #VmaVirtualBlockCreateFlagBits.
+ */
+ VmaVirtualBlockCreateFlags flags;
+
+ /** \brief Custom CPU memory allocation callbacks. Optional.
+
+ Optional, can be null. When specified, they will be used for all CPU-side memory allocations.
+ */
+ const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks;
+} VmaVirtualBlockCreateInfo;
+
+/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate().
+typedef struct VmaVirtualAllocationCreateInfo
+{
+ /** \brief Size of the allocation.
+
+ Cannot be zero.
+ */
+ VkDeviceSize size;
+ /** \brief Required alignment of the allocation. Optional.
+
+ Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset.
+ */
+ VkDeviceSize alignment;
+ /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits.
+ */
+ VmaVirtualAllocationCreateFlags flags;
+ /** \brief Custom pointer to be associated with the allocation. Optional.
+
+ It can be any value and can be used for user-defined purposes. It can be fetched or changed later.
+ */
+ void* VMA_NULLABLE pUserData;
+} VmaVirtualAllocationCreateInfo;
+
+/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo().
+typedef struct VmaVirtualAllocationInfo
+{
+ /** \brief Offset of the allocation.
+
+ Offset at which the allocation was made.
+ */
+ VkDeviceSize offset;
+ /** \brief Size of the allocation.
+
+ Same value as passed in VmaVirtualAllocationCreateInfo::size.
+ */
+ VkDeviceSize size;
+ /** \brief Custom pointer associated with the allocation.
+
+ Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData().
+ */
+ void* VMA_NULLABLE pUserData;
+} VmaVirtualAllocationInfo;
+
+/** @} */
+
+#endif // _VMA_DATA_TYPES_DECLARATIONS
+
+#ifndef _VMA_FUNCTION_HEADERS
+
+/**
+\addtogroup group_init
+@{
+*/
+
+/// Creates #VmaAllocator object.
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator(
+ const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator);
+
+/// Destroys allocator object.
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator(
+ VmaAllocator VMA_NULLABLE allocator);
+
+/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc.
+
+It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to
+`VkPhysicalDevice`, `VkDevice` etc. every time using this function.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo);
+
+/**
+PhysicalDeviceProperties are fetched from physicalDevice by the allocator.
+You can access it here, without fetching it again on your own.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties);
+
+/**
+PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator.
+You can access it here, without fetching it again on your own.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties);
+
+/**
+\brief Given Memory Type Index, returns Property Flags of this memory type.
+
+This is just a convenience function. Same information can be obtained using
+vmaGetMemoryProperties().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t memoryTypeIndex,
+ VkMemoryPropertyFlags* VMA_NOT_NULL pFlags);
+
+/** \brief Sets index of the current frame.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t frameIndex);
+
+/** @} */
+
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Retrieves statistics from current state of the Allocator.
+
+This function is called "calculate" not "get" because it has to traverse all
+internal data structures, so it may be quite slow. Use it for debugging purposes.
+For faster but more brief statistics suitable to be called every frame or every allocation,
+use vmaGetHeapBudgets().
+
+Note that when using allocator from multiple threads, returned information may immediately
+become outdated.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaTotalStatistics* VMA_NOT_NULL pStats);
+
+/** \brief Retrieves information about current memory usage and budget for all memory heaps.
+
+\param allocator
+\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used.
+
+This function is called "get" not "calculate" because it is very fast, suitable to be called
+every frame or every allocation. For more detailed statistics use vmaCalculateStatistics().
+
+Note that when using allocator from multiple threads, returned information may immediately
+become outdated.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets);
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/**
+\brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo.
+
+This algorithm tries to find a memory type that:
+
+- Is allowed by memoryTypeBits.
+- Contains all the flags from pAllocationCreateInfo->requiredFlags.
+- Matches intended usage.
+- Has as many flags from pAllocationCreateInfo->preferredFlags as possible.
+
+\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result
+from this function or any other allocating function probably means that your
+device doesn't support any memory type with requested features for the specific
+type of resource you want to use it for. Please check parameters of your
+resource, like image layout (OPTIMAL versus LINEAR) or mip level count.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t memoryTypeBits,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+ uint32_t* VMA_NOT_NULL pMemoryTypeIndex);
+
+/**
+\brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo.
+
+It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex.
+It internally creates a temporary, dummy buffer that never has memory bound.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+ uint32_t* VMA_NOT_NULL pMemoryTypeIndex);
+
+/**
+\brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo.
+
+It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex.
+It internally creates a temporary, dummy image that never has memory bound.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+ uint32_t* VMA_NOT_NULL pMemoryTypeIndex);
+
+/** \brief Allocates Vulkan device memory and creates #VmaPool object.
+
+\param allocator Allocator object.
+\param pCreateInfo Parameters of pool to create.
+\param[out] pPool Handle to created pool.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool);
+
+/** \brief Destroys #VmaPool object and frees Vulkan device memory.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaPool VMA_NULLABLE pool);
+
+/** @} */
+
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Retrieves statistics of existing #VmaPool object.
+
+\param allocator Allocator object.
+\param pool Pool object.
+\param[out] pPoolStats Statistics of specified pool.
+
+Note that when using the pool from multiple threads, returned information may immediately
+become outdated.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaPool VMA_NOT_NULL pool,
+ VmaStatistics* VMA_NOT_NULL pPoolStats);
+
+/** \brief Retrieves detailed statistics of existing #VmaPool object.
+
+\param allocator Allocator object.
+\param pool Pool object.
+\param[out] pPoolStats Statistics of specified pool.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaPool VMA_NOT_NULL pool,
+ VmaDetailedStatistics* VMA_NOT_NULL pPoolStats);
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions.
+
+Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero,
+`VMA_DEBUG_MARGIN` is defined to nonzero and the pool is created in memory type that is
+`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection).
+
+Possible return values:
+
+- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool.
+- `VK_SUCCESS` - corruption detection has been performed and succeeded.
+- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations.
+ `VMA_ASSERT` is also fired in that case.
+- Other value: Error returned by Vulkan, e.g. memory mapping failure.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaPool VMA_NOT_NULL pool);
+
+/** \brief Retrieves name of a custom pool.
+
+After the call `ppName` is either null or points to an internally-owned null-terminated string
+containing name of the pool that was previously set. The pointer becomes invalid when the pool is
+destroyed or its name is changed using vmaSetPoolName().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaPool VMA_NOT_NULL pool,
+ const char* VMA_NULLABLE* VMA_NOT_NULL ppName);
+
+/** \brief Sets name of a custom pool.
+
+`pName` can be either null or pointer to a null-terminated string with new name for the pool.
+Function makes internal copy of the string, so it can be changed or freed immediately after this call.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaPool VMA_NOT_NULL pool,
+ const char* VMA_NULLABLE pName);
+
+/** \brief General purpose memory allocation.
+
+\param allocator
+\param pVkMemoryRequirements
+\param pCreateInfo
+\param[out] pAllocation Handle to allocated memory.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages().
+
+It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(),
+vmaCreateBuffer(), vmaCreateImage() instead whenever possible.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+ VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief General purpose memory allocation for multiple allocation objects at once.
+
+\param allocator Allocator object.
+\param pVkMemoryRequirements Memory requirements for each allocation.
+\param pCreateInfo Creation parameters for each allocation.
+\param allocationCount Number of allocations to make.
+\param[out] pAllocations Pointer to array that will be filled with handles to created allocations.
+\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations.
+
+You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages().
+
+Word "pages" is just a suggestion to use this function to allocate pieces of memory needed for sparse binding.
+It is just a general purpose allocation function able to make multiple allocations at once.
+It may be internally optimized to be more efficient than calling vmaAllocateMemory() `allocationCount` times.
+
+All allocations are made using same parameters. All of them are created out of the same memory pool and type.
+If any allocation fails, all allocations already made within this function call are also freed, so that when
+returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo,
+ size_t allocationCount,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations,
+ VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo);
+
+/** \brief Allocates memory suitable for given `VkBuffer`.
+
+\param allocator
+\param buffer
+\param pCreateInfo
+\param[out] pAllocation Handle to allocated memory.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory().
+
+This is a special-purpose function. In most cases you should use vmaCreateBuffer().
+
+You must free the allocation using vmaFreeMemory() when no longer needed.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+ VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Allocates memory suitable for given `VkImage`.
+
+\param allocator
+\param image
+\param pCreateInfo
+\param[out] pAllocation Handle to allocated memory.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory().
+
+This is a special-purpose function. In most cases you should use vmaCreateImage().
+
+You must free the allocation using vmaFreeMemory() when no longer needed.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VkImage VMA_NOT_NULL_NON_DISPATCHABLE image,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+ VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage().
+
+Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VmaAllocation VMA_NULLABLE allocation);
+
+/** \brief Frees memory and destroys multiple allocations.
+
+Word "pages" is just a suggestion to use this function to free pieces of memory used for sparse binding.
+It is just a general purpose function to free memory and destroy allocations made using e.g. vmaAllocateMemory(),
+vmaAllocateMemoryPages() and other functions.
+It may be internally optimized to be more efficient than calling vmaFreeMemory() `allocationCount` times.
+
+Allocations in `pAllocations` array can come from any memory pools and types.
+Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages(
+ VmaAllocator VMA_NOT_NULL allocator,
+ size_t allocationCount,
+ const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations);
+
+/** \brief Returns current information about specified allocation.
+
+Current parameters of given allocation are returned in `pAllocationInfo`.
+
+Although this function doesn't lock any mutex, so it should be quite efficient,
+you should avoid calling it too often.
+You can retrieve same VmaAllocationInfo structure while creating your resource, from function
+vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change
+(e.g. due to defragmentation).
+
+There is also a new function vmaGetAllocationInfo2() that offers extended information
+about the allocation, returned using new structure #VmaAllocationInfo2.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo);
+
+/** \brief Returns extended information about specified allocation.
+
+Current parameters of given allocation are returned in `pAllocationInfo`.
+Extended parameters in structure #VmaAllocationInfo2 include memory block size
+and a flag telling whether the allocation has dedicated memory.
+It can be useful e.g. for interop with OpenGL.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo);
+
+/** \brief Sets pUserData in given allocation to new value.
+
+The value of pointer `pUserData` is copied to allocation's `pUserData`.
+It is opaque, so you can use it however you want - e.g.
+as a pointer, ordinal number or some handle to you own data.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ void* VMA_NULLABLE pUserData);
+
+/** \brief Sets pName in given allocation to new value.
+
+`pName` must be either null, or pointer to a null-terminated string. The function
+makes local copy of the string and sets it as allocation's `pName`. String
+passed as pName doesn't need to be valid for whole lifetime of the allocation -
+you can free it after this call. String previously pointed by allocation's
+`pName` is freed from memory.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ const char* VMA_NULLABLE pName);
+
+/**
+\brief Given an allocation, returns Property Flags of its memory type.
+
+This is just a convenience function. Same information can be obtained using
+vmaGetAllocationInfo() + vmaGetMemoryProperties().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkMemoryPropertyFlags* VMA_NOT_NULL pFlags);
+
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+/**
+\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs.
+
+\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns
+ handle for the current process.
+\param[out] pHandle Output parameter that returns the handle.
+
+The function fills `pHandle` with handle that can be used in target process.
+The handle is fetched using function `vkGetMemoryWin32HandleKHR`.
+When no longer needed, you must close it using:
+
+\code
+CloseHandle(handle);
+\endcode
+
+You can close it any time, before or after destroying the allocation object.
+It is reference-counted internally by Windows.
+
+Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to.
+If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation
+(VmaAllocationInfo::offset).
+
+If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check
+that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS`
+or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions.
+
+For more information, see chapter \ref vk_khr_external_memory_win32.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle);
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+
+/** \brief Maps memory represented by given allocation and returns pointer to it.
+
+Maps memory represented by given allocation to make it accessible to CPU code.
+When succeeded, `*ppData` contains pointer to first byte of this memory.
+
+\warning
+If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is
+correctly offsetted to the beginning of region assigned to this particular allocation.
+Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block.
+You should not add VmaAllocationInfo::offset to it!
+
+Mapping is internally reference-counted and synchronized, so despite raw Vulkan
+function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory`
+multiple times simultaneously, it is safe to call this function on allocations
+assigned to the same memory block. Actual Vulkan memory will be mapped on first
+mapping and unmapped on last unmapping.
+
+If the function succeeded, you must call vmaUnmapMemory() to unmap the
+allocation when mapping is no longer needed or before freeing the allocation, at
+the latest.
+
+It also safe to call this function multiple times on the same allocation. You
+must call vmaUnmapMemory() same number of times as you called vmaMapMemory().
+
+It is also safe to call this function on allocation created with
+#VMA_ALLOCATION_CREATE_MAPPED_BIT flag. Its memory stays mapped all the time.
+You must still call vmaUnmapMemory() same number of times as you called
+vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the
+"0-th" mapping made automatically due to #VMA_ALLOCATION_CREATE_MAPPED_BIT flag.
+
+This function fails when used on allocation made in memory type that is not
+`HOST_VISIBLE`.
+
+This function doesn't automatically flush or invalidate caches.
+If the allocation is made from a memory types that is not `HOST_COHERENT`,
+you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ void* VMA_NULLABLE* VMA_NOT_NULL ppData);
+
+/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory().
+
+For details, see description of vmaMapMemory().
+
+This function doesn't automatically flush or invalidate caches.
+If the allocation is made from a memory types that is not `HOST_COHERENT`,
+you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation);
+
+/** \brief Flushes memory of given allocation.
+
+Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation.
+It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`.
+Unmap operation doesn't do that automatically.
+
+- `offset` must be relative to the beginning of allocation.
+- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation.
+- `offset` and `size` don't have to be aligned.
+ They are internally rounded down/up to multiply of `nonCoherentAtomSize`.
+- If `size` is 0, this call is ignored.
+- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`,
+ this call is ignored.
+
+Warning! `offset` and `size` are relative to the contents of given `allocation`.
+If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively.
+Do not pass allocation's offset as `offset`!!!
+
+This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is
+called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize offset,
+ VkDeviceSize size);
+
+/** \brief Invalidates memory of given allocation.
+
+Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation.
+It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`.
+Map operation doesn't do that automatically.
+
+- `offset` must be relative to the beginning of allocation.
+- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation.
+- `offset` and `size` don't have to be aligned.
+ They are internally rounded down/up to multiply of `nonCoherentAtomSize`.
+- If `size` is 0, this call is ignored.
+- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`,
+ this call is ignored.
+
+Warning! `offset` and `size` are relative to the contents of given `allocation`.
+If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively.
+Do not pass allocation's offset as `offset`!!!
+
+This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if
+it is called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize offset,
+ VkDeviceSize size);
+
+/** \brief Flushes memory of given set of allocations.
+
+Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations.
+For more information, see documentation of vmaFlushAllocation().
+
+\param allocator
+\param allocationCount
+\param allocations
+\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero.
+\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations.
+
+This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is
+called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t allocationCount,
+ const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations,
+ const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets,
+ const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes);
+
+/** \brief Invalidates memory of given set of allocations.
+
+Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations.
+For more information, see documentation of vmaInvalidateAllocation().
+
+\param allocator
+\param allocationCount
+\param allocations
+\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero.
+\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations.
+
+This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is
+called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t allocationCount,
+ const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations,
+ const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets,
+ const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes);
+
+/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed.
+
+\param allocator
+\param pSrcHostPointer Pointer to the host data that become source of the copy.
+\param dstAllocation Handle to the allocation that becomes destination of the copy.
+\param dstAllocationLocalOffset Offset within `dstAllocation` where to write copied data, in bytes.
+\param size Number of bytes to copy.
+
+This is a convenience function that allows to copy data from a host pointer to an allocation easily.
+Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation().
+
+This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag.
+It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+Otherwise, the function will fail and generate a Validation Layers error.
+
+`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`.
+If you mean whole allocation, you should pass 0.
+Do not pass allocation's offset within device memory block this parameter!
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer,
+ VmaAllocation VMA_NOT_NULL dstAllocation,
+ VkDeviceSize dstAllocationLocalOffset,
+ VkDeviceSize size);
+
+/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer.
+
+\param allocator
+\param srcAllocation Handle to the allocation that becomes source of the copy.
+\param srcAllocationLocalOffset Offset within `srcAllocation` where to read copied data, in bytes.
+\param pDstHostPointer Pointer to the host memory that become destination of the copy.
+\param size Number of bytes to copy.
+
+This is a convenience function that allows to copy data from an allocation to a host pointer easily.
+Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory().
+
+This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`
+and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag.
+It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+Otherwise, the function may fail and generate a Validation Layers error.
+It may also work very slowly when reading from an uncached memory.
+
+`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`.
+If you mean whole allocation, you should pass 0.
+Do not pass allocation's offset within device memory block as this parameter!
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL srcAllocation,
+ VkDeviceSize srcAllocationLocalOffset,
+ void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer,
+ VkDeviceSize size);
+
+/** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions.
+
+\param allocator
+\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked.
+
+Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero,
+`VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are
+`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection).
+
+Possible return values:
+
+- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types.
+- `VK_SUCCESS` - corruption detection has been performed and succeeded.
+- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations.
+ `VMA_ASSERT` is also fired in that case.
+- Other value: Error returned by Vulkan, e.g. memory mapping failure.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(
+ VmaAllocator VMA_NOT_NULL allocator,
+ uint32_t memoryTypeBits);
+
+/** \brief Begins defragmentation process.
+
+\param allocator Allocator object.
+\param pInfo Structure filled with parameters of defragmentation.
+\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation.
+\returns
+- `VK_SUCCESS` if defragmentation can begin.
+- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported.
+
+For more information about defragmentation, see documentation chapter:
+[Defragmentation](@ref defragmentation).
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VmaDefragmentationInfo* VMA_NOT_NULL pInfo,
+ VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext);
+
+/** \brief Ends defragmentation process.
+
+\param allocator Allocator object.
+\param context Context object that has been created by vmaBeginDefragmentation().
+\param[out] pStats Optional stats for the defragmentation. Can be null.
+
+Use this function to finish defragmentation started by vmaBeginDefragmentation().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaDefragmentationContext VMA_NOT_NULL context,
+ VmaDefragmentationStats* VMA_NULLABLE pStats);
+
+/** \brief Starts single defragmentation pass.
+
+\param allocator Allocator object.
+\param context Context object that has been created by vmaBeginDefragmentation().
+\param[out] pPassInfo Computed information for current pass.
+\returns
+- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation.
+- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(),
+ and then preferably try another pass with vmaBeginDefragmentationPass().
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaDefragmentationContext VMA_NOT_NULL context,
+ VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo);
+
+/** \brief Ends single defragmentation pass.
+
+\param allocator Allocator object.
+\param context Context object that has been created by vmaBeginDefragmentation().
+\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you.
+
+Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible.
+
+Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`.
+After this call:
+
+- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY
+ (which is the default) will be pointing to the new destination place.
+- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY
+ will be freed.
+
+If no more moves are possible you can end whole defragmentation.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaDefragmentationContext VMA_NOT_NULL context,
+ VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo);
+
+/** \brief Binds buffer to allocation.
+
+Binds specified buffer to region of memory represented by specified allocation.
+Gets `VkDeviceMemory` handle and offset from the allocation.
+If you want to create a buffer, allocate memory for it and bind them together separately,
+you should use this function for binding instead of standard `vkBindBufferMemory()`,
+because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple
+allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously
+(which is illegal in Vulkan).
+
+It is recommended to use function vmaCreateBuffer() instead of this one.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer);
+
+/** \brief Binds buffer to allocation with additional parameters.
+
+\param allocator
+\param allocation
+\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0.
+\param buffer
+\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null.
+
+This function is similar to vmaBindBufferMemory(), but it provides additional parameters.
+
+If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag
+or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize allocationLocalOffset,
+ VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer,
+ const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext);
+
+/** \brief Binds image to allocation.
+
+Binds specified image to region of memory represented by specified allocation.
+Gets `VkDeviceMemory` handle and offset from the allocation.
+If you want to create an image, allocate memory for it and bind them together separately,
+you should use this function for binding instead of standard `vkBindImageMemory()`,
+because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple
+allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously
+(which is illegal in Vulkan).
+
+It is recommended to use function vmaCreateImage() instead of this one.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkImage VMA_NOT_NULL_NON_DISPATCHABLE image);
+
+/** \brief Binds image to allocation with additional parameters.
+
+\param allocator
+\param allocation
+\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0.
+\param image
+\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null.
+
+This function is similar to vmaBindImageMemory(), but it provides additional parameters.
+
+If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag
+or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize allocationLocalOffset,
+ VkImage VMA_NOT_NULL_NON_DISPATCHABLE image,
+ const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext);
+
+/** \brief Creates a new `VkBuffer`, allocates and binds memory for it.
+
+\param allocator
+\param pBufferCreateInfo
+\param pAllocationCreateInfo
+\param[out] pBuffer Buffer that was created.
+\param[out] pAllocation Allocation that was created.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+This function automatically:
+
+-# Creates buffer.
+-# Allocates appropriate memory for it.
+-# Binds the buffer with the memory.
+
+If any of these operations fail, buffer and allocation are not created,
+returned value is negative error code, `*pBuffer` and `*pAllocation` are null.
+
+If the function succeeded, you must destroy both buffer and allocation when you
+no longer need them using either convenience function vmaDestroyBuffer() or
+separately, using `vkDestroyBuffer()` and vmaFreeMemory().
+
+If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used,
+VK_KHR_dedicated_allocation extension is used internally to query driver whether
+it requires or prefers the new buffer to have dedicated allocation. If yes,
+and if dedicated allocation is possible
+(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated
+allocation for this buffer, just like when using
+#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+
+\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer,
+although recommended as a good practice, is out of scope of this library and could be implemented
+by the user as a higher-level logic on top of VMA.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+ VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Creates a buffer with additional minimum alignment.
+
+Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom,
+minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g.
+for interop with OpenGL.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+ VkDeviceSize minAlignment,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+ VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Creates a new `VkBuffer`, binds already created memory for it.
+
+\param allocator
+\param allocation Allocation that provides memory to be used for binding new buffer to it.
+\param pBufferCreateInfo
+\param[out] pBuffer Buffer that was created.
+
+This function automatically:
+
+-# Creates buffer.
+-# Binds the buffer with the supplied memory.
+
+If any of these operations fail, buffer is not created,
+returned value is negative error code and `*pBuffer` is null.
+
+If the function succeeded, you must destroy the buffer when you
+no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding
+allocation you can use convenience function vmaDestroyBuffer().
+
+\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2().
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer);
+
+/** \brief Creates a new `VkBuffer`, binds already created memory for it.
+
+\param allocator
+\param allocation Allocation that provides memory to be used for binding new buffer to it.
+\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0.
+\param pBufferCreateInfo
+\param[out] pBuffer Buffer that was created.
+
+This function automatically:
+
+-# Creates buffer.
+-# Binds the buffer with the supplied memory.
+
+If any of these operations fail, buffer is not created,
+returned value is negative error code and `*pBuffer` is null.
+
+If the function succeeded, you must destroy the buffer when you
+no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding
+allocation you can use convenience function vmaDestroyBuffer().
+
+\note This is a new version of the function augmented with parameter `allocationLocalOffset`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize allocationLocalOffset,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer);
+
+/** \brief Destroys Vulkan buffer and frees allocated memory.
+
+This is just a convenience function equivalent to:
+
+\code
+vkDestroyBuffer(device, buffer, allocationCallbacks);
+vmaFreeMemory(allocator, allocation);
+\endcode
+
+It is safe to pass null as buffer and/or allocation.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer,
+ VmaAllocation VMA_NULLABLE allocation);
+
+/// Function similar to vmaCreateBuffer().
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage(
+ VmaAllocator VMA_NOT_NULL allocator,
+ const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+ const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage,
+ VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+ VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/// Function similar to vmaCreateAliasingBuffer() but for images.
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage);
+
+/// Function similar to vmaCreateAliasingBuffer2() but for images.
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize allocationLocalOffset,
+ const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage);
+
+/** \brief Destroys Vulkan image and frees allocated memory.
+
+This is just a convenience function equivalent to:
+
+\code
+vkDestroyImage(device, image, allocationCallbacks);
+vmaFreeMemory(allocator, allocation);
+\endcode
+
+It is safe to pass null as image and/or allocation.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE image,
+ VmaAllocation VMA_NULLABLE allocation);
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/** \brief Creates new #VmaVirtualBlock object.
+
+\param pCreateInfo Parameters for creation.
+\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock(
+ const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock);
+
+/** \brief Destroys #VmaVirtualBlock object.
+
+Please note that you should consciously handle virtual allocations that could remain unfreed in the block.
+You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock()
+if you are sure this is what you want. If you do neither, an assert is called.
+
+If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`,
+don't forget to free them.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(
+ VmaVirtualBlock VMA_NULLABLE virtualBlock);
+
+/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations.
+*/
+VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock);
+
+/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo);
+
+/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock.
+
+If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned
+(despite the function doesn't ever allocate actual GPU memory).
+`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`.
+
+\param virtualBlock Virtual block
+\param pCreateInfo Parameters for the allocation
+\param[out] pAllocation Returned handle of the new allocation
+\param[out] pOffset Returned offset of the new allocation. Optional, can be null.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation,
+ VkDeviceSize* VMA_NULLABLE pOffset);
+
+/** \brief Frees virtual allocation inside given #VmaVirtualBlock.
+
+It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation);
+
+/** \brief Frees all virtual allocations inside given #VmaVirtualBlock.
+
+You must either call this function or free each virtual allocation individually with vmaVirtualFree()
+before destroying a virtual block. Otherwise, an assert is called.
+
+If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`,
+don't forget to free it as well.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock);
+
+/** \brief Changes custom pointer associated with given virtual allocation.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation,
+ void* VMA_NULLABLE pUserData);
+
+/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock.
+
+This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaStatistics* VMA_NOT_NULL pStats);
+
+/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock.
+
+This function is slow to call. Use for debugging purposes.
+For less detailed statistics, see vmaGetVirtualBlockStatistics().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaDetailedStatistics* VMA_NOT_NULL pStats);
+
+/** @} */
+
+#if VMA_STATS_STRING_ENABLED
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock.
+\param virtualBlock Virtual block.
+\param[out] ppStatsString Returned string.
+\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces.
+
+Returned string must be freed using vmaFreeVirtualBlockStatsString().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString,
+ VkBool32 detailedMap);
+
+/// Frees a string returned by vmaBuildVirtualBlockStatsString().
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(
+ VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ char* VMA_NULLABLE pStatsString);
+
+/** \brief Builds and returns statistics as a null-terminated string in JSON format.
+\param allocator
+\param[out] ppStatsString Must be freed using vmaFreeStatsString() function.
+\param detailedMap
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString(
+ VmaAllocator VMA_NOT_NULL allocator,
+ char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString,
+ VkBool32 detailedMap);
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString(
+ VmaAllocator VMA_NOT_NULL allocator,
+ char* VMA_NULLABLE pStatsString);
+
+/** @} */
+
+#endif // VMA_STATS_STRING_ENABLED
+
+#endif // _VMA_FUNCTION_HEADERS
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+// IMPLEMENTATION
+//
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+// For Visual Studio IntelliSense.
+#if defined(__cplusplus) && defined(__INTELLISENSE__)
+#define VMA_IMPLEMENTATION
+#endif
+
+#ifdef VMA_IMPLEMENTATION
+#undef VMA_IMPLEMENTATION
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cinttypes>
+#include <utility>
+#include <type_traits>
+
+#if !defined(VMA_CPP20)
+ #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20
+ #define VMA_CPP20 1
+ #else
+ #define VMA_CPP20 0
+ #endif
+#endif
+
+#ifdef _MSC_VER
+ #include <intrin.h> // For functions like __popcnt, _BitScanForward etc.
+#endif
+#if VMA_CPP20
+ #include <bit>
+#endif
+
+#if VMA_STATS_STRING_ENABLED
+ #include <cstdio> // For snprintf
+#endif
+
+/*******************************************************************************
+CONFIGURATION SECTION
+
+Define some of these macros before each #include of this header or change them
+here if you need other then default behavior depending on your environment.
+*/
+#ifndef _VMA_CONFIGURATION
+
+/*
+Define this macro to 1 to make the library fetch pointers to Vulkan functions
+internally, like:
+
+ vulkanFunctions.vkAllocateMemory = &vkAllocateMemory;
+*/
+#if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES)
+ #define VMA_STATIC_VULKAN_FUNCTIONS 1
+#endif
+
+/*
+Define this macro to 1 to make the library fetch pointers to Vulkan functions
+internally, like:
+
+ vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory");
+
+To use this feature in new versions of VMA you now have to pass
+VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as
+VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null.
+*/
+#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS)
+ #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#endif
+
+#ifndef VMA_USE_STL_SHARED_MUTEX
+ #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17
+ #define VMA_USE_STL_SHARED_MUTEX 1
+ // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus
+ // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2.
+ #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L
+ #define VMA_USE_STL_SHARED_MUTEX 1
+ #else
+ #define VMA_USE_STL_SHARED_MUTEX 0
+ #endif
+#endif
+
+/*
+Define this macro to include custom header files without having to edit this file directly, e.g.:
+
+ // Inside of "my_vma_configuration_user_includes.h":
+
+ #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT
+ #include "my_custom_min.h" // for my_custom_min
+ #include <algorithm>
+ #include <mutex>
+
+ // Inside a different file, which includes "vk_mem_alloc.h":
+
+ #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h"
+ #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr)
+ #define VMA_MIN(v1, v2) (my_custom_min(v1, v2))
+ #include "vk_mem_alloc.h"
+ ...
+
+The following headers are used in this CONFIGURATION section only, so feel free to
+remove them if not needed.
+*/
+#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H)
+ #include <cassert> // for assert
+ #include <algorithm> // for min, max, swap
+ #include <mutex>
+#else
+ #include VMA_CONFIGURATION_USER_INCLUDES_H
+#endif
+
+#ifndef VMA_NULL
+ // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0.
+ #define VMA_NULL nullptr
+#endif
+
+#ifndef VMA_FALLTHROUGH
+ #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17
+ #define VMA_FALLTHROUGH [[fallthrough]]
+ #else
+ #define VMA_FALLTHROUGH
+ #endif
+#endif
+
+// Normal assert to check for programmer's errors, especially in Debug configuration.
+#ifndef VMA_ASSERT
+ #ifdef NDEBUG
+ #define VMA_ASSERT(expr)
+ #else
+ #define VMA_ASSERT(expr) assert(expr)
+ #endif
+#endif
+
+// Assert that will be called very often, like inside data structures e.g. operator[].
+// Making it non-empty can make program slow.
+#ifndef VMA_HEAVY_ASSERT
+ #ifdef NDEBUG
+ #define VMA_HEAVY_ASSERT(expr)
+ #else
+ #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr)
+ #endif
+#endif
+
+// Assert used for reporting memory leaks - unfreed allocations.
+#ifndef VMA_ASSERT_LEAK
+ #define VMA_ASSERT_LEAK(expr) VMA_ASSERT(expr)
+#endif
+
+// If your compiler is not compatible with C++17 and definition of
+// aligned_alloc() function is missing, uncommenting following line may help:
+
+//#include <malloc.h>
+
+#if defined(__ANDROID_API__) && (__ANDROID_API__ < 16)
+#include <cstdlib>
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+ // alignment must be >= sizeof(void*)
+ if(alignment < sizeof(void*))
+ {
+ alignment = sizeof(void*);
+ }
+
+ return memalign(alignment, size);
+}
+#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC))
+#include <cstdlib>
+
+#if defined(__APPLE__)
+#include <AvailabilityMacros.h>
+#endif
+
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+ // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4)
+ // Therefore, for now disable this specific exception until a proper solution is found.
+ //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0))
+ //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0
+ // // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only
+ // // with the MacOSX11.0 SDK in Xcode 12 (which is what adds
+ // // MAC_OS_X_VERSION_10_16), even though the function is marked
+ // // available for 10.15. That is why the preprocessor checks for 10.16 but
+ // // the __builtin_available checks for 10.15.
+ // // People who use C++17 could call aligned_alloc with the 10.15 SDK already.
+ // if (__builtin_available(macOS 10.15, iOS 13, *))
+ // return aligned_alloc(alignment, size);
+ //#endif
+ //#endif
+
+ // alignment must be >= sizeof(void*)
+ if(alignment < sizeof(void*))
+ {
+ alignment = sizeof(void*);
+ }
+
+ void *pointer;
+ if(posix_memalign(&pointer, alignment, size) == 0)
+ return pointer;
+ return VMA_NULL;
+}
+#elif defined(_WIN32)
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+ return _aligned_malloc(size, alignment);
+}
+#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+ return aligned_alloc(alignment, size);
+}
+#else
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+ VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system.");
+ return VMA_NULL;
+}
+#endif
+
+#if defined(_WIN32)
+static void vma_aligned_free(void* ptr)
+{
+ _aligned_free(ptr);
+}
+#else
+static void vma_aligned_free(void* VMA_NULLABLE ptr)
+{
+ free(ptr);
+}
+#endif
+
+#ifndef VMA_ALIGN_OF
+ #define VMA_ALIGN_OF(type) (alignof(type))
+#endif
+
+#ifndef VMA_SYSTEM_ALIGNED_MALLOC
+ #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size))
+#endif
+
+#ifndef VMA_SYSTEM_ALIGNED_FREE
+ // VMA_SYSTEM_FREE is the old name, but might have been defined by the user
+ #if defined(VMA_SYSTEM_FREE)
+ #define VMA_SYSTEM_ALIGNED_FREE(ptr) VMA_SYSTEM_FREE(ptr)
+ #else
+ #define VMA_SYSTEM_ALIGNED_FREE(ptr) vma_aligned_free(ptr)
+ #endif
+#endif
+
+#ifndef VMA_COUNT_BITS_SET
+ // Returns number of bits set to 1 in (v)
+ #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v)
+#endif
+
+#ifndef VMA_BITSCAN_LSB
+ // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX
+ #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask)
+#endif
+
+#ifndef VMA_BITSCAN_MSB
+ // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX
+ #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask)
+#endif
+
+#ifndef VMA_MIN
+ #define VMA_MIN(v1, v2) ((std::min)((v1), (v2)))
+#endif
+
+#ifndef VMA_MAX
+ #define VMA_MAX(v1, v2) ((std::max)((v1), (v2)))
+#endif
+
+#ifndef VMA_SORT
+ #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp)
+#endif
+
+#ifndef VMA_DEBUG_LOG_FORMAT
+ #define VMA_DEBUG_LOG_FORMAT(format, ...)
+ /*
+ #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \
+ printf((format), __VA_ARGS__); \
+ printf("\n"); \
+ } while(false)
+ */
+#endif
+
+#ifndef VMA_DEBUG_LOG
+ #define VMA_DEBUG_LOG(str) VMA_DEBUG_LOG_FORMAT("%s", (str))
+#endif
+
+#ifndef VMA_LEAK_LOG_FORMAT
+ #define VMA_LEAK_LOG_FORMAT(format, ...) VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__)
+#endif
+
+#ifndef VMA_CLASS_NO_COPY
+ #define VMA_CLASS_NO_COPY(className) \
+ private: \
+ className(const className&) = delete; \
+ className& operator=(const className&) = delete;
+#endif
+#ifndef VMA_CLASS_NO_COPY_NO_MOVE
+ #define VMA_CLASS_NO_COPY_NO_MOVE(className) \
+ private: \
+ className(const className&) = delete; \
+ className(className&&) = delete; \
+ className& operator=(const className&) = delete; \
+ className& operator=(className&&) = delete;
+#endif
+
+// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString.
+#if VMA_STATS_STRING_ENABLED
+ static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num)
+ {
+ snprintf(outStr, strLen, "%" PRIu32, num);
+ }
+ static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num)
+ {
+ snprintf(outStr, strLen, "%" PRIu64, num);
+ }
+ static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr)
+ {
+ snprintf(outStr, strLen, "%p", ptr);
+ }
+#endif
+
+#ifndef VMA_MUTEX
+ class VmaMutex
+ {
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex)
+ public:
+ VmaMutex() { }
+ void Lock() { m_Mutex.lock(); }
+ void Unlock() { m_Mutex.unlock(); }
+ bool TryLock() { return m_Mutex.try_lock(); }
+ private:
+ std::mutex m_Mutex;
+ };
+ #define VMA_MUTEX VmaMutex
+#endif
+
+// Read-write mutex, where "read" is shared access, "write" is exclusive access.
+#ifndef VMA_RW_MUTEX
+ #if VMA_USE_STL_SHARED_MUTEX
+ // Use std::shared_mutex from C++17.
+ #include <shared_mutex>
+ class VmaRWMutex
+ {
+ public:
+ void LockRead() { m_Mutex.lock_shared(); }
+ void UnlockRead() { m_Mutex.unlock_shared(); }
+ bool TryLockRead() { return m_Mutex.try_lock_shared(); }
+ void LockWrite() { m_Mutex.lock(); }
+ void UnlockWrite() { m_Mutex.unlock(); }
+ bool TryLockWrite() { return m_Mutex.try_lock(); }
+ private:
+ std::shared_mutex m_Mutex;
+ };
+ #define VMA_RW_MUTEX VmaRWMutex
+ #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600
+ // Use SRWLOCK from WinAPI.
+ // Minimum supported client = Windows Vista, server = Windows Server 2008.
+ class VmaRWMutex
+ {
+ public:
+ VmaRWMutex() { InitializeSRWLock(&m_Lock); }
+ void LockRead() { AcquireSRWLockShared(&m_Lock); }
+ void UnlockRead() { ReleaseSRWLockShared(&m_Lock); }
+ bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; }
+ void LockWrite() { AcquireSRWLockExclusive(&m_Lock); }
+ void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); }
+ bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; }
+ private:
+ SRWLOCK m_Lock;
+ };
+ #define VMA_RW_MUTEX VmaRWMutex
+ #else
+ // Less efficient fallback: Use normal mutex.
+ class VmaRWMutex
+ {
+ public:
+ void LockRead() { m_Mutex.Lock(); }
+ void UnlockRead() { m_Mutex.Unlock(); }
+ bool TryLockRead() { return m_Mutex.TryLock(); }
+ void LockWrite() { m_Mutex.Lock(); }
+ void UnlockWrite() { m_Mutex.Unlock(); }
+ bool TryLockWrite() { return m_Mutex.TryLock(); }
+ private:
+ VMA_MUTEX m_Mutex;
+ };
+ #define VMA_RW_MUTEX VmaRWMutex
+ #endif // #if VMA_USE_STL_SHARED_MUTEX
+#endif // #ifndef VMA_RW_MUTEX
+
+/*
+If providing your own implementation, you need to implement a subset of std::atomic.
+*/
+#ifndef VMA_ATOMIC_UINT32
+ #include <atomic>
+ #define VMA_ATOMIC_UINT32 std::atomic<uint32_t>
+#endif
+
+#ifndef VMA_ATOMIC_UINT64
+ #include <atomic>
+ #define VMA_ATOMIC_UINT64 std::atomic<uint64_t>
+#endif
+
+#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY
+ /**
+ Every allocation will have its own memory block.
+ Define to 1 for debugging purposes only.
+ */
+ #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0)
+#endif
+
+#ifndef VMA_MIN_ALIGNMENT
+ /**
+ Minimum alignment of all allocations, in bytes.
+ Set to more than 1 for debugging purposes. Must be power of two.
+ */
+ #ifdef VMA_DEBUG_ALIGNMENT // Old name
+ #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT
+ #else
+ #define VMA_MIN_ALIGNMENT (1)
+ #endif
+#endif
+
+#ifndef VMA_DEBUG_MARGIN
+ /**
+ Minimum margin after every allocation, in bytes.
+ Set nonzero for debugging purposes only.
+ */
+ #define VMA_DEBUG_MARGIN (0)
+#endif
+
+#ifndef VMA_DEBUG_INITIALIZE_ALLOCATIONS
+ /**
+ Define this macro to 1 to automatically fill new allocations and destroyed
+ allocations with some bit pattern.
+ */
+ #define VMA_DEBUG_INITIALIZE_ALLOCATIONS (0)
+#endif
+
+#ifndef VMA_DEBUG_DETECT_CORRUPTION
+ /**
+ Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to
+ enable writing magic value to the margin after every allocation and
+ validating it, so that memory corruptions (out-of-bounds writes) are detected.
+ */
+ #define VMA_DEBUG_DETECT_CORRUPTION (0)
+#endif
+
+#ifndef VMA_DEBUG_GLOBAL_MUTEX
+ /**
+ Set this to 1 for debugging purposes only, to enable single mutex protecting all
+ entry calls to the library. Can be useful for debugging multithreading issues.
+ */
+ #define VMA_DEBUG_GLOBAL_MUTEX (0)
+#endif
+
+#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY
+ /**
+ Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity.
+ Set to more than 1 for debugging purposes only. Must be power of two.
+ */
+ #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1)
+#endif
+
+#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT
+ /*
+ Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount
+ and return error instead of leaving up to Vulkan implementation what to do in such cases.
+ */
+ #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0)
+#endif
+
+#ifndef VMA_SMALL_HEAP_MAX_SIZE
+ /// Maximum size of a memory heap in Vulkan to consider it "small".
+ #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024)
+#endif
+
+#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE
+ /// Default size of a block allocated as single VkDeviceMemory from a "large" heap.
+ #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024)
+#endif
+
+/*
+Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called
+or a persistently mapped allocation is created and destroyed several times in a row.
+It keeps additional +1 mapping of a device memory block to prevent calling actual
+vkMapMemory/vkUnmapMemory too many times, which may improve performance and help
+tools like RenderDoc.
+*/
+#ifndef VMA_MAPPING_HYSTERESIS_ENABLED
+ #define VMA_MAPPING_HYSTERESIS_ENABLED 1
+#endif
+
+#define VMA_VALIDATE(cond) do { if(!(cond)) { \
+ VMA_ASSERT(0 && "Validation failed: " #cond); \
+ return false; \
+ } } while(false)
+
+/*******************************************************************************
+END OF CONFIGURATION
+*/
+#endif // _VMA_CONFIGURATION
+
+
+static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC;
+static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF;
+// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F.
+static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666;
+
+// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants.
+static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040;
+static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080;
+static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000;
+static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200;
+static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000;
+static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u;
+static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32;
+static const uint32_t VMA_VENDOR_ID_AMD = 4098;
+
+// This one is tricky. Vulkan specification defines this code as available since
+// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131.
+// See pull request #207.
+#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13)
+
+
+#if VMA_STATS_STRING_ENABLED
+// Correspond to values of enum VmaSuballocationType.
+static const char* VMA_SUBALLOCATION_TYPE_NAMES[] =
+{
+ "FREE",
+ "UNKNOWN",
+ "BUFFER",
+ "IMAGE_UNKNOWN",
+ "IMAGE_LINEAR",
+ "IMAGE_OPTIMAL",
+};
+#endif
+
+static VkAllocationCallbacks VmaEmptyAllocationCallbacks =
+ { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL };
+
+
+#ifndef _VMA_ENUM_DECLARATIONS
+
+enum VmaSuballocationType
+{
+ VMA_SUBALLOCATION_TYPE_FREE = 0,
+ VMA_SUBALLOCATION_TYPE_UNKNOWN = 1,
+ VMA_SUBALLOCATION_TYPE_BUFFER = 2,
+ VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3,
+ VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4,
+ VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5,
+ VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF
+};
+
+enum VMA_CACHE_OPERATION
+{
+ VMA_CACHE_FLUSH,
+ VMA_CACHE_INVALIDATE
+};
+
+enum class VmaAllocationRequestType
+{
+ Normal,
+ TLSF,
+ // Used by "Linear" algorithm.
+ UpperAddress,
+ EndOf1st,
+ EndOf2nd,
+};
+
+#endif // _VMA_ENUM_DECLARATIONS
+
+#ifndef _VMA_FORWARD_DECLARATIONS
+// Opaque handle used by allocation algorithms to identify single allocation in any conforming way.
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle);
+
+struct VmaMutexLock;
+struct VmaMutexLockRead;
+struct VmaMutexLockWrite;
+
+template<typename T>
+struct AtomicTransactionalIncrement;
+
+template<typename T>
+struct VmaStlAllocator;
+
+template<typename T, typename AllocatorT>
+class VmaVector;
+
+template<typename T, typename AllocatorT, size_t N>
+class VmaSmallVector;
+
+template<typename T>
+class VmaPoolAllocator;
+
+template<typename T>
+struct VmaListItem;
+
+template<typename T>
+class VmaRawList;
+
+template<typename T, typename AllocatorT>
+class VmaList;
+
+template<typename ItemTypeTraits>
+class VmaIntrusiveLinkedList;
+
+#if VMA_STATS_STRING_ENABLED
+class VmaStringBuilder;
+class VmaJsonWriter;
+#endif
+
+class VmaDeviceMemoryBlock;
+
+struct VmaDedicatedAllocationListItemTraits;
+class VmaDedicatedAllocationList;
+
+struct VmaSuballocation;
+struct VmaSuballocationOffsetLess;
+struct VmaSuballocationOffsetGreater;
+struct VmaSuballocationItemSizeLess;
+
+typedef VmaList<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> VmaSuballocationList;
+
+struct VmaAllocationRequest;
+
+class VmaBlockMetadata;
+class VmaBlockMetadata_Linear;
+class VmaBlockMetadata_TLSF;
+
+class VmaBlockVector;
+
+struct VmaPoolListItemTraits;
+
+struct VmaCurrentBudgetData;
+
+class VmaAllocationObjectAllocator;
+
+#endif // _VMA_FORWARD_DECLARATIONS
+
+
+#ifndef _VMA_FUNCTIONS
+
+/*
+Returns number of bits set to 1 in (v).
+
+On specific platforms and compilers you can use intrinsics like:
+
+Visual Studio:
+ return __popcnt(v);
+GCC, Clang:
+ return static_cast<uint32_t>(__builtin_popcount(v));
+
+Define macro VMA_COUNT_BITS_SET to provide your optimized implementation.
+But you need to check in runtime whether user's CPU supports these, as some old processors don't.
+*/
+static inline uint32_t VmaCountBitsSet(uint32_t v)
+{
+#if VMA_CPP20
+ return std::popcount(v);
+#else
+ uint32_t c = v - ((v >> 1) & 0x55555555);
+ c = ((c >> 2) & 0x33333333) + (c & 0x33333333);
+ c = ((c >> 4) + c) & 0x0F0F0F0F;
+ c = ((c >> 8) + c) & 0x00FF00FF;
+ c = ((c >> 16) + c) & 0x0000FFFF;
+ return c;
+#endif
+}
+
+static inline uint8_t VmaBitScanLSB(uint64_t mask)
+{
+#if defined(_MSC_VER) && defined(_WIN64)
+ unsigned long pos;
+ if (_BitScanForward64(&pos, mask))
+ return static_cast<uint8_t>(pos);
+ return UINT8_MAX;
+#elif VMA_CPP20
+ if(mask)
+ return static_cast<uint8_t>(std::countr_zero(mask));
+ return UINT8_MAX;
+#elif defined __GNUC__ || defined __clang__
+ return static_cast<uint8_t>(__builtin_ffsll(mask)) - 1U;
+#else
+ uint8_t pos = 0;
+ uint64_t bit = 1;
+ do
+ {
+ if (mask & bit)
+ return pos;
+ bit <<= 1;
+ } while (pos++ < 63);
+ return UINT8_MAX;
+#endif
+}
+
+static inline uint8_t VmaBitScanLSB(uint32_t mask)
+{
+#ifdef _MSC_VER
+ unsigned long pos;
+ if (_BitScanForward(&pos, mask))
+ return static_cast<uint8_t>(pos);
+ return UINT8_MAX;
+#elif VMA_CPP20
+ if(mask)
+ return static_cast<uint8_t>(std::countr_zero(mask));
+ return UINT8_MAX;
+#elif defined __GNUC__ || defined __clang__
+ return static_cast<uint8_t>(__builtin_ffs(mask)) - 1U;
+#else
+ uint8_t pos = 0;
+ uint32_t bit = 1;
+ do
+ {
+ if (mask & bit)
+ return pos;
+ bit <<= 1;
+ } while (pos++ < 31);
+ return UINT8_MAX;
+#endif
+}
+
+static inline uint8_t VmaBitScanMSB(uint64_t mask)
+{
+#if defined(_MSC_VER) && defined(_WIN64)
+ unsigned long pos;
+ if (_BitScanReverse64(&pos, mask))
+ return static_cast<uint8_t>(pos);
+#elif VMA_CPP20
+ if(mask)
+ return 63 - static_cast<uint8_t>(std::countl_zero(mask));
+#elif defined __GNUC__ || defined __clang__
+ if (mask)
+ return 63 - static_cast<uint8_t>(__builtin_clzll(mask));
+#else
+ uint8_t pos = 63;
+ uint64_t bit = 1ULL << 63;
+ do
+ {
+ if (mask & bit)
+ return pos;
+ bit >>= 1;
+ } while (pos-- > 0);
+#endif
+ return UINT8_MAX;
+}
+
+static inline uint8_t VmaBitScanMSB(uint32_t mask)
+{
+#ifdef _MSC_VER
+ unsigned long pos;
+ if (_BitScanReverse(&pos, mask))
+ return static_cast<uint8_t>(pos);
+#elif VMA_CPP20
+ if(mask)
+ return 31 - static_cast<uint8_t>(std::countl_zero(mask));
+#elif defined __GNUC__ || defined __clang__
+ if (mask)
+ return 31 - static_cast<uint8_t>(__builtin_clz(mask));
+#else
+ uint8_t pos = 31;
+ uint32_t bit = 1UL << 31;
+ do
+ {
+ if (mask & bit)
+ return pos;
+ bit >>= 1;
+ } while (pos-- > 0);
+#endif
+ return UINT8_MAX;
+}
+
+/*
+Returns true if given number is a power of two.
+T must be unsigned integer number or signed integer but always nonnegative.
+For 0 returns true.
+*/
+template <typename T>
+inline bool VmaIsPow2(T x)
+{
+ return (x & (x - 1)) == 0;
+}
+
+// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16.
+// Use types like uint32_t, uint64_t as T.
+template <typename T>
+static inline T VmaAlignUp(T val, T alignment)
+{
+ VMA_HEAVY_ASSERT(VmaIsPow2(alignment));
+ return (val + alignment - 1) & ~(alignment - 1);
+}
+
+// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8.
+// Use types like uint32_t, uint64_t as T.
+template <typename T>
+static inline T VmaAlignDown(T val, T alignment)
+{
+ VMA_HEAVY_ASSERT(VmaIsPow2(alignment));
+ return val & ~(alignment - 1);
+}
+
+// Division with mathematical rounding to nearest number.
+template <typename T>
+static inline T VmaRoundDiv(T x, T y)
+{
+ return (x + (y / (T)2)) / y;
+}
+
+// Divide by 'y' and round up to nearest integer.
+template <typename T>
+static inline T VmaDivideRoundingUp(T x, T y)
+{
+ return (x + y - (T)1) / y;
+}
+
+// Returns smallest power of 2 greater or equal to v.
+static inline uint32_t VmaNextPow2(uint32_t v)
+{
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v++;
+ return v;
+}
+
+static inline uint64_t VmaNextPow2(uint64_t v)
+{
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v |= v >> 32;
+ v++;
+ return v;
+}
+
+// Returns largest power of 2 less or equal to v.
+static inline uint32_t VmaPrevPow2(uint32_t v)
+{
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v = v ^ (v >> 1);
+ return v;
+}
+
+static inline uint64_t VmaPrevPow2(uint64_t v)
+{
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v |= v >> 32;
+ v = v ^ (v >> 1);
+ return v;
+}
+
+static inline bool VmaStrIsEmpty(const char* pStr)
+{
+ return pStr == VMA_NULL || *pStr == '\0';
+}
+
+/*
+Returns true if two memory blocks occupy overlapping pages.
+ResourceA must be in less memory offset than ResourceB.
+
+Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)"
+chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity".
+*/
+static inline bool VmaBlocksOnSamePage(
+ VkDeviceSize resourceAOffset,
+ VkDeviceSize resourceASize,
+ VkDeviceSize resourceBOffset,
+ VkDeviceSize pageSize)
+{
+ VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0);
+ VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1;
+ VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1);
+ VkDeviceSize resourceBStart = resourceBOffset;
+ VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1);
+ return resourceAEndPage == resourceBStartPage;
+}
+
+/*
+Returns true if given suballocation types could conflict and must respect
+VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer
+or linear image and another one is optimal image. If type is unknown, behave
+conservatively.
+*/
+static inline bool VmaIsBufferImageGranularityConflict(
+ VmaSuballocationType suballocType1,
+ VmaSuballocationType suballocType2)
+{
+ if (suballocType1 > suballocType2)
+ {
+ std::swap(suballocType1, suballocType2);
+ }
+
+ switch (suballocType1)
+ {
+ case VMA_SUBALLOCATION_TYPE_FREE:
+ return false;
+ case VMA_SUBALLOCATION_TYPE_UNKNOWN:
+ return true;
+ case VMA_SUBALLOCATION_TYPE_BUFFER:
+ return
+ suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN ||
+ suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL;
+ case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN:
+ return
+ suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN ||
+ suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR ||
+ suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL;
+ case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR:
+ return
+ suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL;
+ case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL:
+ return false;
+ default:
+ VMA_ASSERT(0);
+ return true;
+ }
+}
+
+static void VmaWriteMagicValue(void* pData, VkDeviceSize offset)
+{
+#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION
+ uint32_t* pDst = (uint32_t*)((char*)pData + offset);
+ const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t);
+ for (size_t i = 0; i < numberCount; ++i, ++pDst)
+ {
+ *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE;
+ }
+#else
+ // no-op
+#endif
+}
+
+static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset)
+{
+#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION
+ const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset);
+ const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t);
+ for (size_t i = 0; i < numberCount; ++i, ++pSrc)
+ {
+ if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE)
+ {
+ return false;
+ }
+ }
+#endif
+ return true;
+}
+
+/*
+Fills structure with parameters of an example buffer to be used for transfers
+during GPU memory defragmentation.
+*/
+static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo)
+{
+ memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo));
+ outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+ outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size.
+}
+
+
+/*
+Performs binary search and returns iterator to first element that is greater or
+equal to (key), according to comparison (cmp).
+
+Cmp should return true if first argument is less than second argument.
+
+Returned value is the found element, if present in the collection or place where
+new element with value (key) should be inserted.
+*/
+template <typename CmpLess, typename IterT, typename KeyT>
+static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp)
+{
+ size_t down = 0, up = size_t(end - beg);
+ while (down < up)
+ {
+ const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation
+ if (cmp(*(beg + mid), key))
+ {
+ down = mid + 1;
+ }
+ else
+ {
+ up = mid;
+ }
+ }
+ return beg + down;
+}
+
+template<typename CmpLess, typename IterT, typename KeyT>
+IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp)
+{
+ IterT it = VmaBinaryFindFirstNotLess<CmpLess, IterT, KeyT>(
+ beg, end, value, cmp);
+ if (it == end ||
+ (!cmp(*it, value) && !cmp(value, *it)))
+ {
+ return it;
+ }
+ return end;
+}
+
+/*
+Returns true if all pointers in the array are not-null and unique.
+Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT.
+T must be pointer type, e.g. VmaAllocation, VmaPool.
+*/
+template<typename T>
+static bool VmaValidatePointerArray(uint32_t count, const T* arr)
+{
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ const T iPtr = arr[i];
+ if (iPtr == VMA_NULL)
+ {
+ return false;
+ }
+ for (uint32_t j = i + 1; j < count; ++j)
+ {
+ if (iPtr == arr[j])
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+template<typename MainT, typename NewT>
+static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct)
+{
+ newStruct->pNext = mainStruct->pNext;
+ mainStruct->pNext = newStruct;
+}
+// Finds structure with s->sType == sType in mainStruct->pNext chain.
+// Returns pointer to it. If not found, returns null.
+template<typename FindT, typename MainT>
+static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType)
+{
+ for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext;
+ s != VMA_NULL; s = s->pNext)
+ {
+ if(s->sType == sType)
+ {
+ return (const FindT*)s;
+ }
+ }
+ return VMA_NULL;
+}
+
+// An abstraction over buffer or image `usage` flags, depending on available extensions.
+struct VmaBufferImageUsage
+{
+#if VMA_KHR_MAINTENANCE5
+ typedef uint64_t BaseType; // VkFlags64
+#else
+ typedef uint32_t BaseType; // VkFlags32
+#endif
+
+ static const VmaBufferImageUsage UNKNOWN;
+
+ BaseType Value;
+
+ VmaBufferImageUsage() { *this = UNKNOWN; }
+ explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { }
+ VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5);
+ explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo);
+
+ bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; }
+ bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; }
+
+ bool Contains(BaseType flag) const { return (Value & flag) != 0; }
+ bool ContainsDeviceAccess() const
+ {
+ // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*.
+ return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0;
+ }
+};
+
+const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0);
+
+VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo,
+ bool useKhrMaintenance5)
+{
+#if VMA_KHR_MAINTENANCE5
+ if(useKhrMaintenance5)
+ {
+ // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR,
+ // take usage from it and ignore VkBufferCreateInfo::usage, per specification
+ // of the VK_KHR_maintenance5 extension.
+ const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 =
+ VmaPnextChainFind<VkBufferUsageFlags2CreateInfoKHR>(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR);
+ if(usageFlags2)
+ {
+ this->Value = usageFlags2->usage;
+ return;
+ }
+ }
+#endif
+
+ this->Value = (BaseType)createInfo.usage;
+}
+
+VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo)
+{
+ // Maybe in the future there will be VK_KHR_maintenanceN extension with structure
+ // VkImageUsageFlags2CreateInfoKHR, like the one for buffers...
+
+ this->Value = (BaseType)createInfo.usage;
+}
+
+// This is the main algorithm that guides the selection of a memory type best for an allocation -
+// converts usage to required/preferred/not preferred flags.
+static bool FindMemoryPreferences(
+ bool isIntegratedGPU,
+ const VmaAllocationCreateInfo& allocCreateInfo,
+ VmaBufferImageUsage bufImgUsage,
+ VkMemoryPropertyFlags& outRequiredFlags,
+ VkMemoryPropertyFlags& outPreferredFlags,
+ VkMemoryPropertyFlags& outNotPreferredFlags)
+{
+ outRequiredFlags = allocCreateInfo.requiredFlags;
+ outPreferredFlags = allocCreateInfo.preferredFlags;
+ outNotPreferredFlags = 0;
+
+ switch(allocCreateInfo.usage)
+ {
+ case VMA_MEMORY_USAGE_UNKNOWN:
+ break;
+ case VMA_MEMORY_USAGE_GPU_ONLY:
+ if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0)
+ {
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ break;
+ case VMA_MEMORY_USAGE_CPU_ONLY:
+ outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ break;
+ case VMA_MEMORY_USAGE_CPU_TO_GPU:
+ outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0)
+ {
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ break;
+ case VMA_MEMORY_USAGE_GPU_TO_CPU:
+ outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ break;
+ case VMA_MEMORY_USAGE_CPU_COPY:
+ outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ break;
+ case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED:
+ outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
+ break;
+ case VMA_MEMORY_USAGE_AUTO:
+ case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE:
+ case VMA_MEMORY_USAGE_AUTO_PREFER_HOST:
+ {
+ if(bufImgUsage == VmaBufferImageUsage::UNKNOWN)
+ {
+ VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known."
+ " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" );
+ return false;
+ }
+
+ const bool deviceAccess = bufImgUsage.ContainsDeviceAccess();
+ const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0;
+ const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0;
+ const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0;
+ const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+ const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
+
+ // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU.
+ if(hostAccessRandom)
+ {
+ // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)!
+ outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+ if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost)
+ {
+ // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL.
+ // Omitting HOST_VISIBLE here is intentional.
+ // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one.
+ // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list.
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ else
+ {
+ // Always CPU memory.
+ outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+ }
+ // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined.
+ else if(hostAccessSequentialWrite)
+ {
+ // Want uncached and write-combined.
+ outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+ if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost)
+ {
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+ else
+ {
+ outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame)
+ if(deviceAccess)
+ {
+ // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory.
+ if(preferHost)
+ outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ else
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU)
+ else
+ {
+ // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory.
+ if(preferDevice)
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ else
+ outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ }
+ }
+ // No CPU access
+ else
+ {
+ // if(deviceAccess)
+ //
+ // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory,
+ // unless there is a clear preference from the user not to do so.
+ //
+ // else:
+ //
+ // No direct GPU access, no CPU access, just transfers.
+ // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or
+ // a "swap file" copy to free some GPU memory (then better CPU memory).
+ // Up to the user to decide. If no preferece, assume the former and choose GPU memory.
+
+ if(preferHost)
+ outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ else
+ outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ break;
+ }
+ default:
+ VMA_ASSERT(0);
+ }
+
+ // Avoid DEVICE_COHERENT unless explicitly requested.
+ if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) &
+ (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0)
+ {
+ outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY;
+ }
+
+ return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory allocation
+
+static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment)
+{
+ void* result = VMA_NULL;
+ if ((pAllocationCallbacks != VMA_NULL) &&
+ (pAllocationCallbacks->pfnAllocation != VMA_NULL))
+ {
+ result = (*pAllocationCallbacks->pfnAllocation)(
+ pAllocationCallbacks->pUserData,
+ size,
+ alignment,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ }
+ else
+ {
+ result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment);
+ }
+ VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed.");
+ return result;
+}
+
+static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr)
+{
+ if ((pAllocationCallbacks != VMA_NULL) &&
+ (pAllocationCallbacks->pfnFree != VMA_NULL))
+ {
+ (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr);
+ }
+ else
+ {
+ VMA_SYSTEM_ALIGNED_FREE(ptr);
+ }
+}
+
+template<typename T>
+static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks)
+{
+ return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T));
+}
+
+template<typename T>
+static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count)
+{
+ return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T));
+}
+
+#define vma_new(allocator, type) new(VmaAllocate<type>(allocator))(type)
+
+#define vma_new_array(allocator, type, count) new(VmaAllocateArray<type>((allocator), (count)))(type)
+
+template<typename T>
+static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr)
+{
+ ptr->~T();
+ VmaFree(pAllocationCallbacks, ptr);
+}
+
+template<typename T>
+static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count)
+{
+ if (ptr != VMA_NULL)
+ {
+ for (size_t i = count; i--; )
+ {
+ ptr[i].~T();
+ }
+ VmaFree(pAllocationCallbacks, ptr);
+ }
+}
+
+static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr)
+{
+ if (srcStr != VMA_NULL)
+ {
+ const size_t len = strlen(srcStr);
+ char* const result = vma_new_array(allocs, char, len + 1);
+ memcpy(result, srcStr, len + 1);
+ return result;
+ }
+ return VMA_NULL;
+}
+
+#if VMA_STATS_STRING_ENABLED
+static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen)
+{
+ if (srcStr != VMA_NULL)
+ {
+ char* const result = vma_new_array(allocs, char, strLen + 1);
+ memcpy(result, srcStr, strLen);
+ result[strLen] = '\0';
+ return result;
+ }
+ return VMA_NULL;
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str)
+{
+ if (str != VMA_NULL)
+ {
+ const size_t len = strlen(str);
+ vma_delete_array(allocs, str, len + 1);
+ }
+}
+
+template<typename CmpLess, typename VectorT>
+size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value)
+{
+ const size_t indexToInsert = VmaBinaryFindFirstNotLess(
+ vector.data(),
+ vector.data() + vector.size(),
+ value,
+ CmpLess()) - vector.data();
+ VmaVectorInsert(vector, indexToInsert, value);
+ return indexToInsert;
+}
+
+template<typename CmpLess, typename VectorT>
+bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value)
+{
+ CmpLess comparator;
+ typename VectorT::iterator it = VmaBinaryFindFirstNotLess(
+ vector.begin(),
+ vector.end(),
+ value,
+ comparator);
+ if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it))
+ {
+ size_t indexToRemove = it - vector.begin();
+ VmaVectorRemove(vector, indexToRemove);
+ return true;
+ }
+ return false;
+}
+#endif // _VMA_FUNCTIONS
+
+#ifndef _VMA_STATISTICS_FUNCTIONS
+
+static void VmaClearStatistics(VmaStatistics& outStats)
+{
+ outStats.blockCount = 0;
+ outStats.allocationCount = 0;
+ outStats.blockBytes = 0;
+ outStats.allocationBytes = 0;
+}
+
+static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src)
+{
+ inoutStats.blockCount += src.blockCount;
+ inoutStats.allocationCount += src.allocationCount;
+ inoutStats.blockBytes += src.blockBytes;
+ inoutStats.allocationBytes += src.allocationBytes;
+}
+
+static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats)
+{
+ VmaClearStatistics(outStats.statistics);
+ outStats.unusedRangeCount = 0;
+ outStats.allocationSizeMin = VK_WHOLE_SIZE;
+ outStats.allocationSizeMax = 0;
+ outStats.unusedRangeSizeMin = VK_WHOLE_SIZE;
+ outStats.unusedRangeSizeMax = 0;
+}
+
+static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size)
+{
+ inoutStats.statistics.allocationCount++;
+ inoutStats.statistics.allocationBytes += size;
+ inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size);
+ inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size);
+}
+
+static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size)
+{
+ inoutStats.unusedRangeCount++;
+ inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size);
+ inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size);
+}
+
+static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src)
+{
+ VmaAddStatistics(inoutStats.statistics, src.statistics);
+ inoutStats.unusedRangeCount += src.unusedRangeCount;
+ inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin);
+ inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax);
+ inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin);
+ inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax);
+}
+
+#endif // _VMA_STATISTICS_FUNCTIONS
+
+#ifndef _VMA_MUTEX_LOCK
+// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope).
+struct VmaMutexLock
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock)
+public:
+ VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) :
+ m_pMutex(useMutex ? &mutex : VMA_NULL)
+ {
+ if (m_pMutex) { m_pMutex->Lock(); }
+ }
+ ~VmaMutexLock() { if (m_pMutex) { m_pMutex->Unlock(); } }
+
+private:
+ VMA_MUTEX* m_pMutex;
+};
+
+// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading.
+struct VmaMutexLockRead
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead)
+public:
+ VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) :
+ m_pMutex(useMutex ? &mutex : VMA_NULL)
+ {
+ if (m_pMutex) { m_pMutex->LockRead(); }
+ }
+ ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } }
+
+private:
+ VMA_RW_MUTEX* m_pMutex;
+};
+
+// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing.
+struct VmaMutexLockWrite
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite)
+public:
+ VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex)
+ : m_pMutex(useMutex ? &mutex : VMA_NULL)
+ {
+ if (m_pMutex) { m_pMutex->LockWrite(); }
+ }
+ ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } }
+
+private:
+ VMA_RW_MUTEX* m_pMutex;
+};
+
+#if VMA_DEBUG_GLOBAL_MUTEX
+ static VMA_MUTEX gDebugGlobalMutex;
+ #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true);
+#else
+ #define VMA_DEBUG_GLOBAL_MUTEX_LOCK
+#endif
+#endif // _VMA_MUTEX_LOCK
+
+#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT
+// An object that increments given atomic but decrements it back in the destructor unless Commit() is called.
+template<typename AtomicT>
+struct AtomicTransactionalIncrement
+{
+public:
+ using T = decltype(AtomicT().load());
+
+ ~AtomicTransactionalIncrement()
+ {
+ if(m_Atomic)
+ --(*m_Atomic);
+ }
+
+ void Commit() { m_Atomic = VMA_NULL; }
+ T Increment(AtomicT* atomic)
+ {
+ m_Atomic = atomic;
+ return m_Atomic->fetch_add(1);
+ }
+
+private:
+ AtomicT* m_Atomic = VMA_NULL;
+};
+#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT
+
+#ifndef _VMA_STL_ALLOCATOR
+// STL-compatible allocator.
+template<typename T>
+struct VmaStlAllocator
+{
+ const VkAllocationCallbacks* const m_pCallbacks;
+ typedef T value_type;
+
+ VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {}
+ template<typename U>
+ VmaStlAllocator(const VmaStlAllocator<U>& src) : m_pCallbacks(src.m_pCallbacks) {}
+ VmaStlAllocator(const VmaStlAllocator&) = default;
+ VmaStlAllocator& operator=(const VmaStlAllocator&) = delete;
+
+ T* allocate(size_t n) { return VmaAllocateArray<T>(m_pCallbacks, n); }
+ void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); }
+
+ template<typename U>
+ bool operator==(const VmaStlAllocator<U>& rhs) const
+ {
+ return m_pCallbacks == rhs.m_pCallbacks;
+ }
+ template<typename U>
+ bool operator!=(const VmaStlAllocator<U>& rhs) const
+ {
+ return m_pCallbacks != rhs.m_pCallbacks;
+ }
+};
+#endif // _VMA_STL_ALLOCATOR
+
+#ifndef _VMA_VECTOR
+/* Class with interface compatible with subset of std::vector.
+T must be POD because constructors and destructors are not called and memcpy is
+used for these objects. */
+template<typename T, typename AllocatorT>
+class VmaVector
+{
+public:
+ typedef T value_type;
+ typedef T* iterator;
+ typedef const T* const_iterator;
+
+ VmaVector(const AllocatorT& allocator);
+ VmaVector(size_t count, const AllocatorT& allocator);
+ // This version of the constructor is here for compatibility with pre-C++14 std::vector.
+ // value is unused.
+ VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {}
+ VmaVector(const VmaVector<T, AllocatorT>& src);
+ VmaVector& operator=(const VmaVector& rhs);
+ ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); }
+
+ bool empty() const { return m_Count == 0; }
+ size_t size() const { return m_Count; }
+ T* data() { return m_pArray; }
+ T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; }
+ T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; }
+ const T* data() const { return m_pArray; }
+ const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; }
+ const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; }
+
+ iterator begin() { return m_pArray; }
+ iterator end() { return m_pArray + m_Count; }
+ const_iterator cbegin() const { return m_pArray; }
+ const_iterator cend() const { return m_pArray + m_Count; }
+ const_iterator begin() const { return cbegin(); }
+ const_iterator end() const { return cend(); }
+
+ void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); }
+ void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); }
+ void push_front(const T& src) { insert(0, src); }
+
+ void push_back(const T& src);
+ void reserve(size_t newCapacity, bool freeMemory = false);
+ void resize(size_t newCount);
+ void clear() { resize(0); }
+ void shrink_to_fit();
+ void insert(size_t index, const T& src);
+ void remove(size_t index);
+
+ T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; }
+ const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; }
+
+private:
+ AllocatorT m_Allocator;
+ T* m_pArray;
+ size_t m_Count;
+ size_t m_Capacity;
+};
+
+#ifndef _VMA_VECTOR_FUNCTIONS
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>::VmaVector(const AllocatorT& allocator)
+ : m_Allocator(allocator),
+ m_pArray(VMA_NULL),
+ m_Count(0),
+ m_Capacity(0) {}
+
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>::VmaVector(size_t count, const AllocatorT& allocator)
+ : m_Allocator(allocator),
+ m_pArray(count ? (T*)VmaAllocateArray<T>(allocator.m_pCallbacks, count) : VMA_NULL),
+ m_Count(count),
+ m_Capacity(count) {}
+
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>::VmaVector(const VmaVector& src)
+ : m_Allocator(src.m_Allocator),
+ m_pArray(src.m_Count ? (T*)VmaAllocateArray<T>(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL),
+ m_Count(src.m_Count),
+ m_Capacity(src.m_Count)
+{
+ if (m_Count != 0)
+ {
+ memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T));
+ }
+}
+
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>& VmaVector<T, AllocatorT>::operator=(const VmaVector& rhs)
+{
+ if (&rhs != this)
+ {
+ resize(rhs.m_Count);
+ if (m_Count != 0)
+ {
+ memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T));
+ }
+ }
+ return *this;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::push_back(const T& src)
+{
+ const size_t newIndex = size();
+ resize(newIndex + 1);
+ m_pArray[newIndex] = src;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::reserve(size_t newCapacity, bool freeMemory)
+{
+ newCapacity = VMA_MAX(newCapacity, m_Count);
+
+ if ((newCapacity < m_Capacity) && !freeMemory)
+ {
+ newCapacity = m_Capacity;
+ }
+
+ if (newCapacity != m_Capacity)
+ {
+ T* const newArray = newCapacity ? VmaAllocateArray<T>(m_Allocator, newCapacity) : VMA_NULL;
+ if (m_Count != 0)
+ {
+ memcpy(newArray, m_pArray, m_Count * sizeof(T));
+ }
+ VmaFree(m_Allocator.m_pCallbacks, m_pArray);
+ m_Capacity = newCapacity;
+ m_pArray = newArray;
+ }
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::resize(size_t newCount)
+{
+ size_t newCapacity = m_Capacity;
+ if (newCount > m_Capacity)
+ {
+ newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8));
+ }
+
+ if (newCapacity != m_Capacity)
+ {
+ T* const newArray = newCapacity ? VmaAllocateArray<T>(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL;
+ const size_t elementsToCopy = VMA_MIN(m_Count, newCount);
+ if (elementsToCopy != 0)
+ {
+ memcpy(newArray, m_pArray, elementsToCopy * sizeof(T));
+ }
+ VmaFree(m_Allocator.m_pCallbacks, m_pArray);
+ m_Capacity = newCapacity;
+ m_pArray = newArray;
+ }
+
+ m_Count = newCount;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::shrink_to_fit()
+{
+ if (m_Capacity > m_Count)
+ {
+ T* newArray = VMA_NULL;
+ if (m_Count > 0)
+ {
+ newArray = VmaAllocateArray<T>(m_Allocator.m_pCallbacks, m_Count);
+ memcpy(newArray, m_pArray, m_Count * sizeof(T));
+ }
+ VmaFree(m_Allocator.m_pCallbacks, m_pArray);
+ m_Capacity = m_Count;
+ m_pArray = newArray;
+ }
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::insert(size_t index, const T& src)
+{
+ VMA_HEAVY_ASSERT(index <= m_Count);
+ const size_t oldCount = size();
+ resize(oldCount + 1);
+ if (index < oldCount)
+ {
+ memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T));
+ }
+ m_pArray[index] = src;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::remove(size_t index)
+{
+ VMA_HEAVY_ASSERT(index < m_Count);
+ const size_t oldCount = size();
+ if (index < oldCount - 1)
+ {
+ memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T));
+ }
+ resize(oldCount - 1);
+}
+#endif // _VMA_VECTOR_FUNCTIONS
+
+template<typename T, typename allocatorT>
+static void VmaVectorInsert(VmaVector<T, allocatorT>& vec, size_t index, const T& item)
+{
+ vec.insert(index, item);
+}
+
+template<typename T, typename allocatorT>
+static void VmaVectorRemove(VmaVector<T, allocatorT>& vec, size_t index)
+{
+ vec.remove(index);
+}
+#endif // _VMA_VECTOR
+
+#ifndef _VMA_SMALL_VECTOR
+/*
+This is a vector (a variable-sized array), optimized for the case when the array is small.
+
+It contains some number of elements in-place, which allows it to avoid heap allocation
+when the actual number of elements is below that threshold. This allows normal "small"
+cases to be fast without losing generality for large inputs.
+*/
+template<typename T, typename AllocatorT, size_t N>
+class VmaSmallVector
+{
+public:
+ typedef T value_type;
+ typedef T* iterator;
+
+ VmaSmallVector(const AllocatorT& allocator);
+ VmaSmallVector(size_t count, const AllocatorT& allocator);
+ template<typename SrcT, typename SrcAllocatorT, size_t SrcN>
+ VmaSmallVector(const VmaSmallVector<SrcT, SrcAllocatorT, SrcN>&) = delete;
+ template<typename SrcT, typename SrcAllocatorT, size_t SrcN>
+ VmaSmallVector<T, AllocatorT, N>& operator=(const VmaSmallVector<SrcT, SrcAllocatorT, SrcN>&) = delete;
+ ~VmaSmallVector() = default;
+
+ bool empty() const { return m_Count == 0; }
+ size_t size() const { return m_Count; }
+ T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; }
+ T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; }
+ T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; }
+ const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; }
+ const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; }
+ const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; }
+
+ iterator begin() { return data(); }
+ iterator end() { return data() + m_Count; }
+
+ void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); }
+ void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); }
+ void push_front(const T& src) { insert(0, src); }
+
+ void push_back(const T& src);
+ void resize(size_t newCount, bool freeMemory = false);
+ void clear(bool freeMemory = false);
+ void insert(size_t index, const T& src);
+ void remove(size_t index);
+
+ T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; }
+ const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; }
+
+private:
+ size_t m_Count;
+ T m_StaticArray[N]; // Used when m_Size <= N
+ VmaVector<T, AllocatorT> m_DynamicArray; // Used when m_Size > N
+};
+
+#ifndef _VMA_SMALL_VECTOR_FUNCTIONS
+template<typename T, typename AllocatorT, size_t N>
+VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(const AllocatorT& allocator)
+ : m_Count(0),
+ m_DynamicArray(allocator) {}
+
+template<typename T, typename AllocatorT, size_t N>
+VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(size_t count, const AllocatorT& allocator)
+ : m_Count(count),
+ m_DynamicArray(count > N ? count : 0, allocator) {}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::push_back(const T& src)
+{
+ const size_t newIndex = size();
+ resize(newIndex + 1);
+ data()[newIndex] = src;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::resize(size_t newCount, bool freeMemory)
+{
+ if (newCount > N && m_Count > N)
+ {
+ // Any direction, staying in m_DynamicArray
+ m_DynamicArray.resize(newCount);
+ if (freeMemory)
+ {
+ m_DynamicArray.shrink_to_fit();
+ }
+ }
+ else if (newCount > N && m_Count <= N)
+ {
+ // Growing, moving from m_StaticArray to m_DynamicArray
+ m_DynamicArray.resize(newCount);
+ if (m_Count > 0)
+ {
+ memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T));
+ }
+ }
+ else if (newCount <= N && m_Count > N)
+ {
+ // Shrinking, moving from m_DynamicArray to m_StaticArray
+ if (newCount > 0)
+ {
+ memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T));
+ }
+ m_DynamicArray.resize(0);
+ if (freeMemory)
+ {
+ m_DynamicArray.shrink_to_fit();
+ }
+ }
+ else
+ {
+ // Any direction, staying in m_StaticArray - nothing to do here
+ }
+ m_Count = newCount;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::clear(bool freeMemory)
+{
+ m_DynamicArray.clear();
+ if (freeMemory)
+ {
+ m_DynamicArray.shrink_to_fit();
+ }
+ m_Count = 0;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::insert(size_t index, const T& src)
+{
+ VMA_HEAVY_ASSERT(index <= m_Count);
+ const size_t oldCount = size();
+ resize(oldCount + 1);
+ T* const dataPtr = data();
+ if (index < oldCount)
+ {
+ // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray.
+ memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T));
+ }
+ dataPtr[index] = src;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::remove(size_t index)
+{
+ VMA_HEAVY_ASSERT(index < m_Count);
+ const size_t oldCount = size();
+ if (index < oldCount - 1)
+ {
+ // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray.
+ T* const dataPtr = data();
+ memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T));
+ }
+ resize(oldCount - 1);
+}
+#endif // _VMA_SMALL_VECTOR_FUNCTIONS
+#endif // _VMA_SMALL_VECTOR
+
+#ifndef _VMA_POOL_ALLOCATOR
+/*
+Allocator for objects of type T using a list of arrays (pools) to speed up
+allocation. Number of elements that can be allocated is not bounded because
+allocator can create multiple blocks.
+*/
+template<typename T>
+class VmaPoolAllocator
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator)
+public:
+ VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity);
+ ~VmaPoolAllocator();
+ template<typename... Types> T* Alloc(Types&&... args);
+ void Free(T* ptr);
+
+private:
+ union Item
+ {
+ uint32_t NextFreeIndex;
+ alignas(T) char Value[sizeof(T)];
+ };
+ struct ItemBlock
+ {
+ Item* pItems;
+ uint32_t Capacity;
+ uint32_t FirstFreeIndex;
+ };
+
+ const VkAllocationCallbacks* m_pAllocationCallbacks;
+ const uint32_t m_FirstBlockCapacity;
+ VmaVector<ItemBlock, VmaStlAllocator<ItemBlock>> m_ItemBlocks;
+
+ ItemBlock& CreateNewBlock();
+};
+
+#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS
+template<typename T>
+VmaPoolAllocator<T>::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity)
+ : m_pAllocationCallbacks(pAllocationCallbacks),
+ m_FirstBlockCapacity(firstBlockCapacity),
+ m_ItemBlocks(VmaStlAllocator<ItemBlock>(pAllocationCallbacks))
+{
+ VMA_ASSERT(m_FirstBlockCapacity > 1);
+}
+
+template<typename T>
+VmaPoolAllocator<T>::~VmaPoolAllocator()
+{
+ for (size_t i = m_ItemBlocks.size(); i--;)
+ vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity);
+ m_ItemBlocks.clear();
+}
+
+template<typename T>
+template<typename... Types> T* VmaPoolAllocator<T>::Alloc(Types&&... args)
+{
+ for (size_t i = m_ItemBlocks.size(); i--; )
+ {
+ ItemBlock& block = m_ItemBlocks[i];
+ // This block has some free items: Use first one.
+ if (block.FirstFreeIndex != UINT32_MAX)
+ {
+ Item* const pItem = &block.pItems[block.FirstFreeIndex];
+ block.FirstFreeIndex = pItem->NextFreeIndex;
+ T* result = (T*)&pItem->Value;
+ new(result)T(std::forward<Types>(args)...); // Explicit constructor call.
+ return result;
+ }
+ }
+
+ // No block has free item: Create new one and use it.
+ ItemBlock& newBlock = CreateNewBlock();
+ Item* const pItem = &newBlock.pItems[0];
+ newBlock.FirstFreeIndex = pItem->NextFreeIndex;
+ T* result = (T*)&pItem->Value;
+ new(result) T(std::forward<Types>(args)...); // Explicit constructor call.
+ return result;
+}
+
+template<typename T>
+void VmaPoolAllocator<T>::Free(T* ptr)
+{
+ // Search all memory blocks to find ptr.
+ for (size_t i = m_ItemBlocks.size(); i--; )
+ {
+ ItemBlock& block = m_ItemBlocks[i];
+
+ // Casting to union.
+ Item* pItemPtr;
+ memcpy(&pItemPtr, &ptr, sizeof(pItemPtr));
+
+ // Check if pItemPtr is in address range of this block.
+ if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity))
+ {
+ ptr->~T(); // Explicit destructor call.
+ const uint32_t index = static_cast<uint32_t>(pItemPtr - block.pItems);
+ pItemPtr->NextFreeIndex = block.FirstFreeIndex;
+ block.FirstFreeIndex = index;
+ return;
+ }
+ }
+ VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool.");
+}
+
+template<typename T>
+typename VmaPoolAllocator<T>::ItemBlock& VmaPoolAllocator<T>::CreateNewBlock()
+{
+ const uint32_t newBlockCapacity = m_ItemBlocks.empty() ?
+ m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2;
+
+ const ItemBlock newBlock =
+ {
+ vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity),
+ newBlockCapacity,
+ 0
+ };
+
+ m_ItemBlocks.push_back(newBlock);
+
+ // Setup singly-linked list of all free items in this block.
+ for (uint32_t i = 0; i < newBlockCapacity - 1; ++i)
+ newBlock.pItems[i].NextFreeIndex = i + 1;
+ newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX;
+ return m_ItemBlocks.back();
+}
+#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS
+#endif // _VMA_POOL_ALLOCATOR
+
+#ifndef _VMA_RAW_LIST
+template<typename T>
+struct VmaListItem
+{
+ VmaListItem* pPrev;
+ VmaListItem* pNext;
+ T Value;
+};
+
+// Doubly linked list.
+template<typename T>
+class VmaRawList
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList)
+public:
+ typedef VmaListItem<T> ItemType;
+
+ VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks);
+ // Intentionally not calling Clear, because that would be unnecessary
+ // computations to return all items to m_ItemAllocator as free.
+ ~VmaRawList() = default;
+
+ size_t GetCount() const { return m_Count; }
+ bool IsEmpty() const { return m_Count == 0; }
+
+ ItemType* Front() { return m_pFront; }
+ ItemType* Back() { return m_pBack; }
+ const ItemType* Front() const { return m_pFront; }
+ const ItemType* Back() const { return m_pBack; }
+
+ ItemType* PushFront();
+ ItemType* PushBack();
+ ItemType* PushFront(const T& value);
+ ItemType* PushBack(const T& value);
+ void PopFront();
+ void PopBack();
+
+ // Item can be null - it means PushBack.
+ ItemType* InsertBefore(ItemType* pItem);
+ // Item can be null - it means PushFront.
+ ItemType* InsertAfter(ItemType* pItem);
+ ItemType* InsertBefore(ItemType* pItem, const T& value);
+ ItemType* InsertAfter(ItemType* pItem, const T& value);
+
+ void Clear();
+ void Remove(ItemType* pItem);
+
+private:
+ const VkAllocationCallbacks* const m_pAllocationCallbacks;
+ VmaPoolAllocator<ItemType> m_ItemAllocator;
+ ItemType* m_pFront;
+ ItemType* m_pBack;
+ size_t m_Count;
+};
+
+#ifndef _VMA_RAW_LIST_FUNCTIONS
+template<typename T>
+VmaRawList<T>::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks)
+ : m_pAllocationCallbacks(pAllocationCallbacks),
+ m_ItemAllocator(pAllocationCallbacks, 128),
+ m_pFront(VMA_NULL),
+ m_pBack(VMA_NULL),
+ m_Count(0) {}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushFront()
+{
+ ItemType* const pNewItem = m_ItemAllocator.Alloc();
+ pNewItem->pPrev = VMA_NULL;
+ if (IsEmpty())
+ {
+ pNewItem->pNext = VMA_NULL;
+ m_pFront = pNewItem;
+ m_pBack = pNewItem;
+ m_Count = 1;
+ }
+ else
+ {
+ pNewItem->pNext = m_pFront;
+ m_pFront->pPrev = pNewItem;
+ m_pFront = pNewItem;
+ ++m_Count;
+ }
+ return pNewItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushBack()
+{
+ ItemType* const pNewItem = m_ItemAllocator.Alloc();
+ pNewItem->pNext = VMA_NULL;
+ if(IsEmpty())
+ {
+ pNewItem->pPrev = VMA_NULL;
+ m_pFront = pNewItem;
+ m_pBack = pNewItem;
+ m_Count = 1;
+ }
+ else
+ {
+ pNewItem->pPrev = m_pBack;
+ m_pBack->pNext = pNewItem;
+ m_pBack = pNewItem;
+ ++m_Count;
+ }
+ return pNewItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushFront(const T& value)
+{
+ ItemType* const pNewItem = PushFront();
+ pNewItem->Value = value;
+ return pNewItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushBack(const T& value)
+{
+ ItemType* const pNewItem = PushBack();
+ pNewItem->Value = value;
+ return pNewItem;
+}
+
+template<typename T>
+void VmaRawList<T>::PopFront()
+{
+ VMA_HEAVY_ASSERT(m_Count > 0);
+ ItemType* const pFrontItem = m_pFront;
+ ItemType* const pNextItem = pFrontItem->pNext;
+ if (pNextItem != VMA_NULL)
+ {
+ pNextItem->pPrev = VMA_NULL;
+ }
+ m_pFront = pNextItem;
+ m_ItemAllocator.Free(pFrontItem);
+ --m_Count;
+}
+
+template<typename T>
+void VmaRawList<T>::PopBack()
+{
+ VMA_HEAVY_ASSERT(m_Count > 0);
+ ItemType* const pBackItem = m_pBack;
+ ItemType* const pPrevItem = pBackItem->pPrev;
+ if(pPrevItem != VMA_NULL)
+ {
+ pPrevItem->pNext = VMA_NULL;
+ }
+ m_pBack = pPrevItem;
+ m_ItemAllocator.Free(pBackItem);
+ --m_Count;
+}
+
+template<typename T>
+void VmaRawList<T>::Clear()
+{
+ if (IsEmpty() == false)
+ {
+ ItemType* pItem = m_pBack;
+ while (pItem != VMA_NULL)
+ {
+ ItemType* const pPrevItem = pItem->pPrev;
+ m_ItemAllocator.Free(pItem);
+ pItem = pPrevItem;
+ }
+ m_pFront = VMA_NULL;
+ m_pBack = VMA_NULL;
+ m_Count = 0;
+ }
+}
+
+template<typename T>
+void VmaRawList<T>::Remove(ItemType* pItem)
+{
+ VMA_HEAVY_ASSERT(pItem != VMA_NULL);
+ VMA_HEAVY_ASSERT(m_Count > 0);
+
+ if(pItem->pPrev != VMA_NULL)
+ {
+ pItem->pPrev->pNext = pItem->pNext;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_pFront == pItem);
+ m_pFront = pItem->pNext;
+ }
+
+ if(pItem->pNext != VMA_NULL)
+ {
+ pItem->pNext->pPrev = pItem->pPrev;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_pBack == pItem);
+ m_pBack = pItem->pPrev;
+ }
+
+ m_ItemAllocator.Free(pItem);
+ --m_Count;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertBefore(ItemType* pItem)
+{
+ if(pItem != VMA_NULL)
+ {
+ ItemType* const prevItem = pItem->pPrev;
+ ItemType* const newItem = m_ItemAllocator.Alloc();
+ newItem->pPrev = prevItem;
+ newItem->pNext = pItem;
+ pItem->pPrev = newItem;
+ if(prevItem != VMA_NULL)
+ {
+ prevItem->pNext = newItem;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_pFront == pItem);
+ m_pFront = newItem;
+ }
+ ++m_Count;
+ return newItem;
+ }
+ else
+ return PushBack();
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertAfter(ItemType* pItem)
+{
+ if(pItem != VMA_NULL)
+ {
+ ItemType* const nextItem = pItem->pNext;
+ ItemType* const newItem = m_ItemAllocator.Alloc();
+ newItem->pNext = nextItem;
+ newItem->pPrev = pItem;
+ pItem->pNext = newItem;
+ if(nextItem != VMA_NULL)
+ {
+ nextItem->pPrev = newItem;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_pBack == pItem);
+ m_pBack = newItem;
+ }
+ ++m_Count;
+ return newItem;
+ }
+ else
+ return PushFront();
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertBefore(ItemType* pItem, const T& value)
+{
+ ItemType* const newItem = InsertBefore(pItem);
+ newItem->Value = value;
+ return newItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertAfter(ItemType* pItem, const T& value)
+{
+ ItemType* const newItem = InsertAfter(pItem);
+ newItem->Value = value;
+ return newItem;
+}
+#endif // _VMA_RAW_LIST_FUNCTIONS
+#endif // _VMA_RAW_LIST
+
+#ifndef _VMA_LIST
+template<typename T, typename AllocatorT>
+class VmaList
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaList)
+public:
+ class reverse_iterator;
+ class const_iterator;
+ class const_reverse_iterator;
+
+ class iterator
+ {
+ friend class const_iterator;
+ friend class VmaList<T, AllocatorT>;
+ public:
+ iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+ iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+ T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+ T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+ bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+ bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+ iterator operator++(int) { iterator result = *this; ++*this; return result; }
+ iterator operator--(int) { iterator result = *this; --*this; return result; }
+
+ iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; }
+ iterator& operator--();
+
+ private:
+ VmaRawList<T>* m_pList;
+ VmaListItem<T>* m_pItem;
+
+ iterator(VmaRawList<T>* pList, VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {}
+ };
+ class reverse_iterator
+ {
+ friend class const_reverse_iterator;
+ friend class VmaList<T, AllocatorT>;
+ public:
+ reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+ reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+ T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+ T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+ bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+ bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+ reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; }
+ reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; }
+
+ reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; }
+ reverse_iterator& operator--();
+
+ private:
+ VmaRawList<T>* m_pList;
+ VmaListItem<T>* m_pItem;
+
+ reverse_iterator(VmaRawList<T>* pList, VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {}
+ };
+ class const_iterator
+ {
+ friend class VmaList<T, AllocatorT>;
+ public:
+ const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+ const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+ const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+ iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; }
+
+ const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+ const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+ bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+ bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+ const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; }
+ const_iterator operator--(int) { const_iterator result = *this; --* this; return result; }
+
+ const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; }
+ const_iterator& operator--();
+
+ private:
+ const VmaRawList<T>* m_pList;
+ const VmaListItem<T>* m_pItem;
+
+ const_iterator(const VmaRawList<T>* pList, const VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {}
+ };
+ class const_reverse_iterator
+ {
+ friend class VmaList<T, AllocatorT>;
+ public:
+ const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+ const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+ const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+ reverse_iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; }
+
+ const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+ const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+ bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+ bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+ const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; }
+ const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; }
+
+ const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; }
+ const_reverse_iterator& operator--();
+
+ private:
+ const VmaRawList<T>* m_pList;
+ const VmaListItem<T>* m_pItem;
+
+ const_reverse_iterator(const VmaRawList<T>* pList, const VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {}
+ };
+
+ VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {}
+
+ bool empty() const { return m_RawList.IsEmpty(); }
+ size_t size() const { return m_RawList.GetCount(); }
+
+ iterator begin() { return iterator(&m_RawList, m_RawList.Front()); }
+ iterator end() { return iterator(&m_RawList, VMA_NULL); }
+
+ const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); }
+ const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); }
+
+ const_iterator begin() const { return cbegin(); }
+ const_iterator end() const { return cend(); }
+
+ reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); }
+ reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); }
+
+ const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); }
+ const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); }
+
+ const_reverse_iterator rbegin() const { return crbegin(); }
+ const_reverse_iterator rend() const { return crend(); }
+
+ void push_back(const T& value) { m_RawList.PushBack(value); }
+ iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); }
+
+ void clear() { m_RawList.Clear(); }
+ void erase(iterator it) { m_RawList.Remove(it.m_pItem); }
+
+private:
+ VmaRawList<T> m_RawList;
+};
+
+#ifndef _VMA_LIST_FUNCTIONS
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::iterator& VmaList<T, AllocatorT>::iterator::operator--()
+{
+ if (m_pItem != VMA_NULL)
+ {
+ m_pItem = m_pItem->pPrev;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+ m_pItem = m_pList->Back();
+ }
+ return *this;
+}
+
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::reverse_iterator& VmaList<T, AllocatorT>::reverse_iterator::operator--()
+{
+ if (m_pItem != VMA_NULL)
+ {
+ m_pItem = m_pItem->pNext;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+ m_pItem = m_pList->Front();
+ }
+ return *this;
+}
+
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::const_iterator& VmaList<T, AllocatorT>::const_iterator::operator--()
+{
+ if (m_pItem != VMA_NULL)
+ {
+ m_pItem = m_pItem->pPrev;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+ m_pItem = m_pList->Back();
+ }
+ return *this;
+}
+
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::const_reverse_iterator& VmaList<T, AllocatorT>::const_reverse_iterator::operator--()
+{
+ if (m_pItem != VMA_NULL)
+ {
+ m_pItem = m_pItem->pNext;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+ m_pItem = m_pList->Back();
+ }
+ return *this;
+}
+#endif // _VMA_LIST_FUNCTIONS
+#endif // _VMA_LIST
+
+#ifndef _VMA_INTRUSIVE_LINKED_LIST
+/*
+Expected interface of ItemTypeTraits:
+struct MyItemTypeTraits
+{
+ typedef MyItem ItemType;
+ static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; }
+ static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; }
+ static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; }
+ static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; }
+};
+*/
+template<typename ItemTypeTraits>
+class VmaIntrusiveLinkedList
+{
+public:
+ typedef typename ItemTypeTraits::ItemType ItemType;
+ static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); }
+ static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); }
+
+ // Movable, not copyable.
+ VmaIntrusiveLinkedList() = default;
+ VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src);
+ VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete;
+ VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src);
+ VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete;
+ ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); }
+
+ size_t GetCount() const { return m_Count; }
+ bool IsEmpty() const { return m_Count == 0; }
+ ItemType* Front() { return m_Front; }
+ ItemType* Back() { return m_Back; }
+ const ItemType* Front() const { return m_Front; }
+ const ItemType* Back() const { return m_Back; }
+
+ void PushBack(ItemType* item);
+ void PushFront(ItemType* item);
+ ItemType* PopBack();
+ ItemType* PopFront();
+
+ // MyItem can be null - it means PushBack.
+ void InsertBefore(ItemType* existingItem, ItemType* newItem);
+ // MyItem can be null - it means PushFront.
+ void InsertAfter(ItemType* existingItem, ItemType* newItem);
+ void Remove(ItemType* item);
+ void RemoveAll();
+
+private:
+ ItemType* m_Front = VMA_NULL;
+ ItemType* m_Back = VMA_NULL;
+ size_t m_Count = 0;
+};
+
+#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS
+template<typename ItemTypeTraits>
+VmaIntrusiveLinkedList<ItemTypeTraits>::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src)
+ : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count)
+{
+ src.m_Front = src.m_Back = VMA_NULL;
+ src.m_Count = 0;
+}
+
+template<typename ItemTypeTraits>
+VmaIntrusiveLinkedList<ItemTypeTraits>& VmaIntrusiveLinkedList<ItemTypeTraits>::operator=(VmaIntrusiveLinkedList&& src)
+{
+ if (&src != this)
+ {
+ VMA_HEAVY_ASSERT(IsEmpty());
+ m_Front = src.m_Front;
+ m_Back = src.m_Back;
+ m_Count = src.m_Count;
+ src.m_Front = src.m_Back = VMA_NULL;
+ src.m_Count = 0;
+ }
+ return *this;
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::PushBack(ItemType* item)
+{
+ VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL);
+ if (IsEmpty())
+ {
+ m_Front = item;
+ m_Back = item;
+ m_Count = 1;
+ }
+ else
+ {
+ ItemTypeTraits::AccessPrev(item) = m_Back;
+ ItemTypeTraits::AccessNext(m_Back) = item;
+ m_Back = item;
+ ++m_Count;
+ }
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::PushFront(ItemType* item)
+{
+ VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL);
+ if (IsEmpty())
+ {
+ m_Front = item;
+ m_Back = item;
+ m_Count = 1;
+ }
+ else
+ {
+ ItemTypeTraits::AccessNext(item) = m_Front;
+ ItemTypeTraits::AccessPrev(m_Front) = item;
+ m_Front = item;
+ ++m_Count;
+ }
+}
+
+template<typename ItemTypeTraits>
+typename VmaIntrusiveLinkedList<ItemTypeTraits>::ItemType* VmaIntrusiveLinkedList<ItemTypeTraits>::PopBack()
+{
+ VMA_HEAVY_ASSERT(m_Count > 0);
+ ItemType* const backItem = m_Back;
+ ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem);
+ if (prevItem != VMA_NULL)
+ {
+ ItemTypeTraits::AccessNext(prevItem) = VMA_NULL;
+ }
+ m_Back = prevItem;
+ --m_Count;
+ ItemTypeTraits::AccessPrev(backItem) = VMA_NULL;
+ ItemTypeTraits::AccessNext(backItem) = VMA_NULL;
+ return backItem;
+}
+
+template<typename ItemTypeTraits>
+typename VmaIntrusiveLinkedList<ItemTypeTraits>::ItemType* VmaIntrusiveLinkedList<ItemTypeTraits>::PopFront()
+{
+ VMA_HEAVY_ASSERT(m_Count > 0);
+ ItemType* const frontItem = m_Front;
+ ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem);
+ if (nextItem != VMA_NULL)
+ {
+ ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL;
+ }
+ m_Front = nextItem;
+ --m_Count;
+ ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL;
+ ItemTypeTraits::AccessNext(frontItem) = VMA_NULL;
+ return frontItem;
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::InsertBefore(ItemType* existingItem, ItemType* newItem)
+{
+ VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL);
+ if (existingItem != VMA_NULL)
+ {
+ ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem);
+ ItemTypeTraits::AccessPrev(newItem) = prevItem;
+ ItemTypeTraits::AccessNext(newItem) = existingItem;
+ ItemTypeTraits::AccessPrev(existingItem) = newItem;
+ if (prevItem != VMA_NULL)
+ {
+ ItemTypeTraits::AccessNext(prevItem) = newItem;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_Front == existingItem);
+ m_Front = newItem;
+ }
+ ++m_Count;
+ }
+ else
+ PushBack(newItem);
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::InsertAfter(ItemType* existingItem, ItemType* newItem)
+{
+ VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL);
+ if (existingItem != VMA_NULL)
+ {
+ ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem);
+ ItemTypeTraits::AccessNext(newItem) = nextItem;
+ ItemTypeTraits::AccessPrev(newItem) = existingItem;
+ ItemTypeTraits::AccessNext(existingItem) = newItem;
+ if (nextItem != VMA_NULL)
+ {
+ ItemTypeTraits::AccessPrev(nextItem) = newItem;
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_Back == existingItem);
+ m_Back = newItem;
+ }
+ ++m_Count;
+ }
+ else
+ return PushFront(newItem);
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::Remove(ItemType* item)
+{
+ VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0);
+ if (ItemTypeTraits::GetPrev(item) != VMA_NULL)
+ {
+ ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item);
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_Front == item);
+ m_Front = ItemTypeTraits::GetNext(item);
+ }
+
+ if (ItemTypeTraits::GetNext(item) != VMA_NULL)
+ {
+ ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item);
+ }
+ else
+ {
+ VMA_HEAVY_ASSERT(m_Back == item);
+ m_Back = ItemTypeTraits::GetPrev(item);
+ }
+ ItemTypeTraits::AccessPrev(item) = VMA_NULL;
+ ItemTypeTraits::AccessNext(item) = VMA_NULL;
+ --m_Count;
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::RemoveAll()
+{
+ if (!IsEmpty())
+ {
+ ItemType* item = m_Back;
+ while (item != VMA_NULL)
+ {
+ ItemType* const prevItem = ItemTypeTraits::AccessPrev(item);
+ ItemTypeTraits::AccessPrev(item) = VMA_NULL;
+ ItemTypeTraits::AccessNext(item) = VMA_NULL;
+ item = prevItem;
+ }
+ m_Front = VMA_NULL;
+ m_Back = VMA_NULL;
+ m_Count = 0;
+ }
+}
+#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS
+#endif // _VMA_INTRUSIVE_LINKED_LIST
+
+#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED
+class VmaStringBuilder
+{
+public:
+ VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator<char>(allocationCallbacks)) {}
+ ~VmaStringBuilder() = default;
+
+ size_t GetLength() const { return m_Data.size(); }
+ const char* GetData() const { return m_Data.data(); }
+ void AddNewLine() { Add('\n'); }
+ void Add(char ch) { m_Data.push_back(ch); }
+
+ void Add(const char* pStr);
+ void AddNumber(uint32_t num);
+ void AddNumber(uint64_t num);
+ void AddPointer(const void* ptr);
+
+private:
+ VmaVector<char, VmaStlAllocator<char>> m_Data;
+};
+
+#ifndef _VMA_STRING_BUILDER_FUNCTIONS
+void VmaStringBuilder::Add(const char* pStr)
+{
+ const size_t strLen = strlen(pStr);
+ if (strLen > 0)
+ {
+ const size_t oldCount = m_Data.size();
+ m_Data.resize(oldCount + strLen);
+ memcpy(m_Data.data() + oldCount, pStr, strLen);
+ }
+}
+
+void VmaStringBuilder::AddNumber(uint32_t num)
+{
+ char buf[11];
+ buf[10] = '\0';
+ char* p = &buf[10];
+ do
+ {
+ *--p = '0' + (char)(num % 10);
+ num /= 10;
+ } while (num);
+ Add(p);
+}
+
+void VmaStringBuilder::AddNumber(uint64_t num)
+{
+ char buf[21];
+ buf[20] = '\0';
+ char* p = &buf[20];
+ do
+ {
+ *--p = '0' + (char)(num % 10);
+ num /= 10;
+ } while (num);
+ Add(p);
+}
+
+void VmaStringBuilder::AddPointer(const void* ptr)
+{
+ char buf[21];
+ VmaPtrToStr(buf, sizeof(buf), ptr);
+ Add(buf);
+}
+#endif //_VMA_STRING_BUILDER_FUNCTIONS
+#endif // _VMA_STRING_BUILDER
+
+#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED
+/*
+Allows to conveniently build a correct JSON document to be written to the
+VmaStringBuilder passed to the constructor.
+*/
+class VmaJsonWriter
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter)
+public:
+ // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object.
+ VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb);
+ ~VmaJsonWriter();
+
+ // Begins object by writing "{".
+ // Inside an object, you must call pairs of WriteString and a value, e.g.:
+ // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject();
+ // Will write: { "A": 1, "B": 2 }
+ void BeginObject(bool singleLine = false);
+ // Ends object by writing "}".
+ void EndObject();
+
+ // Begins array by writing "[".
+ // Inside an array, you can write a sequence of any values.
+ void BeginArray(bool singleLine = false);
+ // Ends array by writing "[".
+ void EndArray();
+
+ // Writes a string value inside "".
+ // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped.
+ void WriteString(const char* pStr);
+
+ // Begins writing a string value.
+ // Call BeginString, ContinueString, ContinueString, ..., EndString instead of
+ // WriteString to conveniently build the string content incrementally, made of
+ // parts including numbers.
+ void BeginString(const char* pStr = VMA_NULL);
+ // Posts next part of an open string.
+ void ContinueString(const char* pStr);
+ // Posts next part of an open string. The number is converted to decimal characters.
+ void ContinueString(uint32_t n);
+ void ContinueString(uint64_t n);
+ // Posts next part of an open string. Pointer value is converted to characters
+ // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00
+ void ContinueString_Pointer(const void* ptr);
+ // Ends writing a string value by writing '"'.
+ void EndString(const char* pStr = VMA_NULL);
+
+ // Writes a number value.
+ void WriteNumber(uint32_t n);
+ void WriteNumber(uint64_t n);
+ // Writes a boolean value - false or true.
+ void WriteBool(bool b);
+ // Writes a null value.
+ void WriteNull();
+
+private:
+ enum COLLECTION_TYPE
+ {
+ COLLECTION_TYPE_OBJECT,
+ COLLECTION_TYPE_ARRAY,
+ };
+ struct StackItem
+ {
+ COLLECTION_TYPE type;
+ uint32_t valueCount;
+ bool singleLineMode;
+ };
+
+ static const char* const INDENT;
+
+ VmaStringBuilder& m_SB;
+ VmaVector< StackItem, VmaStlAllocator<StackItem> > m_Stack;
+ bool m_InsideString;
+
+ void BeginValue(bool isString);
+ void WriteIndent(bool oneLess = false);
+};
+const char* const VmaJsonWriter::INDENT = " ";
+
+#ifndef _VMA_JSON_WRITER_FUNCTIONS
+VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb)
+ : m_SB(sb),
+ m_Stack(VmaStlAllocator<StackItem>(pAllocationCallbacks)),
+ m_InsideString(false) {}
+
+VmaJsonWriter::~VmaJsonWriter()
+{
+ VMA_ASSERT(!m_InsideString);
+ VMA_ASSERT(m_Stack.empty());
+}
+
+void VmaJsonWriter::BeginObject(bool singleLine)
+{
+ VMA_ASSERT(!m_InsideString);
+
+ BeginValue(false);
+ m_SB.Add('{');
+
+ StackItem item;
+ item.type = COLLECTION_TYPE_OBJECT;
+ item.valueCount = 0;
+ item.singleLineMode = singleLine;
+ m_Stack.push_back(item);
+}
+
+void VmaJsonWriter::EndObject()
+{
+ VMA_ASSERT(!m_InsideString);
+
+ WriteIndent(true);
+ m_SB.Add('}');
+
+ VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT);
+ m_Stack.pop_back();
+}
+
+void VmaJsonWriter::BeginArray(bool singleLine)
+{
+ VMA_ASSERT(!m_InsideString);
+
+ BeginValue(false);
+ m_SB.Add('[');
+
+ StackItem item;
+ item.type = COLLECTION_TYPE_ARRAY;
+ item.valueCount = 0;
+ item.singleLineMode = singleLine;
+ m_Stack.push_back(item);
+}
+
+void VmaJsonWriter::EndArray()
+{
+ VMA_ASSERT(!m_InsideString);
+
+ WriteIndent(true);
+ m_SB.Add(']');
+
+ VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY);
+ m_Stack.pop_back();
+}
+
+void VmaJsonWriter::WriteString(const char* pStr)
+{
+ BeginString(pStr);
+ EndString();
+}
+
+void VmaJsonWriter::BeginString(const char* pStr)
+{
+ VMA_ASSERT(!m_InsideString);
+
+ BeginValue(true);
+ m_SB.Add('"');
+ m_InsideString = true;
+ if (pStr != VMA_NULL && pStr[0] != '\0')
+ {
+ ContinueString(pStr);
+ }
+}
+
+void VmaJsonWriter::ContinueString(const char* pStr)
+{
+ VMA_ASSERT(m_InsideString);
+
+ const size_t strLen = strlen(pStr);
+ for (size_t i = 0; i < strLen; ++i)
+ {
+ char ch = pStr[i];
+ if (ch == '\\')
+ {
+ m_SB.Add("\\\\");
+ }
+ else if (ch == '"')
+ {
+ m_SB.Add("\\\"");
+ }
+ else if ((uint8_t)ch >= 32)
+ {
+ m_SB.Add(ch);
+ }
+ else switch (ch)
+ {
+ case '\b':
+ m_SB.Add("\\b");
+ break;
+ case '\f':
+ m_SB.Add("\\f");
+ break;
+ case '\n':
+ m_SB.Add("\\n");
+ break;
+ case '\r':
+ m_SB.Add("\\r");
+ break;
+ case '\t':
+ m_SB.Add("\\t");
+ break;
+ default:
+ VMA_ASSERT(0 && "Character not currently supported.");
+ }
+ }
+}
+
+void VmaJsonWriter::ContinueString(uint32_t n)
+{
+ VMA_ASSERT(m_InsideString);
+ m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::ContinueString(uint64_t n)
+{
+ VMA_ASSERT(m_InsideString);
+ m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::ContinueString_Pointer(const void* ptr)
+{
+ VMA_ASSERT(m_InsideString);
+ m_SB.AddPointer(ptr);
+}
+
+void VmaJsonWriter::EndString(const char* pStr)
+{
+ VMA_ASSERT(m_InsideString);
+ if (pStr != VMA_NULL && pStr[0] != '\0')
+ {
+ ContinueString(pStr);
+ }
+ m_SB.Add('"');
+ m_InsideString = false;
+}
+
+void VmaJsonWriter::WriteNumber(uint32_t n)
+{
+ VMA_ASSERT(!m_InsideString);
+ BeginValue(false);
+ m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::WriteNumber(uint64_t n)
+{
+ VMA_ASSERT(!m_InsideString);
+ BeginValue(false);
+ m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::WriteBool(bool b)
+{
+ VMA_ASSERT(!m_InsideString);
+ BeginValue(false);
+ m_SB.Add(b ? "true" : "false");
+}
+
+void VmaJsonWriter::WriteNull()
+{
+ VMA_ASSERT(!m_InsideString);
+ BeginValue(false);
+ m_SB.Add("null");
+}
+
+void VmaJsonWriter::BeginValue(bool isString)
+{
+ if (!m_Stack.empty())
+ {
+ StackItem& currItem = m_Stack.back();
+ if (currItem.type == COLLECTION_TYPE_OBJECT &&
+ currItem.valueCount % 2 == 0)
+ {
+ VMA_ASSERT(isString);
+ }
+
+ if (currItem.type == COLLECTION_TYPE_OBJECT &&
+ currItem.valueCount % 2 != 0)
+ {
+ m_SB.Add(": ");
+ }
+ else if (currItem.valueCount > 0)
+ {
+ m_SB.Add(", ");
+ WriteIndent();
+ }
+ else
+ {
+ WriteIndent();
+ }
+ ++currItem.valueCount;
+ }
+}
+
+void VmaJsonWriter::WriteIndent(bool oneLess)
+{
+ if (!m_Stack.empty() && !m_Stack.back().singleLineMode)
+ {
+ m_SB.AddNewLine();
+
+ size_t count = m_Stack.size();
+ if (count > 0 && oneLess)
+ {
+ --count;
+ }
+ for (size_t i = 0; i < count; ++i)
+ {
+ m_SB.Add(INDENT);
+ }
+ }
+}
+#endif // _VMA_JSON_WRITER_FUNCTIONS
+
+static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat)
+{
+ json.BeginObject();
+
+ json.WriteString("BlockCount");
+ json.WriteNumber(stat.statistics.blockCount);
+ json.WriteString("BlockBytes");
+ json.WriteNumber(stat.statistics.blockBytes);
+ json.WriteString("AllocationCount");
+ json.WriteNumber(stat.statistics.allocationCount);
+ json.WriteString("AllocationBytes");
+ json.WriteNumber(stat.statistics.allocationBytes);
+ json.WriteString("UnusedRangeCount");
+ json.WriteNumber(stat.unusedRangeCount);
+
+ if (stat.statistics.allocationCount > 1)
+ {
+ json.WriteString("AllocationSizeMin");
+ json.WriteNumber(stat.allocationSizeMin);
+ json.WriteString("AllocationSizeMax");
+ json.WriteNumber(stat.allocationSizeMax);
+ }
+ if (stat.unusedRangeCount > 1)
+ {
+ json.WriteString("UnusedRangeSizeMin");
+ json.WriteNumber(stat.unusedRangeSizeMin);
+ json.WriteString("UnusedRangeSizeMax");
+ json.WriteNumber(stat.unusedRangeSizeMax);
+ }
+ json.EndObject();
+}
+#endif // _VMA_JSON_WRITER
+
+#ifndef _VMA_MAPPING_HYSTERESIS
+
+class VmaMappingHysteresis
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis)
+public:
+ VmaMappingHysteresis() = default;
+
+ uint32_t GetExtraMapping() const { return m_ExtraMapping; }
+
+ // Call when Map was called.
+ // Returns true if switched to extra +1 mapping reference count.
+ bool PostMap()
+ {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+ if(m_ExtraMapping == 0)
+ {
+ ++m_MajorCounter;
+ if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING)
+ {
+ m_ExtraMapping = 1;
+ m_MajorCounter = 0;
+ m_MinorCounter = 0;
+ return true;
+ }
+ }
+ else // m_ExtraMapping == 1
+ PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+ return false;
+ }
+
+ // Call when Unmap was called.
+ void PostUnmap()
+ {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+ if(m_ExtraMapping == 0)
+ ++m_MajorCounter;
+ else // m_ExtraMapping == 1
+ PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+ }
+
+ // Call when allocation was made from the memory block.
+ void PostAlloc()
+ {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+ if(m_ExtraMapping == 1)
+ ++m_MajorCounter;
+ else // m_ExtraMapping == 0
+ PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+ }
+
+ // Call when allocation was freed from the memory block.
+ // Returns true if switched to extra -1 mapping reference count.
+ bool PostFree()
+ {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+ if(m_ExtraMapping == 1)
+ {
+ ++m_MajorCounter;
+ if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING &&
+ m_MajorCounter > m_MinorCounter + 1)
+ {
+ m_ExtraMapping = 0;
+ m_MajorCounter = 0;
+ m_MinorCounter = 0;
+ return true;
+ }
+ }
+ else // m_ExtraMapping == 0
+ PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+ return false;
+ }
+
+private:
+ static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7;
+
+ uint32_t m_MinorCounter = 0;
+ uint32_t m_MajorCounter = 0;
+ uint32_t m_ExtraMapping = 0; // 0 or 1.
+
+ void PostMinorCounter()
+ {
+ if(m_MinorCounter < m_MajorCounter)
+ {
+ ++m_MinorCounter;
+ }
+ else if(m_MajorCounter > 0)
+ {
+ --m_MajorCounter;
+ --m_MinorCounter;
+ }
+ }
+};
+
+#endif // _VMA_MAPPING_HYSTERESIS
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+class VmaWin32Handle
+{
+public:
+ VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { }
+ explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { }
+ ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } }
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle)
+
+public:
+ // Strengthened
+ VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept
+ {
+ *pHandle = VMA_NULL;
+ // Try to get handle first.
+ if (m_hHandle != VMA_NULL)
+ {
+ *pHandle = Duplicate(hTargetProcess);
+ return VK_SUCCESS;
+ }
+
+ VkResult res = VK_SUCCESS;
+ // If failed, try to create it.
+ {
+ VmaMutexLockWrite lock(m_Mutex, useMutex);
+ if (m_hHandle == VMA_NULL)
+ {
+ res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle);
+ }
+ }
+
+ *pHandle = Duplicate(hTargetProcess);
+ return res;
+ }
+
+ operator bool() const noexcept { return m_hHandle != VMA_NULL; }
+private:
+ // Not atomic
+ static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept
+ {
+ VkResult res = VK_ERROR_FEATURE_NOT_PRESENT;
+ if (pvkGetMemoryWin32HandleKHR != VMA_NULL)
+ {
+ VkMemoryGetWin32HandleInfoKHR handleInfo{ };
+ handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
+ handleInfo.memory = memory;
+ handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR;
+ res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle);
+ }
+ return res;
+ }
+ HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept
+ {
+ if (!m_hHandle)
+ return m_hHandle;
+
+ HANDLE hCurrentProcess = ::GetCurrentProcess();
+ HANDLE hDupHandle = VMA_NULL;
+ if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS))
+ {
+ VMA_ASSERT(0 && "Failed to duplicate handle.");
+ }
+ return hDupHandle;
+ }
+private:
+ HANDLE m_hHandle;
+ VMA_RW_MUTEX m_Mutex; // Protects access m_Handle
+};
+#else
+class VmaWin32Handle
+{
+ // ABI compatibility
+ void* placeholder = VMA_NULL;
+ VMA_RW_MUTEX placeholder2;
+};
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+
+
+#ifndef _VMA_DEVICE_MEMORY_BLOCK
+/*
+Represents a single block of device memory (`VkDeviceMemory`) with all the
+data about its regions (aka suballocations, #VmaAllocation), assigned and free.
+
+Thread-safety:
+- Access to m_pMetadata must be externally synchronized.
+- Map, Unmap, Bind* are synchronized internally.
+*/
+class VmaDeviceMemoryBlock
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock)
+public:
+ VmaBlockMetadata* m_pMetadata;
+
+ VmaDeviceMemoryBlock(VmaAllocator hAllocator);
+ ~VmaDeviceMemoryBlock();
+
+ // Always call after construction.
+ void Init(
+ VmaAllocator hAllocator,
+ VmaPool hParentPool,
+ uint32_t newMemoryTypeIndex,
+ VkDeviceMemory newMemory,
+ VkDeviceSize newSize,
+ uint32_t id,
+ uint32_t algorithm,
+ VkDeviceSize bufferImageGranularity);
+ // Always call before destruction.
+ void Destroy(VmaAllocator allocator);
+
+ VmaPool GetParentPool() const { return m_hParentPool; }
+ VkDeviceMemory GetDeviceMemory() const { return m_hMemory; }
+ uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; }
+ uint32_t GetId() const { return m_Id; }
+ void* GetMappedData() const { return m_pMappedData; }
+ uint32_t GetMapRefCount() const { return m_MapCount; }
+
+ // Call when allocation/free was made from m_pMetadata.
+ // Used for m_MappingHysteresis.
+ void PostAlloc(VmaAllocator hAllocator);
+ void PostFree(VmaAllocator hAllocator);
+
+ // Validates all data structures inside this object. If not valid, returns false.
+ bool Validate() const;
+ VkResult CheckCorruption(VmaAllocator hAllocator);
+
+ // ppData can be null.
+ VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData);
+ void Unmap(VmaAllocator hAllocator, uint32_t count);
+
+ VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize);
+ VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize);
+
+ VkResult BindBufferMemory(
+ const VmaAllocator hAllocator,
+ const VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkBuffer hBuffer,
+ const void* pNext);
+ VkResult BindImageMemory(
+ const VmaAllocator hAllocator,
+ const VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkImage hImage,
+ const void* pNext);
+#if VMA_EXTERNAL_MEMORY_WIN32
+ VkResult CreateWin32Handle(
+ const VmaAllocator hAllocator,
+ PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR,
+ HANDLE hTargetProcess,
+ HANDLE* pHandle)noexcept;
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+private:
+ VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool.
+ uint32_t m_MemoryTypeIndex;
+ uint32_t m_Id;
+ VkDeviceMemory m_hMemory;
+
+ /*
+ Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory.
+ Also protects m_MapCount, m_pMappedData.
+ Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex.
+ */
+ VMA_MUTEX m_MapAndBindMutex;
+ VmaMappingHysteresis m_MappingHysteresis;
+ uint32_t m_MapCount;
+ void* m_pMappedData;
+
+ VmaWin32Handle m_Handle;
+};
+#endif // _VMA_DEVICE_MEMORY_BLOCK
+
+#ifndef _VMA_ALLOCATION_T
+struct VmaAllocationExtraData
+{
+ void* m_pMappedData = VMA_NULL; // Not null means memory is mapped.
+ VmaWin32Handle m_Handle;
+};
+
+struct VmaAllocation_T
+{
+ friend struct VmaDedicatedAllocationListItemTraits;
+
+ enum FLAGS
+ {
+ FLAG_PERSISTENT_MAP = 0x01,
+ FLAG_MAPPING_ALLOWED = 0x02,
+ };
+
+public:
+ enum ALLOCATION_TYPE
+ {
+ ALLOCATION_TYPE_NONE,
+ ALLOCATION_TYPE_BLOCK,
+ ALLOCATION_TYPE_DEDICATED,
+ };
+
+ // This struct is allocated using VmaPoolAllocator.
+ VmaAllocation_T(bool mappingAllowed);
+ ~VmaAllocation_T();
+
+ void InitBlockAllocation(
+ VmaDeviceMemoryBlock* block,
+ VmaAllocHandle allocHandle,
+ VkDeviceSize alignment,
+ VkDeviceSize size,
+ uint32_t memoryTypeIndex,
+ VmaSuballocationType suballocationType,
+ bool mapped);
+ // pMappedData not null means allocation is created with MAPPED flag.
+ void InitDedicatedAllocation(
+ VmaAllocator allocator,
+ VmaPool hParentPool,
+ uint32_t memoryTypeIndex,
+ VkDeviceMemory hMemory,
+ VmaSuballocationType suballocationType,
+ void* pMappedData,
+ VkDeviceSize size);
+ void Destroy(VmaAllocator allocator);
+
+ ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; }
+ VkDeviceSize GetAlignment() const { return m_Alignment; }
+ VkDeviceSize GetSize() const { return m_Size; }
+ void* GetUserData() const { return m_pUserData; }
+ const char* GetName() const { return m_pName; }
+ VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; }
+
+ VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; }
+ uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; }
+ bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; }
+ bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; }
+
+ void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; }
+ void SetName(VmaAllocator hAllocator, const char* pName);
+ void FreeName(VmaAllocator hAllocator);
+ uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation);
+ VmaAllocHandle GetAllocHandle() const;
+ VkDeviceSize GetOffset() const;
+ VmaPool GetParentPool() const;
+ VkDeviceMemory GetMemory() const;
+ void* GetMappedData() const;
+
+ void BlockAllocMap();
+ void BlockAllocUnmap();
+ VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData);
+ void DedicatedAllocUnmap(VmaAllocator hAllocator);
+
+#if VMA_STATS_STRING_ENABLED
+ VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; }
+ void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5)
+ {
+ VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN);
+ m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5);
+ }
+ void InitImageUsage(const VkImageCreateInfo &createInfo)
+ {
+ VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN);
+ m_BufferImageUsage = VmaBufferImageUsage(createInfo);
+ }
+ void PrintParameters(class VmaJsonWriter& json) const;
+#endif
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+ VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept;
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+
+private:
+ // Allocation out of VmaDeviceMemoryBlock.
+ struct BlockAllocation
+ {
+ VmaDeviceMemoryBlock* m_Block;
+ VmaAllocHandle m_AllocHandle;
+ };
+ // Allocation for an object that has its own private VkDeviceMemory.
+ struct DedicatedAllocation
+ {
+ VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool.
+ VkDeviceMemory m_hMemory;
+ VmaAllocationExtraData* m_ExtraData;
+ VmaAllocation_T* m_Prev;
+ VmaAllocation_T* m_Next;
+ };
+ union
+ {
+ // Allocation out of VmaDeviceMemoryBlock.
+ BlockAllocation m_BlockAllocation;
+ // Allocation for an object that has its own private VkDeviceMemory.
+ DedicatedAllocation m_DedicatedAllocation;
+ };
+
+ VkDeviceSize m_Alignment;
+ VkDeviceSize m_Size;
+ void* m_pUserData;
+ char* m_pName;
+ uint32_t m_MemoryTypeIndex;
+ uint8_t m_Type; // ALLOCATION_TYPE
+ uint8_t m_SuballocationType; // VmaSuballocationType
+ // Reference counter for vmaMapMemory()/vmaUnmapMemory().
+ uint8_t m_MapCount;
+ uint8_t m_Flags; // enum FLAGS
+#if VMA_STATS_STRING_ENABLED
+ VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown.
+#endif
+
+ void EnsureExtraData(VmaAllocator hAllocator);
+};
+#endif // _VMA_ALLOCATION_T
+
+#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS
+struct VmaDedicatedAllocationListItemTraits
+{
+ typedef VmaAllocation_T ItemType;
+
+ static ItemType* GetPrev(const ItemType* item)
+ {
+ VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+ return item->m_DedicatedAllocation.m_Prev;
+ }
+ static ItemType* GetNext(const ItemType* item)
+ {
+ VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+ return item->m_DedicatedAllocation.m_Next;
+ }
+ static ItemType*& AccessPrev(ItemType* item)
+ {
+ VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+ return item->m_DedicatedAllocation.m_Prev;
+ }
+ static ItemType*& AccessNext(ItemType* item)
+ {
+ VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+ return item->m_DedicatedAllocation.m_Next;
+ }
+};
+#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS
+
+#ifndef _VMA_DEDICATED_ALLOCATION_LIST
+/*
+Stores linked list of VmaAllocation_T objects.
+Thread-safe, synchronized internally.
+*/
+class VmaDedicatedAllocationList
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList)
+public:
+ VmaDedicatedAllocationList() {}
+ ~VmaDedicatedAllocationList();
+
+ void Init(bool useMutex) { m_UseMutex = useMutex; }
+ bool Validate();
+
+ void AddDetailedStatistics(VmaDetailedStatistics& inoutStats);
+ void AddStatistics(VmaStatistics& inoutStats);
+#if VMA_STATS_STRING_ENABLED
+ // Writes JSON array with the list of allocations.
+ void BuildStatsString(VmaJsonWriter& json);
+#endif
+
+ bool IsEmpty();
+ void Register(VmaAllocation alloc);
+ void Unregister(VmaAllocation alloc);
+
+private:
+ typedef VmaIntrusiveLinkedList<VmaDedicatedAllocationListItemTraits> DedicatedAllocationLinkedList;
+
+ bool m_UseMutex = true;
+ VMA_RW_MUTEX m_Mutex;
+ DedicatedAllocationLinkedList m_AllocationList;
+};
+
+#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS
+
+VmaDedicatedAllocationList::~VmaDedicatedAllocationList()
+{
+ VMA_HEAVY_ASSERT(Validate());
+
+ if (!m_AllocationList.IsEmpty())
+ {
+ VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!");
+ }
+}
+
+bool VmaDedicatedAllocationList::Validate()
+{
+ const size_t declaredCount = m_AllocationList.GetCount();
+ size_t actualCount = 0;
+ VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+ for (VmaAllocation alloc = m_AllocationList.Front();
+ alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc))
+ {
+ ++actualCount;
+ }
+ VMA_VALIDATE(actualCount == declaredCount);
+
+ return true;
+}
+
+void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats)
+{
+ for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item))
+ {
+ const VkDeviceSize size = item->GetSize();
+ inoutStats.statistics.blockCount++;
+ inoutStats.statistics.blockBytes += size;
+ VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize());
+ }
+}
+
+void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats)
+{
+ VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+
+ const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount();
+ inoutStats.blockCount += allocCount;
+ inoutStats.allocationCount += allocCount;
+
+ for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item))
+ {
+ const VkDeviceSize size = item->GetSize();
+ inoutStats.blockBytes += size;
+ inoutStats.allocationBytes += size;
+ }
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json)
+{
+ VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+ json.BeginArray();
+ for (VmaAllocation alloc = m_AllocationList.Front();
+ alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc))
+ {
+ json.BeginObject(true);
+ alloc->PrintParameters(json);
+ json.EndObject();
+ }
+ json.EndArray();
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+bool VmaDedicatedAllocationList::IsEmpty()
+{
+ VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+ return m_AllocationList.IsEmpty();
+}
+
+void VmaDedicatedAllocationList::Register(VmaAllocation alloc)
+{
+ VmaMutexLockWrite lock(m_Mutex, m_UseMutex);
+ m_AllocationList.PushBack(alloc);
+}
+
+void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc)
+{
+ VmaMutexLockWrite lock(m_Mutex, m_UseMutex);
+ m_AllocationList.Remove(alloc);
+}
+#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS
+#endif // _VMA_DEDICATED_ALLOCATION_LIST
+
+#ifndef _VMA_SUBALLOCATION
+/*
+Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as
+allocated memory block or free.
+*/
+struct VmaSuballocation
+{
+ VkDeviceSize offset;
+ VkDeviceSize size;
+ void* userData;
+ VmaSuballocationType type;
+};
+
+// Comparator for offsets.
+struct VmaSuballocationOffsetLess
+{
+ bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const
+ {
+ return lhs.offset < rhs.offset;
+ }
+};
+
+struct VmaSuballocationOffsetGreater
+{
+ bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const
+ {
+ return lhs.offset > rhs.offset;
+ }
+};
+
+struct VmaSuballocationItemSizeLess
+{
+ bool operator()(const VmaSuballocationList::iterator lhs,
+ const VmaSuballocationList::iterator rhs) const
+ {
+ return lhs->size < rhs->size;
+ }
+
+ bool operator()(const VmaSuballocationList::iterator lhs,
+ VkDeviceSize rhsSize) const
+ {
+ return lhs->size < rhsSize;
+ }
+};
+#endif // _VMA_SUBALLOCATION
+
+#ifndef _VMA_ALLOCATION_REQUEST
+/*
+Parameters of planned allocation inside a VmaDeviceMemoryBlock.
+item points to a FREE suballocation.
+*/
+struct VmaAllocationRequest
+{
+ VmaAllocHandle allocHandle;
+ VkDeviceSize size;
+ VmaSuballocationList::iterator item;
+ void* customData;
+ uint64_t algorithmData;
+ VmaAllocationRequestType type;
+};
+#endif // _VMA_ALLOCATION_REQUEST
+
+#ifndef _VMA_BLOCK_METADATA
+/*
+Data structure used for bookkeeping of allocations and unused ranges of memory
+in a single VkDeviceMemory block.
+*/
+class VmaBlockMetadata
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata)
+public:
+ // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object.
+ VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks,
+ VkDeviceSize bufferImageGranularity, bool isVirtual);
+ virtual ~VmaBlockMetadata() = default;
+
+ virtual void Init(VkDeviceSize size) { m_Size = size; }
+ bool IsVirtual() const { return m_IsVirtual; }
+ VkDeviceSize GetSize() const { return m_Size; }
+
+ // Validates all data structures inside this object. If not valid, returns false.
+ virtual bool Validate() const = 0;
+ virtual size_t GetAllocationCount() const = 0;
+ virtual size_t GetFreeRegionsCount() const = 0;
+ virtual VkDeviceSize GetSumFreeSize() const = 0;
+ // Returns true if this block is empty - contains only single free suballocation.
+ virtual bool IsEmpty() const = 0;
+ virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0;
+ virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0;
+ virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0;
+
+ virtual VmaAllocHandle GetAllocationListBegin() const = 0;
+ virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0;
+ virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0;
+
+ // Shouldn't modify blockCount.
+ virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0;
+ virtual void AddStatistics(VmaStatistics& inoutStats) const = 0;
+
+#if VMA_STATS_STRING_ENABLED
+ virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0;
+#endif
+
+ // Tries to find a place for suballocation with given parameters inside this block.
+ // If succeeded, fills pAllocationRequest and returns true.
+ // If failed, returns false.
+ virtual bool CreateAllocationRequest(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ bool upperAddress,
+ VmaSuballocationType allocType,
+ // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags.
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest) = 0;
+
+ virtual VkResult CheckCorruption(const void* pBlockData) = 0;
+
+ // Makes actual allocation based on request. Request must already be checked and valid.
+ virtual void Alloc(
+ const VmaAllocationRequest& request,
+ VmaSuballocationType type,
+ void* userData) = 0;
+
+ // Frees suballocation assigned to given memory region.
+ virtual void Free(VmaAllocHandle allocHandle) = 0;
+
+ // Frees all allocations.
+ // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations!
+ virtual void Clear() = 0;
+
+ virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0;
+ virtual void DebugLogAllAllocations() const = 0;
+
+protected:
+ const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; }
+ VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; }
+ VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); }
+
+ void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const;
+#if VMA_STATS_STRING_ENABLED
+ // mapRefCount == UINT32_MAX means unspecified.
+ void PrintDetailedMap_Begin(class VmaJsonWriter& json,
+ VkDeviceSize unusedBytes,
+ size_t allocationCount,
+ size_t unusedRangeCount) const;
+ void PrintDetailedMap_Allocation(class VmaJsonWriter& json,
+ VkDeviceSize offset, VkDeviceSize size, void* userData) const;
+ void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json,
+ VkDeviceSize offset,
+ VkDeviceSize size) const;
+ void PrintDetailedMap_End(class VmaJsonWriter& json) const;
+#endif
+
+private:
+ VkDeviceSize m_Size;
+ const VkAllocationCallbacks* m_pAllocationCallbacks;
+ const VkDeviceSize m_BufferImageGranularity;
+ const bool m_IsVirtual;
+};
+
+#ifndef _VMA_BLOCK_METADATA_FUNCTIONS
+VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks,
+ VkDeviceSize bufferImageGranularity, bool isVirtual)
+ : m_Size(0),
+ m_pAllocationCallbacks(pAllocationCallbacks),
+ m_BufferImageGranularity(bufferImageGranularity),
+ m_IsVirtual(isVirtual) {}
+
+void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const
+{
+ if (IsVirtual())
+ {
+ VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData);
+ }
+ else
+ {
+ VMA_ASSERT(userData != VMA_NULL);
+ VmaAllocation allocation = reinterpret_cast<VmaAllocation>(userData);
+
+ userData = allocation->GetUserData();
+ const char* name = allocation->GetName();
+
+#if VMA_STATS_STRING_ENABLED
+ VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64,
+ offset, size, userData, name ? name : "vma_empty",
+ VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()],
+ (uint64_t)allocation->GetBufferImageUsage().Value);
+#else
+ VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u",
+ offset, size, userData, name ? name : "vma_empty",
+ (unsigned)allocation->GetSuballocationType());
+#endif // VMA_STATS_STRING_ENABLED
+ }
+
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json,
+ VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const
+{
+ json.WriteString("TotalBytes");
+ json.WriteNumber(GetSize());
+
+ json.WriteString("UnusedBytes");
+ json.WriteNumber(unusedBytes);
+
+ json.WriteString("Allocations");
+ json.WriteNumber((uint64_t)allocationCount);
+
+ json.WriteString("UnusedRanges");
+ json.WriteNumber((uint64_t)unusedRangeCount);
+
+ json.WriteString("Suballocations");
+ json.BeginArray();
+}
+
+void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json,
+ VkDeviceSize offset, VkDeviceSize size, void* userData) const
+{
+ json.BeginObject(true);
+
+ json.WriteString("Offset");
+ json.WriteNumber(offset);
+
+ if (IsVirtual())
+ {
+ json.WriteString("Size");
+ json.WriteNumber(size);
+ if (userData)
+ {
+ json.WriteString("CustomData");
+ json.BeginString();
+ json.ContinueString_Pointer(userData);
+ json.EndString();
+ }
+ }
+ else
+ {
+ ((VmaAllocation)userData)->PrintParameters(json);
+ }
+
+ json.EndObject();
+}
+
+void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json,
+ VkDeviceSize offset, VkDeviceSize size) const
+{
+ json.BeginObject(true);
+
+ json.WriteString("Offset");
+ json.WriteNumber(offset);
+
+ json.WriteString("Type");
+ json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]);
+
+ json.WriteString("Size");
+ json.WriteNumber(size);
+
+ json.EndObject();
+}
+
+void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const
+{
+ json.EndArray();
+}
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_BLOCK_METADATA_FUNCTIONS
+#endif // _VMA_BLOCK_METADATA
+
+#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY
+// Before deleting object of this class remember to call 'Destroy()'
+class VmaBlockBufferImageGranularity final
+{
+public:
+ struct ValidationContext
+ {
+ const VkAllocationCallbacks* allocCallbacks;
+ uint16_t* pageAllocs;
+ };
+
+ VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity);
+ ~VmaBlockBufferImageGranularity();
+
+ bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; }
+
+ void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size);
+ // Before destroying object you must call free it's memory
+ void Destroy(const VkAllocationCallbacks* pAllocationCallbacks);
+
+ void RoundupAllocRequest(VmaSuballocationType allocType,
+ VkDeviceSize& inOutAllocSize,
+ VkDeviceSize& inOutAllocAlignment) const;
+
+ bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset,
+ VkDeviceSize allocSize,
+ VkDeviceSize blockOffset,
+ VkDeviceSize blockSize,
+ VmaSuballocationType allocType) const;
+
+ void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size);
+ void FreePages(VkDeviceSize offset, VkDeviceSize size);
+ void Clear();
+
+ ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks,
+ bool isVirutal) const;
+ bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const;
+ bool FinishValidation(ValidationContext& ctx) const;
+
+private:
+ static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256;
+
+ struct RegionInfo
+ {
+ uint8_t allocType;
+ uint16_t allocCount;
+ };
+
+ VkDeviceSize m_BufferImageGranularity;
+ uint32_t m_RegionCount;
+ RegionInfo* m_RegionInfo;
+
+ uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); }
+ uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); }
+
+ uint32_t OffsetToPageIndex(VkDeviceSize offset) const;
+ void AllocPage(RegionInfo& page, uint8_t allocType);
+};
+
+#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS
+VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity)
+ : m_BufferImageGranularity(bufferImageGranularity),
+ m_RegionCount(0),
+ m_RegionInfo(VMA_NULL) {}
+
+VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity()
+{
+ VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!");
+}
+
+void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size)
+{
+ if (IsEnabled())
+ {
+ m_RegionCount = static_cast<uint32_t>(VmaDivideRoundingUp(size, m_BufferImageGranularity));
+ m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount);
+ memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo));
+ }
+}
+
+void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks)
+{
+ if (m_RegionInfo)
+ {
+ vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount);
+ m_RegionInfo = VMA_NULL;
+ }
+}
+
+void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType,
+ VkDeviceSize& inOutAllocSize,
+ VkDeviceSize& inOutAllocAlignment) const
+{
+ if (m_BufferImageGranularity > 1 &&
+ m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY)
+ {
+ if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN ||
+ allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN ||
+ allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)
+ {
+ inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity);
+ inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity);
+ }
+ }
+}
+
+bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset,
+ VkDeviceSize allocSize,
+ VkDeviceSize blockOffset,
+ VkDeviceSize blockSize,
+ VmaSuballocationType allocType) const
+{
+ if (IsEnabled())
+ {
+ uint32_t startPage = GetStartPage(inOutAllocOffset);
+ if (m_RegionInfo[startPage].allocCount > 0 &&
+ VmaIsBufferImageGranularityConflict(static_cast<VmaSuballocationType>(m_RegionInfo[startPage].allocType), allocType))
+ {
+ inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity);
+ if (blockSize < allocSize + inOutAllocOffset - blockOffset)
+ return true;
+ ++startPage;
+ }
+ uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize);
+ if (endPage != startPage &&
+ m_RegionInfo[endPage].allocCount > 0 &&
+ VmaIsBufferImageGranularityConflict(static_cast<VmaSuballocationType>(m_RegionInfo[endPage].allocType), allocType))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size)
+{
+ if (IsEnabled())
+ {
+ uint32_t startPage = GetStartPage(offset);
+ AllocPage(m_RegionInfo[startPage], allocType);
+
+ uint32_t endPage = GetEndPage(offset, size);
+ if (startPage != endPage)
+ AllocPage(m_RegionInfo[endPage], allocType);
+ }
+}
+
+void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size)
+{
+ if (IsEnabled())
+ {
+ uint32_t startPage = GetStartPage(offset);
+ --m_RegionInfo[startPage].allocCount;
+ if (m_RegionInfo[startPage].allocCount == 0)
+ m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE;
+ uint32_t endPage = GetEndPage(offset, size);
+ if (startPage != endPage)
+ {
+ --m_RegionInfo[endPage].allocCount;
+ if (m_RegionInfo[endPage].allocCount == 0)
+ m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE;
+ }
+ }
+}
+
+void VmaBlockBufferImageGranularity::Clear()
+{
+ if (m_RegionInfo)
+ memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo));
+}
+
+VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation(
+ const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const
+{
+ ValidationContext ctx{ pAllocationCallbacks, VMA_NULL };
+ if (!isVirutal && IsEnabled())
+ {
+ ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount);
+ memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t));
+ }
+ return ctx;
+}
+
+bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx,
+ VkDeviceSize offset, VkDeviceSize size) const
+{
+ if (IsEnabled())
+ {
+ uint32_t start = GetStartPage(offset);
+ ++ctx.pageAllocs[start];
+ VMA_VALIDATE(m_RegionInfo[start].allocCount > 0);
+
+ uint32_t end = GetEndPage(offset, size);
+ if (start != end)
+ {
+ ++ctx.pageAllocs[end];
+ VMA_VALIDATE(m_RegionInfo[end].allocCount > 0);
+ }
+ }
+ return true;
+}
+
+bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const
+{
+ // Check proper page structure
+ if (IsEnabled())
+ {
+ VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!");
+
+ for (uint32_t page = 0; page < m_RegionCount; ++page)
+ {
+ VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount);
+ }
+ vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount);
+ ctx.pageAllocs = VMA_NULL;
+ }
+ return true;
+}
+
+uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const
+{
+ return static_cast<uint32_t>(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity));
+}
+
+void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType)
+{
+ // When current alloc type is free then it can be overridden by new type
+ if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE))
+ page.allocType = allocType;
+
+ ++page.allocCount;
+}
+#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS
+#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY
+
+#ifndef _VMA_BLOCK_METADATA_LINEAR
+/*
+Allocations and their references in internal data structure look like this:
+
+if(m_2ndVectorMode == SECOND_VECTOR_EMPTY):
+
+ 0 +-------+
+ | |
+ | |
+ | |
+ +-------+
+ | Alloc | 1st[m_1stNullItemsBeginCount]
+ +-------+
+ | Alloc | 1st[m_1stNullItemsBeginCount + 1]
+ +-------+
+ | ... |
+ +-------+
+ | Alloc | 1st[1st.size() - 1]
+ +-------+
+ | |
+ | |
+ | |
+GetSize() +-------+
+
+if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER):
+
+ 0 +-------+
+ | Alloc | 2nd[0]
+ +-------+
+ | Alloc | 2nd[1]
+ +-------+
+ | ... |
+ +-------+
+ | Alloc | 2nd[2nd.size() - 1]
+ +-------+
+ | |
+ | |
+ | |
+ +-------+
+ | Alloc | 1st[m_1stNullItemsBeginCount]
+ +-------+
+ | Alloc | 1st[m_1stNullItemsBeginCount + 1]
+ +-------+
+ | ... |
+ +-------+
+ | Alloc | 1st[1st.size() - 1]
+ +-------+
+ | |
+GetSize() +-------+
+
+if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK):
+
+ 0 +-------+
+ | |
+ | |
+ | |
+ +-------+
+ | Alloc | 1st[m_1stNullItemsBeginCount]
+ +-------+
+ | Alloc | 1st[m_1stNullItemsBeginCount + 1]
+ +-------+
+ | ... |
+ +-------+
+ | Alloc | 1st[1st.size() - 1]
+ +-------+
+ | |
+ | |
+ | |
+ +-------+
+ | Alloc | 2nd[2nd.size() - 1]
+ +-------+
+ | ... |
+ +-------+
+ | Alloc | 2nd[1]
+ +-------+
+ | Alloc | 2nd[0]
+GetSize() +-------+
+
+*/
+class VmaBlockMetadata_Linear : public VmaBlockMetadata
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear)
+public:
+ VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks,
+ VkDeviceSize bufferImageGranularity, bool isVirtual);
+ virtual ~VmaBlockMetadata_Linear() = default;
+
+ VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; }
+ bool IsEmpty() const override { return GetAllocationCount() == 0; }
+ VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; }
+
+ void Init(VkDeviceSize size) override;
+ bool Validate() const override;
+ size_t GetAllocationCount() const override;
+ size_t GetFreeRegionsCount() const override;
+
+ void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override;
+ void AddStatistics(VmaStatistics& inoutStats) const override;
+
+#if VMA_STATS_STRING_ENABLED
+ void PrintDetailedMap(class VmaJsonWriter& json) const override;
+#endif
+
+ bool CreateAllocationRequest(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ bool upperAddress,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest) override;
+
+ VkResult CheckCorruption(const void* pBlockData) override;
+
+ void Alloc(
+ const VmaAllocationRequest& request,
+ VmaSuballocationType type,
+ void* userData) override;
+
+ void Free(VmaAllocHandle allocHandle) override;
+ void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override;
+ void* GetAllocationUserData(VmaAllocHandle allocHandle) const override;
+ VmaAllocHandle GetAllocationListBegin() const override;
+ VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override;
+ VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override;
+ void Clear() override;
+ void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override;
+ void DebugLogAllAllocations() const override;
+
+private:
+ /*
+ There are two suballocation vectors, used in ping-pong way.
+ The one with index m_1stVectorIndex is called 1st.
+ The one with index (m_1stVectorIndex ^ 1) is called 2nd.
+ 2nd can be non-empty only when 1st is not empty.
+ When 2nd is not empty, m_2ndVectorMode indicates its mode of operation.
+ */
+ typedef VmaVector<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> SuballocationVectorType;
+
+ enum SECOND_VECTOR_MODE
+ {
+ SECOND_VECTOR_EMPTY,
+ /*
+ Suballocations in 2nd vector are created later than the ones in 1st, but they
+ all have smaller offset.
+ */
+ SECOND_VECTOR_RING_BUFFER,
+ /*
+ Suballocations in 2nd vector are upper side of double stack.
+ They all have offsets higher than those in 1st vector.
+ Top of this stack means smaller offsets, but higher indices in this vector.
+ */
+ SECOND_VECTOR_DOUBLE_STACK,
+ };
+
+ VkDeviceSize m_SumFreeSize;
+ SuballocationVectorType m_Suballocations0, m_Suballocations1;
+ uint32_t m_1stVectorIndex;
+ SECOND_VECTOR_MODE m_2ndVectorMode;
+ // Number of items in 1st vector with hAllocation = null at the beginning.
+ size_t m_1stNullItemsBeginCount;
+ // Number of other items in 1st vector with hAllocation = null somewhere in the middle.
+ size_t m_1stNullItemsMiddleCount;
+ // Number of items in 2nd vector with hAllocation = null.
+ size_t m_2ndNullItemsCount;
+
+ SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; }
+ SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; }
+ const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; }
+ const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; }
+
+ VmaSuballocation& FindSuballocation(VkDeviceSize offset) const;
+ bool ShouldCompact1st() const;
+ void CleanupAfterFree();
+
+ bool CreateAllocationRequest_LowerAddress(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest);
+ bool CreateAllocationRequest_UpperAddress(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest);
+};
+
+#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS
+VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks,
+ VkDeviceSize bufferImageGranularity, bool isVirtual)
+ : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual),
+ m_SumFreeSize(0),
+ m_Suballocations0(VmaStlAllocator<VmaSuballocation>(pAllocationCallbacks)),
+ m_Suballocations1(VmaStlAllocator<VmaSuballocation>(pAllocationCallbacks)),
+ m_1stVectorIndex(0),
+ m_2ndVectorMode(SECOND_VECTOR_EMPTY),
+ m_1stNullItemsBeginCount(0),
+ m_1stNullItemsMiddleCount(0),
+ m_2ndNullItemsCount(0) {}
+
+void VmaBlockMetadata_Linear::Init(VkDeviceSize size)
+{
+ VmaBlockMetadata::Init(size);
+ m_SumFreeSize = size;
+}
+
+bool VmaBlockMetadata_Linear::Validate() const
+{
+ const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+ VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY));
+ VMA_VALIDATE(!suballocations1st.empty() ||
+ suballocations2nd.empty() ||
+ m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER);
+
+ if (!suballocations1st.empty())
+ {
+ // Null item at the beginning should be accounted into m_1stNullItemsBeginCount.
+ VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE);
+ // Null item at the end should be just pop_back().
+ VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE);
+ }
+ if (!suballocations2nd.empty())
+ {
+ // Null item at the end should be just pop_back().
+ VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE);
+ }
+
+ VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size());
+ VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size());
+
+ VkDeviceSize sumUsedSize = 0;
+ const size_t suballoc1stCount = suballocations1st.size();
+ const VkDeviceSize debugMargin = GetDebugMargin();
+ VkDeviceSize offset = 0;
+
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ const size_t suballoc2ndCount = suballocations2nd.size();
+ size_t nullItem2ndCount = 0;
+ for (size_t i = 0; i < suballoc2ndCount; ++i)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[i];
+ const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
+
+ VmaAllocation const alloc = (VmaAllocation)suballoc.userData;
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE));
+ }
+ VMA_VALIDATE(suballoc.offset >= offset);
+
+ if (!currFree)
+ {
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1);
+ VMA_VALIDATE(alloc->GetSize() == suballoc.size);
+ }
+ sumUsedSize += suballoc.size;
+ }
+ else
+ {
+ ++nullItem2ndCount;
+ }
+
+ offset = suballoc.offset + suballoc.size + debugMargin;
+ }
+
+ VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount);
+ }
+
+ for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[i];
+ VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE &&
+ suballoc.userData == VMA_NULL);
+ }
+
+ size_t nullItem1stCount = m_1stNullItemsBeginCount;
+
+ for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[i];
+ const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
+
+ VmaAllocation const alloc = (VmaAllocation)suballoc.userData;
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE));
+ }
+ VMA_VALIDATE(suballoc.offset >= offset);
+ VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree);
+
+ if (!currFree)
+ {
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1);
+ VMA_VALIDATE(alloc->GetSize() == suballoc.size);
+ }
+ sumUsedSize += suballoc.size;
+ }
+ else
+ {
+ ++nullItem1stCount;
+ }
+
+ offset = suballoc.offset + suballoc.size + debugMargin;
+ }
+ VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount);
+
+ if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ const size_t suballoc2ndCount = suballocations2nd.size();
+ size_t nullItem2ndCount = 0;
+ for (size_t i = suballoc2ndCount; i--; )
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[i];
+ const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
+
+ VmaAllocation const alloc = (VmaAllocation)suballoc.userData;
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE));
+ }
+ VMA_VALIDATE(suballoc.offset >= offset);
+
+ if (!currFree)
+ {
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1);
+ VMA_VALIDATE(alloc->GetSize() == suballoc.size);
+ }
+ sumUsedSize += suballoc.size;
+ }
+ else
+ {
+ ++nullItem2ndCount;
+ }
+
+ offset = suballoc.offset + suballoc.size + debugMargin;
+ }
+
+ VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount);
+ }
+
+ VMA_VALIDATE(offset <= GetSize());
+ VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize);
+
+ return true;
+}
+
+size_t VmaBlockMetadata_Linear::GetAllocationCount() const
+{
+ return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount +
+ AccessSuballocations2nd().size() - m_2ndNullItemsCount;
+}
+
+size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const
+{
+ // Function only used for defragmentation, which is disabled for this algorithm
+ VMA_ASSERT(0);
+ return SIZE_MAX;
+}
+
+void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const
+{
+ const VkDeviceSize size = GetSize();
+ const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ const size_t suballoc1stCount = suballocations1st.size();
+ const size_t suballoc2ndCount = suballocations2nd.size();
+
+ inoutStats.statistics.blockCount++;
+ inoutStats.statistics.blockBytes += size;
+
+ VkDeviceSize lastOffset = 0;
+
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+ size_t nextAlloc2ndIndex = 0;
+ while (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ // Find next non-null allocation or move nextAllocIndex to the end.
+ while (nextAlloc2ndIndex < suballoc2ndCount &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex < suballoc2ndCount)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size);
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ // There is free space from lastOffset to freeSpace2ndTo1stEnd.
+ if (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset;
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+ }
+
+ // End of loop.
+ lastOffset = freeSpace2ndTo1stEnd;
+ }
+ }
+ }
+
+ size_t nextAlloc1stIndex = m_1stNullItemsBeginCount;
+ const VkDeviceSize freeSpace1stTo2ndEnd =
+ m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size;
+ while (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ // Find next non-null allocation or move nextAllocIndex to the end.
+ while (nextAlloc1stIndex < suballoc1stCount &&
+ suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc1stIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc1stIndex < suballoc1stCount)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size);
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc1stIndex;
+ }
+ // We are at the end.
+ else
+ {
+ // There is free space from lastOffset to freeSpace1stTo2ndEnd.
+ if (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset;
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+ }
+
+ // End of loop.
+ lastOffset = freeSpace1stTo2ndEnd;
+ }
+ }
+
+ if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+ while (lastOffset < size)
+ {
+ // Find next non-null allocation or move nextAllocIndex to the end.
+ while (nextAlloc2ndIndex != SIZE_MAX &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ --nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex != SIZE_MAX)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size);
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ --nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ // There is free space from lastOffset to size.
+ if (lastOffset < size)
+ {
+ const VkDeviceSize unusedRangeSize = size - lastOffset;
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+ }
+
+ // End of loop.
+ lastOffset = size;
+ }
+ }
+ }
+}
+
+void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const
+{
+ const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ const VkDeviceSize size = GetSize();
+ const size_t suballoc1stCount = suballocations1st.size();
+ const size_t suballoc2ndCount = suballocations2nd.size();
+
+ inoutStats.blockCount++;
+ inoutStats.blockBytes += size;
+ inoutStats.allocationBytes += size - m_SumFreeSize;
+
+ VkDeviceSize lastOffset = 0;
+
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+ size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount;
+ while (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+ while (nextAlloc2ndIndex < suballoc2ndCount &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex < suballoc2ndCount)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ ++inoutStats.allocationCount;
+
+ // Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ // End of loop.
+ lastOffset = freeSpace2ndTo1stEnd;
+ }
+ }
+ }
+
+ size_t nextAlloc1stIndex = m_1stNullItemsBeginCount;
+ const VkDeviceSize freeSpace1stTo2ndEnd =
+ m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size;
+ while (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ // Find next non-null allocation or move nextAllocIndex to the end.
+ while (nextAlloc1stIndex < suballoc1stCount &&
+ suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc1stIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc1stIndex < suballoc1stCount)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+ // Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ ++inoutStats.allocationCount;
+
+ // Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc1stIndex;
+ }
+ // We are at the end.
+ else
+ {
+ // End of loop.
+ lastOffset = freeSpace1stTo2ndEnd;
+ }
+ }
+
+ if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+ while (lastOffset < size)
+ {
+ // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+ while (nextAlloc2ndIndex != SIZE_MAX &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ --nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex != SIZE_MAX)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ ++inoutStats.allocationCount;
+
+ // Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ --nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ // End of loop.
+ lastOffset = size;
+ }
+ }
+ }
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const
+{
+ const VkDeviceSize size = GetSize();
+ const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ const size_t suballoc1stCount = suballocations1st.size();
+ const size_t suballoc2ndCount = suballocations2nd.size();
+
+ // FIRST PASS
+
+ size_t unusedRangeCount = 0;
+ VkDeviceSize usedBytes = 0;
+
+ VkDeviceSize lastOffset = 0;
+
+ size_t alloc2ndCount = 0;
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+ size_t nextAlloc2ndIndex = 0;
+ while (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+ while (nextAlloc2ndIndex < suballoc2ndCount &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex < suballoc2ndCount)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ ++unusedRangeCount;
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ ++alloc2ndCount;
+ usedBytes += suballoc.size;
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ if (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ // There is free space from lastOffset to freeSpace2ndTo1stEnd.
+ ++unusedRangeCount;
+ }
+
+ // End of loop.
+ lastOffset = freeSpace2ndTo1stEnd;
+ }
+ }
+ }
+
+ size_t nextAlloc1stIndex = m_1stNullItemsBeginCount;
+ size_t alloc1stCount = 0;
+ const VkDeviceSize freeSpace1stTo2ndEnd =
+ m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size;
+ while (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ // Find next non-null allocation or move nextAllocIndex to the end.
+ while (nextAlloc1stIndex < suballoc1stCount &&
+ suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc1stIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc1stIndex < suballoc1stCount)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ ++unusedRangeCount;
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ ++alloc1stCount;
+ usedBytes += suballoc.size;
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc1stIndex;
+ }
+ // We are at the end.
+ else
+ {
+ if (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ // There is free space from lastOffset to freeSpace1stTo2ndEnd.
+ ++unusedRangeCount;
+ }
+
+ // End of loop.
+ lastOffset = freeSpace1stTo2ndEnd;
+ }
+ }
+
+ if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+ while (lastOffset < size)
+ {
+ // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+ while (nextAlloc2ndIndex != SIZE_MAX &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ --nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex != SIZE_MAX)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ ++unusedRangeCount;
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ ++alloc2ndCount;
+ usedBytes += suballoc.size;
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ --nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ if (lastOffset < size)
+ {
+ // There is free space from lastOffset to size.
+ ++unusedRangeCount;
+ }
+
+ // End of loop.
+ lastOffset = size;
+ }
+ }
+ }
+
+ const VkDeviceSize unusedBytes = size - usedBytes;
+ PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount);
+
+ // SECOND PASS
+ lastOffset = 0;
+
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+ size_t nextAlloc2ndIndex = 0;
+ while (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+ while (nextAlloc2ndIndex < suballoc2ndCount &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex < suballoc2ndCount)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+ PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData);
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ if (lastOffset < freeSpace2ndTo1stEnd)
+ {
+ // There is free space from lastOffset to freeSpace2ndTo1stEnd.
+ const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset;
+ PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+ }
+
+ // End of loop.
+ lastOffset = freeSpace2ndTo1stEnd;
+ }
+ }
+ }
+
+ nextAlloc1stIndex = m_1stNullItemsBeginCount;
+ while (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ // Find next non-null allocation or move nextAllocIndex to the end.
+ while (nextAlloc1stIndex < suballoc1stCount &&
+ suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+ {
+ ++nextAlloc1stIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc1stIndex < suballoc1stCount)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+ PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData);
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ ++nextAlloc1stIndex;
+ }
+ // We are at the end.
+ else
+ {
+ if (lastOffset < freeSpace1stTo2ndEnd)
+ {
+ // There is free space from lastOffset to freeSpace1stTo2ndEnd.
+ const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset;
+ PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+ }
+
+ // End of loop.
+ lastOffset = freeSpace1stTo2ndEnd;
+ }
+ }
+
+ if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+ while (lastOffset < size)
+ {
+ // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+ while (nextAlloc2ndIndex != SIZE_MAX &&
+ suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+ {
+ --nextAlloc2ndIndex;
+ }
+
+ // Found non-null allocation.
+ if (nextAlloc2ndIndex != SIZE_MAX)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+ // 1. Process free space before this allocation.
+ if (lastOffset < suballoc.offset)
+ {
+ // There is free space from lastOffset to suballoc.offset.
+ const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+ PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+ }
+
+ // 2. Process this allocation.
+ // There is allocation with suballoc.offset, suballoc.size.
+ PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData);
+
+ // 3. Prepare for next iteration.
+ lastOffset = suballoc.offset + suballoc.size;
+ --nextAlloc2ndIndex;
+ }
+ // We are at the end.
+ else
+ {
+ if (lastOffset < size)
+ {
+ // There is free space from lastOffset to size.
+ const VkDeviceSize unusedRangeSize = size - lastOffset;
+ PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+ }
+
+ // End of loop.
+ lastOffset = size;
+ }
+ }
+ }
+
+ PrintDetailedMap_End(json);
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+bool VmaBlockMetadata_Linear::CreateAllocationRequest(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ bool upperAddress,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest)
+{
+ VMA_ASSERT(allocSize > 0);
+ VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE);
+ VMA_ASSERT(pAllocationRequest != VMA_NULL);
+ VMA_HEAVY_ASSERT(Validate());
+
+ if(allocSize > GetSize())
+ return false;
+
+ pAllocationRequest->size = allocSize;
+ return upperAddress ?
+ CreateAllocationRequest_UpperAddress(
+ allocSize, allocAlignment, allocType, strategy, pAllocationRequest) :
+ CreateAllocationRequest_LowerAddress(
+ allocSize, allocAlignment, allocType, strategy, pAllocationRequest);
+}
+
+VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData)
+{
+ VMA_ASSERT(!IsVirtual());
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i)
+ {
+ const VmaSuballocation& suballoc = suballocations1st[i];
+ if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size))
+ {
+ VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!");
+ return VK_ERROR_UNKNOWN_COPY;
+ }
+ }
+ }
+
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i)
+ {
+ const VmaSuballocation& suballoc = suballocations2nd[i];
+ if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size))
+ {
+ VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!");
+ return VK_ERROR_UNKNOWN_COPY;
+ }
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+void VmaBlockMetadata_Linear::Alloc(
+ const VmaAllocationRequest& request,
+ VmaSuballocationType type,
+ void* userData)
+{
+ const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1;
+ const VmaSuballocation newSuballoc = { offset, request.size, userData, type };
+
+ switch (request.type)
+ {
+ case VmaAllocationRequestType::UpperAddress:
+ {
+ VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER &&
+ "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer.");
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ suballocations2nd.push_back(newSuballoc);
+ m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK;
+ }
+ break;
+ case VmaAllocationRequestType::EndOf1st:
+ {
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+
+ VMA_ASSERT(suballocations1st.empty() ||
+ offset >= suballocations1st.back().offset + suballocations1st.back().size);
+ // Check if it fits before the end of the block.
+ VMA_ASSERT(offset + request.size <= GetSize());
+
+ suballocations1st.push_back(newSuballoc);
+ }
+ break;
+ case VmaAllocationRequestType::EndOf2nd:
+ {
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector.
+ VMA_ASSERT(!suballocations1st.empty() &&
+ offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset);
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+ switch (m_2ndVectorMode)
+ {
+ case SECOND_VECTOR_EMPTY:
+ // First allocation from second part ring buffer.
+ VMA_ASSERT(suballocations2nd.empty());
+ m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER;
+ break;
+ case SECOND_VECTOR_RING_BUFFER:
+ // 2-part ring buffer is already started.
+ VMA_ASSERT(!suballocations2nd.empty());
+ break;
+ case SECOND_VECTOR_DOUBLE_STACK:
+ VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack.");
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ suballocations2nd.push_back(newSuballoc);
+ }
+ break;
+ default:
+ VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR.");
+ }
+
+ m_SumFreeSize -= newSuballoc.size;
+}
+
+void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle)
+{
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ VkDeviceSize offset = (VkDeviceSize)allocHandle - 1;
+
+ if (!suballocations1st.empty())
+ {
+ // First allocation: Mark it as next empty at the beginning.
+ VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount];
+ if (firstSuballoc.offset == offset)
+ {
+ firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE;
+ firstSuballoc.userData = VMA_NULL;
+ m_SumFreeSize += firstSuballoc.size;
+ ++m_1stNullItemsBeginCount;
+ CleanupAfterFree();
+ return;
+ }
+ }
+
+ // Last allocation in 2-part ring buffer or top of upper stack (same logic).
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ||
+ m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ VmaSuballocation& lastSuballoc = suballocations2nd.back();
+ if (lastSuballoc.offset == offset)
+ {
+ m_SumFreeSize += lastSuballoc.size;
+ suballocations2nd.pop_back();
+ CleanupAfterFree();
+ return;
+ }
+ }
+ // Last allocation in 1st vector.
+ else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY)
+ {
+ VmaSuballocation& lastSuballoc = suballocations1st.back();
+ if (lastSuballoc.offset == offset)
+ {
+ m_SumFreeSize += lastSuballoc.size;
+ suballocations1st.pop_back();
+ CleanupAfterFree();
+ return;
+ }
+ }
+
+ VmaSuballocation refSuballoc;
+ refSuballoc.offset = offset;
+ // Rest of members stays uninitialized intentionally for better performance.
+
+ // Item from the middle of 1st vector.
+ {
+ const SuballocationVectorType::iterator it = VmaBinaryFindSorted(
+ suballocations1st.begin() + m_1stNullItemsBeginCount,
+ suballocations1st.end(),
+ refSuballoc,
+ VmaSuballocationOffsetLess());
+ if (it != suballocations1st.end())
+ {
+ it->type = VMA_SUBALLOCATION_TYPE_FREE;
+ it->userData = VMA_NULL;
+ ++m_1stNullItemsMiddleCount;
+ m_SumFreeSize += it->size;
+ CleanupAfterFree();
+ return;
+ }
+ }
+
+ if (m_2ndVectorMode != SECOND_VECTOR_EMPTY)
+ {
+ // Item from the middle of 2nd vector.
+ const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ?
+ VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) :
+ VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater());
+ if (it != suballocations2nd.end())
+ {
+ it->type = VMA_SUBALLOCATION_TYPE_FREE;
+ it->userData = VMA_NULL;
+ ++m_2ndNullItemsCount;
+ m_SumFreeSize += it->size;
+ CleanupAfterFree();
+ return;
+ }
+ }
+
+ VMA_ASSERT(0 && "Allocation to free not found in linear allocator!");
+}
+
+void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo)
+{
+ outInfo.offset = (VkDeviceSize)allocHandle - 1;
+ VmaSuballocation& suballoc = FindSuballocation(outInfo.offset);
+ outInfo.size = suballoc.size;
+ outInfo.pUserData = suballoc.userData;
+}
+
+void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const
+{
+ return FindSuballocation((VkDeviceSize)allocHandle - 1).userData;
+}
+
+VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const
+{
+ // Function only used for defragmentation, which is disabled for this algorithm
+ VMA_ASSERT(0);
+ return VK_NULL_HANDLE;
+}
+
+VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const
+{
+ // Function only used for defragmentation, which is disabled for this algorithm
+ VMA_ASSERT(0);
+ return VK_NULL_HANDLE;
+}
+
+VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const
+{
+ // Function only used for defragmentation, which is disabled for this algorithm
+ VMA_ASSERT(0);
+ return 0;
+}
+
+void VmaBlockMetadata_Linear::Clear()
+{
+ m_SumFreeSize = GetSize();
+ m_Suballocations0.clear();
+ m_Suballocations1.clear();
+ // Leaving m_1stVectorIndex unchanged - it doesn't matter.
+ m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+ m_1stNullItemsBeginCount = 0;
+ m_1stNullItemsMiddleCount = 0;
+ m_2ndNullItemsCount = 0;
+}
+
+void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData)
+{
+ VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1);
+ suballoc.userData = userData;
+}
+
+void VmaBlockMetadata_Linear::DebugLogAllAllocations() const
+{
+ const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it)
+ if (it->type != VMA_SUBALLOCATION_TYPE_FREE)
+ DebugLogAllocation(it->offset, it->size, it->userData);
+
+ const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+ for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it)
+ if (it->type != VMA_SUBALLOCATION_TYPE_FREE)
+ DebugLogAllocation(it->offset, it->size, it->userData);
+}
+
+VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const
+{
+ const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+ VmaSuballocation refSuballoc;
+ refSuballoc.offset = offset;
+ // Rest of members stays uninitialized intentionally for better performance.
+
+ // Item from the 1st vector.
+ {
+ SuballocationVectorType::const_iterator it = VmaBinaryFindSorted(
+ suballocations1st.begin() + m_1stNullItemsBeginCount,
+ suballocations1st.end(),
+ refSuballoc,
+ VmaSuballocationOffsetLess());
+ if (it != suballocations1st.end())
+ {
+ return const_cast<VmaSuballocation&>(*it);
+ }
+ }
+
+ if (m_2ndVectorMode != SECOND_VECTOR_EMPTY)
+ {
+ // Rest of members stays uninitialized intentionally for better performance.
+ SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ?
+ VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) :
+ VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater());
+ if (it != suballocations2nd.end())
+ {
+ return const_cast<VmaSuballocation&>(*it);
+ }
+ }
+
+ VMA_ASSERT(0 && "Allocation not found in linear allocator!");
+ return const_cast<VmaSuballocation&>(suballocations1st.back()); // Should never occur.
+}
+
+bool VmaBlockMetadata_Linear::ShouldCompact1st() const
+{
+ const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount;
+ const size_t suballocCount = AccessSuballocations1st().size();
+ return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3;
+}
+
+void VmaBlockMetadata_Linear::CleanupAfterFree()
+{
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+ if (IsEmpty())
+ {
+ suballocations1st.clear();
+ suballocations2nd.clear();
+ m_1stNullItemsBeginCount = 0;
+ m_1stNullItemsMiddleCount = 0;
+ m_2ndNullItemsCount = 0;
+ m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+ }
+ else
+ {
+ const size_t suballoc1stCount = suballocations1st.size();
+ const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount;
+ VMA_ASSERT(nullItem1stCount <= suballoc1stCount);
+
+ // Find more null items at the beginning of 1st vector.
+ while (m_1stNullItemsBeginCount < suballoc1stCount &&
+ suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ ++m_1stNullItemsBeginCount;
+ --m_1stNullItemsMiddleCount;
+ }
+
+ // Find more null items at the end of 1st vector.
+ while (m_1stNullItemsMiddleCount > 0 &&
+ suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ --m_1stNullItemsMiddleCount;
+ suballocations1st.pop_back();
+ }
+
+ // Find more null items at the end of 2nd vector.
+ while (m_2ndNullItemsCount > 0 &&
+ suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ --m_2ndNullItemsCount;
+ suballocations2nd.pop_back();
+ }
+
+ // Find more null items at the beginning of 2nd vector.
+ while (m_2ndNullItemsCount > 0 &&
+ suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ --m_2ndNullItemsCount;
+ VmaVectorRemove(suballocations2nd, 0);
+ }
+
+ if (ShouldCompact1st())
+ {
+ const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount;
+ size_t srcIndex = m_1stNullItemsBeginCount;
+ for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex)
+ {
+ while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ ++srcIndex;
+ }
+ if (dstIndex != srcIndex)
+ {
+ suballocations1st[dstIndex] = suballocations1st[srcIndex];
+ }
+ ++srcIndex;
+ }
+ suballocations1st.resize(nonNullItemCount);
+ m_1stNullItemsBeginCount = 0;
+ m_1stNullItemsMiddleCount = 0;
+ }
+
+ // 2nd vector became empty.
+ if (suballocations2nd.empty())
+ {
+ m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+ }
+
+ // 1st vector became empty.
+ if (suballocations1st.size() - m_1stNullItemsBeginCount == 0)
+ {
+ suballocations1st.clear();
+ m_1stNullItemsBeginCount = 0;
+
+ if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ // Swap 1st with 2nd. Now 2nd is empty.
+ m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+ m_1stNullItemsMiddleCount = m_2ndNullItemsCount;
+ while (m_1stNullItemsBeginCount < suballocations2nd.size() &&
+ suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE)
+ {
+ ++m_1stNullItemsBeginCount;
+ --m_1stNullItemsMiddleCount;
+ }
+ m_2ndNullItemsCount = 0;
+ m_1stVectorIndex ^= 1;
+ }
+ }
+ }
+
+ VMA_HEAVY_ASSERT(Validate());
+}
+
+bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest)
+{
+ const VkDeviceSize blockSize = GetSize();
+ const VkDeviceSize debugMargin = GetDebugMargin();
+ const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity();
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+ if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ // Try to allocate at the end of 1st vector.
+
+ VkDeviceSize resultBaseOffset = 0;
+ if (!suballocations1st.empty())
+ {
+ const VmaSuballocation& lastSuballoc = suballocations1st.back();
+ resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin;
+ }
+
+ // Start from offset equal to beginning of free space.
+ VkDeviceSize resultOffset = resultBaseOffset;
+
+ // Apply alignment.
+ resultOffset = VmaAlignUp(resultOffset, allocAlignment);
+
+ // Check previous suballocations for BufferImageGranularity conflicts.
+ // Make bigger alignment if necessary.
+ if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty())
+ {
+ bool bufferImageGranularityConflict = false;
+ for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
+ {
+ const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
+ if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
+ {
+ if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType))
+ {
+ bufferImageGranularityConflict = true;
+ break;
+ }
+ }
+ else
+ // Already on previous page.
+ break;
+ }
+ if (bufferImageGranularityConflict)
+ {
+ resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity);
+ }
+ }
+
+ const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ?
+ suballocations2nd.back().offset : blockSize;
+
+ // There is enough free space at the end after alignment.
+ if (resultOffset + allocSize + debugMargin <= freeSpaceEnd)
+ {
+ // Check next suballocations for BufferImageGranularity conflicts.
+ // If conflict exists, allocation cannot be made here.
+ if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+ {
+ for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
+ {
+ const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
+ if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
+ {
+ if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ // Already on previous page.
+ break;
+ }
+ }
+ }
+
+ // All tests passed: Success.
+ pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1);
+ // pAllocationRequest->item, customData unused.
+ pAllocationRequest->type = VmaAllocationRequestType::EndOf1st;
+ return true;
+ }
+ }
+
+ // Wrap-around to end of 2nd vector. Try to allocate there, watching for the
+ // beginning of 1st vector as the end of free space.
+ if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ VMA_ASSERT(!suballocations1st.empty());
+
+ VkDeviceSize resultBaseOffset = 0;
+ if (!suballocations2nd.empty())
+ {
+ const VmaSuballocation& lastSuballoc = suballocations2nd.back();
+ resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin;
+ }
+
+ // Start from offset equal to beginning of free space.
+ VkDeviceSize resultOffset = resultBaseOffset;
+
+ // Apply alignment.
+ resultOffset = VmaAlignUp(resultOffset, allocAlignment);
+
+ // Check previous suballocations for BufferImageGranularity conflicts.
+ // Make bigger alignment if necessary.
+ if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty())
+ {
+ bool bufferImageGranularityConflict = false;
+ for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; )
+ {
+ const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex];
+ if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
+ {
+ if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType))
+ {
+ bufferImageGranularityConflict = true;
+ break;
+ }
+ }
+ else
+ // Already on previous page.
+ break;
+ }
+ if (bufferImageGranularityConflict)
+ {
+ resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity);
+ }
+ }
+
+ size_t index1st = m_1stNullItemsBeginCount;
+
+ // There is enough free space at the end after alignment.
+ if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) ||
+ (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset))
+ {
+ // Check next suballocations for BufferImageGranularity conflicts.
+ // If conflict exists, allocation cannot be made here.
+ if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity)
+ {
+ for (size_t nextSuballocIndex = index1st;
+ nextSuballocIndex < suballocations1st.size();
+ nextSuballocIndex++)
+ {
+ const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex];
+ if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
+ {
+ if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ // Already on next page.
+ break;
+ }
+ }
+ }
+
+ // All tests passed: Success.
+ pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1);
+ pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd;
+ // pAllocationRequest->item, customData unused.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest)
+{
+ const VkDeviceSize blockSize = GetSize();
+ const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity();
+ SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+ SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+ if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+ {
+ VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer.");
+ return false;
+ }
+
+ // Try to allocate before 2nd.back(), or end of block if 2nd.empty().
+ if (allocSize > blockSize)
+ {
+ return false;
+ }
+ VkDeviceSize resultBaseOffset = blockSize - allocSize;
+ if (!suballocations2nd.empty())
+ {
+ const VmaSuballocation& lastSuballoc = suballocations2nd.back();
+ resultBaseOffset = lastSuballoc.offset - allocSize;
+ if (allocSize > lastSuballoc.offset)
+ {
+ return false;
+ }
+ }
+
+ // Start from offset equal to end of free space.
+ VkDeviceSize resultOffset = resultBaseOffset;
+
+ const VkDeviceSize debugMargin = GetDebugMargin();
+
+ // Apply debugMargin at the end.
+ if (debugMargin > 0)
+ {
+ if (resultOffset < debugMargin)
+ {
+ return false;
+ }
+ resultOffset -= debugMargin;
+ }
+
+ // Apply alignment.
+ resultOffset = VmaAlignDown(resultOffset, allocAlignment);
+
+ // Check next suballocations from 2nd for BufferImageGranularity conflicts.
+ // Make bigger alignment if necessary.
+ if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty())
+ {
+ bool bufferImageGranularityConflict = false;
+ for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
+ {
+ const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
+ if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
+ {
+ if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType))
+ {
+ bufferImageGranularityConflict = true;
+ break;
+ }
+ }
+ else
+ // Already on previous page.
+ break;
+ }
+ if (bufferImageGranularityConflict)
+ {
+ resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity);
+ }
+ }
+
+ // There is enough free space.
+ const VkDeviceSize endOf1st = !suballocations1st.empty() ?
+ suballocations1st.back().offset + suballocations1st.back().size :
+ 0;
+ if (endOf1st + debugMargin <= resultOffset)
+ {
+ // Check previous suballocations for BufferImageGranularity conflicts.
+ // If conflict exists, allocation cannot be made here.
+ if (bufferImageGranularity > 1)
+ {
+ for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
+ {
+ const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
+ if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
+ {
+ if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ // Already on next page.
+ break;
+ }
+ }
+ }
+
+ // All tests passed: Success.
+ pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1);
+ // pAllocationRequest->item unused.
+ pAllocationRequest->type = VmaAllocationRequestType::UpperAddress;
+ return true;
+ }
+
+ return false;
+}
+#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS
+#endif // _VMA_BLOCK_METADATA_LINEAR
+
+#ifndef _VMA_BLOCK_METADATA_TLSF
+// To not search current larger region if first allocation won't succeed and skip to smaller range
+// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest().
+// When fragmentation and reusal of previous blocks doesn't matter then use with
+// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible.
+class VmaBlockMetadata_TLSF : public VmaBlockMetadata
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF)
+public:
+ VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks,
+ VkDeviceSize bufferImageGranularity, bool isVirtual);
+ virtual ~VmaBlockMetadata_TLSF();
+
+ size_t GetAllocationCount() const override { return m_AllocCount; }
+ size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; }
+ VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; }
+ bool IsEmpty() const override { return m_NullBlock->offset == 0; }
+ VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }
+
+ void Init(VkDeviceSize size) override;
+ bool Validate() const override;
+
+ void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override;
+ void AddStatistics(VmaStatistics& inoutStats) const override;
+
+#if VMA_STATS_STRING_ENABLED
+ void PrintDetailedMap(class VmaJsonWriter& json) const override;
+#endif
+
+ bool CreateAllocationRequest(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ bool upperAddress,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest) override;
+
+ VkResult CheckCorruption(const void* pBlockData) override;
+ void Alloc(
+ const VmaAllocationRequest& request,
+ VmaSuballocationType type,
+ void* userData) override;
+
+ void Free(VmaAllocHandle allocHandle) override;
+ void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override;
+ void* GetAllocationUserData(VmaAllocHandle allocHandle) const override;
+ VmaAllocHandle GetAllocationListBegin() const override;
+ VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override;
+ VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override;
+ void Clear() override;
+ void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override;
+ void DebugLogAllAllocations() const override;
+
+private:
+ // According to original paper it should be preferable 4 or 5:
+ // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems"
+ // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf
+ static const uint8_t SECOND_LEVEL_INDEX = 5;
+ static const uint16_t SMALL_BUFFER_SIZE = 256;
+ static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16;
+ static const uint8_t MEMORY_CLASS_SHIFT = 7;
+ static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT;
+
+ class Block
+ {
+ public:
+ VkDeviceSize offset;
+ VkDeviceSize size;
+ Block* prevPhysical;
+ Block* nextPhysical;
+
+ void MarkFree() { prevFree = VMA_NULL; }
+ void MarkTaken() { prevFree = this; }
+ bool IsFree() const { return prevFree != this; }
+ void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; }
+ Block*& PrevFree() { return prevFree; }
+ Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; }
+
+ private:
+ Block* prevFree; // Address of the same block here indicates that block is taken
+ union
+ {
+ Block* nextFree;
+ void* userData;
+ };
+ };
+
+ size_t m_AllocCount;
+ // Total number of free blocks besides null block
+ size_t m_BlocksFreeCount;
+ // Total size of free blocks excluding null block
+ VkDeviceSize m_BlocksFreeSize;
+ uint32_t m_IsFreeBitmap;
+ uint8_t m_MemoryClasses;
+ uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES];
+ uint32_t m_ListsCount;
+ /*
+ * 0: 0-3 lists for small buffers
+ * 1+: 0-(2^SLI-1) lists for normal buffers
+ */
+ Block** m_FreeList;
+ VmaPoolAllocator<Block> m_BlockAllocator;
+ Block* m_NullBlock;
+ VmaBlockBufferImageGranularity m_GranularityHandler;
+
+ uint8_t SizeToMemoryClass(VkDeviceSize size) const;
+ uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const;
+ uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const;
+ uint32_t GetListIndex(VkDeviceSize size) const;
+
+ void RemoveFreeBlock(Block* block);
+ void InsertFreeBlock(Block* block);
+ void MergeBlock(Block* block, Block* prev);
+
+ Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const;
+ bool CheckBlock(
+ Block& block,
+ uint32_t listIndex,
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ VmaSuballocationType allocType,
+ VmaAllocationRequest* pAllocationRequest);
+};
+
+#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS
+VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks,
+ VkDeviceSize bufferImageGranularity, bool isVirtual)
+ : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual),
+ m_AllocCount(0),
+ m_BlocksFreeCount(0),
+ m_BlocksFreeSize(0),
+ m_IsFreeBitmap(0),
+ m_MemoryClasses(0),
+ m_ListsCount(0),
+ m_FreeList(VMA_NULL),
+ m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT),
+ m_NullBlock(VMA_NULL),
+ m_GranularityHandler(bufferImageGranularity) {}
+
+VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF()
+{
+ if (m_FreeList)
+ vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount);
+ m_GranularityHandler.Destroy(GetAllocationCallbacks());
+}
+
+void VmaBlockMetadata_TLSF::Init(VkDeviceSize size)
+{
+ VmaBlockMetadata::Init(size);
+
+ if (!IsVirtual())
+ m_GranularityHandler.Init(GetAllocationCallbacks(), size);
+
+ m_NullBlock = m_BlockAllocator.Alloc();
+ m_NullBlock->size = size;
+ m_NullBlock->offset = 0;
+ m_NullBlock->prevPhysical = VMA_NULL;
+ m_NullBlock->nextPhysical = VMA_NULL;
+ m_NullBlock->MarkFree();
+ m_NullBlock->NextFree() = VMA_NULL;
+ m_NullBlock->PrevFree() = VMA_NULL;
+ uint8_t memoryClass = SizeToMemoryClass(size);
+ uint16_t sli = SizeToSecondIndex(size, memoryClass);
+ m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1;
+ if (IsVirtual())
+ m_ListsCount += 1UL << SECOND_LEVEL_INDEX;
+ else
+ m_ListsCount += 4;
+
+ m_MemoryClasses = memoryClass + uint8_t(2);
+ memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t));
+
+ m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount);
+ memset(m_FreeList, 0, m_ListsCount * sizeof(Block*));
+}
+
+bool VmaBlockMetadata_TLSF::Validate() const
+{
+ VMA_VALIDATE(GetSumFreeSize() <= GetSize());
+
+ VkDeviceSize calculatedSize = m_NullBlock->size;
+ VkDeviceSize calculatedFreeSize = m_NullBlock->size;
+ size_t allocCount = 0;
+ size_t freeCount = 0;
+
+ // Check integrity of free lists
+ for (uint32_t list = 0; list < m_ListsCount; ++list)
+ {
+ Block* block = m_FreeList[list];
+ if (block != VMA_NULL)
+ {
+ VMA_VALIDATE(block->IsFree());
+ VMA_VALIDATE(block->PrevFree() == VMA_NULL);
+ while (block->NextFree())
+ {
+ VMA_VALIDATE(block->NextFree()->IsFree());
+ VMA_VALIDATE(block->NextFree()->PrevFree() == block);
+ block = block->NextFree();
+ }
+ }
+ }
+
+ VkDeviceSize nextOffset = m_NullBlock->offset;
+ auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual());
+
+ VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL);
+ if (m_NullBlock->prevPhysical)
+ {
+ VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock);
+ }
+ // Check all blocks
+ for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical)
+ {
+ VMA_VALIDATE(prev->offset + prev->size == nextOffset);
+ nextOffset = prev->offset;
+ calculatedSize += prev->size;
+
+ uint32_t listIndex = GetListIndex(prev->size);
+ if (prev->IsFree())
+ {
+ ++freeCount;
+ // Check if free block belongs to free list
+ Block* freeBlock = m_FreeList[listIndex];
+ VMA_VALIDATE(freeBlock != VMA_NULL);
+
+ bool found = false;
+ do
+ {
+ if (freeBlock == prev)
+ found = true;
+
+ freeBlock = freeBlock->NextFree();
+ } while (!found && freeBlock != VMA_NULL);
+
+ VMA_VALIDATE(found);
+ calculatedFreeSize += prev->size;
+ }
+ else
+ {
+ ++allocCount;
+ // Check if taken block is not on a free list
+ Block* freeBlock = m_FreeList[listIndex];
+ while (freeBlock)
+ {
+ VMA_VALIDATE(freeBlock != prev);
+ freeBlock = freeBlock->NextFree();
+ }
+
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size));
+ }
+ }
+
+ if (prev->prevPhysical)
+ {
+ VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev);
+ }
+ }
+
+ if (!IsVirtual())
+ {
+ VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx));
+ }
+
+ VMA_VALIDATE(nextOffset == 0);
+ VMA_VALIDATE(calculatedSize == GetSize());
+ VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize());
+ VMA_VALIDATE(allocCount == m_AllocCount);
+ VMA_VALIDATE(freeCount == m_BlocksFreeCount);
+
+ return true;
+}
+
+void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const
+{
+ inoutStats.statistics.blockCount++;
+ inoutStats.statistics.blockBytes += GetSize();
+ if (m_NullBlock->size > 0)
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size);
+
+ for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+ {
+ if (block->IsFree())
+ VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size);
+ else
+ VmaAddDetailedStatisticsAllocation(inoutStats, block->size);
+ }
+}
+
+void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const
+{
+ inoutStats.blockCount++;
+ inoutStats.allocationCount += (uint32_t)m_AllocCount;
+ inoutStats.blockBytes += GetSize();
+ inoutStats.allocationBytes += GetSize() - GetSumFreeSize();
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const
+{
+ size_t blockCount = m_AllocCount + m_BlocksFreeCount;
+ VmaStlAllocator<Block*> allocator(GetAllocationCallbacks());
+ VmaVector<Block*, VmaStlAllocator<Block*>> blockList(blockCount, allocator);
+
+ size_t i = blockCount;
+ for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+ {
+ blockList[--i] = block;
+ }
+ VMA_ASSERT(i == 0);
+
+ VmaDetailedStatistics stats;
+ VmaClearDetailedStatistics(stats);
+ AddDetailedStatistics(stats);
+
+ PrintDetailedMap_Begin(json,
+ stats.statistics.blockBytes - stats.statistics.allocationBytes,
+ stats.statistics.allocationCount,
+ stats.unusedRangeCount);
+
+ for (; i < blockCount; ++i)
+ {
+ Block* block = blockList[i];
+ if (block->IsFree())
+ PrintDetailedMap_UnusedRange(json, block->offset, block->size);
+ else
+ PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData());
+ }
+ if (m_NullBlock->size > 0)
+ PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size);
+
+ PrintDetailedMap_End(json);
+}
+#endif
+
+bool VmaBlockMetadata_TLSF::CreateAllocationRequest(
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ bool upperAddress,
+ VmaSuballocationType allocType,
+ uint32_t strategy,
+ VmaAllocationRequest* pAllocationRequest)
+{
+ VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!");
+ VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm.");
+
+ // For small granularity round up
+ if (!IsVirtual())
+ m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment);
+
+ allocSize += GetDebugMargin();
+ // Quick check for too small pool
+ if (allocSize > GetSumFreeSize())
+ return false;
+
+ // If no free blocks in pool then check only null block
+ if (m_BlocksFreeCount == 0)
+ return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest);
+
+ // Round up to the next block
+ VkDeviceSize sizeForNextList = allocSize;
+ VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4));
+ if (allocSize > SMALL_BUFFER_SIZE)
+ {
+ sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX));
+ }
+ else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep)
+ sizeForNextList = SMALL_BUFFER_SIZE + 1;
+ else
+ sizeForNextList += smallSizeStep;
+
+ uint32_t nextListIndex = m_ListsCount;
+ uint32_t prevListIndex = m_ListsCount;
+ Block* nextListBlock = VMA_NULL;
+ Block* prevListBlock = VMA_NULL;
+
+ // Check blocks according to strategies
+ if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT)
+ {
+ // Quick check for larger block first
+ nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex);
+ if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+
+ // If not fitted then null block
+ if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+
+ // Null block failed, search larger bucket
+ while (nextListBlock)
+ {
+ if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ nextListBlock = nextListBlock->NextFree();
+ }
+
+ // Failed again, check best fit bucket
+ prevListBlock = FindFreeBlock(allocSize, prevListIndex);
+ while (prevListBlock)
+ {
+ if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ prevListBlock = prevListBlock->NextFree();
+ }
+ }
+ else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT)
+ {
+ // Check best fit bucket
+ prevListBlock = FindFreeBlock(allocSize, prevListIndex);
+ while (prevListBlock)
+ {
+ if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ prevListBlock = prevListBlock->NextFree();
+ }
+
+ // If failed check null block
+ if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+
+ // Check larger bucket
+ nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex);
+ while (nextListBlock)
+ {
+ if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ nextListBlock = nextListBlock->NextFree();
+ }
+ }
+ else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT )
+ {
+ // Perform search from the start
+ VmaStlAllocator<Block*> allocator(GetAllocationCallbacks());
+ VmaVector<Block*, VmaStlAllocator<Block*>> blockList(m_BlocksFreeCount, allocator);
+
+ size_t i = m_BlocksFreeCount;
+ for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+ {
+ if (block->IsFree() && block->size >= allocSize)
+ blockList[--i] = block;
+ }
+
+ for (; i < m_BlocksFreeCount; ++i)
+ {
+ Block& block = *blockList[i];
+ if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ }
+
+ // If failed check null block
+ if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+
+ // Whole range searched, no more memory
+ return false;
+ }
+ else
+ {
+ // Check larger bucket
+ nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex);
+ while (nextListBlock)
+ {
+ if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ nextListBlock = nextListBlock->NextFree();
+ }
+
+ // If failed check null block
+ if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+
+ // Check best fit bucket
+ prevListBlock = FindFreeBlock(allocSize, prevListIndex);
+ while (prevListBlock)
+ {
+ if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ prevListBlock = prevListBlock->NextFree();
+ }
+ }
+
+ // Worst case, full search has to be done
+ while (++nextListIndex < m_ListsCount)
+ {
+ nextListBlock = m_FreeList[nextListIndex];
+ while (nextListBlock)
+ {
+ if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+ return true;
+ nextListBlock = nextListBlock->NextFree();
+ }
+ }
+
+ // No more memory sadly
+ return false;
+}
+
+VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData)
+{
+ for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+ {
+ if (!block->IsFree())
+ {
+ if (!VmaValidateMagicValue(pBlockData, block->offset + block->size))
+ {
+ VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!");
+ return VK_ERROR_UNKNOWN_COPY;
+ }
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+void VmaBlockMetadata_TLSF::Alloc(
+ const VmaAllocationRequest& request,
+ VmaSuballocationType type,
+ void* userData)
+{
+ VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF);
+
+ // Get block and pop it from the free list
+ Block* currentBlock = (Block*)request.allocHandle;
+ VkDeviceSize offset = request.algorithmData;
+ VMA_ASSERT(currentBlock != VMA_NULL);
+ VMA_ASSERT(currentBlock->offset <= offset);
+
+ if (currentBlock != m_NullBlock)
+ RemoveFreeBlock(currentBlock);
+
+ VkDeviceSize debugMargin = GetDebugMargin();
+ VkDeviceSize missingAlignment = offset - currentBlock->offset;
+
+ // Append missing alignment to prev block or create new one
+ if (missingAlignment)
+ {
+ Block* prevBlock = currentBlock->prevPhysical;
+ VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!");
+
+ if (prevBlock->IsFree() && prevBlock->size != debugMargin)
+ {
+ uint32_t oldList = GetListIndex(prevBlock->size);
+ prevBlock->size += missingAlignment;
+ // Check if new size crosses list bucket
+ if (oldList != GetListIndex(prevBlock->size))
+ {
+ prevBlock->size -= missingAlignment;
+ RemoveFreeBlock(prevBlock);
+ prevBlock->size += missingAlignment;
+ InsertFreeBlock(prevBlock);
+ }
+ else
+ m_BlocksFreeSize += missingAlignment;
+ }
+ else
+ {
+ Block* newBlock = m_BlockAllocator.Alloc();
+ currentBlock->prevPhysical = newBlock;
+ prevBlock->nextPhysical = newBlock;
+ newBlock->prevPhysical = prevBlock;
+ newBlock->nextPhysical = currentBlock;
+ newBlock->size = missingAlignment;
+ newBlock->offset = currentBlock->offset;
+ newBlock->MarkTaken();
+
+ InsertFreeBlock(newBlock);
+ }
+
+ currentBlock->size -= missingAlignment;
+ currentBlock->offset += missingAlignment;
+ }
+
+ VkDeviceSize size = request.size + debugMargin;
+ if (currentBlock->size == size)
+ {
+ if (currentBlock == m_NullBlock)
+ {
+ // Setup new null block
+ m_NullBlock = m_BlockAllocator.Alloc();
+ m_NullBlock->size = 0;
+ m_NullBlock->offset = currentBlock->offset + size;
+ m_NullBlock->prevPhysical = currentBlock;
+ m_NullBlock->nextPhysical = VMA_NULL;
+ m_NullBlock->MarkFree();
+ m_NullBlock->PrevFree() = VMA_NULL;
+ m_NullBlock->NextFree() = VMA_NULL;
+ currentBlock->nextPhysical = m_NullBlock;
+ currentBlock->MarkTaken();
+ }
+ }
+ else
+ {
+ VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!");
+
+ // Create new free block
+ Block* newBlock = m_BlockAllocator.Alloc();
+ newBlock->size = currentBlock->size - size;
+ newBlock->offset = currentBlock->offset + size;
+ newBlock->prevPhysical = currentBlock;
+ newBlock->nextPhysical = currentBlock->nextPhysical;
+ currentBlock->nextPhysical = newBlock;
+ currentBlock->size = size;
+
+ if (currentBlock == m_NullBlock)
+ {
+ m_NullBlock = newBlock;
+ m_NullBlock->MarkFree();
+ m_NullBlock->NextFree() = VMA_NULL;
+ m_NullBlock->PrevFree() = VMA_NULL;
+ currentBlock->MarkTaken();
+ }
+ else
+ {
+ newBlock->nextPhysical->prevPhysical = newBlock;
+ newBlock->MarkTaken();
+ InsertFreeBlock(newBlock);
+ }
+ }
+ currentBlock->UserData() = userData;
+
+ if (debugMargin > 0)
+ {
+ currentBlock->size -= debugMargin;
+ Block* newBlock = m_BlockAllocator.Alloc();
+ newBlock->size = debugMargin;
+ newBlock->offset = currentBlock->offset + currentBlock->size;
+ newBlock->prevPhysical = currentBlock;
+ newBlock->nextPhysical = currentBlock->nextPhysical;
+ newBlock->MarkTaken();
+ currentBlock->nextPhysical->prevPhysical = newBlock;
+ currentBlock->nextPhysical = newBlock;
+ InsertFreeBlock(newBlock);
+ }
+
+ if (!IsVirtual())
+ m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData,
+ currentBlock->offset, currentBlock->size);
+ ++m_AllocCount;
+}
+
+void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle)
+{
+ Block* block = (Block*)allocHandle;
+ Block* next = block->nextPhysical;
+ VMA_ASSERT(!block->IsFree() && "Block is already free!");
+
+ if (!IsVirtual())
+ m_GranularityHandler.FreePages(block->offset, block->size);
+ --m_AllocCount;
+
+ VkDeviceSize debugMargin = GetDebugMargin();
+ if (debugMargin > 0)
+ {
+ RemoveFreeBlock(next);
+ MergeBlock(next, block);
+ block = next;
+ next = next->nextPhysical;
+ }
+
+ // Try merging
+ Block* prev = block->prevPhysical;
+ if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin)
+ {
+ RemoveFreeBlock(prev);
+ MergeBlock(block, prev);
+ }
+
+ if (!next->IsFree())
+ InsertFreeBlock(block);
+ else if (next == m_NullBlock)
+ MergeBlock(m_NullBlock, block);
+ else
+ {
+ RemoveFreeBlock(next);
+ MergeBlock(next, block);
+ InsertFreeBlock(next);
+ }
+}
+
+void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo)
+{
+ Block* block = (Block*)allocHandle;
+ VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!");
+ outInfo.offset = block->offset;
+ outInfo.size = block->size;
+ outInfo.pUserData = block->UserData();
+}
+
+void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const
+{
+ Block* block = (Block*)allocHandle;
+ VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!");
+ return block->UserData();
+}
+
+VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const
+{
+ if (m_AllocCount == 0)
+ return VK_NULL_HANDLE;
+
+ for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical)
+ {
+ if (!block->IsFree())
+ return (VmaAllocHandle)block;
+ }
+ VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!");
+ return VK_NULL_HANDLE;
+}
+
+VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const
+{
+ Block* startBlock = (Block*)prevAlloc;
+ VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!");
+
+ for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical)
+ {
+ if (!block->IsFree())
+ return (VmaAllocHandle)block;
+ }
+ return VK_NULL_HANDLE;
+}
+
+VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const
+{
+ Block* block = (Block*)alloc;
+ VMA_ASSERT(!block->IsFree() && "Incorrect block!");
+
+ if (block->prevPhysical)
+ return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0;
+ return 0;
+}
+
+void VmaBlockMetadata_TLSF::Clear()
+{
+ m_AllocCount = 0;
+ m_BlocksFreeCount = 0;
+ m_BlocksFreeSize = 0;
+ m_IsFreeBitmap = 0;
+ m_NullBlock->offset = 0;
+ m_NullBlock->size = GetSize();
+ Block* block = m_NullBlock->prevPhysical;
+ m_NullBlock->prevPhysical = VMA_NULL;
+ while (block)
+ {
+ Block* prev = block->prevPhysical;
+ m_BlockAllocator.Free(block);
+ block = prev;
+ }
+ memset(m_FreeList, 0, m_ListsCount * sizeof(Block*));
+ memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t));
+ m_GranularityHandler.Clear();
+}
+
+void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData)
+{
+ Block* block = (Block*)allocHandle;
+ VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!");
+ block->UserData() = userData;
+}
+
+void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const
+{
+ for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+ if (!block->IsFree())
+ DebugLogAllocation(block->offset, block->size, block->UserData());
+}
+
+uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const
+{
+ if (size > SMALL_BUFFER_SIZE)
+ return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT);
+ return 0;
+}
+
+uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const
+{
+ if (memoryClass == 0)
+ {
+ if (IsVirtual())
+ return static_cast<uint16_t>((size - 1) / 8);
+ else
+ return static_cast<uint16_t>((size - 1) / 64);
+ }
+ return static_cast<uint16_t>((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX));
+}
+
+uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const
+{
+ if (memoryClass == 0)
+ return secondIndex;
+
+ const uint32_t index = static_cast<uint32_t>(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex;
+ if (IsVirtual())
+ return index + (1 << SECOND_LEVEL_INDEX);
+ else
+ return index + 4;
+}
+
+uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const
+{
+ uint8_t memoryClass = SizeToMemoryClass(size);
+ return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass));
+}
+
+void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block)
+{
+ VMA_ASSERT(block != m_NullBlock);
+ VMA_ASSERT(block->IsFree());
+
+ if (block->NextFree() != VMA_NULL)
+ block->NextFree()->PrevFree() = block->PrevFree();
+ if (block->PrevFree() != VMA_NULL)
+ block->PrevFree()->NextFree() = block->NextFree();
+ else
+ {
+ uint8_t memClass = SizeToMemoryClass(block->size);
+ uint16_t secondIndex = SizeToSecondIndex(block->size, memClass);
+ uint32_t index = GetListIndex(memClass, secondIndex);
+ VMA_ASSERT(m_FreeList[index] == block);
+ m_FreeList[index] = block->NextFree();
+ if (block->NextFree() == VMA_NULL)
+ {
+ m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex);
+ if (m_InnerIsFreeBitmap[memClass] == 0)
+ m_IsFreeBitmap &= ~(1UL << memClass);
+ }
+ }
+ block->MarkTaken();
+ block->UserData() = VMA_NULL;
+ --m_BlocksFreeCount;
+ m_BlocksFreeSize -= block->size;
+}
+
+void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block)
+{
+ VMA_ASSERT(block != m_NullBlock);
+ VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!");
+
+ uint8_t memClass = SizeToMemoryClass(block->size);
+ uint16_t secondIndex = SizeToSecondIndex(block->size, memClass);
+ uint32_t index = GetListIndex(memClass, secondIndex);
+ VMA_ASSERT(index < m_ListsCount);
+ block->PrevFree() = VMA_NULL;
+ block->NextFree() = m_FreeList[index];
+ m_FreeList[index] = block;
+ if (block->NextFree() != VMA_NULL)
+ block->NextFree()->PrevFree() = block;
+ else
+ {
+ m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex;
+ m_IsFreeBitmap |= 1UL << memClass;
+ }
+ ++m_BlocksFreeCount;
+ m_BlocksFreeSize += block->size;
+}
+
+void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev)
+{
+ VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!");
+ VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!");
+
+ block->offset = prev->offset;
+ block->size += prev->size;
+ block->prevPhysical = prev->prevPhysical;
+ if (block->prevPhysical)
+ block->prevPhysical->nextPhysical = block;
+ m_BlockAllocator.Free(prev);
+}
+
+VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const
+{
+ uint8_t memoryClass = SizeToMemoryClass(size);
+ uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass));
+ if (!innerFreeMap)
+ {
+ // Check higher levels for available blocks
+ uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1));
+ if (!freeMap)
+ return VMA_NULL; // No more memory available
+
+ // Find lowest free region
+ memoryClass = VMA_BITSCAN_LSB(freeMap);
+ innerFreeMap = m_InnerIsFreeBitmap[memoryClass];
+ VMA_ASSERT(innerFreeMap != 0);
+ }
+ // Find lowest free subregion
+ listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap));
+ VMA_ASSERT(m_FreeList[listIndex]);
+ return m_FreeList[listIndex];
+}
+
+bool VmaBlockMetadata_TLSF::CheckBlock(
+ Block& block,
+ uint32_t listIndex,
+ VkDeviceSize allocSize,
+ VkDeviceSize allocAlignment,
+ VmaSuballocationType allocType,
+ VmaAllocationRequest* pAllocationRequest)
+{
+ VMA_ASSERT(block.IsFree() && "Block is already taken!");
+
+ VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment);
+ if (block.size < allocSize + alignedOffset - block.offset)
+ return false;
+
+ // Check for granularity conflicts
+ if (!IsVirtual() &&
+ m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType))
+ return false;
+
+ // Alloc successful
+ pAllocationRequest->type = VmaAllocationRequestType::TLSF;
+ pAllocationRequest->allocHandle = (VmaAllocHandle)&block;
+ pAllocationRequest->size = allocSize - GetDebugMargin();
+ pAllocationRequest->customData = (void*)allocType;
+ pAllocationRequest->algorithmData = alignedOffset;
+
+ // Place block at the start of list if it's normal block
+ if (listIndex != m_ListsCount && block.PrevFree())
+ {
+ block.PrevFree()->NextFree() = block.NextFree();
+ if (block.NextFree())
+ block.NextFree()->PrevFree() = block.PrevFree();
+ block.PrevFree() = VMA_NULL;
+ block.NextFree() = m_FreeList[listIndex];
+ m_FreeList[listIndex] = &block;
+ if (block.NextFree())
+ block.NextFree()->PrevFree() = &block;
+ }
+
+ return true;
+}
+#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS
+#endif // _VMA_BLOCK_METADATA_TLSF
+
+#ifndef _VMA_BLOCK_VECTOR
+/*
+Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific
+Vulkan memory type.
+
+Synchronized internally with a mutex.
+*/
+class VmaBlockVector
+{
+ friend struct VmaDefragmentationContext_T;
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector)
+public:
+ VmaBlockVector(
+ VmaAllocator hAllocator,
+ VmaPool hParentPool,
+ uint32_t memoryTypeIndex,
+ VkDeviceSize preferredBlockSize,
+ size_t minBlockCount,
+ size_t maxBlockCount,
+ VkDeviceSize bufferImageGranularity,
+ bool explicitBlockSize,
+ uint32_t algorithm,
+ float priority,
+ VkDeviceSize minAllocationAlignment,
+ void* pMemoryAllocateNext);
+ ~VmaBlockVector();
+
+ VmaAllocator GetAllocator() const { return m_hAllocator; }
+ VmaPool GetParentPool() const { return m_hParentPool; }
+ bool IsCustomPool() const { return m_hParentPool != VMA_NULL; }
+ uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; }
+ VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; }
+ VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; }
+ uint32_t GetAlgorithm() const { return m_Algorithm; }
+ bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; }
+ float GetPriority() const { return m_Priority; }
+ const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; }
+ // To be used only while the m_Mutex is locked. Used during defragmentation.
+ size_t GetBlockCount() const { return m_Blocks.size(); }
+ // To be used only while the m_Mutex is locked. Used during defragmentation.
+ VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; }
+ VMA_RW_MUTEX &GetMutex() { return m_Mutex; }
+
+ VkResult CreateMinBlocks();
+ void AddStatistics(VmaStatistics& inoutStats);
+ void AddDetailedStatistics(VmaDetailedStatistics& inoutStats);
+ bool IsEmpty();
+ bool IsCorruptionDetectionEnabled() const;
+
+ VkResult Allocate(
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ const VmaAllocationCreateInfo& createInfo,
+ VmaSuballocationType suballocType,
+ size_t allocationCount,
+ VmaAllocation* pAllocations);
+
+ void Free(const VmaAllocation hAllocation);
+
+#if VMA_STATS_STRING_ENABLED
+ void PrintDetailedMap(class VmaJsonWriter& json);
+#endif
+
+ VkResult CheckCorruption();
+
+private:
+ const VmaAllocator m_hAllocator;
+ const VmaPool m_hParentPool;
+ const uint32_t m_MemoryTypeIndex;
+ const VkDeviceSize m_PreferredBlockSize;
+ const size_t m_MinBlockCount;
+ const size_t m_MaxBlockCount;
+ const VkDeviceSize m_BufferImageGranularity;
+ const bool m_ExplicitBlockSize;
+ const uint32_t m_Algorithm;
+ const float m_Priority;
+ const VkDeviceSize m_MinAllocationAlignment;
+
+ void* const m_pMemoryAllocateNext;
+ VMA_RW_MUTEX m_Mutex;
+ // Incrementally sorted by sumFreeSize, ascending.
+ VmaVector<VmaDeviceMemoryBlock*, VmaStlAllocator<VmaDeviceMemoryBlock*>> m_Blocks;
+ uint32_t m_NextBlockId;
+ bool m_IncrementalSort = true;
+
+ void SetIncrementalSort(bool val) { m_IncrementalSort = val; }
+
+ VkDeviceSize CalcMaxBlockSize() const;
+ // Finds and removes given block from vector.
+ void Remove(VmaDeviceMemoryBlock* pBlock);
+ // Performs single step in sorting m_Blocks. They may not be fully sorted
+ // after this call.
+ void IncrementallySortBlocks();
+ void SortByFreeSize();
+
+ VkResult AllocatePage(
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ const VmaAllocationCreateInfo& createInfo,
+ VmaSuballocationType suballocType,
+ VmaAllocation* pAllocation);
+
+ VkResult AllocateFromBlock(
+ VmaDeviceMemoryBlock* pBlock,
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ VmaAllocationCreateFlags allocFlags,
+ void* pUserData,
+ VmaSuballocationType suballocType,
+ uint32_t strategy,
+ VmaAllocation* pAllocation);
+
+ VkResult CommitAllocationRequest(
+ VmaAllocationRequest& allocRequest,
+ VmaDeviceMemoryBlock* pBlock,
+ VkDeviceSize alignment,
+ VmaAllocationCreateFlags allocFlags,
+ void* pUserData,
+ VmaSuballocationType suballocType,
+ VmaAllocation* pAllocation);
+
+ VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex);
+ bool HasEmptyBlock();
+};
+#endif // _VMA_BLOCK_VECTOR
+
+#ifndef _VMA_DEFRAGMENTATION_CONTEXT
+struct VmaDefragmentationContext_T
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T)
+public:
+ VmaDefragmentationContext_T(
+ VmaAllocator hAllocator,
+ const VmaDefragmentationInfo& info);
+ ~VmaDefragmentationContext_T();
+
+ void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; }
+
+ VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo);
+ VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo);
+
+private:
+ // Max number of allocations to ignore due to size constraints before ending single pass
+ static const uint8_t MAX_ALLOCS_TO_IGNORE = 16;
+ enum class CounterStatus { Pass, Ignore, End };
+
+ struct FragmentedBlock
+ {
+ uint32_t data;
+ VmaDeviceMemoryBlock* block;
+ };
+ struct StateBalanced
+ {
+ VkDeviceSize avgFreeSize = 0;
+ VkDeviceSize avgAllocSize = UINT64_MAX;
+ };
+ struct StateExtensive
+ {
+ enum class Operation : uint8_t
+ {
+ FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll,
+ MoveBuffers, MoveTextures, MoveAll,
+ Cleanup, Done
+ };
+
+ Operation operation = Operation::FindFreeBlockTexture;
+ size_t firstFreeBlock = SIZE_MAX;
+ };
+ struct MoveAllocationData
+ {
+ VkDeviceSize size;
+ VkDeviceSize alignment;
+ VmaSuballocationType type;
+ VmaAllocationCreateFlags flags;
+ VmaDefragmentationMove move = {};
+ };
+
+ const VkDeviceSize m_MaxPassBytes;
+ const uint32_t m_MaxPassAllocations;
+ const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback;
+ void* m_BreakCallbackUserData;
+
+ VmaStlAllocator<VmaDefragmentationMove> m_MoveAllocator;
+ VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>> m_Moves;
+
+ uint8_t m_IgnoredAllocs = 0;
+ uint32_t m_Algorithm;
+ uint32_t m_BlockVectorCount;
+ VmaBlockVector* m_PoolBlockVector;
+ VmaBlockVector** m_pBlockVectors;
+ size_t m_ImmovableBlockCount = 0;
+ VmaDefragmentationStats m_GlobalStats = { 0 };
+ VmaDefragmentationStats m_PassStats = { 0 };
+ void* m_AlgorithmState = VMA_NULL;
+
+ static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata);
+ CounterStatus CheckCounters(VkDeviceSize bytes);
+ bool IncrementCounters(VkDeviceSize bytes);
+ bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block);
+ bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector);
+
+ bool ComputeDefragmentation(VmaBlockVector& vector, size_t index);
+ bool ComputeDefragmentation_Fast(VmaBlockVector& vector);
+ bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update);
+ bool ComputeDefragmentation_Full(VmaBlockVector& vector);
+ bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index);
+
+ void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state);
+ bool MoveDataToFreeBlocks(VmaSuballocationType currentType,
+ VmaBlockVector& vector, size_t firstFreeBlock,
+ bool& texturePresent, bool& bufferPresent, bool& otherPresent);
+};
+#endif // _VMA_DEFRAGMENTATION_CONTEXT
+
+#ifndef _VMA_POOL_T
+struct VmaPool_T
+{
+ friend struct VmaPoolListItemTraits;
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T)
+public:
+ VmaBlockVector m_BlockVector;
+ VmaDedicatedAllocationList m_DedicatedAllocations;
+
+ VmaPool_T(
+ VmaAllocator hAllocator,
+ const VmaPoolCreateInfo& createInfo,
+ VkDeviceSize preferredBlockSize);
+ ~VmaPool_T();
+
+ uint32_t GetId() const { return m_Id; }
+ void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; }
+
+ const char* GetName() const { return m_Name; }
+ void SetName(const char* pName);
+
+#if VMA_STATS_STRING_ENABLED
+ //void PrintDetailedMap(class VmaStringBuilder& sb);
+#endif
+
+private:
+ uint32_t m_Id;
+ char* m_Name;
+ VmaPool_T* m_PrevPool = VMA_NULL;
+ VmaPool_T* m_NextPool = VMA_NULL;
+};
+
+struct VmaPoolListItemTraits
+{
+ typedef VmaPool_T ItemType;
+
+ static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; }
+ static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; }
+ static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; }
+ static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; }
+};
+#endif // _VMA_POOL_T
+
+#ifndef _VMA_CURRENT_BUDGET_DATA
+struct VmaCurrentBudgetData
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData)
+public:
+
+ VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS];
+ VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS];
+ VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS];
+ VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS];
+
+#if VMA_MEMORY_BUDGET
+ VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch;
+ VMA_RW_MUTEX m_BudgetMutex;
+ uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS];
+ uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS];
+ uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS];
+#endif // VMA_MEMORY_BUDGET
+
+ VmaCurrentBudgetData();
+
+ void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize);
+ void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize);
+};
+
+#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS
+VmaCurrentBudgetData::VmaCurrentBudgetData()
+{
+ for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex)
+ {
+ m_BlockCount[heapIndex] = 0;
+ m_AllocationCount[heapIndex] = 0;
+ m_BlockBytes[heapIndex] = 0;
+ m_AllocationBytes[heapIndex] = 0;
+#if VMA_MEMORY_BUDGET
+ m_VulkanUsage[heapIndex] = 0;
+ m_VulkanBudget[heapIndex] = 0;
+ m_BlockBytesAtBudgetFetch[heapIndex] = 0;
+#endif
+ }
+
+#if VMA_MEMORY_BUDGET
+ m_OperationsSinceBudgetFetch = 0;
+#endif
+}
+
+void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize)
+{
+ m_AllocationBytes[heapIndex] += allocationSize;
+ ++m_AllocationCount[heapIndex];
+#if VMA_MEMORY_BUDGET
+ ++m_OperationsSinceBudgetFetch;
+#endif
+}
+
+void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize)
+{
+ VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize);
+ m_AllocationBytes[heapIndex] -= allocationSize;
+ VMA_ASSERT(m_AllocationCount[heapIndex] > 0);
+ --m_AllocationCount[heapIndex];
+#if VMA_MEMORY_BUDGET
+ ++m_OperationsSinceBudgetFetch;
+#endif
+}
+#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS
+#endif // _VMA_CURRENT_BUDGET_DATA
+
+#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR
+/*
+Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects.
+*/
+class VmaAllocationObjectAllocator
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator)
+public:
+ VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks)
+ : m_Allocator(pAllocationCallbacks, 1024) {}
+
+ template<typename... Types> VmaAllocation Allocate(Types&&... args);
+ void Free(VmaAllocation hAlloc);
+
+private:
+ VMA_MUTEX m_Mutex;
+ VmaPoolAllocator<VmaAllocation_T> m_Allocator;
+};
+
+template<typename... Types>
+VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args)
+{
+ VmaMutexLock mutexLock(m_Mutex);
+ return m_Allocator.Alloc<Types...>(std::forward<Types>(args)...);
+}
+
+void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc)
+{
+ VmaMutexLock mutexLock(m_Mutex);
+ m_Allocator.Free(hAlloc);
+}
+#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR
+
+#ifndef _VMA_VIRTUAL_BLOCK_T
+struct VmaVirtualBlock_T
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T)
+public:
+ const bool m_AllocationCallbacksSpecified;
+ const VkAllocationCallbacks m_AllocationCallbacks;
+
+ VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo);
+ ~VmaVirtualBlock_T();
+
+ VkResult Init() { return VK_SUCCESS; }
+ bool IsEmpty() const { return m_Metadata->IsEmpty(); }
+ void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); }
+ void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); }
+ void Clear() { m_Metadata->Clear(); }
+
+ const VkAllocationCallbacks* GetAllocationCallbacks() const;
+ void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo);
+ VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation,
+ VkDeviceSize* outOffset);
+ void GetStatistics(VmaStatistics& outStats) const;
+ void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const;
+#if VMA_STATS_STRING_ENABLED
+ void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const;
+#endif
+
+private:
+ VmaBlockMetadata* m_Metadata;
+};
+
+#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS
+VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo)
+ : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL),
+ m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks)
+{
+ const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK;
+ switch (algorithm)
+ {
+ case 0:
+ m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true);
+ break;
+ case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT:
+ m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true);
+ break;
+ default:
+ VMA_ASSERT(0);
+ m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true);
+ }
+
+ m_Metadata->Init(createInfo.size);
+}
+
+VmaVirtualBlock_T::~VmaVirtualBlock_T()
+{
+ // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT
+ // to receive the list of the unfreed allocations.
+ if (!m_Metadata->IsEmpty())
+ m_Metadata->DebugLogAllAllocations();
+ // This is the most important assert in the entire library.
+ // Hitting it means you have some memory leak - unreleased virtual allocations.
+ VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!");
+
+ vma_delete(GetAllocationCallbacks(), m_Metadata);
+}
+
+const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const
+{
+ return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL;
+}
+
+void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo)
+{
+ m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo);
+}
+
+VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation,
+ VkDeviceSize* outOffset)
+{
+ VmaAllocationRequest request = {};
+ if (m_Metadata->CreateAllocationRequest(
+ createInfo.size, // allocSize
+ VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment
+ (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress
+ VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant
+ createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy
+ &request))
+ {
+ m_Metadata->Alloc(request,
+ VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant
+ createInfo.pUserData);
+ outAllocation = (VmaVirtualAllocation)request.allocHandle;
+ if(outOffset)
+ *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle);
+ return VK_SUCCESS;
+ }
+ outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE;
+ if (outOffset)
+ *outOffset = UINT64_MAX;
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const
+{
+ VmaClearStatistics(outStats);
+ m_Metadata->AddStatistics(outStats);
+}
+
+void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const
+{
+ VmaClearDetailedStatistics(outStats);
+ m_Metadata->AddDetailedStatistics(outStats);
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const
+{
+ VmaJsonWriter json(GetAllocationCallbacks(), sb);
+ json.BeginObject();
+
+ VmaDetailedStatistics stats;
+ CalculateDetailedStatistics(stats);
+
+ json.WriteString("Stats");
+ VmaPrintDetailedStatistics(json, stats);
+
+ if (detailedMap)
+ {
+ json.WriteString("Details");
+ json.BeginObject();
+ m_Metadata->PrintDetailedMap(json);
+ json.EndObject();
+ }
+
+ json.EndObject();
+}
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS
+#endif // _VMA_VIRTUAL_BLOCK_T
+
+
+// Main allocator object.
+struct VmaAllocator_T
+{
+ VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T)
+public:
+ const bool m_UseMutex;
+ const uint32_t m_VulkanApiVersion;
+ bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0).
+ bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0).
+ bool m_UseExtMemoryBudget;
+ bool m_UseAmdDeviceCoherentMemory;
+ bool m_UseKhrBufferDeviceAddress;
+ bool m_UseExtMemoryPriority;
+ bool m_UseKhrMaintenance4;
+ bool m_UseKhrMaintenance5;
+ bool m_UseKhrExternalMemoryWin32;
+ const VkDevice m_hDevice;
+ const VkInstance m_hInstance;
+ const bool m_AllocationCallbacksSpecified;
+ const VkAllocationCallbacks m_AllocationCallbacks;
+ VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks;
+ VmaAllocationObjectAllocator m_AllocationObjectAllocator;
+
+ // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size.
+ uint32_t m_HeapSizeLimitMask;
+
+ VkPhysicalDeviceProperties m_PhysicalDeviceProperties;
+ VkPhysicalDeviceMemoryProperties m_MemProps;
+
+ // Default pools.
+ VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES];
+ VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES];
+
+ VmaCurrentBudgetData m_Budget;
+ VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects.
+
+ VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo);
+ VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo);
+ ~VmaAllocator_T();
+
+ const VkAllocationCallbacks* GetAllocationCallbacks() const
+ {
+ return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL;
+ }
+ const VmaVulkanFunctions& GetVulkanFunctions() const
+ {
+ return m_VulkanFunctions;
+ }
+
+ VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; }
+
+ VkDeviceSize GetBufferImageGranularity() const
+ {
+ return VMA_MAX(
+ static_cast<VkDeviceSize>(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY),
+ m_PhysicalDeviceProperties.limits.bufferImageGranularity);
+ }
+
+ uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; }
+ uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; }
+
+ uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const
+ {
+ VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount);
+ return m_MemProps.memoryTypes[memTypeIndex].heapIndex;
+ }
+ // True when specific memory type is HOST_VISIBLE but not HOST_COHERENT.
+ bool IsMemoryTypeNonCoherent(uint32_t memTypeIndex) const
+ {
+ return (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) ==
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+ // Minimum alignment for all allocations in specific memory type.
+ VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const
+ {
+ return IsMemoryTypeNonCoherent(memTypeIndex) ?
+ VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) :
+ (VkDeviceSize)VMA_MIN_ALIGNMENT;
+ }
+
+ bool IsIntegratedGpu() const
+ {
+ return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+ }
+
+ uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; }
+
+ void GetBufferMemoryRequirements(
+ VkBuffer hBuffer,
+ VkMemoryRequirements& memReq,
+ bool& requiresDedicatedAllocation,
+ bool& prefersDedicatedAllocation) const;
+ void GetImageMemoryRequirements(
+ VkImage hImage,
+ VkMemoryRequirements& memReq,
+ bool& requiresDedicatedAllocation,
+ bool& prefersDedicatedAllocation) const;
+ VkResult FindMemoryTypeIndex(
+ uint32_t memoryTypeBits,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ VmaBufferImageUsage bufImgUsage,
+ uint32_t* pMemoryTypeIndex) const;
+
+ // Main allocation function.
+ VkResult AllocateMemory(
+ const VkMemoryRequirements& vkMemReq,
+ bool requiresDedicatedAllocation,
+ bool prefersDedicatedAllocation,
+ VkBuffer dedicatedBuffer,
+ VkImage dedicatedImage,
+ VmaBufferImageUsage dedicatedBufferImageUsage,
+ const VmaAllocationCreateInfo& createInfo,
+ VmaSuballocationType suballocType,
+ size_t allocationCount,
+ VmaAllocation* pAllocations);
+
+ // Main deallocation function.
+ void FreeMemory(
+ size_t allocationCount,
+ const VmaAllocation* pAllocations);
+
+ void CalculateStatistics(VmaTotalStatistics* pStats);
+
+ void GetHeapBudgets(
+ VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount);
+
+#if VMA_STATS_STRING_ENABLED
+ void PrintDetailedMap(class VmaJsonWriter& json);
+#endif
+
+ void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo);
+ void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo);
+
+ VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool);
+ void DestroyPool(VmaPool pool);
+ void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats);
+ void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats);
+
+ void SetCurrentFrameIndex(uint32_t frameIndex);
+ uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); }
+
+ VkResult CheckPoolCorruption(VmaPool hPool);
+ VkResult CheckCorruption(uint32_t memoryTypeBits);
+
+ // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping.
+ VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory);
+ // Call to Vulkan function vkFreeMemory with accompanying bookkeeping.
+ void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory);
+ // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR.
+ VkResult BindVulkanBuffer(
+ VkDeviceMemory memory,
+ VkDeviceSize memoryOffset,
+ VkBuffer buffer,
+ const void* pNext);
+ // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR.
+ VkResult BindVulkanImage(
+ VkDeviceMemory memory,
+ VkDeviceSize memoryOffset,
+ VkImage image,
+ const void* pNext);
+
+ VkResult Map(VmaAllocation hAllocation, void** ppData);
+ void Unmap(VmaAllocation hAllocation);
+
+ VkResult BindBufferMemory(
+ VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkBuffer hBuffer,
+ const void* pNext);
+ VkResult BindImageMemory(
+ VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkImage hImage,
+ const void* pNext);
+
+ VkResult FlushOrInvalidateAllocation(
+ VmaAllocation hAllocation,
+ VkDeviceSize offset, VkDeviceSize size,
+ VMA_CACHE_OPERATION op);
+ VkResult FlushOrInvalidateAllocations(
+ uint32_t allocationCount,
+ const VmaAllocation* allocations,
+ const VkDeviceSize* offsets, const VkDeviceSize* sizes,
+ VMA_CACHE_OPERATION op);
+
+ VkResult CopyMemoryToAllocation(
+ const void* pSrcHostPointer,
+ VmaAllocation dstAllocation,
+ VkDeviceSize dstAllocationLocalOffset,
+ VkDeviceSize size);
+ VkResult CopyAllocationToMemory(
+ VmaAllocation srcAllocation,
+ VkDeviceSize srcAllocationLocalOffset,
+ void* pDstHostPointer,
+ VkDeviceSize size);
+
+ void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern);
+
+ /*
+ Returns bit mask of memory types that can support defragmentation on GPU as
+ they support creation of required buffer for copy operations.
+ */
+ uint32_t GetGpuDefragmentationMemoryTypeBits();
+
+#if VMA_EXTERNAL_MEMORY
+ VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const
+ {
+ return m_TypeExternalMemoryHandleTypes[memTypeIndex];
+ }
+#endif // #if VMA_EXTERNAL_MEMORY
+
+private:
+ VkDeviceSize m_PreferredLargeHeapBlockSize;
+
+ VkPhysicalDevice m_PhysicalDevice;
+ VMA_ATOMIC_UINT32 m_CurrentFrameIndex;
+ VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized.
+#if VMA_EXTERNAL_MEMORY
+ VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES];
+#endif // #if VMA_EXTERNAL_MEMORY
+
+ VMA_RW_MUTEX m_PoolsMutex;
+ typedef VmaIntrusiveLinkedList<VmaPoolListItemTraits> PoolList;
+ // Protected by m_PoolsMutex.
+ PoolList m_Pools;
+ uint32_t m_NextPoolId;
+
+ VmaVulkanFunctions m_VulkanFunctions;
+
+ // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types.
+ uint32_t m_GlobalMemoryTypeBits;
+
+ void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions);
+
+#if VMA_STATIC_VULKAN_FUNCTIONS == 1
+ void ImportVulkanFunctions_Static();
+#endif
+
+ void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions);
+
+#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+ void ImportVulkanFunctions_Dynamic();
+#endif
+
+ void ValidateVulkanFunctions();
+
+ VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex);
+
+ VkResult AllocateMemoryOfType(
+ VmaPool pool,
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ bool dedicatedPreferred,
+ VkBuffer dedicatedBuffer,
+ VkImage dedicatedImage,
+ VmaBufferImageUsage dedicatedBufferImageUsage,
+ const VmaAllocationCreateInfo& createInfo,
+ uint32_t memTypeIndex,
+ VmaSuballocationType suballocType,
+ VmaDedicatedAllocationList& dedicatedAllocations,
+ VmaBlockVector& blockVector,
+ size_t allocationCount,
+ VmaAllocation* pAllocations);
+
+ // Helper function only to be used inside AllocateDedicatedMemory.
+ VkResult AllocateDedicatedMemoryPage(
+ VmaPool pool,
+ VkDeviceSize size,
+ VmaSuballocationType suballocType,
+ uint32_t memTypeIndex,
+ const VkMemoryAllocateInfo& allocInfo,
+ bool map,
+ bool isUserDataString,
+ bool isMappingAllowed,
+ void* pUserData,
+ VmaAllocation* pAllocation);
+
+ // Allocates and registers new VkDeviceMemory specifically for dedicated allocations.
+ VkResult AllocateDedicatedMemory(
+ VmaPool pool,
+ VkDeviceSize size,
+ VmaSuballocationType suballocType,
+ VmaDedicatedAllocationList& dedicatedAllocations,
+ uint32_t memTypeIndex,
+ bool map,
+ bool isUserDataString,
+ bool isMappingAllowed,
+ bool canAliasMemory,
+ void* pUserData,
+ float priority,
+ VkBuffer dedicatedBuffer,
+ VkImage dedicatedImage,
+ VmaBufferImageUsage dedicatedBufferImageUsage,
+ size_t allocationCount,
+ VmaAllocation* pAllocations,
+ const void* pNextChain = VMA_NULL);
+
+ void FreeDedicatedMemory(const VmaAllocation allocation);
+
+ VkResult CalcMemTypeParams(
+ VmaAllocationCreateInfo& outCreateInfo,
+ uint32_t memTypeIndex,
+ VkDeviceSize size,
+ size_t allocationCount);
+ VkResult CalcAllocationParams(
+ VmaAllocationCreateInfo& outCreateInfo,
+ bool dedicatedRequired,
+ bool dedicatedPreferred);
+
+ /*
+ Calculates and returns bit mask of memory types that can support defragmentation
+ on GPU as they support creation of required buffer for copy operations.
+ */
+ uint32_t CalculateGpuDefragmentationMemoryTypeBits() const;
+ uint32_t CalculateGlobalMemoryTypeBits() const;
+
+ bool GetFlushOrInvalidateRange(
+ VmaAllocation allocation,
+ VkDeviceSize offset, VkDeviceSize size,
+ VkMappedMemoryRange& outRange) const;
+
+#if VMA_MEMORY_BUDGET
+ void UpdateVulkanBudget();
+#endif // #if VMA_MEMORY_BUDGET
+};
+
+
+#ifndef _VMA_MEMORY_FUNCTIONS
+static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment)
+{
+ return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment);
+}
+
+static void VmaFree(VmaAllocator hAllocator, void* ptr)
+{
+ VmaFree(&hAllocator->m_AllocationCallbacks, ptr);
+}
+
+template<typename T>
+static T* VmaAllocate(VmaAllocator hAllocator)
+{
+ return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T));
+}
+
+template<typename T>
+static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count)
+{
+ return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T));
+}
+
+template<typename T>
+static void vma_delete(VmaAllocator hAllocator, T* ptr)
+{
+ if(ptr != VMA_NULL)
+ {
+ ptr->~T();
+ VmaFree(hAllocator, ptr);
+ }
+}
+
+template<typename T>
+static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count)
+{
+ if(ptr != VMA_NULL)
+ {
+ for(size_t i = count; i--; )
+ ptr[i].~T();
+ VmaFree(hAllocator, ptr);
+ }
+}
+#endif // _VMA_MEMORY_FUNCTIONS
+
+#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS
+VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator)
+ : m_pMetadata(VMA_NULL),
+ m_MemoryTypeIndex(UINT32_MAX),
+ m_Id(0),
+ m_hMemory(VK_NULL_HANDLE),
+ m_MapCount(0),
+ m_pMappedData(VMA_NULL){}
+
+VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock()
+{
+ VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped.");
+ VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE);
+}
+
+void VmaDeviceMemoryBlock::Init(
+ VmaAllocator hAllocator,
+ VmaPool hParentPool,
+ uint32_t newMemoryTypeIndex,
+ VkDeviceMemory newMemory,
+ VkDeviceSize newSize,
+ uint32_t id,
+ uint32_t algorithm,
+ VkDeviceSize bufferImageGranularity)
+{
+ VMA_ASSERT(m_hMemory == VK_NULL_HANDLE);
+
+ m_hParentPool = hParentPool;
+ m_MemoryTypeIndex = newMemoryTypeIndex;
+ m_Id = id;
+ m_hMemory = newMemory;
+
+ switch (algorithm)
+ {
+ case 0:
+ m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(),
+ bufferImageGranularity, false); // isVirtual
+ break;
+ case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
+ m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(),
+ bufferImageGranularity, false); // isVirtual
+ break;
+ default:
+ VMA_ASSERT(0);
+ m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(),
+ bufferImageGranularity, false); // isVirtual
+ }
+ m_pMetadata->Init(newSize);
+}
+
+void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator)
+{
+ // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT
+ // to receive the list of the unfreed allocations.
+ if (!m_pMetadata->IsEmpty())
+ m_pMetadata->DebugLogAllAllocations();
+ // This is the most important assert in the entire library.
+ // Hitting it means you have some memory leak - unreleased VmaAllocation objects.
+ VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!");
+
+ VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE);
+ allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory);
+ m_hMemory = VK_NULL_HANDLE;
+
+ vma_delete(allocator, m_pMetadata);
+ m_pMetadata = VMA_NULL;
+}
+
+void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator)
+{
+ VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+ m_MappingHysteresis.PostAlloc();
+}
+
+void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator)
+{
+ VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+ if(m_MappingHysteresis.PostFree())
+ {
+ VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0);
+ if (m_MapCount == 0)
+ {
+ m_pMappedData = VMA_NULL;
+ (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory);
+ }
+ }
+}
+
+bool VmaDeviceMemoryBlock::Validate() const
+{
+ VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) &&
+ (m_pMetadata->GetSize() != 0));
+
+ return m_pMetadata->Validate();
+}
+
+VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator)
+{
+ void* pData = VMA_NULL;
+ VkResult res = Map(hAllocator, 1, &pData);
+ if (res != VK_SUCCESS)
+ {
+ return res;
+ }
+
+ res = m_pMetadata->CheckCorruption(pData);
+
+ Unmap(hAllocator, 1);
+
+ return res;
+}
+
+VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData)
+{
+ if (count == 0)
+ {
+ return VK_SUCCESS;
+ }
+
+ VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+ const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping();
+ if (oldTotalMapCount != 0)
+ {
+ VMA_ASSERT(m_pMappedData != VMA_NULL);
+ m_MappingHysteresis.PostMap();
+ m_MapCount += count;
+ if (ppData != VMA_NULL)
+ {
+ *ppData = m_pMappedData;
+ }
+ return VK_SUCCESS;
+ }
+ else
+ {
+ VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)(
+ hAllocator->m_hDevice,
+ m_hMemory,
+ 0, // offset
+ VK_WHOLE_SIZE,
+ 0, // flags
+ &m_pMappedData);
+ if (result == VK_SUCCESS)
+ {
+ VMA_ASSERT(m_pMappedData != VMA_NULL);
+ m_MappingHysteresis.PostMap();
+ m_MapCount = count;
+ if (ppData != VMA_NULL)
+ {
+ *ppData = m_pMappedData;
+ }
+ }
+ return result;
+ }
+}
+
+void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count)
+{
+ if (count == 0)
+ {
+ return;
+ }
+
+ VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+ if (m_MapCount >= count)
+ {
+ m_MapCount -= count;
+ const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping();
+ if (totalMapCount == 0)
+ {
+ m_pMappedData = VMA_NULL;
+ (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory);
+ }
+ m_MappingHysteresis.PostUnmap();
+ }
+ else
+ {
+ VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped.");
+ }
+}
+
+VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize)
+{
+ VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION);
+
+ void* pData;
+ VkResult res = Map(hAllocator, 1, &pData);
+ if (res != VK_SUCCESS)
+ {
+ return res;
+ }
+
+ VmaWriteMagicValue(pData, allocOffset + allocSize);
+
+ Unmap(hAllocator, 1);
+ return VK_SUCCESS;
+}
+
+VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize)
+{
+ VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION);
+
+ void* pData;
+ VkResult res = Map(hAllocator, 1, &pData);
+ if (res != VK_SUCCESS)
+ {
+ return res;
+ }
+
+ if (!VmaValidateMagicValue(pData, allocOffset + allocSize))
+ {
+ VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!");
+ }
+
+ Unmap(hAllocator, 1);
+ return VK_SUCCESS;
+}
+
+VkResult VmaDeviceMemoryBlock::BindBufferMemory(
+ const VmaAllocator hAllocator,
+ const VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkBuffer hBuffer,
+ const void* pNext)
+{
+ VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK &&
+ hAllocation->GetBlock() == this);
+ VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() &&
+ "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?");
+ const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset;
+ // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads.
+ VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+ return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext);
+}
+
+VkResult VmaDeviceMemoryBlock::BindImageMemory(
+ const VmaAllocator hAllocator,
+ const VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkImage hImage,
+ const void* pNext)
+{
+ VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK &&
+ hAllocation->GetBlock() == this);
+ VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() &&
+ "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?");
+ const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset;
+ // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads.
+ VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+ return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext);
+}
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept
+{
+ VMA_ASSERT(pHandle);
+ return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle);
+}
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS
+
+#ifndef _VMA_ALLOCATION_T_FUNCTIONS
+VmaAllocation_T::VmaAllocation_T(bool mappingAllowed)
+ : m_Alignment{ 1 },
+ m_Size{ 0 },
+ m_pUserData{ VMA_NULL },
+ m_pName{ VMA_NULL },
+ m_MemoryTypeIndex{ 0 },
+ m_Type{ (uint8_t)ALLOCATION_TYPE_NONE },
+ m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN },
+ m_MapCount{ 0 },
+ m_Flags{ 0 }
+{
+ if(mappingAllowed)
+ m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED;
+}
+
+VmaAllocation_T::~VmaAllocation_T()
+{
+ VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction.");
+
+ // Check if owned string was freed.
+ VMA_ASSERT(m_pName == VMA_NULL);
+}
+
+void VmaAllocation_T::InitBlockAllocation(
+ VmaDeviceMemoryBlock* block,
+ VmaAllocHandle allocHandle,
+ VkDeviceSize alignment,
+ VkDeviceSize size,
+ uint32_t memoryTypeIndex,
+ VmaSuballocationType suballocationType,
+ bool mapped)
+{
+ VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE);
+ VMA_ASSERT(block != VMA_NULL);
+ m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK;
+ m_Alignment = alignment;
+ m_Size = size;
+ m_MemoryTypeIndex = memoryTypeIndex;
+ if(mapped)
+ {
+ VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+ m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP;
+ }
+ m_SuballocationType = (uint8_t)suballocationType;
+ m_BlockAllocation.m_Block = block;
+ m_BlockAllocation.m_AllocHandle = allocHandle;
+}
+
+void VmaAllocation_T::InitDedicatedAllocation(
+ VmaAllocator allocator,
+ VmaPool hParentPool,
+ uint32_t memoryTypeIndex,
+ VkDeviceMemory hMemory,
+ VmaSuballocationType suballocationType,
+ void* pMappedData,
+ VkDeviceSize size)
+{
+ VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE);
+ VMA_ASSERT(hMemory != VK_NULL_HANDLE);
+ m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED;
+ m_Alignment = 0;
+ m_Size = size;
+ m_MemoryTypeIndex = memoryTypeIndex;
+ m_SuballocationType = (uint8_t)suballocationType;
+ m_DedicatedAllocation.m_ExtraData = VMA_NULL;
+ m_DedicatedAllocation.m_hParentPool = hParentPool;
+ m_DedicatedAllocation.m_hMemory = hMemory;
+ m_DedicatedAllocation.m_Prev = VMA_NULL;
+ m_DedicatedAllocation.m_Next = VMA_NULL;
+
+ if (pMappedData != VMA_NULL)
+ {
+ VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+ m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP;
+ EnsureExtraData(allocator);
+ m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData;
+ }
+}
+
+void VmaAllocation_T::Destroy(VmaAllocator allocator)
+{
+ FreeName(allocator);
+
+ if (GetType() == ALLOCATION_TYPE_DEDICATED)
+ {
+ vma_delete(allocator, m_DedicatedAllocation.m_ExtraData);
+ }
+}
+
+void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName)
+{
+ VMA_ASSERT(pName == VMA_NULL || pName != m_pName);
+
+ FreeName(hAllocator);
+
+ if (pName != VMA_NULL)
+ m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName);
+}
+
+uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation)
+{
+ VMA_ASSERT(allocation != VMA_NULL);
+ VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK);
+ VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK);
+
+ if (m_MapCount != 0)
+ m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount);
+
+ m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation);
+ std::swap(m_BlockAllocation, allocation->m_BlockAllocation);
+ m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this);
+
+#if VMA_STATS_STRING_ENABLED
+ std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage);
+#endif
+ return m_MapCount;
+}
+
+VmaAllocHandle VmaAllocation_T::GetAllocHandle() const
+{
+ switch (m_Type)
+ {
+ case ALLOCATION_TYPE_BLOCK:
+ return m_BlockAllocation.m_AllocHandle;
+ case ALLOCATION_TYPE_DEDICATED:
+ return VK_NULL_HANDLE;
+ default:
+ VMA_ASSERT(0);
+ return VK_NULL_HANDLE;
+ }
+}
+
+VkDeviceSize VmaAllocation_T::GetOffset() const
+{
+ switch (m_Type)
+ {
+ case ALLOCATION_TYPE_BLOCK:
+ return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle);
+ case ALLOCATION_TYPE_DEDICATED:
+ return 0;
+ default:
+ VMA_ASSERT(0);
+ return 0;
+ }
+}
+
+VmaPool VmaAllocation_T::GetParentPool() const
+{
+ switch (m_Type)
+ {
+ case ALLOCATION_TYPE_BLOCK:
+ return m_BlockAllocation.m_Block->GetParentPool();
+ case ALLOCATION_TYPE_DEDICATED:
+ return m_DedicatedAllocation.m_hParentPool;
+ default:
+ VMA_ASSERT(0);
+ return VK_NULL_HANDLE;
+ }
+}
+
+VkDeviceMemory VmaAllocation_T::GetMemory() const
+{
+ switch (m_Type)
+ {
+ case ALLOCATION_TYPE_BLOCK:
+ return m_BlockAllocation.m_Block->GetDeviceMemory();
+ case ALLOCATION_TYPE_DEDICATED:
+ return m_DedicatedAllocation.m_hMemory;
+ default:
+ VMA_ASSERT(0);
+ return VK_NULL_HANDLE;
+ }
+}
+
+void* VmaAllocation_T::GetMappedData() const
+{
+ switch (m_Type)
+ {
+ case ALLOCATION_TYPE_BLOCK:
+ if (m_MapCount != 0 || IsPersistentMap())
+ {
+ void* pBlockData = m_BlockAllocation.m_Block->GetMappedData();
+ VMA_ASSERT(pBlockData != VMA_NULL);
+ return (char*)pBlockData + GetOffset();
+ }
+ else
+ {
+ return VMA_NULL;
+ }
+ break;
+ case ALLOCATION_TYPE_DEDICATED:
+ VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) ==
+ (m_MapCount != 0 || IsPersistentMap()));
+ return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL;
+ default:
+ VMA_ASSERT(0);
+ return VMA_NULL;
+ }
+}
+
+void VmaAllocation_T::BlockAllocMap()
+{
+ VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK);
+ VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+
+ if (m_MapCount < 0xFF)
+ {
+ ++m_MapCount;
+ }
+ else
+ {
+ VMA_ASSERT(0 && "Allocation mapped too many times simultaneously.");
+ }
+}
+
+void VmaAllocation_T::BlockAllocUnmap()
+{
+ VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK);
+
+ if (m_MapCount > 0)
+ {
+ --m_MapCount;
+ }
+ else
+ {
+ VMA_ASSERT(0 && "Unmapping allocation not previously mapped.");
+ }
+}
+
+VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData)
+{
+ VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED);
+ VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+
+ EnsureExtraData(hAllocator);
+
+ if (m_MapCount != 0 || IsPersistentMap())
+ {
+ if (m_MapCount < 0xFF)
+ {
+ VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL);
+ *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData;
+ ++m_MapCount;
+ return VK_SUCCESS;
+ }
+ else
+ {
+ VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously.");
+ return VK_ERROR_MEMORY_MAP_FAILED;
+ }
+ }
+ else
+ {
+ VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)(
+ hAllocator->m_hDevice,
+ m_DedicatedAllocation.m_hMemory,
+ 0, // offset
+ VK_WHOLE_SIZE,
+ 0, // flags
+ ppData);
+ if (result == VK_SUCCESS)
+ {
+ m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData;
+ m_MapCount = 1;
+ }
+ return result;
+ }
+}
+
+void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator)
+{
+ VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED);
+
+ if (m_MapCount > 0)
+ {
+ --m_MapCount;
+ if (m_MapCount == 0 && !IsPersistentMap())
+ {
+ VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL);
+ m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL;
+ (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(
+ hAllocator->m_hDevice,
+ m_DedicatedAllocation.m_hMemory);
+ }
+ }
+ else
+ {
+ VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped.");
+ }
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const
+{
+ json.WriteString("Type");
+ json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]);
+
+ json.WriteString("Size");
+ json.WriteNumber(m_Size);
+ json.WriteString("Usage");
+ json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t.
+
+ if (m_pUserData != VMA_NULL)
+ {
+ json.WriteString("CustomData");
+ json.BeginString();
+ json.ContinueString_Pointer(m_pUserData);
+ json.EndString();
+ }
+ if (m_pName != VMA_NULL)
+ {
+ json.WriteString("Name");
+ json.WriteString(m_pName);
+ }
+}
+#if VMA_EXTERNAL_MEMORY_WIN32
+VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept
+{
+ auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR;
+ switch (m_Type)
+ {
+ case ALLOCATION_TYPE_BLOCK:
+ return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle);
+ case ALLOCATION_TYPE_DEDICATED:
+ EnsureExtraData(hAllocator);
+ return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle);
+ default:
+ VMA_ASSERT(0);
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+}
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+#endif // VMA_STATS_STRING_ENABLED
+
+void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator)
+{
+ if (m_DedicatedAllocation.m_ExtraData == VMA_NULL)
+ {
+ m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)();
+ }
+}
+
+void VmaAllocation_T::FreeName(VmaAllocator hAllocator)
+{
+ if(m_pName)
+ {
+ VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName);
+ m_pName = VMA_NULL;
+ }
+}
+#endif // _VMA_ALLOCATION_T_FUNCTIONS
+
+#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS
+VmaBlockVector::VmaBlockVector(
+ VmaAllocator hAllocator,
+ VmaPool hParentPool,
+ uint32_t memoryTypeIndex,
+ VkDeviceSize preferredBlockSize,
+ size_t minBlockCount,
+ size_t maxBlockCount,
+ VkDeviceSize bufferImageGranularity,
+ bool explicitBlockSize,
+ uint32_t algorithm,
+ float priority,
+ VkDeviceSize minAllocationAlignment,
+ void* pMemoryAllocateNext)
+ : m_hAllocator(hAllocator),
+ m_hParentPool(hParentPool),
+ m_MemoryTypeIndex(memoryTypeIndex),
+ m_PreferredBlockSize(preferredBlockSize),
+ m_MinBlockCount(minBlockCount),
+ m_MaxBlockCount(maxBlockCount),
+ m_BufferImageGranularity(bufferImageGranularity),
+ m_ExplicitBlockSize(explicitBlockSize),
+ m_Algorithm(algorithm),
+ m_Priority(priority),
+ m_MinAllocationAlignment(minAllocationAlignment),
+ m_pMemoryAllocateNext(pMemoryAllocateNext),
+ m_Blocks(VmaStlAllocator<VmaDeviceMemoryBlock*>(hAllocator->GetAllocationCallbacks())),
+ m_NextBlockId(0) {}
+
+VmaBlockVector::~VmaBlockVector()
+{
+ for (size_t i = m_Blocks.size(); i--; )
+ {
+ m_Blocks[i]->Destroy(m_hAllocator);
+ vma_delete(m_hAllocator, m_Blocks[i]);
+ }
+}
+
+VkResult VmaBlockVector::CreateMinBlocks()
+{
+ for (size_t i = 0; i < m_MinBlockCount; ++i)
+ {
+ VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL);
+ if (res != VK_SUCCESS)
+ {
+ return res;
+ }
+ }
+ return VK_SUCCESS;
+}
+
+void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats)
+{
+ VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+ const size_t blockCount = m_Blocks.size();
+ for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
+ {
+ const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pBlock);
+ VMA_HEAVY_ASSERT(pBlock->Validate());
+ pBlock->m_pMetadata->AddStatistics(inoutStats);
+ }
+}
+
+void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats)
+{
+ VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+ const size_t blockCount = m_Blocks.size();
+ for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
+ {
+ const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pBlock);
+ VMA_HEAVY_ASSERT(pBlock->Validate());
+ pBlock->m_pMetadata->AddDetailedStatistics(inoutStats);
+ }
+}
+
+bool VmaBlockVector::IsEmpty()
+{
+ VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+ return m_Blocks.empty();
+}
+
+bool VmaBlockVector::IsCorruptionDetectionEnabled() const
+{
+ const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ return (VMA_DEBUG_DETECT_CORRUPTION != 0) &&
+ (VMA_DEBUG_MARGIN > 0) &&
+ (m_Algorithm == 0 || m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) &&
+ (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags;
+}
+
+VkResult VmaBlockVector::Allocate(
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ const VmaAllocationCreateInfo& createInfo,
+ VmaSuballocationType suballocType,
+ size_t allocationCount,
+ VmaAllocation* pAllocations)
+{
+ size_t allocIndex;
+ VkResult res = VK_SUCCESS;
+
+ alignment = VMA_MAX(alignment, m_MinAllocationAlignment);
+
+ if (IsCorruptionDetectionEnabled())
+ {
+ size = VmaAlignUp<VkDeviceSize>(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE));
+ alignment = VmaAlignUp<VkDeviceSize>(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE));
+ }
+
+ {
+ VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
+ for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+ {
+ res = AllocatePage(
+ size,
+ alignment,
+ createInfo,
+ suballocType,
+ pAllocations + allocIndex);
+ if (res != VK_SUCCESS)
+ {
+ break;
+ }
+ }
+ }
+
+ if (res != VK_SUCCESS)
+ {
+ // Free all already created allocations.
+ while (allocIndex--)
+ Free(pAllocations[allocIndex]);
+ memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
+ }
+
+ return res;
+}
+
+VkResult VmaBlockVector::AllocatePage(
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ const VmaAllocationCreateInfo& createInfo,
+ VmaSuballocationType suballocType,
+ VmaAllocation* pAllocation)
+{
+ const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0;
+
+ VkDeviceSize freeMemory;
+ {
+ const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex);
+ VmaBudget heapBudget = {};
+ m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1);
+ freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0;
+ }
+
+ const bool canFallbackToDedicated = !HasExplicitBlockSize() &&
+ (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0;
+ const bool canCreateNewBlock =
+ ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) &&
+ (m_Blocks.size() < m_MaxBlockCount) &&
+ (freeMemory >= size || !canFallbackToDedicated);
+ uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK;
+
+ // Upper address can only be used with linear allocator and within single memory block.
+ if (isUpperAddress &&
+ (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1))
+ {
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
+ // Early reject: requested allocation size is larger that maximum block size for this block vector.
+ if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize)
+ {
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+
+ // 1. Search existing allocations. Try to allocate.
+ if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
+ {
+ // Use only last block.
+ if (!m_Blocks.empty())
+ {
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back();
+ VMA_ASSERT(pCurrBlock);
+ VkResult res = AllocateFromBlock(
+ pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+ if (res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG_FORMAT(" Returned from last block #%" PRIu32, pCurrBlock->GetId());
+ IncrementallySortBlocks();
+ return VK_SUCCESS;
+ }
+ }
+ }
+ else
+ {
+ if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default
+ {
+ const bool isHostVisible =
+ (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
+ if(isHostVisible)
+ {
+ const bool isMappingAllowed = (createInfo.flags &
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0;
+ /*
+ For non-mappable allocations, check blocks that are not mapped first.
+ For mappable allocations, check blocks that are already mapped first.
+ This way, having many blocks, we will separate mappable and non-mappable allocations,
+ hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc.
+ */
+ for(size_t mappingI = 0; mappingI < 2; ++mappingI)
+ {
+ // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
+ for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+ {
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL;
+ if((mappingI == 0) == (isMappingAllowed == isBlockMapped))
+ {
+ VkResult res = AllocateFromBlock(
+ pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+ if (res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId());
+ IncrementallySortBlocks();
+ return VK_SUCCESS;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
+ for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+ {
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ VkResult res = AllocateFromBlock(
+ pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+ if (res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId());
+ IncrementallySortBlocks();
+ return VK_SUCCESS;
+ }
+ }
+ }
+ }
+ else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT
+ {
+ // Backward order in m_Blocks - prefer blocks with largest amount of free space.
+ for (size_t blockIndex = m_Blocks.size(); blockIndex--; )
+ {
+ VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pCurrBlock);
+ VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+ if (res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId());
+ IncrementallySortBlocks();
+ return VK_SUCCESS;
+ }
+ }
+ }
+ }
+
+ // 2. Try to create new block.
+ if (canCreateNewBlock)
+ {
+ // Calculate optimal size for new block.
+ VkDeviceSize newBlockSize = m_PreferredBlockSize;
+ uint32_t newBlockSizeShift = 0;
+ const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3;
+
+ if (!m_ExplicitBlockSize)
+ {
+ // Allocate 1/8, 1/4, 1/2 as first blocks.
+ const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize();
+ for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i)
+ {
+ const VkDeviceSize smallerNewBlockSize = newBlockSize / 2;
+ if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2)
+ {
+ newBlockSize = smallerNewBlockSize;
+ ++newBlockSizeShift;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ size_t newBlockIndex = 0;
+ VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ?
+ CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize.
+ if (!m_ExplicitBlockSize)
+ {
+ while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX)
+ {
+ const VkDeviceSize smallerNewBlockSize = newBlockSize / 2;
+ if (smallerNewBlockSize >= size)
+ {
+ newBlockSize = smallerNewBlockSize;
+ ++newBlockSizeShift;
+ res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ?
+ CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ if (res == VK_SUCCESS)
+ {
+ VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex];
+ VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size);
+
+ res = AllocateFromBlock(
+ pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+ if (res == VK_SUCCESS)
+ {
+ VMA_DEBUG_LOG_FORMAT(" Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize);
+ IncrementallySortBlocks();
+ return VK_SUCCESS;
+ }
+ else
+ {
+ // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment.
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ }
+ }
+
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+void VmaBlockVector::Free(const VmaAllocation hAllocation)
+{
+ VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL;
+
+ bool budgetExceeded = false;
+ {
+ const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex);
+ VmaBudget heapBudget = {};
+ m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1);
+ budgetExceeded = heapBudget.usage >= heapBudget.budget;
+ }
+
+ // Scope for lock.
+ {
+ VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+ VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock();
+
+ if (IsCorruptionDetectionEnabled())
+ {
+ VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize());
+ VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value.");
+ }
+
+ if (hAllocation->IsPersistentMap())
+ {
+ pBlock->Unmap(m_hAllocator, 1);
+ }
+
+ const bool hadEmptyBlockBeforeFree = HasEmptyBlock();
+ pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle());
+ pBlock->PostFree(m_hAllocator);
+ VMA_HEAVY_ASSERT(pBlock->Validate());
+
+ VMA_DEBUG_LOG_FORMAT(" Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex);
+
+ const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount;
+ // pBlock became empty after this deallocation.
+ if (pBlock->m_pMetadata->IsEmpty())
+ {
+ // Already had empty block. We don't want to have two, so delete this one.
+ if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock)
+ {
+ pBlockToDelete = pBlock;
+ Remove(pBlock);
+ }
+ // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth.
+ }
+ // pBlock didn't become empty, but we have another empty block - find and free that one.
+ // (This is optional, heuristics.)
+ else if (hadEmptyBlockBeforeFree && canDeleteBlock)
+ {
+ VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back();
+ if (pLastBlock->m_pMetadata->IsEmpty())
+ {
+ pBlockToDelete = pLastBlock;
+ m_Blocks.pop_back();
+ }
+ }
+
+ IncrementallySortBlocks();
+
+ m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize());
+ hAllocation->Destroy(m_hAllocator);
+ m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation);
+ }
+
+ // Destruction of a free block. Deferred until this point, outside of mutex
+ // lock, for performance reason.
+ if (pBlockToDelete != VMA_NULL)
+ {
+ VMA_DEBUG_LOG_FORMAT(" Deleted empty block #%" PRIu32, pBlockToDelete->GetId());
+ pBlockToDelete->Destroy(m_hAllocator);
+ vma_delete(m_hAllocator, pBlockToDelete);
+ }
+}
+
+VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const
+{
+ VkDeviceSize result = 0;
+ for (size_t i = m_Blocks.size(); i--; )
+ {
+ result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize());
+ if (result >= m_PreferredBlockSize)
+ {
+ break;
+ }
+ }
+ return result;
+}
+
+void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock)
+{
+ for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+ {
+ if (m_Blocks[blockIndex] == pBlock)
+ {
+ VmaVectorRemove(m_Blocks, blockIndex);
+ return;
+ }
+ }
+ VMA_ASSERT(0);
+}
+
+void VmaBlockVector::IncrementallySortBlocks()
+{
+ if (!m_IncrementalSort)
+ return;
+ if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
+ {
+ // Bubble sort only until first swap.
+ for (size_t i = 1; i < m_Blocks.size(); ++i)
+ {
+ if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize())
+ {
+ std::swap(m_Blocks[i - 1], m_Blocks[i]);
+ return;
+ }
+ }
+ }
+}
+
+void VmaBlockVector::SortByFreeSize()
+{
+ VMA_SORT(m_Blocks.begin(), m_Blocks.end(),
+ [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool
+ {
+ return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize();
+ });
+}
+
+VkResult VmaBlockVector::AllocateFromBlock(
+ VmaDeviceMemoryBlock* pBlock,
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ VmaAllocationCreateFlags allocFlags,
+ void* pUserData,
+ VmaSuballocationType suballocType,
+ uint32_t strategy,
+ VmaAllocation* pAllocation)
+{
+ const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0;
+
+ VmaAllocationRequest currRequest = {};
+ if (pBlock->m_pMetadata->CreateAllocationRequest(
+ size,
+ alignment,
+ isUpperAddress,
+ suballocType,
+ strategy,
+ &currRequest))
+ {
+ return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation);
+ }
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+VkResult VmaBlockVector::CommitAllocationRequest(
+ VmaAllocationRequest& allocRequest,
+ VmaDeviceMemoryBlock* pBlock,
+ VkDeviceSize alignment,
+ VmaAllocationCreateFlags allocFlags,
+ void* pUserData,
+ VmaSuballocationType suballocType,
+ VmaAllocation* pAllocation)
+{
+ const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0;
+ const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0;
+ const bool isMappingAllowed = (allocFlags &
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0;
+
+ pBlock->PostAlloc(m_hAllocator);
+ // Allocate from pCurrBlock.
+ if (mapped)
+ {
+ VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL);
+ if (res != VK_SUCCESS)
+ {
+ return res;
+ }
+ }
+
+ *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed);
+ pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation);
+ (*pAllocation)->InitBlockAllocation(
+ pBlock,
+ allocRequest.allocHandle,
+ alignment,
+ allocRequest.size, // Not size, as actual allocation size may be larger than requested!
+ m_MemoryTypeIndex,
+ suballocType,
+ mapped);
+ VMA_HEAVY_ASSERT(pBlock->Validate());
+ if (isUserDataString)
+ (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData);
+ else
+ (*pAllocation)->SetUserData(m_hAllocator, pUserData);
+ m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size);
+ if (VMA_DEBUG_INITIALIZE_ALLOCATIONS)
+ {
+ m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
+ }
+ if (IsCorruptionDetectionEnabled())
+ {
+ VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size);
+ VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value.");
+ }
+ return VK_SUCCESS;
+}
+
+VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex)
+{
+ VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
+ allocInfo.pNext = m_pMemoryAllocateNext;
+ allocInfo.memoryTypeIndex = m_MemoryTypeIndex;
+ allocInfo.allocationSize = blockSize;
+
+#if VMA_BUFFER_DEVICE_ADDRESS
+ // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature.
+ VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR };
+ if (m_hAllocator->m_UseKhrBufferDeviceAddress)
+ {
+ allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
+ VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo);
+ }
+#endif // VMA_BUFFER_DEVICE_ADDRESS
+
+#if VMA_MEMORY_PRIORITY
+ VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT };
+ if (m_hAllocator->m_UseExtMemoryPriority)
+ {
+ VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f);
+ priorityInfo.priority = m_Priority;
+ VmaPnextChainPushFront(&allocInfo, &priorityInfo);
+ }
+#endif // VMA_MEMORY_PRIORITY
+
+#if VMA_EXTERNAL_MEMORY
+ // Attach VkExportMemoryAllocateInfoKHR if necessary.
+ VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR };
+ exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex);
+ if (exportMemoryAllocInfo.handleTypes != 0)
+ {
+ VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo);
+ }
+#endif // VMA_EXTERNAL_MEMORY
+
+ VkDeviceMemory mem = VK_NULL_HANDLE;
+ VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem);
+ if (res < 0)
+ {
+ return res;
+ }
+
+ // New VkDeviceMemory successfully created.
+
+ // Create new Allocation for it.
+ VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator);
+ pBlock->Init(
+ m_hAllocator,
+ m_hParentPool,
+ m_MemoryTypeIndex,
+ mem,
+ allocInfo.allocationSize,
+ m_NextBlockId++,
+ m_Algorithm,
+ m_BufferImageGranularity);
+
+ m_Blocks.push_back(pBlock);
+ if (pNewBlockIndex != VMA_NULL)
+ {
+ *pNewBlockIndex = m_Blocks.size() - 1;
+ }
+
+ return VK_SUCCESS;
+}
+
+bool VmaBlockVector::HasEmptyBlock()
+{
+ for (size_t index = 0, count = m_Blocks.size(); index < count; ++index)
+ {
+ VmaDeviceMemoryBlock* const pBlock = m_Blocks[index];
+ if (pBlock->m_pMetadata->IsEmpty())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json)
+{
+ VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+
+ json.BeginObject();
+ for (size_t i = 0; i < m_Blocks.size(); ++i)
+ {
+ json.BeginString();
+ json.ContinueString(m_Blocks[i]->GetId());
+ json.EndString();
+
+ json.BeginObject();
+ json.WriteString("MapRefCount");
+ json.WriteNumber(m_Blocks[i]->GetMapRefCount());
+
+ m_Blocks[i]->m_pMetadata->PrintDetailedMap(json);
+ json.EndObject();
+ }
+ json.EndObject();
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+VkResult VmaBlockVector::CheckCorruption()
+{
+ if (!IsCorruptionDetectionEnabled())
+ {
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
+ VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+ for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+ {
+ VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
+ VMA_ASSERT(pBlock);
+ VkResult res = pBlock->CheckCorruption(m_hAllocator);
+ if (res != VK_SUCCESS)
+ {
+ return res;
+ }
+ }
+ return VK_SUCCESS;
+}
+
+#endif // _VMA_BLOCK_VECTOR_FUNCTIONS
+
+#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS
+VmaDefragmentationContext_T::VmaDefragmentationContext_T(
+ VmaAllocator hAllocator,
+ const VmaDefragmentationInfo& info)
+ : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass),
+ m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass),
+ m_BreakCallback(info.pfnBreakCallback),
+ m_BreakCallbackUserData(info.pBreakCallbackUserData),
+ m_MoveAllocator(hAllocator->GetAllocationCallbacks()),
+ m_Moves(m_MoveAllocator)
+{
+ m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK;
+
+ if (info.pool != VMA_NULL)
+ {
+ m_BlockVectorCount = 1;
+ m_PoolBlockVector = &info.pool->m_BlockVector;
+ m_pBlockVectors = &m_PoolBlockVector;
+ m_PoolBlockVector->SetIncrementalSort(false);
+ m_PoolBlockVector->SortByFreeSize();
+ }
+ else
+ {
+ m_BlockVectorCount = hAllocator->GetMemoryTypeCount();
+ m_PoolBlockVector = VMA_NULL;
+ m_pBlockVectors = hAllocator->m_pBlockVectors;
+ for (uint32_t i = 0; i < m_BlockVectorCount; ++i)
+ {
+ VmaBlockVector* vector = m_pBlockVectors[i];
+ if (vector != VMA_NULL)
+ {
+ vector->SetIncrementalSort(false);
+ vector->SortByFreeSize();
+ }
+ }
+ }
+
+ switch (m_Algorithm)
+ {
+ case 0: // Default algorithm
+ m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT;
+ m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount);
+ break;
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT:
+ m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount);
+ break;
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT:
+ if (hAllocator->GetBufferImageGranularity() > 1)
+ {
+ m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount);
+ }
+ break;
+ }
+}
+
+VmaDefragmentationContext_T::~VmaDefragmentationContext_T()
+{
+ if (m_PoolBlockVector != VMA_NULL)
+ {
+ m_PoolBlockVector->SetIncrementalSort(true);
+ }
+ else
+ {
+ for (uint32_t i = 0; i < m_BlockVectorCount; ++i)
+ {
+ VmaBlockVector* vector = m_pBlockVectors[i];
+ if (vector != VMA_NULL)
+ vector->SetIncrementalSort(true);
+ }
+ }
+
+ if (m_AlgorithmState)
+ {
+ switch (m_Algorithm)
+ {
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT:
+ vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateBalanced*>(m_AlgorithmState), m_BlockVectorCount);
+ break;
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT:
+ vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateExtensive*>(m_AlgorithmState), m_BlockVectorCount);
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+ }
+}
+
+VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo)
+{
+ if (m_PoolBlockVector != VMA_NULL)
+ {
+ VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex);
+
+ if (m_PoolBlockVector->GetBlockCount() > 1)
+ ComputeDefragmentation(*m_PoolBlockVector, 0);
+ else if (m_PoolBlockVector->GetBlockCount() == 1)
+ ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0));
+ }
+ else
+ {
+ for (uint32_t i = 0; i < m_BlockVectorCount; ++i)
+ {
+ if (m_pBlockVectors[i] != VMA_NULL)
+ {
+ VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex);
+
+ if (m_pBlockVectors[i]->GetBlockCount() > 1)
+ {
+ if (ComputeDefragmentation(*m_pBlockVectors[i], i))
+ break;
+ }
+ else if (m_pBlockVectors[i]->GetBlockCount() == 1)
+ {
+ if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0)))
+ break;
+ }
+ }
+ }
+ }
+
+ moveInfo.moveCount = static_cast<uint32_t>(m_Moves.size());
+ if (moveInfo.moveCount > 0)
+ {
+ moveInfo.pMoves = m_Moves.data();
+ return VK_INCOMPLETE;
+ }
+
+ moveInfo.pMoves = VMA_NULL;
+ return VK_SUCCESS;
+}
+
+VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo)
+{
+ VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true);
+
+ VkResult result = VK_SUCCESS;
+ VmaStlAllocator<FragmentedBlock> blockAllocator(m_MoveAllocator.m_pCallbacks);
+ VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> immovableBlocks(blockAllocator);
+ VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> mappedBlocks(blockAllocator);
+
+ VmaAllocator allocator = VMA_NULL;
+ for (uint32_t i = 0; i < moveInfo.moveCount; ++i)
+ {
+ VmaDefragmentationMove& move = moveInfo.pMoves[i];
+ size_t prevCount = 0, currentCount = 0;
+ VkDeviceSize freedBlockSize = 0;
+
+ uint32_t vectorIndex;
+ VmaBlockVector* vector;
+ if (m_PoolBlockVector != VMA_NULL)
+ {
+ vectorIndex = 0;
+ vector = m_PoolBlockVector;
+ }
+ else
+ {
+ vectorIndex = move.srcAllocation->GetMemoryTypeIndex();
+ vector = m_pBlockVectors[vectorIndex];
+ VMA_ASSERT(vector != VMA_NULL);
+ }
+
+ switch (move.operation)
+ {
+ case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY:
+ {
+ uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation);
+ if (mapCount > 0)
+ {
+ allocator = vector->m_hAllocator;
+ VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock();
+ bool notPresent = true;
+ for (FragmentedBlock& block : mappedBlocks)
+ {
+ if (block.block == newMapBlock)
+ {
+ notPresent = false;
+ block.data += mapCount;
+ break;
+ }
+ }
+ if (notPresent)
+ mappedBlocks.push_back({ mapCount, newMapBlock });
+ }
+
+ // Scope for locks, Free have it's own lock
+ {
+ VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+ prevCount = vector->GetBlockCount();
+ freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize();
+ }
+ vector->Free(move.dstTmpAllocation);
+ {
+ VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+ currentCount = vector->GetBlockCount();
+ }
+
+ result = VK_INCOMPLETE;
+ break;
+ }
+ case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE:
+ {
+ m_PassStats.bytesMoved -= move.srcAllocation->GetSize();
+ --m_PassStats.allocationsMoved;
+ vector->Free(move.dstTmpAllocation);
+
+ VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock();
+ bool notPresent = true;
+ for (const FragmentedBlock& block : immovableBlocks)
+ {
+ if (block.block == newBlock)
+ {
+ notPresent = false;
+ break;
+ }
+ }
+ if (notPresent)
+ immovableBlocks.push_back({ vectorIndex, newBlock });
+ break;
+ }
+ case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY:
+ {
+ m_PassStats.bytesMoved -= move.srcAllocation->GetSize();
+ --m_PassStats.allocationsMoved;
+ // Scope for locks, Free have it's own lock
+ {
+ VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+ prevCount = vector->GetBlockCount();
+ freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize();
+ }
+ vector->Free(move.srcAllocation);
+ {
+ VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+ currentCount = vector->GetBlockCount();
+ }
+ freedBlockSize *= prevCount - currentCount;
+
+ VkDeviceSize dstBlockSize;
+ {
+ VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+ dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize();
+ }
+ vector->Free(move.dstTmpAllocation);
+ {
+ VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+ freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount());
+ currentCount = vector->GetBlockCount();
+ }
+
+ result = VK_INCOMPLETE;
+ break;
+ }
+ default:
+ VMA_ASSERT(0);
+ }
+
+ if (prevCount > currentCount)
+ {
+ size_t freedBlocks = prevCount - currentCount;
+ m_PassStats.deviceMemoryBlocksFreed += static_cast<uint32_t>(freedBlocks);
+ m_PassStats.bytesFreed += freedBlockSize;
+ }
+
+ if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT &&
+ m_AlgorithmState != VMA_NULL)
+ {
+ // Avoid unnecessary tries to allocate when new free block is available
+ StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[vectorIndex];
+ if (state.firstFreeBlock != SIZE_MAX)
+ {
+ const size_t diff = prevCount - currentCount;
+ if (state.firstFreeBlock >= diff)
+ {
+ state.firstFreeBlock -= diff;
+ if (state.firstFreeBlock != 0)
+ state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty();
+ }
+ else
+ state.firstFreeBlock = 0;
+ }
+ }
+ }
+ moveInfo.moveCount = 0;
+ moveInfo.pMoves = VMA_NULL;
+ m_Moves.clear();
+
+ // Update stats
+ m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved;
+ m_GlobalStats.bytesFreed += m_PassStats.bytesFreed;
+ m_GlobalStats.bytesMoved += m_PassStats.bytesMoved;
+ m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed;
+ m_PassStats = { 0 };
+
+ // Move blocks with immovable allocations according to algorithm
+ if (immovableBlocks.size() > 0)
+ {
+ do
+ {
+ if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT)
+ {
+ if (m_AlgorithmState != VMA_NULL)
+ {
+ bool swapped = false;
+ // Move to the start of free blocks range
+ for (const FragmentedBlock& block : immovableBlocks)
+ {
+ StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[block.data];
+ if (state.operation != StateExtensive::Operation::Cleanup)
+ {
+ VmaBlockVector* vector = m_pBlockVectors[block.data];
+ VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+
+ for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i)
+ {
+ if (vector->GetBlock(i) == block.block)
+ {
+ std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]);
+ if (state.firstFreeBlock != SIZE_MAX)
+ {
+ if (i + 1 < state.firstFreeBlock)
+ {
+ if (state.firstFreeBlock > 1)
+ std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]);
+ else
+ --state.firstFreeBlock;
+ }
+ }
+ swapped = true;
+ break;
+ }
+ }
+ }
+ }
+ if (swapped)
+ result = VK_INCOMPLETE;
+ break;
+ }
+ }
+
+ // Move to the beginning
+ for (const FragmentedBlock& block : immovableBlocks)
+ {
+ VmaBlockVector* vector = m_pBlockVectors[block.data];
+ VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+
+ for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i)
+ {
+ if (vector->GetBlock(i) == block.block)
+ {
+ std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]);
+ break;
+ }
+ }
+ }
+ } while (false);
+ }
+
+ // Bulk-map destination blocks
+ for (const FragmentedBlock& block : mappedBlocks)
+ {
+ VkResult res = block.block->Map(allocator, block.data, VMA_NULL);
+ VMA_ASSERT(res == VK_SUCCESS);
+ }
+ return result;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index)
+{
+ switch (m_Algorithm)
+ {
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT:
+ return ComputeDefragmentation_Fast(vector);
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT:
+ return ComputeDefragmentation_Balanced(vector, index, true);
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT:
+ return ComputeDefragmentation_Full(vector);
+ case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT:
+ return ComputeDefragmentation_Extensive(vector, index);
+ default:
+ VMA_ASSERT(0);
+ return ComputeDefragmentation_Balanced(vector, index, true);
+ }
+}
+
+VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData(
+ VmaAllocHandle handle, VmaBlockMetadata* metadata)
+{
+ MoveAllocationData moveData;
+ moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle);
+ moveData.size = moveData.move.srcAllocation->GetSize();
+ moveData.alignment = moveData.move.srcAllocation->GetAlignment();
+ moveData.type = moveData.move.srcAllocation->GetSuballocationType();
+ moveData.flags = 0;
+
+ if (moveData.move.srcAllocation->IsPersistentMap())
+ moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
+ if (moveData.move.srcAllocation->IsMappingAllowed())
+ moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+
+ return moveData;
+}
+
+VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes)
+{
+ // Check custom criteria if exists
+ if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData))
+ return CounterStatus::End;
+
+ // Ignore allocation if will exceed max size for copy
+ if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes)
+ {
+ if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE)
+ return CounterStatus::Ignore;
+ else
+ return CounterStatus::End;
+ }
+ else
+ m_IgnoredAllocs = 0;
+ return CounterStatus::Pass;
+}
+
+bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes)
+{
+ m_PassStats.bytesMoved += bytes;
+ // Early return when max found
+ if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes)
+ {
+ VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations ||
+ m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!");
+ return true;
+ }
+ return false;
+}
+
+bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block)
+{
+ VmaBlockMetadata* metadata = block->m_pMetadata;
+
+ for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+ handle != VK_NULL_HANDLE;
+ handle = metadata->GetNextAllocation(handle))
+ {
+ MoveAllocationData moveData = GetMoveData(handle, metadata);
+ // Ignore newly created allocations by defragmentation algorithm
+ if (moveData.move.srcAllocation->GetUserData() == this)
+ continue;
+ switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+ {
+ case CounterStatus::Ignore:
+ continue;
+ case CounterStatus::End:
+ return true;
+ case CounterStatus::Pass:
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
+ if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
+ {
+ VmaAllocationRequest request = {};
+ if (metadata->CreateAllocationRequest(
+ moveData.size,
+ moveData.alignment,
+ false,
+ moveData.type,
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+ &request))
+ {
+ if (metadata->GetAllocationOffset(request.allocHandle) < offset)
+ {
+ if (vector.CommitAllocationRequest(
+ request,
+ block,
+ moveData.alignment,
+ moveData.flags,
+ this,
+ moveData.type,
+ &moveData.move.dstTmpAllocation) == VK_SUCCESS)
+ {
+ m_Moves.push_back(moveData.move);
+ if (IncrementCounters(moveData.size))
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector)
+{
+ for (; start < end; ++start)
+ {
+ VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start);
+ if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size)
+ {
+ if (vector.AllocateFromBlock(dstBlock,
+ data.size,
+ data.alignment,
+ data.flags,
+ this,
+ data.type,
+ 0,
+ &data.move.dstTmpAllocation) == VK_SUCCESS)
+ {
+ m_Moves.push_back(data.move);
+ if (IncrementCounters(data.size))
+ return true;
+ break;
+ }
+ }
+ }
+ return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector)
+{
+ // Move only between blocks
+
+ // Go through allocations in last blocks and try to fit them inside first ones
+ for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i)
+ {
+ VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata;
+
+ for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+ handle != VK_NULL_HANDLE;
+ handle = metadata->GetNextAllocation(handle))
+ {
+ MoveAllocationData moveData = GetMoveData(handle, metadata);
+ // Ignore newly created allocations by defragmentation algorithm
+ if (moveData.move.srcAllocation->GetUserData() == this)
+ continue;
+ switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+ {
+ case CounterStatus::Ignore:
+ continue;
+ case CounterStatus::End:
+ return true;
+ case CounterStatus::Pass:
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ // Check all previous blocks for free space
+ if (AllocInOtherBlock(0, i, moveData, vector))
+ return true;
+ }
+ }
+ return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update)
+{
+ // Go over every allocation and try to fit it in previous blocks at lowest offsets,
+ // if not possible: realloc within single block to minimize offset (exclude offset == 0),
+ // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block)
+ VMA_ASSERT(m_AlgorithmState != VMA_NULL);
+
+ StateBalanced& vectorState = reinterpret_cast<StateBalanced*>(m_AlgorithmState)[index];
+ if (update && vectorState.avgAllocSize == UINT64_MAX)
+ UpdateVectorStatistics(vector, vectorState);
+
+ const size_t startMoveCount = m_Moves.size();
+ VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2;
+ for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i)
+ {
+ VmaDeviceMemoryBlock* block = vector.GetBlock(i);
+ VmaBlockMetadata* metadata = block->m_pMetadata;
+ VkDeviceSize prevFreeRegionSize = 0;
+
+ for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+ handle != VK_NULL_HANDLE;
+ handle = metadata->GetNextAllocation(handle))
+ {
+ MoveAllocationData moveData = GetMoveData(handle, metadata);
+ // Ignore newly created allocations by defragmentation algorithm
+ if (moveData.move.srcAllocation->GetUserData() == this)
+ continue;
+ switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+ {
+ case CounterStatus::Ignore:
+ continue;
+ case CounterStatus::End:
+ return true;
+ case CounterStatus::Pass:
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ // Check all previous blocks for free space
+ const size_t prevMoveCount = m_Moves.size();
+ if (AllocInOtherBlock(0, i, moveData, vector))
+ return true;
+
+ VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle);
+ // If no room found then realloc within block for lower offset
+ VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
+ if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
+ {
+ // Check if realloc will make sense
+ if (prevFreeRegionSize >= minimalFreeRegion ||
+ nextFreeRegionSize >= minimalFreeRegion ||
+ moveData.size <= vectorState.avgFreeSize ||
+ moveData.size <= vectorState.avgAllocSize)
+ {
+ VmaAllocationRequest request = {};
+ if (metadata->CreateAllocationRequest(
+ moveData.size,
+ moveData.alignment,
+ false,
+ moveData.type,
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+ &request))
+ {
+ if (metadata->GetAllocationOffset(request.allocHandle) < offset)
+ {
+ if (vector.CommitAllocationRequest(
+ request,
+ block,
+ moveData.alignment,
+ moveData.flags,
+ this,
+ moveData.type,
+ &moveData.move.dstTmpAllocation) == VK_SUCCESS)
+ {
+ m_Moves.push_back(moveData.move);
+ if (IncrementCounters(moveData.size))
+ return true;
+ }
+ }
+ }
+ }
+ }
+ prevFreeRegionSize = nextFreeRegionSize;
+ }
+ }
+
+ // No moves performed, update statistics to current vector state
+ if (startMoveCount == m_Moves.size() && !update)
+ {
+ vectorState.avgAllocSize = UINT64_MAX;
+ return ComputeDefragmentation_Balanced(vector, index, false);
+ }
+ return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector)
+{
+ // Go over every allocation and try to fit it in previous blocks at lowest offsets,
+ // if not possible: realloc within single block to minimize offset (exclude offset == 0)
+
+ for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i)
+ {
+ VmaDeviceMemoryBlock* block = vector.GetBlock(i);
+ VmaBlockMetadata* metadata = block->m_pMetadata;
+
+ for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+ handle != VK_NULL_HANDLE;
+ handle = metadata->GetNextAllocation(handle))
+ {
+ MoveAllocationData moveData = GetMoveData(handle, metadata);
+ // Ignore newly created allocations by defragmentation algorithm
+ if (moveData.move.srcAllocation->GetUserData() == this)
+ continue;
+ switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+ {
+ case CounterStatus::Ignore:
+ continue;
+ case CounterStatus::End:
+ return true;
+ case CounterStatus::Pass:
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ // Check all previous blocks for free space
+ const size_t prevMoveCount = m_Moves.size();
+ if (AllocInOtherBlock(0, i, moveData, vector))
+ return true;
+
+ // If no room found then realloc within block for lower offset
+ VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
+ if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
+ {
+ VmaAllocationRequest request = {};
+ if (metadata->CreateAllocationRequest(
+ moveData.size,
+ moveData.alignment,
+ false,
+ moveData.type,
+ VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+ &request))
+ {
+ if (metadata->GetAllocationOffset(request.allocHandle) < offset)
+ {
+ if (vector.CommitAllocationRequest(
+ request,
+ block,
+ moveData.alignment,
+ moveData.flags,
+ this,
+ moveData.type,
+ &moveData.move.dstTmpAllocation) == VK_SUCCESS)
+ {
+ m_Moves.push_back(moveData.move);
+ if (IncrementCounters(moveData.size))
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index)
+{
+ // First free single block, then populate it to the brim, then free another block, and so on
+
+ // Fallback to previous algorithm since without granularity conflicts it can achieve max packing
+ if (vector.m_BufferImageGranularity == 1)
+ return ComputeDefragmentation_Full(vector);
+
+ VMA_ASSERT(m_AlgorithmState != VMA_NULL);
+
+ StateExtensive& vectorState = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[index];
+
+ bool texturePresent = false, bufferPresent = false, otherPresent = false;
+ switch (vectorState.operation)
+ {
+ case StateExtensive::Operation::Done: // Vector defragmented
+ return false;
+ case StateExtensive::Operation::FindFreeBlockBuffer:
+ case StateExtensive::Operation::FindFreeBlockTexture:
+ case StateExtensive::Operation::FindFreeBlockAll:
+ {
+ // No more blocks to free, just perform fast realloc and move to cleanup
+ if (vectorState.firstFreeBlock == 0)
+ {
+ vectorState.operation = StateExtensive::Operation::Cleanup;
+ return ComputeDefragmentation_Fast(vector);
+ }
+
+ // No free blocks, have to clear last one
+ size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1;
+ VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata;
+
+ const size_t prevMoveCount = m_Moves.size();
+ for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin();
+ handle != VK_NULL_HANDLE;
+ handle = freeMetadata->GetNextAllocation(handle))
+ {
+ MoveAllocationData moveData = GetMoveData(handle, freeMetadata);
+ switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+ {
+ case CounterStatus::Ignore:
+ continue;
+ case CounterStatus::End:
+ return true;
+ case CounterStatus::Pass:
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ // Check all previous blocks for free space
+ if (AllocInOtherBlock(0, last, moveData, vector))
+ {
+ // Full clear performed already
+ if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE)
+ vectorState.firstFreeBlock = last;
+ return true;
+ }
+ }
+
+ if (prevMoveCount == m_Moves.size())
+ {
+ // Cannot perform full clear, have to move data in other blocks around
+ if (last != 0)
+ {
+ for (size_t i = last - 1; i; --i)
+ {
+ if (ReallocWithinBlock(vector, vector.GetBlock(i)))
+ return true;
+ }
+ }
+
+ if (prevMoveCount == m_Moves.size())
+ {
+ // No possible reallocs within blocks, try to move them around fast
+ return ComputeDefragmentation_Fast(vector);
+ }
+ }
+ else
+ {
+ switch (vectorState.operation)
+ {
+ case StateExtensive::Operation::FindFreeBlockBuffer:
+ vectorState.operation = StateExtensive::Operation::MoveBuffers;
+ break;
+ case StateExtensive::Operation::FindFreeBlockTexture:
+ vectorState.operation = StateExtensive::Operation::MoveTextures;
+ break;
+ case StateExtensive::Operation::FindFreeBlockAll:
+ vectorState.operation = StateExtensive::Operation::MoveAll;
+ break;
+ default:
+ VMA_ASSERT(0);
+ vectorState.operation = StateExtensive::Operation::MoveTextures;
+ }
+ vectorState.firstFreeBlock = last;
+ // Nothing done, block found without reallocations, can perform another reallocs in same pass
+ return ComputeDefragmentation_Extensive(vector, index);
+ }
+ break;
+ }
+ case StateExtensive::Operation::MoveTextures:
+ {
+ if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector,
+ vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent))
+ {
+ if (texturePresent)
+ {
+ vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture;
+ return ComputeDefragmentation_Extensive(vector, index);
+ }
+
+ if (!bufferPresent && !otherPresent)
+ {
+ vectorState.operation = StateExtensive::Operation::Cleanup;
+ break;
+ }
+
+ // No more textures to move, check buffers
+ vectorState.operation = StateExtensive::Operation::MoveBuffers;
+ bufferPresent = false;
+ otherPresent = false;
+ }
+ else
+ break;
+ VMA_FALLTHROUGH; // Fallthrough
+ }
+ case StateExtensive::Operation::MoveBuffers:
+ {
+ if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector,
+ vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent))
+ {
+ if (bufferPresent)
+ {
+ vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer;
+ return ComputeDefragmentation_Extensive(vector, index);
+ }
+
+ if (!otherPresent)
+ {
+ vectorState.operation = StateExtensive::Operation::Cleanup;
+ break;
+ }
+
+ // No more buffers to move, check all others
+ vectorState.operation = StateExtensive::Operation::MoveAll;
+ otherPresent = false;
+ }
+ else
+ break;
+ VMA_FALLTHROUGH; // Fallthrough
+ }
+ case StateExtensive::Operation::MoveAll:
+ {
+ if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector,
+ vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent))
+ {
+ if (otherPresent)
+ {
+ vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer;
+ return ComputeDefragmentation_Extensive(vector, index);
+ }
+ // Everything moved
+ vectorState.operation = StateExtensive::Operation::Cleanup;
+ }
+ break;
+ }
+ case StateExtensive::Operation::Cleanup:
+ // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062).
+ break;
+ }
+
+ if (vectorState.operation == StateExtensive::Operation::Cleanup)
+ {
+ // All other work done, pack data in blocks even tighter if possible
+ const size_t prevMoveCount = m_Moves.size();
+ for (size_t i = 0; i < vector.GetBlockCount(); ++i)
+ {
+ if (ReallocWithinBlock(vector, vector.GetBlock(i)))
+ return true;
+ }
+
+ if (prevMoveCount == m_Moves.size())
+ vectorState.operation = StateExtensive::Operation::Done;
+ }
+ return false;
+}
+
+void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state)
+{
+ size_t allocCount = 0;
+ size_t freeCount = 0;
+ state.avgFreeSize = 0;
+ state.avgAllocSize = 0;
+
+ for (size_t i = 0; i < vector.GetBlockCount(); ++i)
+ {
+ VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata;
+
+ allocCount += metadata->GetAllocationCount();
+ freeCount += metadata->GetFreeRegionsCount();
+ state.avgFreeSize += metadata->GetSumFreeSize();
+ state.avgAllocSize += metadata->GetSize();
+ }
+
+ state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount;
+ state.avgFreeSize /= freeCount;
+}
+
+bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType,
+ VmaBlockVector& vector, size_t firstFreeBlock,
+ bool& texturePresent, bool& bufferPresent, bool& otherPresent)
+{
+ const size_t prevMoveCount = m_Moves.size();
+ for (size_t i = firstFreeBlock ; i;)
+ {
+ VmaDeviceMemoryBlock* block = vector.GetBlock(--i);
+ VmaBlockMetadata* metadata = block->m_pMetadata;
+
+ for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+ handle != VK_NULL_HANDLE;
+ handle = metadata->GetNextAllocation(handle))
+ {
+ MoveAllocationData moveData = GetMoveData(handle, metadata);
+ // Ignore newly created allocations by defragmentation algorithm
+ if (moveData.move.srcAllocation->GetUserData() == this)
+ continue;
+ switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+ {
+ case CounterStatus::Ignore:
+ continue;
+ case CounterStatus::End:
+ return true;
+ case CounterStatus::Pass:
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+
+ // Move only single type of resources at once
+ if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType))
+ {
+ // Try to fit allocation into free blocks
+ if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector))
+ return false;
+ }
+
+ if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL))
+ texturePresent = true;
+ else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER))
+ bufferPresent = true;
+ else
+ otherPresent = true;
+ }
+ }
+ return prevMoveCount == m_Moves.size();
+}
+#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS
+
+#ifndef _VMA_POOL_T_FUNCTIONS
+VmaPool_T::VmaPool_T(
+ VmaAllocator hAllocator,
+ const VmaPoolCreateInfo& createInfo,
+ VkDeviceSize preferredBlockSize)
+ : m_BlockVector(
+ hAllocator,
+ this, // hParentPool
+ createInfo.memoryTypeIndex,
+ createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize,
+ createInfo.minBlockCount,
+ createInfo.maxBlockCount,
+ (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(),
+ createInfo.blockSize != 0, // explicitBlockSize
+ createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm
+ createInfo.priority,
+ VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment),
+ createInfo.pMemoryAllocateNext),
+ m_Id(0),
+ m_Name(VMA_NULL) {}
+
+VmaPool_T::~VmaPool_T()
+{
+ VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL);
+
+ const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks();
+ VmaFreeString(allocs, m_Name);
+}
+
+void VmaPool_T::SetName(const char* pName)
+{
+ const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks();
+ VmaFreeString(allocs, m_Name);
+
+ if (pName != VMA_NULL)
+ {
+ m_Name = VmaCreateStringCopy(allocs, pName);
+ }
+ else
+ {
+ m_Name = VMA_NULL;
+ }
+}
+#endif // _VMA_POOL_T_FUNCTIONS
+
+#ifndef _VMA_ALLOCATOR_T_FUNCTIONS
+VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
+ m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0),
+ m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0),
+ m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0),
+ m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0),
+ m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0),
+ m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0),
+ m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0),
+ m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0),
+ m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0),
+ m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0),
+ m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0),
+ m_hDevice(pCreateInfo->device),
+ m_hInstance(pCreateInfo->instance),
+ m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL),
+ m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ?
+ *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks),
+ m_AllocationObjectAllocator(&m_AllocationCallbacks),
+ m_HeapSizeLimitMask(0),
+ m_DeviceMemoryCount(0),
+ m_PreferredLargeHeapBlockSize(0),
+ m_PhysicalDevice(pCreateInfo->physicalDevice),
+ m_GpuDefragmentationMemoryTypeBits(UINT32_MAX),
+ m_NextPoolId(0),
+ m_GlobalMemoryTypeBits(UINT32_MAX)
+{
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ m_UseKhrDedicatedAllocation = false;
+ m_UseKhrBindMemory2 = false;
+ }
+
+ if(VMA_DEBUG_DETECT_CORRUPTION)
+ {
+ // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it.
+ VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0);
+ }
+
+ VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance);
+
+ if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0))
+ {
+#if !(VMA_DEDICATED_ALLOCATION)
+ if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros.");
+ }
+#endif
+#if !(VMA_BIND_MEMORY2)
+ if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros.");
+ }
+#endif
+ }
+#if !(VMA_MEMORY_BUDGET)
+ if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros.");
+ }
+#endif
+#if !(VMA_BUFFER_DEVICE_ADDRESS)
+ if(m_UseKhrBufferDeviceAddress)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+ }
+#endif
+#if VMA_VULKAN_VERSION < 1004000
+ VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 4, 0) && "vulkanApiVersion >= VK_API_VERSION_1_4 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if VMA_VULKAN_VERSION < 1003000
+ VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 3, 0) && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if VMA_VULKAN_VERSION < 1002000
+ VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 2, 0) && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if VMA_VULKAN_VERSION < 1001000
+ VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0) && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if !(VMA_MEMORY_PRIORITY)
+ if(m_UseExtMemoryPriority)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+ }
+#endif
+#if !(VMA_KHR_MAINTENANCE4)
+ if(m_UseKhrMaintenance4)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+ }
+#endif
+#if !(VMA_KHR_MAINTENANCE5)
+ if(m_UseKhrMaintenance5)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+ }
+#endif
+#if !(VMA_KHR_MAINTENANCE5)
+ if(m_UseKhrMaintenance5)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+ }
+#endif
+
+#if !(VMA_EXTERNAL_MEMORY_WIN32)
+ if(m_UseKhrExternalMemoryWin32)
+ {
+ VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+ }
+#endif
+
+ memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks));
+ memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties));
+ memset(&m_MemProps, 0, sizeof(m_MemProps));
+
+ memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors));
+ memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions));
+
+#if VMA_EXTERNAL_MEMORY
+ memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes));
+#endif // #if VMA_EXTERNAL_MEMORY
+
+ if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL)
+ {
+ m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData;
+ m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate;
+ m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree;
+ }
+
+ ImportVulkanFunctions(pCreateInfo->pVulkanFunctions);
+
+ (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties);
+ (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps);
+
+ VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT));
+ VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY));
+ VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity));
+ VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize));
+
+ m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ?
+ pCreateInfo->preferredLargeHeapBlockSize : static_cast<VkDeviceSize>(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE);
+
+ m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits();
+
+#if VMA_EXTERNAL_MEMORY
+ if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL)
+ {
+ memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes,
+ sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount());
+ }
+#endif // #if VMA_EXTERNAL_MEMORY
+
+ if(pCreateInfo->pHeapSizeLimit != VMA_NULL)
+ {
+ for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
+ {
+ const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex];
+ if(limit != VK_WHOLE_SIZE)
+ {
+ m_HeapSizeLimitMask |= 1u << heapIndex;
+ if(limit < m_MemProps.memoryHeaps[heapIndex].size)
+ {
+ m_MemProps.memoryHeaps[heapIndex].size = limit;
+ }
+ }
+ }
+ }
+
+ for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ // Create only supported types
+ if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0)
+ {
+ const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex);
+ m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)(
+ this,
+ VK_NULL_HANDLE, // hParentPool
+ memTypeIndex,
+ preferredBlockSize,
+ 0,
+ SIZE_MAX,
+ GetBufferImageGranularity(),
+ false, // explicitBlockSize
+ 0, // algorithm
+ 0.5f, // priority (0.5 is the default per Vulkan spec)
+ GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment
+ VMA_NULL); // // pMemoryAllocateNext
+ // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here,
+ // because minBlockCount is 0.
+ }
+ }
+}
+
+VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo)
+{
+ VkResult res = VK_SUCCESS;
+
+#if VMA_MEMORY_BUDGET
+ if(m_UseExtMemoryBudget)
+ {
+ UpdateVulkanBudget();
+ }
+#endif // #if VMA_MEMORY_BUDGET
+
+ return res;
+}
+
+VmaAllocator_T::~VmaAllocator_T()
+{
+ VMA_ASSERT(m_Pools.IsEmpty());
+
+ for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; )
+ {
+ vma_delete(this, m_pBlockVectors[memTypeIndex]);
+ }
+}
+
+void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions)
+{
+#if VMA_STATIC_VULKAN_FUNCTIONS == 1
+ ImportVulkanFunctions_Static();
+#endif
+
+ if(pVulkanFunctions != VMA_NULL)
+ {
+ ImportVulkanFunctions_Custom(pVulkanFunctions);
+ }
+
+#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+ ImportVulkanFunctions_Dynamic();
+#endif
+
+ ValidateVulkanFunctions();
+}
+
+#if VMA_STATIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ImportVulkanFunctions_Static()
+{
+ // Vulkan 1.0
+ m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr;
+ m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr;
+ m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties;
+ m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties;
+ m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory;
+ m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory;
+ m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory;
+ m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory;
+ m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges;
+ m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges;
+ m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory;
+ m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory;
+ m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements;
+ m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements;
+ m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer;
+ m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer;
+ m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage;
+ m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage;
+ m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer;
+
+ // Vulkan 1.1
+#if VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2;
+ m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2;
+ m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2;
+ m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2;
+ }
+#endif
+
+#if VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2;
+ }
+#endif
+
+#if VMA_VULKAN_VERSION >= 1003000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0))
+ {
+ m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements;
+ m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements;
+ }
+#endif
+}
+
+#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions)
+{
+ VMA_ASSERT(pVulkanFunctions != VMA_NULL);
+
+#define VMA_COPY_IF_NOT_NULL(funcName) \
+ if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName;
+
+ VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr);
+ VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr);
+ VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties);
+ VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties);
+ VMA_COPY_IF_NOT_NULL(vkAllocateMemory);
+ VMA_COPY_IF_NOT_NULL(vkFreeMemory);
+ VMA_COPY_IF_NOT_NULL(vkMapMemory);
+ VMA_COPY_IF_NOT_NULL(vkUnmapMemory);
+ VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges);
+ VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges);
+ VMA_COPY_IF_NOT_NULL(vkBindBufferMemory);
+ VMA_COPY_IF_NOT_NULL(vkBindImageMemory);
+ VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements);
+ VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements);
+ VMA_COPY_IF_NOT_NULL(vkCreateBuffer);
+ VMA_COPY_IF_NOT_NULL(vkDestroyBuffer);
+ VMA_COPY_IF_NOT_NULL(vkCreateImage);
+ VMA_COPY_IF_NOT_NULL(vkDestroyImage);
+ VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer);
+
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR);
+ VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR);
+#endif
+
+#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+ VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR);
+ VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR);
+#endif
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+ VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR);
+#endif
+
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements);
+ VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements);
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+ VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR);
+#endif
+#undef VMA_COPY_IF_NOT_NULL
+}
+
+#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ImportVulkanFunctions_Dynamic()
+{
+ VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr &&
+ "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass "
+ "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. "
+ "Other members can be null.");
+
+#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \
+ if(m_VulkanFunctions.memberName == VMA_NULL) \
+ m_VulkanFunctions.memberName = \
+ (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString);
+#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \
+ if(m_VulkanFunctions.memberName == VMA_NULL) \
+ m_VulkanFunctions.memberName = \
+ (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString);
+
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties");
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
+ VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory");
+ VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory");
+ VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory");
+ VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory");
+ VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges");
+ VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges");
+ VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory");
+ VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory");
+ VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements");
+ VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements");
+ VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer");
+ VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer");
+ VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage");
+ VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage");
+ VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer");
+
+#if VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2");
+ VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2");
+ VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2");
+ VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2");
+ }
+#endif
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2");
+ // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410.
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR");
+ }
+ else if(m_UseExtMemoryBudget)
+ {
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR");
+ // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410.
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2");
+ }
+#endif
+
+#if VMA_DEDICATED_ALLOCATION
+ if(m_UseKhrDedicatedAllocation)
+ {
+ VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR");
+ VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR");
+ }
+#endif
+
+#if VMA_BIND_MEMORY2
+ if(m_UseKhrBindMemory2)
+ {
+ VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR");
+ VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR");
+ }
+#endif // #if VMA_BIND_MEMORY2
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2");
+ }
+ else if(m_UseExtMemoryBudget)
+ {
+ VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR");
+ }
+#endif // #if VMA_MEMORY_BUDGET
+
+#if VMA_VULKAN_VERSION >= 1003000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0))
+ {
+ VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements");
+ VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements");
+ }
+#endif
+#if VMA_KHR_MAINTENANCE4
+ if(m_UseKhrMaintenance4)
+ {
+ VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR");
+ VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR");
+ }
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+ if (m_UseKhrExternalMemoryWin32)
+ {
+ VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR");
+ }
+#endif
+#undef VMA_FETCH_DEVICE_FUNC
+#undef VMA_FETCH_INSTANCE_FUNC
+}
+
+#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ValidateVulkanFunctions()
+{
+ VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkFlushMappedMemoryRanges != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkInvalidateMappedMemoryRanges != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL);
+
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation)
+ {
+ VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL);
+ }
+#endif
+
+#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+ if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2)
+ {
+ VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL);
+ VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL);
+ }
+#endif
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+ if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL);
+ }
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+ if (m_UseKhrExternalMemoryWin32)
+ {
+ VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL);
+ }
+#endif
+
+ // Not validating these due to suspected driver bugs with these function
+ // pointers being null despite correct extension or Vulkan version is enabled.
+ // See issue #397. Their usage in VMA is optional anyway.
+ //
+ // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL);
+ // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL);
+}
+
+VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex)
+{
+ const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
+ const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size;
+ const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE;
+ return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32);
+}
+
+VkResult VmaAllocator_T::AllocateMemoryOfType(
+ VmaPool pool,
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ bool dedicatedPreferred,
+ VkBuffer dedicatedBuffer,
+ VkImage dedicatedImage,
+ VmaBufferImageUsage dedicatedBufferImageUsage,
+ const VmaAllocationCreateInfo& createInfo,
+ uint32_t memTypeIndex,
+ VmaSuballocationType suballocType,
+ VmaDedicatedAllocationList& dedicatedAllocations,
+ VmaBlockVector& blockVector,
+ size_t allocationCount,
+ VmaAllocation* pAllocations)
+{
+ VMA_ASSERT(pAllocations != VMA_NULL);
+ VMA_DEBUG_LOG_FORMAT(" AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size);
+
+ VmaAllocationCreateInfo finalCreateInfo = createInfo;
+ VkResult res = CalcMemTypeParams(
+ finalCreateInfo,
+ memTypeIndex,
+ size,
+ allocationCount);
+ if(res != VK_SUCCESS)
+ return res;
+
+ if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0)
+ {
+ return AllocateDedicatedMemory(
+ pool,
+ size,
+ suballocType,
+ dedicatedAllocations,
+ memTypeIndex,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
+ (finalCreateInfo.flags &
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0,
+ finalCreateInfo.pUserData,
+ finalCreateInfo.priority,
+ dedicatedBuffer,
+ dedicatedImage,
+ dedicatedBufferImageUsage,
+ allocationCount,
+ pAllocations,
+ blockVector.GetAllocationNextPtr());
+ }
+ else
+ {
+ const bool canAllocateDedicated =
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 &&
+ (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize());
+
+ if(canAllocateDedicated)
+ {
+ // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size.
+ if(size > blockVector.GetPreferredBlockSize() / 2)
+ {
+ dedicatedPreferred = true;
+ }
+ // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget,
+ // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above
+ // 3/4 of the maximum allocation count.
+ if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 &&
+ m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4)
+ {
+ dedicatedPreferred = false;
+ }
+
+ if(dedicatedPreferred)
+ {
+ res = AllocateDedicatedMemory(
+ pool,
+ size,
+ suballocType,
+ dedicatedAllocations,
+ memTypeIndex,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
+ (finalCreateInfo.flags &
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0,
+ finalCreateInfo.pUserData,
+ finalCreateInfo.priority,
+ dedicatedBuffer,
+ dedicatedImage,
+ dedicatedBufferImageUsage,
+ allocationCount,
+ pAllocations,
+ blockVector.GetAllocationNextPtr());
+ if(res == VK_SUCCESS)
+ {
+ // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here.
+ VMA_DEBUG_LOG(" Allocated as DedicatedMemory");
+ return VK_SUCCESS;
+ }
+ }
+ }
+
+ res = blockVector.Allocate(
+ size,
+ alignment,
+ finalCreateInfo,
+ suballocType,
+ allocationCount,
+ pAllocations);
+ if(res == VK_SUCCESS)
+ return VK_SUCCESS;
+
+ // Try dedicated memory.
+ if(canAllocateDedicated && !dedicatedPreferred)
+ {
+ res = AllocateDedicatedMemory(
+ pool,
+ size,
+ suballocType,
+ dedicatedAllocations,
+ memTypeIndex,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
+ (finalCreateInfo.flags &
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0,
+ (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0,
+ finalCreateInfo.pUserData,
+ finalCreateInfo.priority,
+ dedicatedBuffer,
+ dedicatedImage,
+ dedicatedBufferImageUsage,
+ allocationCount,
+ pAllocations,
+ blockVector.GetAllocationNextPtr());
+ if(res == VK_SUCCESS)
+ {
+ // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here.
+ VMA_DEBUG_LOG(" Allocated as DedicatedMemory");
+ return VK_SUCCESS;
+ }
+ }
+ // Everything failed: Return error code.
+ VMA_DEBUG_LOG(" vkAllocateMemory FAILED");
+ return res;
+ }
+}
+
+VkResult VmaAllocator_T::AllocateDedicatedMemory(
+ VmaPool pool,
+ VkDeviceSize size,
+ VmaSuballocationType suballocType,
+ VmaDedicatedAllocationList& dedicatedAllocations,
+ uint32_t memTypeIndex,
+ bool map,
+ bool isUserDataString,
+ bool isMappingAllowed,
+ bool canAliasMemory,
+ void* pUserData,
+ float priority,
+ VkBuffer dedicatedBuffer,
+ VkImage dedicatedImage,
+ VmaBufferImageUsage dedicatedBufferImageUsage,
+ size_t allocationCount,
+ VmaAllocation* pAllocations,
+ const void* pNextChain)
+{
+ VMA_ASSERT(allocationCount > 0 && pAllocations);
+
+ VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
+ allocInfo.memoryTypeIndex = memTypeIndex;
+ allocInfo.allocationSize = size;
+ allocInfo.pNext = pNextChain;
+
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR };
+ if(!canAliasMemory)
+ {
+ if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ if(dedicatedBuffer != VK_NULL_HANDLE)
+ {
+ VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE);
+ dedicatedAllocInfo.buffer = dedicatedBuffer;
+ VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo);
+ }
+ else if(dedicatedImage != VK_NULL_HANDLE)
+ {
+ dedicatedAllocInfo.image = dedicatedImage;
+ VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo);
+ }
+ }
+ }
+#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+
+#if VMA_BUFFER_DEVICE_ADDRESS
+ VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR };
+ if(m_UseKhrBufferDeviceAddress)
+ {
+ bool canContainBufferWithDeviceAddress = true;
+ if(dedicatedBuffer != VK_NULL_HANDLE)
+ {
+ canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN ||
+ dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT);
+ }
+ else if(dedicatedImage != VK_NULL_HANDLE)
+ {
+ canContainBufferWithDeviceAddress = false;
+ }
+ if(canContainBufferWithDeviceAddress)
+ {
+ allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
+ VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo);
+ }
+ }
+#endif // #if VMA_BUFFER_DEVICE_ADDRESS
+
+#if VMA_MEMORY_PRIORITY
+ VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT };
+ if(m_UseExtMemoryPriority)
+ {
+ VMA_ASSERT(priority >= 0.f && priority <= 1.f);
+ priorityInfo.priority = priority;
+ VmaPnextChainPushFront(&allocInfo, &priorityInfo);
+ }
+#endif // #if VMA_MEMORY_PRIORITY
+
+#if VMA_EXTERNAL_MEMORY
+ // Attach VkExportMemoryAllocateInfoKHR if necessary.
+ VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR };
+ exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex);
+ if(exportMemoryAllocInfo.handleTypes != 0)
+ {
+ VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo);
+ }
+#endif // #if VMA_EXTERNAL_MEMORY
+
+ size_t allocIndex;
+ VkResult res = VK_SUCCESS;
+ for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+ {
+ res = AllocateDedicatedMemoryPage(
+ pool,
+ size,
+ suballocType,
+ memTypeIndex,
+ allocInfo,
+ map,
+ isUserDataString,
+ isMappingAllowed,
+ pUserData,
+ pAllocations + allocIndex);
+ if(res != VK_SUCCESS)
+ {
+ break;
+ }
+ }
+
+ if(res == VK_SUCCESS)
+ {
+ for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+ {
+ dedicatedAllocations.Register(pAllocations[allocIndex]);
+ }
+ VMA_DEBUG_LOG_FORMAT(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex);
+ }
+ else
+ {
+ // Free all already created allocations.
+ while(allocIndex--)
+ {
+ VmaAllocation currAlloc = pAllocations[allocIndex];
+ VkDeviceMemory hMemory = currAlloc->GetMemory();
+
+ /*
+ There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory
+ before vkFreeMemory.
+
+ if(currAlloc->GetMappedData() != VMA_NULL)
+ {
+ (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory);
+ }
+ */
+
+ FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory);
+ m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize());
+ m_AllocationObjectAllocator.Free(currAlloc);
+ }
+
+ memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
+ }
+
+ return res;
+}
+
+VkResult VmaAllocator_T::AllocateDedicatedMemoryPage(
+ VmaPool pool,
+ VkDeviceSize size,
+ VmaSuballocationType suballocType,
+ uint32_t memTypeIndex,
+ const VkMemoryAllocateInfo& allocInfo,
+ bool map,
+ bool isUserDataString,
+ bool isMappingAllowed,
+ void* pUserData,
+ VmaAllocation* pAllocation)
+{
+ VkDeviceMemory hMemory = VK_NULL_HANDLE;
+ VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory);
+ if(res < 0)
+ {
+ VMA_DEBUG_LOG(" vkAllocateMemory FAILED");
+ return res;
+ }
+
+ void* pMappedData = VMA_NULL;
+ if(map)
+ {
+ res = (*m_VulkanFunctions.vkMapMemory)(
+ m_hDevice,
+ hMemory,
+ 0,
+ VK_WHOLE_SIZE,
+ 0,
+ &pMappedData);
+ if(res < 0)
+ {
+ VMA_DEBUG_LOG(" vkMapMemory FAILED");
+ FreeVulkanMemory(memTypeIndex, size, hMemory);
+ return res;
+ }
+ }
+
+ *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed);
+ (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size);
+ if (isUserDataString)
+ (*pAllocation)->SetName(this, (const char*)pUserData);
+ else
+ (*pAllocation)->SetUserData(this, pUserData);
+ m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size);
+ if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
+ {
+ FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
+ }
+
+ return VK_SUCCESS;
+}
+
+void VmaAllocator_T::GetBufferMemoryRequirements(
+ VkBuffer hBuffer,
+ VkMemoryRequirements& memReq,
+ bool& requiresDedicatedAllocation,
+ bool& prefersDedicatedAllocation) const
+{
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR };
+ memReqInfo.buffer = hBuffer;
+
+ VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR };
+
+ VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR };
+ VmaPnextChainPushFront(&memReq2, &memDedicatedReq);
+
+ (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2);
+
+ memReq = memReq2.memoryRequirements;
+ requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE);
+ prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE);
+ }
+ else
+#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ {
+ (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq);
+ requiresDedicatedAllocation = false;
+ prefersDedicatedAllocation = false;
+ }
+}
+
+void VmaAllocator_T::GetImageMemoryRequirements(
+ VkImage hImage,
+ VkMemoryRequirements& memReq,
+ bool& requiresDedicatedAllocation,
+ bool& prefersDedicatedAllocation) const
+{
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+ {
+ VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR };
+ memReqInfo.image = hImage;
+
+ VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR };
+
+ VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR };
+ VmaPnextChainPushFront(&memReq2, &memDedicatedReq);
+
+ (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2);
+
+ memReq = memReq2.memoryRequirements;
+ requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE);
+ prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE);
+ }
+ else
+#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ {
+ (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq);
+ requiresDedicatedAllocation = false;
+ prefersDedicatedAllocation = false;
+ }
+}
+
+VkResult VmaAllocator_T::FindMemoryTypeIndex(
+ uint32_t memoryTypeBits,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ VmaBufferImageUsage bufImgUsage,
+ uint32_t* pMemoryTypeIndex) const
+{
+ memoryTypeBits &= GetGlobalMemoryTypeBits();
+
+ if(pAllocationCreateInfo->memoryTypeBits != 0)
+ {
+ memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits;
+ }
+
+ VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0;
+ if(!FindMemoryPreferences(
+ IsIntegratedGpu(),
+ *pAllocationCreateInfo,
+ bufImgUsage,
+ requiredFlags, preferredFlags, notPreferredFlags))
+ {
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
+ *pMemoryTypeIndex = UINT32_MAX;
+ uint32_t minCost = UINT32_MAX;
+ for(uint32_t memTypeIndex = 0, memTypeBit = 1;
+ memTypeIndex < GetMemoryTypeCount();
+ ++memTypeIndex, memTypeBit <<= 1)
+ {
+ // This memory type is acceptable according to memoryTypeBits bitmask.
+ if((memTypeBit & memoryTypeBits) != 0)
+ {
+ const VkMemoryPropertyFlags currFlags =
+ m_MemProps.memoryTypes[memTypeIndex].propertyFlags;
+ // This memory type contains requiredFlags.
+ if((requiredFlags & ~currFlags) == 0)
+ {
+ // Calculate cost as number of bits from preferredFlags not present in this memory type.
+ uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) +
+ VMA_COUNT_BITS_SET(currFlags & notPreferredFlags);
+ // Remember memory type with lowest cost.
+ if(currCost < minCost)
+ {
+ *pMemoryTypeIndex = memTypeIndex;
+ if(currCost == 0)
+ {
+ return VK_SUCCESS;
+ }
+ minCost = currCost;
+ }
+ }
+ }
+ }
+ return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT;
+}
+
+VkResult VmaAllocator_T::CalcMemTypeParams(
+ VmaAllocationCreateInfo& inoutCreateInfo,
+ uint32_t memTypeIndex,
+ VkDeviceSize size,
+ size_t allocationCount)
+{
+ // If memory type is not HOST_VISIBLE, disable MAPPED.
+ if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 &&
+ (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0)
+ {
+ inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT;
+ }
+
+ if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 &&
+ (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0)
+ {
+ const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
+ VmaBudget heapBudget = {};
+ GetHeapBudgets(&heapBudget, heapIndex, 1);
+ if(heapBudget.usage + size * allocationCount > heapBudget.budget)
+ {
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ }
+ return VK_SUCCESS;
+}
+
+VkResult VmaAllocator_T::CalcAllocationParams(
+ VmaAllocationCreateInfo& inoutCreateInfo,
+ bool dedicatedRequired,
+ bool dedicatedPreferred)
+{
+ VMA_ASSERT((inoutCreateInfo.flags &
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) !=
+ (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) &&
+ "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect.");
+ VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 ||
+ (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) &&
+ "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.");
+ if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST)
+ {
+ if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0)
+ {
+ VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 &&
+ "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.");
+ }
+ }
+
+ // If memory is lazily allocated, it should be always dedicated.
+ if(dedicatedRequired ||
+ inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED)
+ {
+ inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+ }
+
+ if(inoutCreateInfo.pool != VK_NULL_HANDLE)
+ {
+ if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() &&
+ (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0)
+ {
+ VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations.");
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+ inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority();
+ }
+
+ if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 &&
+ (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0)
+ {
+ VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense.");
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
+ if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY &&
+ (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0)
+ {
+ inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+ }
+
+ // Non-auto USAGE values imply HOST_ACCESS flags.
+ // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools.
+ // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*.
+ // Otherwise they just protect from assert on mapping.
+ if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO &&
+ inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE &&
+ inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST)
+ {
+ if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0)
+ {
+ inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult VmaAllocator_T::AllocateMemory(
+ const VkMemoryRequirements& vkMemReq,
+ bool requiresDedicatedAllocation,
+ bool prefersDedicatedAllocation,
+ VkBuffer dedicatedBuffer,
+ VkImage dedicatedImage,
+ VmaBufferImageUsage dedicatedBufferImageUsage,
+ const VmaAllocationCreateInfo& createInfo,
+ VmaSuballocationType suballocType,
+ size_t allocationCount,
+ VmaAllocation* pAllocations)
+{
+ memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
+
+ VMA_ASSERT(VmaIsPow2(vkMemReq.alignment));
+
+ if(vkMemReq.size == 0)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ VmaAllocationCreateInfo createInfoFinal = createInfo;
+ VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation);
+ if(res != VK_SUCCESS)
+ return res;
+
+ if(createInfoFinal.pool != VK_NULL_HANDLE)
+ {
+ VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector;
+ return AllocateMemoryOfType(
+ createInfoFinal.pool,
+ vkMemReq.size,
+ vkMemReq.alignment,
+ prefersDedicatedAllocation,
+ dedicatedBuffer,
+ dedicatedImage,
+ dedicatedBufferImageUsage,
+ createInfoFinal,
+ blockVector.GetMemoryTypeIndex(),
+ suballocType,
+ createInfoFinal.pool->m_DedicatedAllocations,
+ blockVector,
+ allocationCount,
+ pAllocations);
+ }
+ else
+ {
+ // Bit mask of memory Vulkan types acceptable for this allocation.
+ uint32_t memoryTypeBits = vkMemReq.memoryTypeBits;
+ uint32_t memTypeIndex = UINT32_MAX;
+ res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex);
+ // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT.
+ if(res != VK_SUCCESS)
+ return res;
+ do
+ {
+ VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex];
+ VMA_ASSERT(blockVector && "Trying to use unsupported memory type!");
+ res = AllocateMemoryOfType(
+ VK_NULL_HANDLE,
+ vkMemReq.size,
+ vkMemReq.alignment,
+ requiresDedicatedAllocation || prefersDedicatedAllocation,
+ dedicatedBuffer,
+ dedicatedImage,
+ dedicatedBufferImageUsage,
+ createInfoFinal,
+ memTypeIndex,
+ suballocType,
+ m_DedicatedAllocations[memTypeIndex],
+ *blockVector,
+ allocationCount,
+ pAllocations);
+ // Allocation succeeded
+ if(res == VK_SUCCESS)
+ return VK_SUCCESS;
+
+ // Remove old memTypeIndex from list of possibilities.
+ memoryTypeBits &= ~(1u << memTypeIndex);
+ // Find alternative memTypeIndex.
+ res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex);
+ } while(res == VK_SUCCESS);
+
+ // No other matching memory type index could be found.
+ // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once.
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+}
+
+void VmaAllocator_T::FreeMemory(
+ size_t allocationCount,
+ const VmaAllocation* pAllocations)
+{
+ VMA_ASSERT(pAllocations);
+
+ for(size_t allocIndex = allocationCount; allocIndex--; )
+ {
+ VmaAllocation allocation = pAllocations[allocIndex];
+
+ if(allocation != VK_NULL_HANDLE)
+ {
+ if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
+ {
+ FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED);
+ }
+
+ switch(allocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ {
+ VmaBlockVector* pBlockVector = VMA_NULL;
+ VmaPool hPool = allocation->GetParentPool();
+ if(hPool != VK_NULL_HANDLE)
+ {
+ pBlockVector = &hPool->m_BlockVector;
+ }
+ else
+ {
+ const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+ pBlockVector = m_pBlockVectors[memTypeIndex];
+ VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!");
+ }
+ pBlockVector->Free(allocation);
+ }
+ break;
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ FreeDedicatedMemory(allocation);
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+ }
+ }
+}
+
+void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats)
+{
+ // Initialize.
+ VmaClearDetailedStatistics(pStats->total);
+ for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i)
+ VmaClearDetailedStatistics(pStats->memoryType[i]);
+ for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
+ VmaClearDetailedStatistics(pStats->memoryHeap[i]);
+
+ // Process default pools.
+ for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex];
+ if (pBlockVector != VMA_NULL)
+ pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+ }
+
+ // Process custom pools.
+ {
+ VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex);
+ for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool))
+ {
+ VmaBlockVector& blockVector = pool->m_BlockVector;
+ const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex();
+ blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+ pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+ }
+ }
+
+ // Process dedicated allocations.
+ for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+ }
+
+ // Sum from memory types to memory heaps.
+ for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex;
+ VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]);
+ }
+
+ // Sum from memory heaps to total.
+ for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex)
+ VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]);
+
+ VMA_ASSERT(pStats->total.statistics.allocationCount == 0 ||
+ pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin);
+ VMA_ASSERT(pStats->total.unusedRangeCount == 0 ||
+ pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin);
+}
+
+void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount)
+{
+#if VMA_MEMORY_BUDGET
+ if(m_UseExtMemoryBudget)
+ {
+ if(m_Budget.m_OperationsSinceBudgetFetch < 30)
+ {
+ VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex);
+ for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets)
+ {
+ const uint32_t heapIndex = firstHeap + i;
+
+ outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex];
+ outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex];
+ outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex];
+ outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex];
+
+ if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex])
+ {
+ outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] +
+ outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex];
+ }
+ else
+ {
+ outBudgets->usage = 0;
+ }
+
+ // Have to take MIN with heap size because explicit HeapSizeLimit is included in it.
+ outBudgets->budget = VMA_MIN(
+ m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size);
+ }
+ }
+ else
+ {
+ UpdateVulkanBudget(); // Outside of mutex lock
+ GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion
+ }
+ }
+ else
+#endif
+ {
+ for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets)
+ {
+ const uint32_t heapIndex = firstHeap + i;
+
+ outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex];
+ outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex];
+ outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex];
+ outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex];
+
+ outBudgets->usage = outBudgets->statistics.blockBytes;
+ outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics.
+ }
+ }
+}
+
+void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo)
+{
+ pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex();
+ pAllocationInfo->deviceMemory = hAllocation->GetMemory();
+ pAllocationInfo->offset = hAllocation->GetOffset();
+ pAllocationInfo->size = hAllocation->GetSize();
+ pAllocationInfo->pMappedData = hAllocation->GetMappedData();
+ pAllocationInfo->pUserData = hAllocation->GetUserData();
+ pAllocationInfo->pName = hAllocation->GetName();
+}
+
+void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo)
+{
+ GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo);
+
+ switch (hAllocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize();
+ pAllocationInfo->dedicatedMemory = VK_FALSE;
+ break;
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size;
+ pAllocationInfo->dedicatedMemory = VK_TRUE;
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+}
+
+VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool)
+{
+ VMA_DEBUG_LOG_FORMAT(" CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags);
+
+ VmaPoolCreateInfo newCreateInfo = *pCreateInfo;
+
+ // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash.
+ if(pCreateInfo->pMemoryAllocateNext)
+ {
+ VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0);
+ }
+
+ if(newCreateInfo.maxBlockCount == 0)
+ {
+ newCreateInfo.maxBlockCount = SIZE_MAX;
+ }
+ if(newCreateInfo.minBlockCount > newCreateInfo.maxBlockCount)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+ // Memory type index out of range or forbidden.
+ if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() ||
+ ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0)
+ {
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+ if(newCreateInfo.minAllocationAlignment > 0)
+ {
+ VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment));
+ }
+
+ const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex);
+
+ *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize);
+
+ VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks();
+ if(res != VK_SUCCESS)
+ {
+ vma_delete(this, *pPool);
+ *pPool = VMA_NULL;
+ return res;
+ }
+
+ // Add to m_Pools.
+ {
+ VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex);
+ (*pPool)->SetId(m_NextPoolId++);
+ m_Pools.PushBack(*pPool);
+ }
+
+ return VK_SUCCESS;
+}
+
+void VmaAllocator_T::DestroyPool(VmaPool pool)
+{
+ // Remove from m_Pools.
+ {
+ VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex);
+ m_Pools.Remove(pool);
+ }
+
+ vma_delete(this, pool);
+}
+
+void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats)
+{
+ VmaClearStatistics(*pPoolStats);
+ pool->m_BlockVector.AddStatistics(*pPoolStats);
+ pool->m_DedicatedAllocations.AddStatistics(*pPoolStats);
+}
+
+void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats)
+{
+ VmaClearDetailedStatistics(*pPoolStats);
+ pool->m_BlockVector.AddDetailedStatistics(*pPoolStats);
+ pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats);
+}
+
+void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex)
+{
+ m_CurrentFrameIndex.store(frameIndex);
+
+#if VMA_MEMORY_BUDGET
+ if(m_UseExtMemoryBudget)
+ {
+ UpdateVulkanBudget();
+ }
+#endif // #if VMA_MEMORY_BUDGET
+}
+
+VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool)
+{
+ return hPool->m_BlockVector.CheckCorruption();
+}
+
+VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits)
+{
+ VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT;
+
+ // Process default pools.
+ for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex];
+ if(pBlockVector != VMA_NULL)
+ {
+ VkResult localRes = pBlockVector->CheckCorruption();
+ switch(localRes)
+ {
+ case VK_ERROR_FEATURE_NOT_PRESENT:
+ break;
+ case VK_SUCCESS:
+ finalRes = VK_SUCCESS;
+ break;
+ default:
+ return localRes;
+ }
+ }
+ }
+
+ // Process custom pools.
+ {
+ VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex);
+ for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool))
+ {
+ if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0)
+ {
+ VkResult localRes = pool->m_BlockVector.CheckCorruption();
+ switch(localRes)
+ {
+ case VK_ERROR_FEATURE_NOT_PRESENT:
+ break;
+ case VK_SUCCESS:
+ finalRes = VK_SUCCESS;
+ break;
+ default:
+ return localRes;
+ }
+ }
+ }
+ }
+
+ return finalRes;
+}
+
+VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory)
+{
+ AtomicTransactionalIncrement<VMA_ATOMIC_UINT32> deviceMemoryCountIncrement;
+ const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount);
+#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT
+ if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount)
+ {
+ return VK_ERROR_TOO_MANY_OBJECTS;
+ }
+#endif
+
+ const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex);
+
+ // HeapSizeLimit is in effect for this heap.
+ if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0)
+ {
+ const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size;
+ VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex];
+ for(;;)
+ {
+ const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize;
+ if(blockBytesAfterAllocation > heapSize)
+ {
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation))
+ {
+ break;
+ }
+ }
+ }
+ else
+ {
+ m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize;
+ }
+ ++m_Budget.m_BlockCount[heapIndex];
+
+ // VULKAN CALL vkAllocateMemory.
+ VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory);
+
+ if(res == VK_SUCCESS)
+ {
+#if VMA_MEMORY_BUDGET
+ ++m_Budget.m_OperationsSinceBudgetFetch;
+#endif
+
+ // Informative callback.
+ if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL)
+ {
+ (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData);
+ }
+
+ deviceMemoryCountIncrement.Commit();
+ }
+ else
+ {
+ --m_Budget.m_BlockCount[heapIndex];
+ m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize;
+ }
+
+ return res;
+}
+
+void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory)
+{
+ // Informative callback.
+ if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL)
+ {
+ (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData);
+ }
+
+ // VULKAN CALL vkFreeMemory.
+ (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks());
+
+ const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType);
+ --m_Budget.m_BlockCount[heapIndex];
+ m_Budget.m_BlockBytes[heapIndex] -= size;
+
+ --m_DeviceMemoryCount;
+}
+
+VkResult VmaAllocator_T::BindVulkanBuffer(
+ VkDeviceMemory memory,
+ VkDeviceSize memoryOffset,
+ VkBuffer buffer,
+ const void* pNext)
+{
+ if(pNext != VMA_NULL)
+ {
+#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2
+ if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) &&
+ m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL)
+ {
+ VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR };
+ bindBufferMemoryInfo.pNext = pNext;
+ bindBufferMemoryInfo.buffer = buffer;
+ bindBufferMemoryInfo.memory = memory;
+ bindBufferMemoryInfo.memoryOffset = memoryOffset;
+ return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo);
+ }
+ else
+#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2
+ {
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
+ }
+ }
+ else
+ {
+ return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset);
+ }
+}
+
+VkResult VmaAllocator_T::BindVulkanImage(
+ VkDeviceMemory memory,
+ VkDeviceSize memoryOffset,
+ VkImage image,
+ const void* pNext)
+{
+ if(pNext != VMA_NULL)
+ {
+#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2
+ if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) &&
+ m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL)
+ {
+ VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR };
+ bindBufferMemoryInfo.pNext = pNext;
+ bindBufferMemoryInfo.image = image;
+ bindBufferMemoryInfo.memory = memory;
+ bindBufferMemoryInfo.memoryOffset = memoryOffset;
+ return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo);
+ }
+ else
+#endif // #if VMA_BIND_MEMORY2
+ {
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
+ }
+ }
+ else
+ {
+ return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset);
+ }
+}
+
+VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData)
+{
+ switch(hAllocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ {
+ VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock();
+ char *pBytes = VMA_NULL;
+ VkResult res = pBlock->Map(this, 1, (void**)&pBytes);
+ if(res == VK_SUCCESS)
+ {
+ *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset();
+ hAllocation->BlockAllocMap();
+ }
+ return res;
+ }
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ return hAllocation->DedicatedAllocMap(this, ppData);
+ default:
+ VMA_ASSERT(0);
+ return VK_ERROR_MEMORY_MAP_FAILED;
+ }
+}
+
+void VmaAllocator_T::Unmap(VmaAllocation hAllocation)
+{
+ switch(hAllocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ {
+ VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock();
+ hAllocation->BlockAllocUnmap();
+ pBlock->Unmap(this, 1);
+ }
+ break;
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ hAllocation->DedicatedAllocUnmap(this);
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+}
+
+VkResult VmaAllocator_T::BindBufferMemory(
+ VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkBuffer hBuffer,
+ const void* pNext)
+{
+ VkResult res = VK_ERROR_UNKNOWN_COPY;
+ switch(hAllocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext);
+ break;
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ {
+ VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock();
+ VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block.");
+ res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext);
+ break;
+ }
+ default:
+ VMA_ASSERT(0);
+ }
+ return res;
+}
+
+VkResult VmaAllocator_T::BindImageMemory(
+ VmaAllocation hAllocation,
+ VkDeviceSize allocationLocalOffset,
+ VkImage hImage,
+ const void* pNext)
+{
+ VkResult res = VK_ERROR_UNKNOWN_COPY;
+ switch(hAllocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext);
+ break;
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ {
+ VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock();
+ VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block.");
+ res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext);
+ break;
+ }
+ default:
+ VMA_ASSERT(0);
+ }
+ return res;
+}
+
+VkResult VmaAllocator_T::FlushOrInvalidateAllocation(
+ VmaAllocation hAllocation,
+ VkDeviceSize offset, VkDeviceSize size,
+ VMA_CACHE_OPERATION op)
+{
+ VkResult res = VK_SUCCESS;
+
+ VkMappedMemoryRange memRange = {};
+ if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange))
+ {
+ switch(op)
+ {
+ case VMA_CACHE_FLUSH:
+ res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange);
+ break;
+ case VMA_CACHE_INVALIDATE:
+ res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange);
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+ }
+ // else: Just ignore this call.
+ return res;
+}
+
+VkResult VmaAllocator_T::FlushOrInvalidateAllocations(
+ uint32_t allocationCount,
+ const VmaAllocation* allocations,
+ const VkDeviceSize* offsets, const VkDeviceSize* sizes,
+ VMA_CACHE_OPERATION op)
+{
+ typedef VmaStlAllocator<VkMappedMemoryRange> RangeAllocator;
+ typedef VmaSmallVector<VkMappedMemoryRange, RangeAllocator, 16> RangeVector;
+ RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks()));
+
+ for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+ {
+ const VmaAllocation alloc = allocations[allocIndex];
+ const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0;
+ const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE;
+ VkMappedMemoryRange newRange;
+ if(GetFlushOrInvalidateRange(alloc, offset, size, newRange))
+ {
+ ranges.push_back(newRange);
+ }
+ }
+
+ VkResult res = VK_SUCCESS;
+ if(!ranges.empty())
+ {
+ switch(op)
+ {
+ case VMA_CACHE_FLUSH:
+ res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data());
+ break;
+ case VMA_CACHE_INVALIDATE:
+ res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data());
+ break;
+ default:
+ VMA_ASSERT(0);
+ }
+ }
+ // else: Just ignore this call.
+ return res;
+}
+
+VkResult VmaAllocator_T::CopyMemoryToAllocation(
+ const void* pSrcHostPointer,
+ VmaAllocation dstAllocation,
+ VkDeviceSize dstAllocationLocalOffset,
+ VkDeviceSize size)
+{
+ void* dstMappedData = VMA_NULL;
+ VkResult res = Map(dstAllocation, &dstMappedData);
+ if(res == VK_SUCCESS)
+ {
+ memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size);
+ Unmap(dstAllocation);
+ res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH);
+ }
+ return res;
+}
+
+VkResult VmaAllocator_T::CopyAllocationToMemory(
+ VmaAllocation srcAllocation,
+ VkDeviceSize srcAllocationLocalOffset,
+ void* pDstHostPointer,
+ VkDeviceSize size)
+{
+ void* srcMappedData = VMA_NULL;
+ VkResult res = Map(srcAllocation, &srcMappedData);
+ if(res == VK_SUCCESS)
+ {
+ res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE);
+ if(res == VK_SUCCESS)
+ {
+ memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size);
+ Unmap(srcAllocation);
+ }
+ }
+ return res;
+}
+
+void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation)
+{
+ VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+
+ const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+ VmaPool parentPool = allocation->GetParentPool();
+ if(parentPool == VK_NULL_HANDLE)
+ {
+ // Default pool
+ m_DedicatedAllocations[memTypeIndex].Unregister(allocation);
+ }
+ else
+ {
+ // Custom pool
+ parentPool->m_DedicatedAllocations.Unregister(allocation);
+ }
+
+ VkDeviceMemory hMemory = allocation->GetMemory();
+
+ /*
+ There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory
+ before vkFreeMemory.
+
+ if(allocation->GetMappedData() != VMA_NULL)
+ {
+ (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory);
+ }
+ */
+
+ FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory);
+
+ m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize());
+ allocation->Destroy(this);
+ m_AllocationObjectAllocator.Free(allocation);
+
+ VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex);
+}
+
+uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const
+{
+ VkBufferCreateInfo dummyBufCreateInfo;
+ VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo);
+
+ uint32_t memoryTypeBits = 0;
+
+ // Create buffer.
+ VkBuffer buf = VK_NULL_HANDLE;
+ VkResult res = (*GetVulkanFunctions().vkCreateBuffer)(
+ m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf);
+ if(res == VK_SUCCESS)
+ {
+ // Query for supported memory types.
+ VkMemoryRequirements memReq;
+ (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq);
+ memoryTypeBits = memReq.memoryTypeBits;
+
+ // Destroy buffer.
+ (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks());
+ }
+
+ return memoryTypeBits;
+}
+
+uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const
+{
+ // Make sure memory information is already fetched.
+ VMA_ASSERT(GetMemoryTypeCount() > 0);
+
+ uint32_t memoryTypeBits = UINT32_MAX;
+
+ if(!m_UseAmdDeviceCoherentMemory)
+ {
+ // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD.
+ for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0)
+ {
+ memoryTypeBits &= ~(1u << memTypeIndex);
+ }
+ }
+ }
+
+ return memoryTypeBits;
+}
+
+bool VmaAllocator_T::GetFlushOrInvalidateRange(
+ VmaAllocation allocation,
+ VkDeviceSize offset, VkDeviceSize size,
+ VkMappedMemoryRange& outRange) const
+{
+ const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+ if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex))
+ {
+ const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize;
+ const VkDeviceSize allocationSize = allocation->GetSize();
+ VMA_ASSERT(offset <= allocationSize);
+
+ outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ outRange.pNext = VMA_NULL;
+ outRange.memory = allocation->GetMemory();
+
+ switch(allocation->GetType())
+ {
+ case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+ outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize);
+ if(size == VK_WHOLE_SIZE)
+ {
+ outRange.size = allocationSize - outRange.offset;
+ }
+ else
+ {
+ VMA_ASSERT(offset + size <= allocationSize);
+ outRange.size = VMA_MIN(
+ VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize),
+ allocationSize - outRange.offset);
+ }
+ break;
+ case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+ {
+ // 1. Still within this allocation.
+ outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize);
+ if(size == VK_WHOLE_SIZE)
+ {
+ size = allocationSize - offset;
+ }
+ else
+ {
+ VMA_ASSERT(offset + size <= allocationSize);
+ }
+ outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize);
+
+ // 2. Adjust to whole block.
+ const VkDeviceSize allocationOffset = allocation->GetOffset();
+ VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0);
+ const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize();
+ outRange.offset += allocationOffset;
+ outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset);
+
+ break;
+ }
+ default:
+ VMA_ASSERT(0);
+ }
+ return true;
+ }
+ return false;
+}
+
+#if VMA_MEMORY_BUDGET
+void VmaAllocator_T::UpdateVulkanBudget()
+{
+ VMA_ASSERT(m_UseExtMemoryBudget);
+
+ VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR };
+
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT };
+ VmaPnextChainPushFront(&memProps, &budgetProps);
+
+ GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps);
+
+ {
+ VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex);
+
+ for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
+ {
+ m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex];
+ m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex];
+ m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load();
+
+ // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size.
+ if(m_Budget.m_VulkanBudget[heapIndex] == 0)
+ {
+ m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics.
+ }
+ else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size)
+ {
+ m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size;
+ }
+ if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0)
+ {
+ m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex];
+ }
+ }
+ m_Budget.m_OperationsSinceBudgetFetch = 0;
+ }
+}
+#endif // VMA_MEMORY_BUDGET
+
+void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern)
+{
+ if(VMA_DEBUG_INITIALIZE_ALLOCATIONS &&
+ hAllocation->IsMappingAllowed() &&
+ (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0)
+ {
+ void* pData = VMA_NULL;
+ VkResult res = Map(hAllocation, &pData);
+ if(res == VK_SUCCESS)
+ {
+ memset(pData, (int)pattern, (size_t)hAllocation->GetSize());
+ FlushOrInvalidateAllocation(hAllocation, 0, VK_WHOLE_SIZE, VMA_CACHE_FLUSH);
+ Unmap(hAllocation);
+ }
+ else
+ {
+ VMA_ASSERT(0 && "VMA_DEBUG_INITIALIZE_ALLOCATIONS is enabled, but couldn't map memory to fill allocation.");
+ }
+ }
+}
+
+uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits()
+{
+ uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load();
+ if(memoryTypeBits == UINT32_MAX)
+ {
+ memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits();
+ m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits);
+ }
+ return memoryTypeBits;
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)
+{
+ json.WriteString("DefaultPools");
+ json.BeginObject();
+ {
+ for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex];
+ VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex];
+ if (pBlockVector != VMA_NULL)
+ {
+ json.BeginString("Type ");
+ json.ContinueString(memTypeIndex);
+ json.EndString();
+ json.BeginObject();
+ {
+ json.WriteString("PreferredBlockSize");
+ json.WriteNumber(pBlockVector->GetPreferredBlockSize());
+
+ json.WriteString("Blocks");
+ pBlockVector->PrintDetailedMap(json);
+
+ json.WriteString("DedicatedAllocations");
+ dedicatedAllocList.BuildStatsString(json);
+ }
+ json.EndObject();
+ }
+ }
+ }
+ json.EndObject();
+
+ json.WriteString("CustomPools");
+ json.BeginObject();
+ {
+ VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex);
+ if (!m_Pools.IsEmpty())
+ {
+ for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+ {
+ bool displayType = true;
+ size_t index = 0;
+ for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool))
+ {
+ VmaBlockVector& blockVector = pool->m_BlockVector;
+ if (blockVector.GetMemoryTypeIndex() == memTypeIndex)
+ {
+ if (displayType)
+ {
+ json.BeginString("Type ");
+ json.ContinueString(memTypeIndex);
+ json.EndString();
+ json.BeginArray();
+ displayType = false;
+ }
+
+ json.BeginObject();
+ {
+ json.WriteString("Name");
+ json.BeginString();
+ json.ContinueString((uint64_t)index++);
+ if (pool->GetName())
+ {
+ json.ContinueString(" - ");
+ json.ContinueString(pool->GetName());
+ }
+ json.EndString();
+
+ json.WriteString("PreferredBlockSize");
+ json.WriteNumber(blockVector.GetPreferredBlockSize());
+
+ json.WriteString("Blocks");
+ blockVector.PrintDetailedMap(json);
+
+ json.WriteString("DedicatedAllocations");
+ pool->m_DedicatedAllocations.BuildStatsString(json);
+ }
+ json.EndObject();
+ }
+ }
+
+ if (!displayType)
+ json.EndArray();
+ }
+ }
+ }
+ json.EndObject();
+}
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_ALLOCATOR_T_FUNCTIONS
+
+
+#ifndef _VMA_PUBLIC_INTERFACE
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator(
+ const VmaAllocatorCreateInfo* pCreateInfo,
+ VmaAllocator* pAllocator)
+{
+ VMA_ASSERT(pCreateInfo && pAllocator);
+ VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 ||
+ (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 4));
+ VMA_DEBUG_LOG("vmaCreateAllocator");
+ *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo);
+ VkResult result = (*pAllocator)->Init(pCreateInfo);
+ if(result < 0)
+ {
+ vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator);
+ *pAllocator = VK_NULL_HANDLE;
+ }
+ return result;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator(
+ VmaAllocator allocator)
+{
+ if(allocator != VK_NULL_HANDLE)
+ {
+ VMA_DEBUG_LOG("vmaDestroyAllocator");
+ VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying.
+ vma_delete(&allocationCallbacks, allocator);
+ }
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo)
+{
+ VMA_ASSERT(allocator && pAllocatorInfo);
+ pAllocatorInfo->instance = allocator->m_hInstance;
+ pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice();
+ pAllocatorInfo->device = allocator->m_hDevice;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties(
+ VmaAllocator allocator,
+ const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties)
+{
+ VMA_ASSERT(allocator && ppPhysicalDeviceProperties);
+ *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties(
+ VmaAllocator allocator,
+ const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties)
+{
+ VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties);
+ *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties(
+ VmaAllocator allocator,
+ uint32_t memoryTypeIndex,
+ VkMemoryPropertyFlags* pFlags)
+{
+ VMA_ASSERT(allocator && pFlags);
+ VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount());
+ *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex(
+ VmaAllocator allocator,
+ uint32_t frameIndex)
+{
+ VMA_ASSERT(allocator);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->SetCurrentFrameIndex(frameIndex);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics(
+ VmaAllocator allocator,
+ VmaTotalStatistics* pStats)
+{
+ VMA_ASSERT(allocator && pStats);
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+ allocator->CalculateStatistics(pStats);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets(
+ VmaAllocator allocator,
+ VmaBudget* pBudgets)
+{
+ VMA_ASSERT(allocator && pBudgets);
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+ allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount());
+}
+
+#if VMA_STATS_STRING_ENABLED
+
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString(
+ VmaAllocator allocator,
+ char** ppStatsString,
+ VkBool32 detailedMap)
+{
+ VMA_ASSERT(allocator && ppStatsString);
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ VmaStringBuilder sb(allocator->GetAllocationCallbacks());
+ {
+ VmaBudget budgets[VK_MAX_MEMORY_HEAPS];
+ allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount());
+
+ VmaTotalStatistics stats;
+ allocator->CalculateStatistics(&stats);
+
+ VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb);
+ json.BeginObject();
+ {
+ json.WriteString("General");
+ json.BeginObject();
+ {
+ const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties;
+ const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps;
+
+ json.WriteString("API");
+ json.WriteString("Vulkan");
+
+ json.WriteString("apiVersion");
+ json.BeginString();
+ json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion));
+ json.ContinueString(".");
+ json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion));
+ json.ContinueString(".");
+ json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion));
+ json.EndString();
+
+ json.WriteString("GPU");
+ json.WriteString(deviceProperties.deviceName);
+ json.WriteString("deviceType");
+ json.WriteNumber(static_cast<uint32_t>(deviceProperties.deviceType));
+
+ json.WriteString("maxMemoryAllocationCount");
+ json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount);
+ json.WriteString("bufferImageGranularity");
+ json.WriteNumber(deviceProperties.limits.bufferImageGranularity);
+ json.WriteString("nonCoherentAtomSize");
+ json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize);
+
+ json.WriteString("memoryHeapCount");
+ json.WriteNumber(memoryProperties.memoryHeapCount);
+ json.WriteString("memoryTypeCount");
+ json.WriteNumber(memoryProperties.memoryTypeCount);
+ }
+ json.EndObject();
+ }
+ {
+ json.WriteString("Total");
+ VmaPrintDetailedStatistics(json, stats.total);
+ }
+ {
+ json.WriteString("MemoryInfo");
+ json.BeginObject();
+ {
+ for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex)
+ {
+ json.BeginString("Heap ");
+ json.ContinueString(heapIndex);
+ json.EndString();
+ json.BeginObject();
+ {
+ const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex];
+ json.WriteString("Flags");
+ json.BeginArray(true);
+ {
+ if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
+ json.WriteString("DEVICE_LOCAL");
+ #if VMA_VULKAN_VERSION >= 1001000
+ if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT)
+ json.WriteString("MULTI_INSTANCE");
+ #endif
+
+ VkMemoryHeapFlags flags = heapInfo.flags &
+ ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
+ #if VMA_VULKAN_VERSION >= 1001000
+ | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT
+ #endif
+ );
+ if (flags != 0)
+ json.WriteNumber(flags);
+ }
+ json.EndArray();
+
+ json.WriteString("Size");
+ json.WriteNumber(heapInfo.size);
+
+ json.WriteString("Budget");
+ json.BeginObject();
+ {
+ json.WriteString("BudgetBytes");
+ json.WriteNumber(budgets[heapIndex].budget);
+ json.WriteString("UsageBytes");
+ json.WriteNumber(budgets[heapIndex].usage);
+ }
+ json.EndObject();
+
+ json.WriteString("Stats");
+ VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]);
+
+ json.WriteString("MemoryPools");
+ json.BeginObject();
+ {
+ for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex)
+ {
+ if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex)
+ {
+ json.BeginString("Type ");
+ json.ContinueString(typeIndex);
+ json.EndString();
+ json.BeginObject();
+ {
+ json.WriteString("Flags");
+ json.BeginArray(true);
+ {
+ VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags;
+ if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+ json.WriteString("DEVICE_LOCAL");
+ if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ json.WriteString("HOST_VISIBLE");
+ if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ json.WriteString("HOST_COHERENT");
+ if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
+ json.WriteString("HOST_CACHED");
+ if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)
+ json.WriteString("LAZILY_ALLOCATED");
+ #if VMA_VULKAN_VERSION >= 1001000
+ if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
+ json.WriteString("PROTECTED");
+ #endif
+ #if VK_AMD_device_coherent_memory
+ if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY)
+ json.WriteString("DEVICE_COHERENT_AMD");
+ if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)
+ json.WriteString("DEVICE_UNCACHED_AMD");
+ #endif
+
+ flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+ #if VMA_VULKAN_VERSION >= 1001000
+ | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT
+ #endif
+ #if VK_AMD_device_coherent_memory
+ | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY
+ | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY
+ #endif
+ | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+ | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ if (flags != 0)
+ json.WriteNumber(flags);
+ }
+ json.EndArray();
+
+ json.WriteString("Stats");
+ VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]);
+ }
+ json.EndObject();
+ }
+ }
+
+ }
+ json.EndObject();
+ }
+ json.EndObject();
+ }
+ }
+ json.EndObject();
+ }
+
+ if (detailedMap == VK_TRUE)
+ allocator->PrintDetailedMap(json);
+
+ json.EndObject();
+ }
+
+ *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength());
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString(
+ VmaAllocator allocator,
+ char* pStatsString)
+{
+ if(pStatsString != VMA_NULL)
+ {
+ VMA_ASSERT(allocator);
+ VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString);
+ }
+}
+
+#endif // VMA_STATS_STRING_ENABLED
+
+/*
+This function is not protected by any mutex because it just reads immutable data.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex(
+ VmaAllocator allocator,
+ uint32_t memoryTypeBits,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ uint32_t* pMemoryTypeIndex)
+{
+ VMA_ASSERT(allocator != VK_NULL_HANDLE);
+ VMA_ASSERT(pAllocationCreateInfo != VMA_NULL);
+ VMA_ASSERT(pMemoryTypeIndex != VMA_NULL);
+
+ return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo(
+ VmaAllocator allocator,
+ const VkBufferCreateInfo* pBufferCreateInfo,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ uint32_t* pMemoryTypeIndex)
+{
+ VMA_ASSERT(allocator != VK_NULL_HANDLE);
+ VMA_ASSERT(pBufferCreateInfo != VMA_NULL);
+ VMA_ASSERT(pAllocationCreateInfo != VMA_NULL);
+ VMA_ASSERT(pMemoryTypeIndex != VMA_NULL);
+
+ const VkDevice hDev = allocator->m_hDevice;
+ const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions();
+ VkResult res;
+
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ if(funcs->vkGetDeviceBufferMemoryRequirements)
+ {
+ // Can query straight from VkBufferCreateInfo :)
+ VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR};
+ devBufMemReq.pCreateInfo = pBufferCreateInfo;
+
+ VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+ (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq);
+
+ res = allocator->FindMemoryTypeIndex(
+ memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo,
+ VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex);
+ }
+ else
+#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ {
+ // Must create a dummy buffer to query :(
+ VkBuffer hBuffer = VK_NULL_HANDLE;
+ res = funcs->vkCreateBuffer(
+ hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer);
+ if(res == VK_SUCCESS)
+ {
+ VkMemoryRequirements memReq = {};
+ funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq);
+
+ res = allocator->FindMemoryTypeIndex(
+ memReq.memoryTypeBits, pAllocationCreateInfo,
+ VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex);
+
+ funcs->vkDestroyBuffer(
+ hDev, hBuffer, allocator->GetAllocationCallbacks());
+ }
+ }
+ return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo(
+ VmaAllocator allocator,
+ const VkImageCreateInfo* pImageCreateInfo,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ uint32_t* pMemoryTypeIndex)
+{
+ VMA_ASSERT(allocator != VK_NULL_HANDLE);
+ VMA_ASSERT(pImageCreateInfo != VMA_NULL);
+ VMA_ASSERT(pAllocationCreateInfo != VMA_NULL);
+ VMA_ASSERT(pMemoryTypeIndex != VMA_NULL);
+
+ const VkDevice hDev = allocator->m_hDevice;
+ const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions();
+ VkResult res;
+
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ if(funcs->vkGetDeviceImageMemoryRequirements)
+ {
+ // Can query straight from VkImageCreateInfo :)
+ VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR};
+ devImgMemReq.pCreateInfo = pImageCreateInfo;
+ VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 &&
+ "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect.");
+
+ VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+ (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq);
+
+ res = allocator->FindMemoryTypeIndex(
+ memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo,
+ VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex);
+ }
+ else
+#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ {
+ // Must create a dummy image to query :(
+ VkImage hImage = VK_NULL_HANDLE;
+ res = funcs->vkCreateImage(
+ hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage);
+ if(res == VK_SUCCESS)
+ {
+ VkMemoryRequirements memReq = {};
+ funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq);
+
+ res = allocator->FindMemoryTypeIndex(
+ memReq.memoryTypeBits, pAllocationCreateInfo,
+ VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex);
+
+ funcs->vkDestroyImage(
+ hDev, hImage, allocator->GetAllocationCallbacks());
+ }
+ }
+ return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool(
+ VmaAllocator allocator,
+ const VmaPoolCreateInfo* pCreateInfo,
+ VmaPool* pPool)
+{
+ VMA_ASSERT(allocator && pCreateInfo && pPool);
+
+ VMA_DEBUG_LOG("vmaCreatePool");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->CreatePool(pCreateInfo, pPool);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool(
+ VmaAllocator allocator,
+ VmaPool pool)
+{
+ VMA_ASSERT(allocator);
+
+ if(pool == VK_NULL_HANDLE)
+ {
+ return;
+ }
+
+ VMA_DEBUG_LOG("vmaDestroyPool");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->DestroyPool(pool);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics(
+ VmaAllocator allocator,
+ VmaPool pool,
+ VmaStatistics* pPoolStats)
+{
+ VMA_ASSERT(allocator && pool && pPoolStats);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->GetPoolStatistics(pool, pPoolStats);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics(
+ VmaAllocator allocator,
+ VmaPool pool,
+ VmaDetailedStatistics* pPoolStats)
+{
+ VMA_ASSERT(allocator && pool && pPoolStats);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->CalculatePoolStatistics(pool, pPoolStats);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool)
+{
+ VMA_ASSERT(allocator && pool);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ VMA_DEBUG_LOG("vmaCheckPoolCorruption");
+
+ return allocator->CheckPoolCorruption(pool);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName(
+ VmaAllocator allocator,
+ VmaPool pool,
+ const char** ppName)
+{
+ VMA_ASSERT(allocator && pool && ppName);
+
+ VMA_DEBUG_LOG("vmaGetPoolName");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ *ppName = pool->GetName();
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName(
+ VmaAllocator allocator,
+ VmaPool pool,
+ const char* pName)
+{
+ VMA_ASSERT(allocator && pool);
+
+ VMA_DEBUG_LOG("vmaSetPoolName");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ pool->SetName(pName);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory(
+ VmaAllocator allocator,
+ const VkMemoryRequirements* pVkMemoryRequirements,
+ const VmaAllocationCreateInfo* pCreateInfo,
+ VmaAllocation* pAllocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation);
+
+ VMA_DEBUG_LOG("vmaAllocateMemory");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ VkResult result = allocator->AllocateMemory(
+ *pVkMemoryRequirements,
+ false, // requiresDedicatedAllocation
+ false, // prefersDedicatedAllocation
+ VK_NULL_HANDLE, // dedicatedBuffer
+ VK_NULL_HANDLE, // dedicatedImage
+ VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+ *pCreateInfo,
+ VMA_SUBALLOCATION_TYPE_UNKNOWN,
+ 1, // allocationCount
+ pAllocation);
+
+ if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS)
+ {
+ allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+ }
+
+ return result;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages(
+ VmaAllocator allocator,
+ const VkMemoryRequirements* pVkMemoryRequirements,
+ const VmaAllocationCreateInfo* pCreateInfo,
+ size_t allocationCount,
+ VmaAllocation* pAllocations,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ if(allocationCount == 0)
+ {
+ return VK_SUCCESS;
+ }
+
+ VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocations);
+
+ VMA_DEBUG_LOG("vmaAllocateMemoryPages");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ VkResult result = allocator->AllocateMemory(
+ *pVkMemoryRequirements,
+ false, // requiresDedicatedAllocation
+ false, // prefersDedicatedAllocation
+ VK_NULL_HANDLE, // dedicatedBuffer
+ VK_NULL_HANDLE, // dedicatedImage
+ VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+ *pCreateInfo,
+ VMA_SUBALLOCATION_TYPE_UNKNOWN,
+ allocationCount,
+ pAllocations);
+
+ if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS)
+ {
+ for(size_t i = 0; i < allocationCount; ++i)
+ {
+ allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i);
+ }
+ }
+
+ return result;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer(
+ VmaAllocator allocator,
+ VkBuffer buffer,
+ const VmaAllocationCreateInfo* pCreateInfo,
+ VmaAllocation* pAllocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation);
+
+ VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ VkMemoryRequirements vkMemReq = {};
+ bool requiresDedicatedAllocation = false;
+ bool prefersDedicatedAllocation = false;
+ allocator->GetBufferMemoryRequirements(buffer, vkMemReq,
+ requiresDedicatedAllocation,
+ prefersDedicatedAllocation);
+
+ VkResult result = allocator->AllocateMemory(
+ vkMemReq,
+ requiresDedicatedAllocation,
+ prefersDedicatedAllocation,
+ buffer, // dedicatedBuffer
+ VK_NULL_HANDLE, // dedicatedImage
+ VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+ *pCreateInfo,
+ VMA_SUBALLOCATION_TYPE_BUFFER,
+ 1, // allocationCount
+ pAllocation);
+
+ if(pAllocationInfo && result == VK_SUCCESS)
+ {
+ allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+ }
+
+ return result;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage(
+ VmaAllocator allocator,
+ VkImage image,
+ const VmaAllocationCreateInfo* pCreateInfo,
+ VmaAllocation* pAllocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation);
+
+ VMA_DEBUG_LOG("vmaAllocateMemoryForImage");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ VkMemoryRequirements vkMemReq = {};
+ bool requiresDedicatedAllocation = false;
+ bool prefersDedicatedAllocation = false;
+ allocator->GetImageMemoryRequirements(image, vkMemReq,
+ requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+ VkResult result = allocator->AllocateMemory(
+ vkMemReq,
+ requiresDedicatedAllocation,
+ prefersDedicatedAllocation,
+ VK_NULL_HANDLE, // dedicatedBuffer
+ image, // dedicatedImage
+ VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+ *pCreateInfo,
+ VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN,
+ 1, // allocationCount
+ pAllocation);
+
+ if(pAllocationInfo && result == VK_SUCCESS)
+ {
+ allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+ }
+
+ return result;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory(
+ VmaAllocator allocator,
+ VmaAllocation allocation)
+{
+ VMA_ASSERT(allocator);
+
+ if(allocation == VK_NULL_HANDLE)
+ {
+ return;
+ }
+
+ VMA_DEBUG_LOG("vmaFreeMemory");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->FreeMemory(
+ 1, // allocationCount
+ &allocation);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages(
+ VmaAllocator allocator,
+ size_t allocationCount,
+ const VmaAllocation* pAllocations)
+{
+ if(allocationCount == 0)
+ {
+ return;
+ }
+
+ VMA_ASSERT(allocator);
+
+ VMA_DEBUG_LOG("vmaFreeMemoryPages");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->FreeMemory(allocationCount, pAllocations);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && allocation && pAllocationInfo);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->GetAllocationInfo(allocation, pAllocationInfo);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VmaAllocationInfo2* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && allocation && pAllocationInfo);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->GetAllocationInfo2(allocation, pAllocationInfo);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ void* pUserData)
+{
+ VMA_ASSERT(allocator && allocation);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocation->SetUserData(allocator, pUserData);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ const char* VMA_NULLABLE pName)
+{
+ allocation->SetName(allocator, pName);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkMemoryPropertyFlags* VMA_NOT_NULL pFlags)
+{
+ VMA_ASSERT(allocator && allocation && pFlags);
+ const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+ *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ void** ppData)
+{
+ VMA_ASSERT(allocator && allocation && ppData);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->Map(allocation, ppData);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory(
+ VmaAllocator allocator,
+ VmaAllocation allocation)
+{
+ VMA_ASSERT(allocator && allocation);
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ allocator->Unmap(allocation);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VkDeviceSize offset,
+ VkDeviceSize size)
+{
+ VMA_ASSERT(allocator && allocation);
+
+ VMA_DEBUG_LOG("vmaFlushAllocation");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VkDeviceSize offset,
+ VkDeviceSize size)
+{
+ VMA_ASSERT(allocator && allocation);
+
+ VMA_DEBUG_LOG("vmaInvalidateAllocation");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations(
+ VmaAllocator allocator,
+ uint32_t allocationCount,
+ const VmaAllocation* allocations,
+ const VkDeviceSize* offsets,
+ const VkDeviceSize* sizes)
+{
+ VMA_ASSERT(allocator);
+
+ if(allocationCount == 0)
+ {
+ return VK_SUCCESS;
+ }
+
+ VMA_ASSERT(allocations);
+
+ VMA_DEBUG_LOG("vmaFlushAllocations");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations(
+ VmaAllocator allocator,
+ uint32_t allocationCount,
+ const VmaAllocation* allocations,
+ const VkDeviceSize* offsets,
+ const VkDeviceSize* sizes)
+{
+ VMA_ASSERT(allocator);
+
+ if(allocationCount == 0)
+ {
+ return VK_SUCCESS;
+ }
+
+ VMA_ASSERT(allocations);
+
+ VMA_DEBUG_LOG("vmaInvalidateAllocations");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation(
+ VmaAllocator allocator,
+ const void* pSrcHostPointer,
+ VmaAllocation dstAllocation,
+ VkDeviceSize dstAllocationLocalOffset,
+ VkDeviceSize size)
+{
+ VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation);
+
+ if(size == 0)
+ {
+ return VK_SUCCESS;
+ }
+
+ VMA_DEBUG_LOG("vmaCopyMemoryToAllocation");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory(
+ VmaAllocator allocator,
+ VmaAllocation srcAllocation,
+ VkDeviceSize srcAllocationLocalOffset,
+ void* pDstHostPointer,
+ VkDeviceSize size)
+{
+ VMA_ASSERT(allocator && srcAllocation && pDstHostPointer);
+
+ if(size == 0)
+ {
+ return VK_SUCCESS;
+ }
+
+ VMA_DEBUG_LOG("vmaCopyAllocationToMemory");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(
+ VmaAllocator allocator,
+ uint32_t memoryTypeBits)
+{
+ VMA_ASSERT(allocator);
+
+ VMA_DEBUG_LOG("vmaCheckCorruption");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->CheckCorruption(memoryTypeBits);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation(
+ VmaAllocator allocator,
+ const VmaDefragmentationInfo* pInfo,
+ VmaDefragmentationContext* pContext)
+{
+ VMA_ASSERT(allocator && pInfo && pContext);
+
+ VMA_DEBUG_LOG("vmaBeginDefragmentation");
+
+ if (pInfo->pool != VMA_NULL)
+ {
+ // Check if run on supported algorithms
+ if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo);
+ return VK_SUCCESS;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation(
+ VmaAllocator allocator,
+ VmaDefragmentationContext context,
+ VmaDefragmentationStats* pStats)
+{
+ VMA_ASSERT(allocator && context);
+
+ VMA_DEBUG_LOG("vmaEndDefragmentation");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ if (pStats)
+ context->GetStats(*pStats);
+ vma_delete(allocator, context);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaDefragmentationContext VMA_NOT_NULL context,
+ VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo)
+{
+ VMA_ASSERT(context && pPassInfo);
+
+ VMA_DEBUG_LOG("vmaBeginDefragmentationPass");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return context->DefragmentPassBegin(*pPassInfo);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaDefragmentationContext VMA_NOT_NULL context,
+ VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo)
+{
+ VMA_ASSERT(context && pPassInfo);
+
+ VMA_DEBUG_LOG("vmaEndDefragmentationPass");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return context->DefragmentPassEnd(*pPassInfo);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VkBuffer buffer)
+{
+ VMA_ASSERT(allocator && allocation && buffer);
+
+ VMA_DEBUG_LOG("vmaBindBufferMemory");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VkDeviceSize allocationLocalOffset,
+ VkBuffer buffer,
+ const void* pNext)
+{
+ VMA_ASSERT(allocator && allocation && buffer);
+
+ VMA_DEBUG_LOG("vmaBindBufferMemory2");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VkImage image)
+{
+ VMA_ASSERT(allocator && allocation && image);
+
+ VMA_DEBUG_LOG("vmaBindImageMemory");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->BindImageMemory(allocation, 0, image, VMA_NULL);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2(
+ VmaAllocator allocator,
+ VmaAllocation allocation,
+ VkDeviceSize allocationLocalOffset,
+ VkImage image,
+ const void* pNext)
+{
+ VMA_ASSERT(allocator && allocation && image);
+
+ VMA_DEBUG_LOG("vmaBindImageMemory2");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer(
+ VmaAllocator allocator,
+ const VkBufferCreateInfo* pBufferCreateInfo,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ VkBuffer* pBuffer,
+ VmaAllocation* pAllocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation);
+
+ if(pBufferCreateInfo->size == 0)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+ if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 &&
+ !allocator->m_UseKhrBufferDeviceAddress)
+ {
+ VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used.");
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ VMA_DEBUG_LOG("vmaCreateBuffer");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ *pBuffer = VK_NULL_HANDLE;
+ *pAllocation = VK_NULL_HANDLE;
+
+ // 1. Create VkBuffer.
+ VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)(
+ allocator->m_hDevice,
+ pBufferCreateInfo,
+ allocator->GetAllocationCallbacks(),
+ pBuffer);
+ if(res >= 0)
+ {
+ // 2. vkGetBufferMemoryRequirements.
+ VkMemoryRequirements vkMemReq = {};
+ bool requiresDedicatedAllocation = false;
+ bool prefersDedicatedAllocation = false;
+ allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq,
+ requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+ // 3. Allocate memory using allocator.
+ res = allocator->AllocateMemory(
+ vkMemReq,
+ requiresDedicatedAllocation,
+ prefersDedicatedAllocation,
+ *pBuffer, // dedicatedBuffer
+ VK_NULL_HANDLE, // dedicatedImage
+ VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage
+ *pAllocationCreateInfo,
+ VMA_SUBALLOCATION_TYPE_BUFFER,
+ 1, // allocationCount
+ pAllocation);
+
+ if(res >= 0)
+ {
+ // 3. Bind buffer with memory.
+ if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0)
+ {
+ res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL);
+ }
+ if(res >= 0)
+ {
+ // All steps succeeded.
+ #if VMA_STATS_STRING_ENABLED
+ (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5);
+ #endif
+ if(pAllocationInfo != VMA_NULL)
+ {
+ allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+ }
+
+ return VK_SUCCESS;
+ }
+ allocator->FreeMemory(
+ 1, // allocationCount
+ pAllocation);
+ *pAllocation = VK_NULL_HANDLE;
+ (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+ *pBuffer = VK_NULL_HANDLE;
+ return res;
+ }
+ (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+ *pBuffer = VK_NULL_HANDLE;
+ return res;
+ }
+ return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment(
+ VmaAllocator allocator,
+ const VkBufferCreateInfo* pBufferCreateInfo,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ VkDeviceSize minAlignment,
+ VkBuffer* pBuffer,
+ VmaAllocation* pAllocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation);
+
+ if(pBufferCreateInfo->size == 0)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+ if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 &&
+ !allocator->m_UseKhrBufferDeviceAddress)
+ {
+ VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used.");
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ VMA_DEBUG_LOG("vmaCreateBufferWithAlignment");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ *pBuffer = VK_NULL_HANDLE;
+ *pAllocation = VK_NULL_HANDLE;
+
+ // 1. Create VkBuffer.
+ VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)(
+ allocator->m_hDevice,
+ pBufferCreateInfo,
+ allocator->GetAllocationCallbacks(),
+ pBuffer);
+ if(res >= 0)
+ {
+ // 2. vkGetBufferMemoryRequirements.
+ VkMemoryRequirements vkMemReq = {};
+ bool requiresDedicatedAllocation = false;
+ bool prefersDedicatedAllocation = false;
+ allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq,
+ requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+ // 2a. Include minAlignment
+ vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment);
+
+ // 3. Allocate memory using allocator.
+ res = allocator->AllocateMemory(
+ vkMemReq,
+ requiresDedicatedAllocation,
+ prefersDedicatedAllocation,
+ *pBuffer, // dedicatedBuffer
+ VK_NULL_HANDLE, // dedicatedImage
+ VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage
+ *pAllocationCreateInfo,
+ VMA_SUBALLOCATION_TYPE_BUFFER,
+ 1, // allocationCount
+ pAllocation);
+
+ if(res >= 0)
+ {
+ // 3. Bind buffer with memory.
+ if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0)
+ {
+ res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL);
+ }
+ if(res >= 0)
+ {
+ // All steps succeeded.
+ #if VMA_STATS_STRING_ENABLED
+ (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5);
+ #endif
+ if(pAllocationInfo != VMA_NULL)
+ {
+ allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+ }
+
+ return VK_SUCCESS;
+ }
+ allocator->FreeMemory(
+ 1, // allocationCount
+ pAllocation);
+ *pAllocation = VK_NULL_HANDLE;
+ (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+ *pBuffer = VK_NULL_HANDLE;
+ return res;
+ }
+ (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+ *pBuffer = VK_NULL_HANDLE;
+ return res;
+ }
+ return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer)
+{
+ return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize allocationLocalOffset,
+ const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+ VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer)
+{
+ VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation);
+ VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize());
+
+ VMA_DEBUG_LOG("vmaCreateAliasingBuffer2");
+
+ *pBuffer = VK_NULL_HANDLE;
+
+ if (pBufferCreateInfo->size == 0)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+ if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 &&
+ !allocator->m_UseKhrBufferDeviceAddress)
+ {
+ VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used.");
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ // 1. Create VkBuffer.
+ VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)(
+ allocator->m_hDevice,
+ pBufferCreateInfo,
+ allocator->GetAllocationCallbacks(),
+ pBuffer);
+ if (res >= 0)
+ {
+ // 2. Bind buffer with memory.
+ res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL);
+ if (res >= 0)
+ {
+ return VK_SUCCESS;
+ }
+ (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+ }
+ return res;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer(
+ VmaAllocator allocator,
+ VkBuffer buffer,
+ VmaAllocation allocation)
+{
+ VMA_ASSERT(allocator);
+
+ if(buffer == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE)
+ {
+ return;
+ }
+
+ VMA_DEBUG_LOG("vmaDestroyBuffer");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ if(buffer != VK_NULL_HANDLE)
+ {
+ (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks());
+ }
+
+ if(allocation != VK_NULL_HANDLE)
+ {
+ allocator->FreeMemory(
+ 1, // allocationCount
+ &allocation);
+ }
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage(
+ VmaAllocator allocator,
+ const VkImageCreateInfo* pImageCreateInfo,
+ const VmaAllocationCreateInfo* pAllocationCreateInfo,
+ VkImage* pImage,
+ VmaAllocation* pAllocation,
+ VmaAllocationInfo* pAllocationInfo)
+{
+ VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation);
+
+ if(pImageCreateInfo->extent.width == 0 ||
+ pImageCreateInfo->extent.height == 0 ||
+ pImageCreateInfo->extent.depth == 0 ||
+ pImageCreateInfo->mipLevels == 0 ||
+ pImageCreateInfo->arrayLayers == 0)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ VMA_DEBUG_LOG("vmaCreateImage");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ *pImage = VK_NULL_HANDLE;
+ *pAllocation = VK_NULL_HANDLE;
+
+ // 1. Create VkImage.
+ VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)(
+ allocator->m_hDevice,
+ pImageCreateInfo,
+ allocator->GetAllocationCallbacks(),
+ pImage);
+ if(res == VK_SUCCESS)
+ {
+ VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ?
+ VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL :
+ VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR;
+
+ // 2. Allocate memory using allocator.
+ VkMemoryRequirements vkMemReq = {};
+ bool requiresDedicatedAllocation = false;
+ bool prefersDedicatedAllocation = false;
+ allocator->GetImageMemoryRequirements(*pImage, vkMemReq,
+ requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+ res = allocator->AllocateMemory(
+ vkMemReq,
+ requiresDedicatedAllocation,
+ prefersDedicatedAllocation,
+ VK_NULL_HANDLE, // dedicatedBuffer
+ *pImage, // dedicatedImage
+ VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage
+ *pAllocationCreateInfo,
+ suballocType,
+ 1, // allocationCount
+ pAllocation);
+
+ if(res == VK_SUCCESS)
+ {
+ // 3. Bind image with memory.
+ if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0)
+ {
+ res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL);
+ }
+ if(res == VK_SUCCESS)
+ {
+ // All steps succeeded.
+ #if VMA_STATS_STRING_ENABLED
+ (*pAllocation)->InitImageUsage(*pImageCreateInfo);
+ #endif
+ if(pAllocationInfo != VMA_NULL)
+ {
+ allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+ }
+
+ return VK_SUCCESS;
+ }
+ allocator->FreeMemory(
+ 1, // allocationCount
+ pAllocation);
+ *pAllocation = VK_NULL_HANDLE;
+ (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks());
+ *pImage = VK_NULL_HANDLE;
+ return res;
+ }
+ (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks());
+ *pImage = VK_NULL_HANDLE;
+ return res;
+ }
+ return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage)
+{
+ return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation,
+ VkDeviceSize allocationLocalOffset,
+ const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage)
+{
+ VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation);
+
+ *pImage = VK_NULL_HANDLE;
+
+ VMA_DEBUG_LOG("vmaCreateImage2");
+
+ if (pImageCreateInfo->extent.width == 0 ||
+ pImageCreateInfo->extent.height == 0 ||
+ pImageCreateInfo->extent.depth == 0 ||
+ pImageCreateInfo->mipLevels == 0 ||
+ pImageCreateInfo->arrayLayers == 0)
+ {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ // 1. Create VkImage.
+ VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)(
+ allocator->m_hDevice,
+ pImageCreateInfo,
+ allocator->GetAllocationCallbacks(),
+ pImage);
+ if (res >= 0)
+ {
+ // 2. Bind image with memory.
+ res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL);
+ if (res >= 0)
+ {
+ return VK_SUCCESS;
+ }
+ (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks());
+ }
+ return res;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage(
+ VmaAllocator VMA_NOT_NULL allocator,
+ VkImage VMA_NULLABLE_NON_DISPATCHABLE image,
+ VmaAllocation VMA_NULLABLE allocation)
+{
+ VMA_ASSERT(allocator);
+
+ if(image == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE)
+ {
+ return;
+ }
+
+ VMA_DEBUG_LOG("vmaDestroyImage");
+
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+ if(image != VK_NULL_HANDLE)
+ {
+ (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks());
+ }
+ if(allocation != VK_NULL_HANDLE)
+ {
+ allocator->FreeMemory(
+ 1, // allocationCount
+ &allocation);
+ }
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock(
+ const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo,
+ VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock)
+{
+ VMA_ASSERT(pCreateInfo && pVirtualBlock);
+ VMA_ASSERT(pCreateInfo->size > 0);
+ VMA_DEBUG_LOG("vmaCreateVirtualBlock");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo);
+ VkResult res = (*pVirtualBlock)->Init();
+ if(res < 0)
+ {
+ vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock);
+ *pVirtualBlock = VK_NULL_HANDLE;
+ }
+ return res;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock)
+{
+ if(virtualBlock != VK_NULL_HANDLE)
+ {
+ VMA_DEBUG_LOG("vmaDestroyVirtualBlock");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying.
+ vma_delete(&allocationCallbacks, virtualBlock);
+ }
+}
+
+VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+ VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL);
+ VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation,
+ VkDeviceSize* VMA_NULLABLE pOffset)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL);
+ VMA_DEBUG_LOG("vmaVirtualAllocate");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation)
+{
+ if(allocation != VK_NULL_HANDLE)
+ {
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+ VMA_DEBUG_LOG("vmaVirtualFree");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ virtualBlock->Free(allocation);
+ }
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+ VMA_DEBUG_LOG("vmaClearVirtualBlock");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ virtualBlock->Clear();
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+ VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ virtualBlock->SetAllocationUserData(allocation, pUserData);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaStatistics* VMA_NOT_NULL pStats)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL);
+ VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ virtualBlock->GetStatistics(*pStats);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ VmaDetailedStatistics* VMA_NOT_NULL pStats)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL);
+ VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics");
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ virtualBlock->CalculateDetailedStatistics(*pStats);
+}
+
+#if VMA_STATS_STRING_ENABLED
+
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap)
+{
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL);
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks();
+ VmaStringBuilder sb(allocationCallbacks);
+ virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb);
+ *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength());
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+ char* VMA_NULLABLE pStatsString)
+{
+ if(pStatsString != VMA_NULL)
+ {
+ VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString);
+ }
+}
+#if VMA_EXTERNAL_MEMORY_WIN32
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator,
+ VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle)
+{
+ VMA_ASSERT(allocator && allocation && pHandle);
+ VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+ return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle);
+}
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_PUBLIC_INTERFACE
+#endif // VMA_IMPLEMENTATION
+
+/**
+\page quick_start Quick start
+
+\section quick_start_project_setup Project setup
+
+Vulkan Memory Allocator comes in form of a "stb-style" single header file.
+While you can pull the entire repository e.g. as Git module, there is also Cmake script provided,
+you don't need to build it as a separate library project.
+You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files.
+
+"Single header" doesn't mean that everything is contained in C/C++ declarations,
+like it tends to be in case of inline functions or C++ templates.
+It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro.
+If you don't do it properly, it will result in linker errors.
+
+To do it properly:
+
+-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library.
+ This includes declarations of all members of the library.
+-# In exactly one CPP file define following macro before this include.
+ It enables also internal definitions.
+
+\code
+#define VMA_IMPLEMENTATION
+#include "vk_mem_alloc.h"
+\endcode
+
+It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp".
+
+This library includes header `<vulkan/vulkan.h>`, which in turn
+includes `<windows.h>` on Windows. If you need some specific macros defined
+before including these headers (like `WIN32_LEAN_AND_MEAN` or
+`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define
+them before every `#include` of this library.
+It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h",
+that will be included in other source files instead of VMA header directly.
+
+This library is written in C++, but has C-compatible interface.
+Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full
+implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C.
+Some features of C++14 are used and required. Features of C++20 are used optionally when available.
+Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used.
+
+
+\section quick_start_initialization Initialization
+
+VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation,
+structures describing parameters of objects to be created like #VmaAllocationCreateInfo,
+and errors codes returned from functions using `VkResult` type.
+
+The first and the main object that needs to be created is #VmaAllocator.
+It represents the initialization of the entire library.
+Only one such object should be created per `VkDevice`.
+You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made.
+It must be destroyed before `VkDevice` is destroyed.
+
+At program startup:
+
+-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object.
+-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object.
+
+Only members `physicalDevice`, `device`, `instance` are required.
+However, you should inform the library which Vulkan version do you use by setting
+VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable
+by setting VmaAllocatorCreateInfo::flags.
+Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions.
+See below for details.
+
+\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version
+
+VMA supports Vulkan version down to 1.0, for backward compatibility.
+If you want to use higher version, you need to inform the library about it.
+This is a two-step process.
+
+<b>Step 1: Compile time.</b> By default, VMA compiles with code supporting the highest
+Vulkan version found in the included `<vulkan/vulkan.h>` that is also supported by the library.
+If this is OK, you don't need to do anything.
+However, if you want to compile VMA as if only some lower Vulkan version was available,
+define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`.
+It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version.
+For example, to compile against Vulkan 1.2:
+
+\code
+#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2
+#include "vk_mem_alloc.h"
+\endcode
+
+<b>Step 2: Runtime.</b> Even when compiled with higher Vulkan version available,
+VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object.
+By default, only Vulkan 1.0 is used.
+To initialize the allocator with support for higher Vulkan version, you need to set member
+VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`.
+See code sample below.
+
+\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions
+
+You may need to configure importing Vulkan functions. There are 3 ways to do this:
+
+-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows):
+ - You don't need to do anything.
+ - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default.
+-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`,
+ `vkGetDeviceProcAddr` (this is the option presented in the example below):
+ - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1.
+ - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr,
+ VmaVulkanFunctions::vkGetDeviceProcAddr.
+ - The library will fetch pointers to all other functions it needs internally.
+-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like
+ [Volk](https://github.com/zeux/volk):
+ - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0.
+ - Pass these pointers via structure #VmaVulkanFunctions.
+
+\subsection quick_start_initialization_enabling_extensions Enabling extensions
+
+VMA can automatically use following Vulkan extensions.
+If you found them available on the selected physical device and you enabled them
+while creating `VkInstance` / `VkDevice` object, inform VMA about their availability
+by setting appropriate flags in VmaAllocatorCreateInfo::flags.
+
+Vulkan extension | VMA flag
+------------------------------|-----------------------------------------------------
+VK_KHR_dedicated_allocation | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT
+VK_KHR_bind_memory2 | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT
+VK_KHR_maintenance4 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT
+VK_KHR_maintenance5 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT
+VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT
+VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT
+VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT
+VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT
+VK_KHR_external_memory_win32 | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT
+
+Example with fetching pointers to Vulkan functions dynamically:
+
+\code
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#include "vk_mem_alloc.h"
+
+...
+
+VmaVulkanFunctions vulkanFunctions = {};
+vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr;
+vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr;
+
+VmaAllocatorCreateInfo allocatorCreateInfo = {};
+allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
+allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2;
+allocatorCreateInfo.physicalDevice = physicalDevice;
+allocatorCreateInfo.device = device;
+allocatorCreateInfo.instance = instance;
+allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions;
+
+VmaAllocator allocator;
+vmaCreateAllocator(&allocatorCreateInfo, &allocator);
+
+// Entire program...
+
+// At the end, don't forget to:
+vmaDestroyAllocator(allocator);
+\endcode
+
+
+\subsection quick_start_initialization_other_config Other configuration options
+
+There are additional configuration options available through preprocessor macros that you can define
+before including VMA header and through parameters passed in #VmaAllocatorCreateInfo.
+They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`),
+callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`),
+or your custom `VMA_ASSERT` macro, among others.
+For more information, see: @ref configuration.
+
+
+\section quick_start_resource_allocation Resource allocation
+
+When you want to create a buffer or image:
+
+-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure.
+-# Fill VmaAllocationCreateInfo structure.
+-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory
+ already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory.
+
+\code
+VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufferInfo.size = 65536;
+bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+Don't forget to destroy your buffer and allocation objects when no longer needed:
+
+\code
+vmaDestroyBuffer(allocator, buffer, allocation);
+\endcode
+
+If you need to map the buffer, you must set flag
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+in VmaAllocationCreateInfo::flags.
+There are many additional parameters that can control the choice of memory type to be used for the allocation
+and other features.
+For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping.
+
+
+\page choosing_memory_type Choosing memory type
+
+Physical devices in Vulkan support various combinations of memory heaps and
+types. Help with choosing correct and optimal memory type for your specific
+resource is one of the key features of this library. You can use it by filling
+appropriate members of VmaAllocationCreateInfo structure, as described below.
+You can also combine multiple methods.
+
+-# If you just want to find memory type index that meets your requirements, you
+ can use function: vmaFindMemoryTypeIndexForBufferInfo(),
+ vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex().
+-# If you want to allocate a region of device memory without association with any
+ specific image or buffer, you can use function vmaAllocateMemory(). Usage of
+ this function is not recommended and usually not needed.
+ vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once,
+ which may be useful for sparse binding.
+-# If you already have a buffer or an image created, you want to allocate memory
+ for it and then you will bind it yourself, you can use function
+ vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage().
+ For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory()
+ or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2().
+-# If you want to create a buffer or an image, allocate memory for it, and bind
+ them together, all in one call, you can use function vmaCreateBuffer(),
+ vmaCreateImage().
+ <b>This is the easiest and recommended way to use this library!</b>
+
+When using 3. or 4., the library internally queries Vulkan for memory types
+supported for that buffer or image (function `vkGetBufferMemoryRequirements()`)
+and uses only one of these types.
+
+If no memory type can be found that meets all the requirements, these functions
+return `VK_ERROR_FEATURE_NOT_PRESENT`.
+
+You can leave VmaAllocationCreateInfo structure completely filled with zeros.
+It means no requirements are specified for memory type.
+It is valid, although not very useful.
+
+\section choosing_memory_type_usage Usage
+
+The easiest way to specify memory requirements is to fill member
+VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage.
+It defines high level, common usage types.
+Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically.
+
+For example, if you want to create a uniform buffer that will be filled using
+transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can
+do it using following code. The buffer will most likely end up in a memory type with
+`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device.
+
+\code
+VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufferInfo.size = 65536;
+bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory
+on systems with discrete graphics card that have the memories separate, you can use
+#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST.
+
+When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory,
+you also need to specify one of the host access flags:
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`
+so you can map it.
+
+For example, a staging buffer that will be filled via mapped pointer and then
+used as a source of transfer to the buffer described previously can be created like this.
+It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT`
+but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM).
+
+\code
+VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+stagingBufferInfo.size = 65536;
+stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo stagingAllocInfo = {};
+stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+
+VkBuffer stagingBuffer;
+VmaAllocation stagingAllocation;
+vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr);
+\endcode
+
+For more examples of creating different kinds of resources, see chapter \ref usage_patterns.
+See also: @ref memory_mapping.
+
+Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows
+about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed,
+so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc.
+If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting
+memory type, as described below.
+
+\note
+Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`,
+`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`)
+are still available and work same way as in previous versions of the library
+for backward compatibility, but they are deprecated.
+
+\section choosing_memory_type_required_preferred_flags Required and preferred flags
+
+You can specify more detailed requirements by filling members
+VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags
+with a combination of bits from enum `VkMemoryPropertyFlags`. For example,
+if you want to create a buffer that will be persistently mapped on host (so it
+must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`,
+use following code:
+
+\code
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+A memory type is chosen that has all the required flags and as many preferred
+flags set as possible.
+
+Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags,
+plus some extra "magic" (heuristics).
+
+\section choosing_memory_type_explicit_memory_types Explicit memory types
+
+If you inspected memory types available on the physical device and <b>you have
+a preference for memory types that you want to use</b>, you can fill member
+VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set
+means that a memory type with that index is allowed to be used for the
+allocation. Special value 0, just like `UINT32_MAX`, means there are no
+restrictions to memory type index.
+
+Please note that this member is NOT just a memory type index.
+Still you can use it to choose just one, specific memory type.
+For example, if you already determined that your buffer should be created in
+memory type 2, use following code:
+
+\code
+uint32_t memoryTypeIndex = 2;
+
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.memoryTypeBits = 1u << memoryTypeIndex;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+You can also use this parameter to <b>exclude some memory types</b>.
+If you inspect memory heaps and types available on the current physical device and
+you determine that for some reason you don't want to use a specific memory type for the allocation,
+you can enable automatic memory type selection but exclude certain memory type or types
+by setting all bits of `memoryTypeBits` to 1 except the ones you choose.
+
+\code
+// ...
+uint32_t excludedMemoryTypeIndex = 2;
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex);
+// ...
+\endcode
+
+
+\section choosing_memory_type_custom_memory_pools Custom memory pools
+
+If you allocate from custom memory pool, all the ways of specifying memory
+requirements described above are not applicable and the aforementioned members
+of VmaAllocationCreateInfo structure are ignored. Memory type is selected
+explicitly when creating the pool and then used to make all the allocations from
+that pool. For further details, see \ref custom_memory_pools.
+
+\section choosing_memory_type_dedicated_allocations Dedicated allocations
+
+Memory for allocations is reserved out of larger block of `VkDeviceMemory`
+allocated from Vulkan internally. That is the main feature of this whole library.
+You can still request a separate memory block to be created for an allocation,
+just like you would do in a trivial solution without using any allocator.
+In that case, a buffer or image is always bound to that memory at offset 0.
+This is called a "dedicated allocation".
+You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+The library can also internally decide to use dedicated allocation in some cases, e.g.:
+
+- When the size of the allocation is large.
+- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled
+ and it reports that dedicated allocation is required or recommended for the resource.
+- When allocation of next big memory block fails due to not enough device memory,
+ but allocation with the exact requested size succeeds.
+
+
+\page memory_mapping Memory mapping
+
+To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`,
+to be able to read from it or write to it in CPU code.
+Mapping is possible only of memory allocated from a memory type that has
+`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag.
+Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose.
+You can use them directly with memory allocated by this library,
+but it is not recommended because of following issue:
+Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed.
+This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan.
+It is also not thread-safe.
+Because of this, Vulkan Memory Allocator provides following facilities:
+
+\note If you want to be able to map an allocation, you need to specify one of the flags
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable
+when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values.
+For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable,
+but these flags can still be used for consistency.
+
+\section memory_mapping_copy_functions Copy functions
+
+The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation().
+It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`,
+and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`).
+
+It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads
+(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`.
+
+\code
+struct ConstantBuffer
+{
+ ...
+};
+ConstantBuffer constantBufferData = ...
+
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = sizeof(ConstantBuffer);
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
+
+vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer));
+\endcode
+
+Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory().
+
+\section memory_mapping_mapping_functions Mapping functions
+
+The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory().
+They are safer and more convenient to use than standard Vulkan functions.
+You can map an allocation multiple times simultaneously - mapping is reference-counted internally.
+You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block.
+The way it is implemented is that the library always maps entire memory block, not just region of the allocation.
+For further details, see description of vmaMapMemory() function.
+Example:
+
+\code
+// Having these objects initialized:
+struct ConstantBuffer
+{
+ ...
+};
+ConstantBuffer constantBufferData = ...
+
+VmaAllocator allocator = ...
+VkBuffer constantBuffer = ...
+VmaAllocation constantBufferAllocation = ...
+
+// You can map and fill your buffer using following code:
+
+void* mappedData;
+vmaMapMemory(allocator, constantBufferAllocation, &mappedData);
+memcpy(mappedData, &constantBufferData, sizeof(constantBufferData));
+vmaUnmapMemory(allocator, constantBufferAllocation);
+\endcode
+
+When mapping, you may see a warning from Vulkan validation layer similar to this one:
+
+<i>Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.</i>
+
+It happens because the library maps entire `VkDeviceMemory` block, where different
+types of images and buffers may end up together, especially on GPUs with unified memory like Intel.
+You can safely ignore it if you are sure you access only memory of the intended
+object that you wanted to map.
+
+
+\section memory_mapping_persistently_mapped_memory Persistently mapped memory
+
+Keeping your memory persistently mapped is generally OK in Vulkan.
+You don't need to unmap it before using its data on the GPU.
+The library provides a special feature designed for that:
+Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in
+VmaAllocationCreateInfo::flags stay mapped all the time,
+so you can just access CPU pointer to it any time
+without a need to call any "map" or "unmap" function.
+Example:
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = sizeof(ConstantBuffer);
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+ VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+
+// Buffer is already mapped. You can access its memory.
+memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData));
+\endcode
+
+\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up
+in a mappable memory type.
+For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation.
+For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading.
+
+\section memory_mapping_cache_control Cache flush and invalidate
+
+Memory in Vulkan doesn't need to be unmapped before using it on GPU,
+but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set,
+you need to manually **invalidate** cache before reading of mapped pointer
+and **flush** cache after writing to mapped pointer.
+Map/unmap operations don't do that automatically.
+Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`,
+`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient
+functions that refer to given allocation object: vmaFlushAllocation(),
+vmaInvalidateAllocation(),
+or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations().
+
+Regions of memory specified for flush/invalidate must be aligned to
+`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library.
+In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations
+within blocks are aligned to this value, so their offsets are always multiply of
+`nonCoherentAtomSize` and two different allocations never share same "line" of this size.
+
+Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA)
+currently provide `HOST_COHERENT` flag on all memory types that are
+`HOST_VISIBLE`, so on PC you may not need to bother.
+
+
+\page staying_within_budget Staying within budget
+
+When developing a graphics-intensive game or program, it is important to avoid allocating
+more GPU memory than it is physically available. When the memory is over-committed,
+various bad things can happen, depending on the specific GPU, graphics driver, and
+operating system:
+
+- It may just work without any problems.
+- The application may slow down because some memory blocks are moved to system RAM
+ and the GPU has to access them through PCI Express bus.
+- A new allocation may take very long time to complete, even few seconds, and possibly
+ freeze entire system.
+- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST`
+ returned somewhere later.
+
+\section staying_within_budget_querying_for_budget Querying for budget
+
+To query for current memory usage and available budget, use function vmaGetHeapBudgets().
+Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap.
+
+Please note that this function returns different information and works faster than
+vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every
+allocation, while vmaCalculateStatistics() is intended to be used rarely,
+only to obtain statistical information, e.g. for debugging purposes.
+
+It is recommended to use <b>VK_EXT_memory_budget</b> device extension to obtain information
+about the budget from Vulkan device. VMA is able to use this extension automatically.
+When not enabled, the allocator behaves same way, but then it estimates current usage
+and available budget based on its internal information and Vulkan memory heap sizes,
+which may be less precise. In order to use this extension:
+
+1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2
+ required by it are available and enable them. Please note that the first is a device
+ extension and the second is instance extension!
+2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object.
+3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from
+ Vulkan inside of it to avoid overhead of querying it with every allocation.
+
+\section staying_within_budget_controlling_memory_usage Controlling memory usage
+
+There are many ways in which you can try to stay within the budget.
+
+First, when making new allocation requires allocating a new memory block, the library
+tries not to exceed the budget automatically. If a block with default recommended size
+(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even
+dedicated memory for just this resource.
+
+If the size of the requested resource plus current memory usage is more than the
+budget, by default the library still tries to create it, leaving it to the Vulkan
+implementation whether the allocation succeeds or fails. You can change this behavior
+by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is
+not made if it would exceed the budget or if the budget is already exceeded.
+VMA then tries to make the allocation from the next eligible Vulkan memory type.
+If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag
+when creating resources that are not essential for the application (e.g. the texture
+of a specific object) and not to pass it when creating critically important resources
+(e.g. render targets).
+
+On AMD graphics cards there is a custom vendor extension available: <b>VK_AMD_memory_overallocation_behavior</b>
+that allows to control the behavior of the Vulkan implementation in out-of-memory cases -
+whether it should fail with an error code or still allow the allocation.
+Usage of this extension involves only passing extra structure on Vulkan device creation,
+so it is out of scope of this library.
+
+Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure
+a new allocation is created only when it fits inside one of the existing memory blocks.
+If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+This also ensures that the function call is very fast because it never goes to Vulkan
+to obtain a new block.
+
+\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount
+set to more than 0 will currently try to allocate memory blocks without checking whether they
+fit within budget.
+
+
+\page resource_aliasing Resource aliasing (overlap)
+
+New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory
+management, give an opportunity to alias (overlap) multiple resources in the
+same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL).
+It can be useful to save video memory, but it must be used with caution.
+
+For example, if you know the flow of your whole render frame in advance, you
+are going to use some intermediate textures or buffers only during a small range of render passes,
+and you know these ranges don't overlap in time, you can bind these resources to
+the same place in memory, even if they have completely different parameters (width, height, format etc.).
+
+![Resource aliasing (overlap)](../gfx/Aliasing.png)
+
+Such scenario is possible using VMA, but you need to create your images manually.
+Then you need to calculate parameters of an allocation to be made using formula:
+
+- allocation size = max(size of each image)
+- allocation alignment = max(alignment of each image)
+- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image)
+
+Following example shows two different images bound to the same place in memory,
+allocated to fit largest of them.
+
+\code
+// A 512x512 texture to be sampled.
+VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+img1CreateInfo.imageType = VK_IMAGE_TYPE_2D;
+img1CreateInfo.extent.width = 512;
+img1CreateInfo.extent.height = 512;
+img1CreateInfo.extent.depth = 1;
+img1CreateInfo.mipLevels = 10;
+img1CreateInfo.arrayLayers = 1;
+img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB;
+img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
+img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+// A full screen texture to be used as color attachment.
+VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+img2CreateInfo.imageType = VK_IMAGE_TYPE_2D;
+img2CreateInfo.extent.width = 1920;
+img2CreateInfo.extent.height = 1080;
+img2CreateInfo.extent.depth = 1;
+img2CreateInfo.mipLevels = 1;
+img2CreateInfo.arrayLayers = 1;
+img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+VkImage img1;
+res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1);
+VkImage img2;
+res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2);
+
+VkMemoryRequirements img1MemReq;
+vkGetImageMemoryRequirements(device, img1, &img1MemReq);
+VkMemoryRequirements img2MemReq;
+vkGetImageMemoryRequirements(device, img2, &img2MemReq);
+
+VkMemoryRequirements finalMemReq = {};
+finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size);
+finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment);
+finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits;
+// Validate if(finalMemReq.memoryTypeBits != 0)
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+
+VmaAllocation alloc;
+res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr);
+
+res = vmaBindImageMemory(allocator, alloc, img1);
+res = vmaBindImageMemory(allocator, alloc, img2);
+
+// You can use img1, img2 here, but not at the same time!
+
+vmaFreeMemory(allocator, alloc);
+vkDestroyImage(allocator, img2, nullptr);
+vkDestroyImage(allocator, img1, nullptr);
+\endcode
+
+VMA also provides convenience functions that create a buffer or image and bind it to memory
+represented by an existing #VmaAllocation:
+vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(),
+vmaCreateAliasingImage(), vmaCreateAliasingImage2().
+Versions with "2" offer additional parameter `allocationLocalOffset`.
+
+Remember that using resources that alias in memory requires proper synchronization.
+You need to issue a memory barrier to make sure commands that use `img1` and `img2`
+don't overlap on GPU timeline.
+You also need to treat a resource after aliasing as uninitialized - containing garbage data.
+For example, if you use `img1` and then want to use `img2`, you need to issue
+an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`.
+
+Additional considerations:
+
+- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases.
+See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag.
+- You can create more complex layout where different images and buffers are bound
+at different offsets inside one large allocation. For example, one can imagine
+a big texture used in some render passes, aliasing with a set of many small buffers
+used between in some further passes. To bind a resource at non-zero offset in an allocation,
+use vmaBindBufferMemory2() / vmaBindImageMemory2().
+- Before allocating memory for the resources you want to alias, check `memoryTypeBits`
+returned in memory requirements of each resource to make sure the bits overlap.
+Some GPUs may expose multiple memory types suitable e.g. only for buffers or
+images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your
+resources may be disjoint. Aliasing them is not possible in that case.
+
+
+\page custom_memory_pools Custom memory pools
+
+A memory pool contains a number of `VkDeviceMemory` blocks.
+The library automatically creates and manages default pool for each memory type available on the device.
+Default memory pool automatically grows in size.
+Size of allocated blocks is also variable and managed automatically.
+You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null.
+
+You can create custom pool and allocate memory out of it.
+It can be useful if you want to:
+
+- Keep certain kind of allocations separate from others.
+- Enforce particular, fixed size of Vulkan memory blocks.
+- Limit maximum amount of Vulkan memory allocated for that pool.
+- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool.
+- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in
+ #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain.
+- Perform defragmentation on a specific subset of your allocations.
+
+To use custom memory pools:
+
+-# Fill VmaPoolCreateInfo structure.
+-# Call vmaCreatePool() to obtain #VmaPool handle.
+-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle.
+ You don't need to specify any other parameters of this structure, like `usage`.
+
+Example:
+
+\code
+// Find memoryTypeIndex for the pool.
+VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+sampleBufCreateInfo.size = 0x10000; // Doesn't matter.
+sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo sampleAllocCreateInfo = {};
+sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+uint32_t memTypeIndex;
+VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator,
+ &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex);
+// Check res...
+
+// Create a pool that can have at most 2 blocks, 128 MiB each.
+VmaPoolCreateInfo poolCreateInfo = {};
+poolCreateInfo.memoryTypeIndex = memTypeIndex;
+poolCreateInfo.blockSize = 128ull * 1024 * 1024;
+poolCreateInfo.maxBlockCount = 2;
+
+VmaPool pool;
+res = vmaCreatePool(allocator, &poolCreateInfo, &pool);
+// Check res...
+
+// Allocate a buffer out of it.
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 1024;
+bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.pool = pool;
+
+VkBuffer buf;
+VmaAllocation alloc;
+res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
+// Check res...
+\endcode
+
+You have to free all allocations made from this pool before destroying it.
+
+\code
+vmaDestroyBuffer(allocator, buf, alloc);
+vmaDestroyPool(allocator, pool);
+\endcode
+
+New versions of this library support creating dedicated allocations in custom pools.
+It is supported only when VmaPoolCreateInfo::blockSize = 0.
+To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and
+VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+
+
+\section custom_memory_pools_MemTypeIndex Choosing memory type index
+
+When creating a pool, you must explicitly specify memory type index.
+To find the one suitable for your buffers or images, you can use helper functions
+vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo().
+You need to provide structures with example parameters of buffers or images
+that you are going to create in that pool.
+
+\code
+VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+exampleBufCreateInfo.size = 1024; // Doesn't matter
+exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+uint32_t memTypeIndex;
+vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex);
+
+VmaPoolCreateInfo poolCreateInfo = {};
+poolCreateInfo.memoryTypeIndex = memTypeIndex;
+// ...
+\endcode
+
+When creating buffers/images allocated in that pool, provide following parameters:
+
+- `VkBufferCreateInfo`: Prefer to pass same parameters as above.
+ Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior.
+ Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers
+ or the other way around.
+- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member.
+ Other members are ignored anyway.
+
+
+\section custom_memory_pools_when_not_use When not to use custom pools
+
+Custom pools are commonly overused by VMA users.
+While it may feel natural to keep some logical groups of resources separate in memory,
+in most cases it does more harm than good.
+Using custom pool shouldn't be your first choice.
+Instead, please make all allocations from default pools first and only use custom pools
+if you can prove and measure that it is beneficial in some way,
+e.g. it results in lower memory usage, better performance, etc.
+
+Using custom pools has disadvantages:
+
+- Each pool has its own collection of `VkDeviceMemory` blocks.
+ Some of them may be partially or even completely empty.
+ Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory.
+- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex).
+ When using default pools, best memory type for each of your allocations can be selected automatically
+ using a carefully design algorithm that works across all kinds of GPUs.
+- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure.
+ When using default pools, VMA tries another compatible memory type.
+- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size,
+ while default pools start from small blocks and only allocate next blocks larger and larger
+ up to the preferred block size.
+
+Many of the common concerns can be addressed in a different way than using custom pools:
+
+- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived)
+ separate, you likely don't need to.
+ VMA uses a high quality allocation algorithm that manages memory well in various cases.
+ Please measure and check if using custom pools provides a benefit.
+- If you want to keep your images and buffers separate, you don't need to.
+ VMA respects `bufferImageGranularity` limit automatically.
+- If you want to keep your mapped and not mapped allocations separate, you don't need to.
+ VMA respects `nonCoherentAtomSize` limit automatically.
+ It also maps only those `VkDeviceMemory` blocks that need to map any allocation.
+ It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory.
+- If you want to choose a custom size for the default memory block, you can set it globally instead
+ using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize.
+- If you want to select specific memory type for your allocation,
+ you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead.
+- If you need to create a buffer with certain minimum alignment, you can still do it
+ using default pools with dedicated function vmaCreateBufferWithAlignment().
+
+
+\section linear_algorithm Linear allocation algorithm
+
+Each Vulkan memory block managed by this library has accompanying metadata that
+keeps track of used and unused regions. By default, the metadata structure and
+algorithm tries to find best place for new allocations among free regions to
+optimize memory usage. This way you can allocate and free objects in any order.
+
+![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png)
+
+Sometimes there is a need to use simpler, linear allocation algorithm. You can
+create custom pool that uses such algorithm by adding flag
+#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating
+#VmaPool object. Then an alternative metadata management is used. It always
+creates new allocations after last one and doesn't reuse free regions after
+allocations freed in the middle. It results in better allocation performance and
+less memory consumed by metadata.
+
+![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png)
+
+With this one flag, you can create a custom pool that can be used in many ways:
+free-at-once, stack, double stack, and ring buffer. See below for details.
+You don't need to specify explicitly which of these options you are going to use - it is detected automatically.
+
+\subsection linear_algorithm_free_at_once Free-at-once
+
+In a pool that uses linear algorithm, you still need to free all the allocations
+individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free
+them in any order. New allocations are always made after last one - free space
+in the middle is not reused. However, when you release all the allocation and
+the pool becomes empty, allocation starts from the beginning again. This way you
+can use linear algorithm to speed up creation of allocations that you are going
+to release all at once.
+
+![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png)
+
+This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount
+value that allows multiple memory blocks.
+
+\subsection linear_algorithm_stack Stack
+
+When you free an allocation that was created last, its space can be reused.
+Thanks to this, if you always release allocations in the order opposite to their
+creation (LIFO - Last In First Out), you can achieve behavior of a stack.
+
+![Stack](../gfx/Linear_allocator_4_stack.png)
+
+This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount
+value that allows multiple memory blocks.
+
+\subsection linear_algorithm_double_stack Double stack
+
+The space reserved by a custom pool with linear algorithm may be used by two
+stacks:
+
+- First, default one, growing up from offset 0.
+- Second, "upper" one, growing down from the end towards lower offsets.
+
+To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT
+to VmaAllocationCreateInfo::flags.
+
+![Double stack](../gfx/Linear_allocator_7_double_stack.png)
+
+Double stack is available only in pools with one memory block -
+VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined.
+
+When the two stacks' ends meet so there is not enough space between them for a
+new allocation, such allocation fails with usual
+`VK_ERROR_OUT_OF_DEVICE_MEMORY` error.
+
+\subsection linear_algorithm_ring_buffer Ring buffer
+
+When you free some allocations from the beginning and there is not enough free space
+for a new one at the end of a pool, allocator's "cursor" wraps around to the
+beginning and starts allocation there. Thanks to this, if you always release
+allocations in the same order as you created them (FIFO - First In First Out),
+you can achieve behavior of a ring buffer / queue.
+
+![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png)
+
+Ring buffer is available only in pools with one memory block -
+VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined.
+
+\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT.
+
+
+\page defragmentation Defragmentation
+
+Interleaved allocations and deallocations of many objects of varying size can
+cause fragmentation over time, which can lead to a situation where the library is unable
+to find a continuous range of free memory for a new allocation despite there is
+enough free space, just scattered across many small free ranges between existing
+allocations.
+
+To mitigate this problem, you can use defragmentation feature.
+It doesn't happen automatically though and needs your cooperation,
+because VMA is a low level library that only allocates memory.
+It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures.
+It cannot copy their contents as it doesn't record any commands to a command buffer.
+
+Example:
+
+\code
+VmaDefragmentationInfo defragInfo = {};
+defragInfo.pool = myPool;
+defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT;
+
+VmaDefragmentationContext defragCtx;
+VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx);
+// Check res...
+
+for(;;)
+{
+ VmaDefragmentationPassMoveInfo pass;
+ res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass);
+ if(res == VK_SUCCESS)
+ break;
+ else if(res != VK_INCOMPLETE)
+ // Handle error...
+
+ for(uint32_t i = 0; i < pass.moveCount; ++i)
+ {
+ // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents.
+ VmaAllocationInfo allocInfo;
+ vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo);
+ MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData;
+
+ // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset.
+ VkImageCreateInfo imgCreateInfo = ...
+ VkImage newImg;
+ res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg);
+ // Check res...
+ res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg);
+ // Check res...
+
+ // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place.
+ vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...);
+ }
+
+ // Make sure the copy commands finished executing.
+ vkWaitForFences(...);
+
+ // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation.
+ for(uint32_t i = 0; i < pass.moveCount; ++i)
+ {
+ // ...
+ vkDestroyImage(device, resData->img, nullptr);
+ }
+
+ // Update appropriate descriptors to point to the new places...
+
+ res = vmaEndDefragmentationPass(allocator, defragCtx, &pass);
+ if(res == VK_SUCCESS)
+ break;
+ else if(res != VK_INCOMPLETE)
+ // Handle error...
+}
+
+vmaEndDefragmentation(allocator, defragCtx, nullptr);
+\endcode
+
+Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage()
+create/destroy an allocation and a buffer/image at once, these are just a shortcut for
+creating the resource, allocating memory, and binding them together.
+Defragmentation works on memory allocations only. You must handle the rest manually.
+Defragmentation is an iterative process that should repreat "passes" as long as related functions
+return `VK_INCOMPLETE` not `VK_SUCCESS`.
+In each pass:
+
+1. vmaBeginDefragmentationPass() function call:
+ - Calculates and returns the list of allocations to be moved in this pass.
+ Note this can be a time-consuming process.
+ - Reserves destination memory for them by creating temporary destination allocations
+ that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo().
+2. Inside the pass, **you should**:
+ - Inspect the returned list of allocations to be moved.
+ - Create new buffers/images and bind them at the returned destination temporary allocations.
+ - Copy data from source to destination resources if necessary.
+ - Destroy the source buffers/images, but NOT their allocations.
+3. vmaEndDefragmentationPass() function call:
+ - Frees the source memory reserved for the allocations that are moved.
+ - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory.
+ - Frees `VkDeviceMemory` blocks that became empty.
+
+Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter.
+Defragmentation algorithm tries to move all suitable allocations.
+You can, however, refuse to move some of them inside a defragmentation pass, by setting
+`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE.
+This is not recommended and may result in suboptimal packing of the allocations after defragmentation.
+If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool.
+
+Inside a pass, for each allocation that should be moved:
+
+- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`.
+ - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass().
+- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared,
+ filled, and used temporarily in each rendering frame, you can just recreate this image
+ without copying its data.
+- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU
+ using `memcpy()`.
+- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE.
+ This will cancel the move.
+ - vmaEndDefragmentationPass() will then free the destination memory
+ not the source memory of the allocation, leaving it unchanged.
+- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time),
+ you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY.
+ - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object.
+
+You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool
+(like in the example above) or all the default pools by setting this member to null.
+
+Defragmentation is always performed in each pool separately.
+Allocations are never moved between different Vulkan memory types.
+The size of the destination memory reserved for a moved allocation is the same as the original one.
+Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation.
+Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones.
+
+You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved
+in each pass, e.g. to call it in sync with render frames and not to experience too big hitches.
+See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass.
+
+It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA
+usage, possibly from multiple threads, with the exception that allocations
+returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended.
+
+<b>Mapping</b> is preserved on allocations that are moved during defragmentation.
+Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations
+are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried
+using VmaAllocationInfo::pMappedData.
+
+\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT.
+
+
+\page statistics Statistics
+
+This library contains several functions that return information about its internal state,
+especially the amount of memory allocated from Vulkan.
+
+\section statistics_numeric_statistics Numeric statistics
+
+If you need to obtain basic statistics about memory usage per heap, together with current budget,
+you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget.
+This is useful to keep track of memory usage and stay within budget
+(see also \ref staying_within_budget).
+Example:
+
+\code
+uint32_t heapIndex = ...
+
+VmaBudget budgets[VK_MAX_MEMORY_HEAPS];
+vmaGetHeapBudgets(allocator, budgets);
+
+printf("My heap currently has %u allocations taking %llu B,\n",
+ budgets[heapIndex].statistics.allocationCount,
+ budgets[heapIndex].statistics.allocationBytes);
+printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n",
+ budgets[heapIndex].statistics.blockCount,
+ budgets[heapIndex].statistics.blockBytes);
+printf("Vulkan reports total usage %llu B with budget %llu B.\n",
+ budgets[heapIndex].usage,
+ budgets[heapIndex].budget);
+\endcode
+
+You can query for more detailed statistics per memory heap, type, and totals,
+including minimum and maximum allocation size and unused range size,
+by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics.
+This function is slower though, as it has to traverse all the internal data structures,
+so it should be used only for debugging purposes.
+
+You can query for statistics of a custom pool using function vmaGetPoolStatistics()
+or vmaCalculatePoolStatistics().
+
+You can query for information about a specific allocation using function vmaGetAllocationInfo().
+It fill structure #VmaAllocationInfo.
+
+\section statistics_json_dump JSON dump
+
+You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString().
+The result is guaranteed to be correct JSON.
+It uses ANSI encoding.
+Any strings provided by user (see [Allocation names](@ref allocation_names))
+are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding,
+this JSON string can be treated as using this encoding.
+It must be freed using function vmaFreeStatsString().
+
+The format of this JSON string is not part of official documentation of the library,
+but it will not change in backward-incompatible way without increasing library major version number
+and appropriate mention in changelog.
+
+The JSON string contains all the data that can be obtained using vmaCalculateStatistics().
+It can also contain detailed map of allocated memory blocks and their regions -
+free and occupied by allocations.
+This allows e.g. to visualize the memory or assess fragmentation.
+
+
+\page allocation_annotation Allocation names and user data
+
+\section allocation_user_data Allocation user data
+
+You can annotate allocations with your own information, e.g. for debugging purposes.
+To do that, fill VmaAllocationCreateInfo::pUserData field when creating
+an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer,
+some handle, index, key, ordinal number or any other value that would associate
+the allocation with your custom metadata.
+It is useful to identify appropriate data structures in your engine given #VmaAllocation,
+e.g. when doing \ref defragmentation.
+
+\code
+VkBufferCreateInfo bufCreateInfo = ...
+
+MyBufferMetadata* pMetadata = CreateBufferMetadata();
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.pUserData = pMetadata;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr);
+\endcode
+
+The pointer may be later retrieved as VmaAllocationInfo::pUserData:
+
+\code
+VmaAllocationInfo allocInfo;
+vmaGetAllocationInfo(allocator, allocation, &allocInfo);
+MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData;
+\endcode
+
+It can also be changed using function vmaSetAllocationUserData().
+
+Values of (non-zero) allocations' `pUserData` are printed in JSON report created by
+vmaBuildStatsString() in hexadecimal form.
+
+\section allocation_names Allocation names
+
+An allocation can also carry a null-terminated string, giving a name to the allocation.
+To set it, call vmaSetAllocationName().
+The library creates internal copy of the string, so the pointer you pass doesn't need
+to be valid for whole lifetime of the allocation. You can free it after the call.
+
+\code
+std::string imageName = "Texture: ";
+imageName += fileName;
+vmaSetAllocationName(allocator, allocation, imageName.c_str());
+\endcode
+
+The string can be later retrieved by inspecting VmaAllocationInfo::pName.
+It is also printed in JSON report created by vmaBuildStatsString().
+
+\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it.
+You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library.
+
+
+\page virtual_allocator Virtual allocator
+
+As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator".
+It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block".
+You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan.
+A common use case is sub-allocation of pieces of one large GPU buffer.
+
+\section virtual_allocator_creating_virtual_block Creating virtual block
+
+To use this functionality, there is no main "allocator" object.
+You don't need to have #VmaAllocator object created.
+All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator:
+
+-# Fill in #VmaVirtualBlockCreateInfo structure.
+-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object.
+
+Example:
+
+\code
+VmaVirtualBlockCreateInfo blockCreateInfo = {};
+blockCreateInfo.size = 1048576; // 1 MB
+
+VmaVirtualBlock block;
+VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block);
+\endcode
+
+\section virtual_allocator_making_virtual_allocations Making virtual allocations
+
+#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions
+using the same code as the main Vulkan memory allocator.
+Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type
+that represents an opaque handle to an allocation within the virtual block.
+
+In order to make such allocation:
+
+-# Fill in #VmaVirtualAllocationCreateInfo structure.
+-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation.
+ You can also receive `VkDeviceSize offset` that was assigned to the allocation.
+
+Example:
+
+\code
+VmaVirtualAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.size = 4096; // 4 KB
+
+VmaVirtualAllocation alloc;
+VkDeviceSize offset;
+res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset);
+if(res == VK_SUCCESS)
+{
+ // Use the 4 KB of your memory starting at offset.
+}
+else
+{
+ // Allocation failed - no space for it could be found. Handle this error!
+}
+\endcode
+
+\section virtual_allocator_deallocation Deallocation
+
+When no longer needed, an allocation can be freed by calling vmaVirtualFree().
+You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate()
+called for the same #VmaVirtualBlock.
+
+When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock().
+All allocations must be freed before the block is destroyed, which is checked internally by an assert.
+However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once -
+a feature not available in normal Vulkan memory allocator. Example:
+
+\code
+vmaVirtualFree(block, alloc);
+vmaDestroyVirtualBlock(block);
+\endcode
+
+\section virtual_allocator_allocation_parameters Allocation parameters
+
+You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData().
+Its default value is null.
+It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some
+larger data structure containing more information. Example:
+
+\code
+struct CustomAllocData
+{
+ std::string m_AllocName;
+};
+CustomAllocData* allocData = new CustomAllocData();
+allocData->m_AllocName = "My allocation 1";
+vmaSetVirtualAllocationUserData(block, alloc, allocData);
+\endcode
+
+The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function
+vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo.
+If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation!
+Example:
+
+\code
+VmaVirtualAllocationInfo allocInfo;
+vmaGetVirtualAllocationInfo(block, alloc, &allocInfo);
+delete (CustomAllocData*)allocInfo.pUserData;
+
+vmaVirtualFree(block, alloc);
+\endcode
+
+\section virtual_allocator_alignment_and_units Alignment and units
+
+It feels natural to express sizes and offsets in bytes.
+If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member
+VmaVirtualAllocationCreateInfo::alignment to request it. Example:
+
+\code
+VmaVirtualAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.size = 4096; // 4 KB
+allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B
+
+VmaVirtualAllocation alloc;
+res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr);
+\endcode
+
+Alignments of different allocations made from one block may vary.
+However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`,
+you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes.
+It might be more convenient, but you need to make sure to use this new unit consistently in all the places:
+
+- VmaVirtualBlockCreateInfo::size
+- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment
+- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset
+
+\section virtual_allocator_statistics Statistics
+
+You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics()
+(to get brief statistics that are fast to calculate)
+or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate).
+The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator.
+Example:
+
+\code
+VmaStatistics stats;
+vmaGetVirtualBlockStatistics(block, &stats);
+printf("My virtual block has %llu bytes used by %u virtual allocations\n",
+ stats.allocationBytes, stats.allocationCount);
+\endcode
+
+You can also request a full list of allocations and free regions as a string in JSON format by calling
+vmaBuildVirtualBlockStatsString().
+Returned string must be later freed using vmaFreeVirtualBlockStatsString().
+The format of this string differs from the one returned by the main Vulkan allocator, but it is similar.
+
+\section virtual_allocator_additional_considerations Additional considerations
+
+The "virtual allocator" functionality is implemented on a level of individual memory blocks.
+Keeping track of a whole collection of blocks, allocating new ones when out of free space,
+deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user.
+
+Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory.
+See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT).
+You can find their description in chapter \ref custom_memory_pools.
+Allocation strategies are also supported.
+See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT).
+
+Following features are supported only by the allocator of the real GPU memory and not by virtual allocations:
+buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`.
+
+
+\page debugging_memory_usage Debugging incorrect memory usage
+
+If you suspect a bug with memory usage, like usage of uninitialized memory or
+memory being overwritten out of bounds of an allocation,
+you can use debug features of this library to verify this.
+
+\section debugging_memory_usage_initialization Memory initialization
+
+If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used,
+you can enable automatic memory initialization to verify this.
+To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1.
+
+\code
+#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1
+#include "vk_mem_alloc.h"
+\endcode
+
+It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`.
+Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`.
+Memory is automatically mapped and unmapped if necessary.
+
+If you find these values while debugging your program, good chances are that you incorrectly
+read Vulkan memory that is allocated but not initialized, or already freed, respectively.
+
+Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped.
+It works also with dedicated allocations.
+
+\section debugging_memory_usage_margins Margins
+
+By default, allocations are laid out in memory blocks next to each other if possible
+(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`).
+
+![Allocations without margin](../gfx/Margins_1.png)
+
+Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified
+number of bytes as a margin after every allocation.
+
+\code
+#define VMA_DEBUG_MARGIN 16
+#include "vk_mem_alloc.h"
+\endcode
+
+![Allocations with margin](../gfx/Margins_2.png)
+
+If your bug goes away after enabling margins, it means it may be caused by memory
+being overwritten outside of allocation boundaries. It is not 100% certain though.
+Change in application behavior may also be caused by different order and distribution
+of allocations across memory blocks after margins are applied.
+
+Margins work with all types of memory.
+
+Margin is applied only to allocations made out of memory blocks and not to dedicated
+allocations, which have their own memory block of specific size.
+It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag
+or those automatically decided to put into dedicated allocations, e.g. due to its
+large size or recommended by VK_KHR_dedicated_allocation extension.
+
+Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space.
+
+Note that enabling margins increases memory usage and fragmentation.
+
+Margins do not apply to \ref virtual_allocator.
+
+\section debugging_memory_usage_corruption_detection Corruption detection
+
+You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation
+of contents of the margins.
+
+\code
+#define VMA_DEBUG_MARGIN 16
+#define VMA_DEBUG_DETECT_CORRUPTION 1
+#include "vk_mem_alloc.h"
+\endcode
+
+When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN`
+(it must be multiply of 4) after every allocation is filled with a magic number.
+This idea is also know as "canary".
+Memory is automatically mapped and unmapped if necessary.
+
+This number is validated automatically when the allocation is destroyed.
+If it is not equal to the expected value, `VMA_ASSERT()` is executed.
+It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation,
+which indicates a serious bug.
+
+You can also explicitly request checking margins of all allocations in all memory blocks
+that belong to specified memory types by using function vmaCheckCorruption(),
+or in memory blocks that belong to specified custom pool, by using function
+vmaCheckPoolCorruption().
+
+Margin validation (corruption detection) works only for memory types that are
+`HOST_VISIBLE` and `HOST_COHERENT`.
+
+
+\section debugging_memory_usage_leak_detection Leak detection features
+
+At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using
+`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug
+builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()`
+to change this behavior.
+
+At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()`
+macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op.
+If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't
+quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning
+individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them.
+
+\page other_api_interop Interop with other graphics APIs
+
+VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL.
+
+\section opengl_interop_exporting_memory Exporting memory
+
+If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library:
+
+You can create \ref custom_memory_pools for such allocations.
+Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext
+while creating the custom pool.
+Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool,
+not only while creating it, as no copy of the structure is made,
+but its original pointer is used for each allocation instead.
+
+If you want to export all memory allocated by VMA from certain memory types,
+also dedicated allocations or other allocations made from default pools,
+an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes.
+It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library
+through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type.
+Please note that new versions of the library also support dedicated allocations created in custom pools.
+
+You should not mix these two methods in a way that allows to apply both to the same memory type.
+Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`.
+
+
+\section opengl_interop_custom_alignment Custom alignment
+
+Buffers or images exported to a different API like OpenGL may require a different alignment,
+higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`.
+To impose such alignment:
+
+You can create \ref custom_memory_pools for such allocations.
+Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation
+to be made out of this pool.
+The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image
+from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically.
+
+If you want to create a buffer with a specific minimum alignment out of default pools,
+use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`.
+
+Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated
+allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block.
+You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation.
+
+\section opengl_interop_extended_allocation_information Extended allocation information
+
+If you want to rely on VMA to allocate your buffers and images inside larger memory blocks,
+but you need to know the size of the entire block and whether the allocation was made
+with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve
+extended allocation information in structure #VmaAllocationInfo2.
+
+
+
+\page usage_patterns Recommended usage patterns
+
+Vulkan gives great flexibility in memory allocation.
+This chapter shows the most common patterns.
+
+See also slides from talk:
+[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New)
+
+
+\section usage_patterns_gpu_only GPU-only resource
+
+<b>When:</b>
+Any resources that you frequently write and read on GPU,
+e.g. images used as color attachments (aka "render targets"), depth-stencil attachments,
+images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)").
+
+<b>What to do:</b>
+Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+
+\code
+VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
+imgCreateInfo.extent.width = 3840;
+imgCreateInfo.extent.height = 2160;
+imgCreateInfo.extent.depth = 1;
+imgCreateInfo.mipLevels = 1;
+imgCreateInfo.arrayLayers = 1;
+imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+allocCreateInfo.priority = 1.0f;
+
+VkImage img;
+VmaAllocation alloc;
+vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr);
+\endcode
+
+<b>Also consider:</b>
+Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT,
+especially if they are large or if you plan to destroy and recreate them with different sizes
+e.g. when display resolution changes.
+Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later.
+When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation
+to decrease chances to be evicted to system memory by the operating system.
+
+\section usage_patterns_staging_copy_upload Staging copy for upload
+
+<b>When:</b>
+A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer
+to some GPU resource.
+
+<b>What to do:</b>
+Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT.
+Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`.
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 65536;
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+ VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+
+...
+
+memcpy(allocInfo.pMappedData, myData, myDataSize);
+\endcode
+
+<b>Also consider:</b>
+You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped
+using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above.
+
+
+\section usage_patterns_readback Readback
+
+<b>When:</b>
+Buffers for data written by or transferred from the GPU that you want to read back on the CPU,
+e.g. results of some computations.
+
+<b>What to do:</b>
+Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`
+and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`.
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 65536;
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
+ VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+
+...
+
+const float* downloadedData = (const float*)allocInfo.pMappedData;
+\endcode
+
+
+\section usage_patterns_advanced_data_uploading Advanced data uploading
+
+For resources that you frequently write on CPU via mapped pointer and
+frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible:
+
+-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory,
+ even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card,
+ and make the device reach out to that resource directly.
+ - Reads performed by the device will then go through PCI Express bus.
+ The performance of this access may be limited, but it may be fine depending on the size
+ of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity
+ of access.
+-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips),
+ a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL`
+ (fast to access from the GPU). Then, it is likely the best choice for such type of resource.
+-# Systems with a discrete graphics card and separate video memory may or may not expose
+ a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR).
+ If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS)
+ that is available to CPU for mapping.
+ - Writes performed by the host to that memory go through PCI Express bus.
+ The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0,
+ as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads.
+-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory,
+ a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them.
+
+Thankfully, VMA offers an aid to create and use such resources in the the way optimal
+for the current Vulkan device. To help the library make the best choice,
+use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT.
+It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR),
+but if no such memory type is available or allocation from it fails
+(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS),
+it will fall back to `DEVICE_LOCAL` memory for fast GPU access.
+It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`,
+so you need to create another "staging" allocation and perform explicit transfers.
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 65536;
+bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
+ VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+// Check result...
+
+VkMemoryPropertyFlags memPropFlags;
+vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags);
+
+if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+{
+ // Allocation ended up in a mappable memory and is already mapped - write to it directly.
+
+ // [Executed in runtime]:
+ memcpy(allocInfo.pMappedData, myData, myDataSize);
+ result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE);
+ // Check result...
+
+ VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
+ bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT;
+ bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ bufMemBarrier.buffer = buf;
+ bufMemBarrier.offset = 0;
+ bufMemBarrier.size = VK_WHOLE_SIZE;
+
+ vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
+ 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
+}
+else
+{
+ // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required.
+ VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+ stagingBufCreateInfo.size = 65536;
+ stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+ VmaAllocationCreateInfo stagingAllocCreateInfo = {};
+ stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+ stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+ VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+ VkBuffer stagingBuf;
+ VmaAllocation stagingAlloc;
+ VmaAllocationInfo stagingAllocInfo;
+ result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo,
+ &stagingBuf, &stagingAlloc, &stagingAllocInfo);
+ // Check result...
+
+ // [Executed in runtime]:
+ memcpy(stagingAllocInfo.pMappedData, myData, myDataSize);
+ result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE);
+ // Check result...
+
+ VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
+ bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ bufMemBarrier.buffer = stagingBuf;
+ bufMemBarrier.offset = 0;
+ bufMemBarrier.size = VK_WHOLE_SIZE;
+
+ vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
+
+ VkBufferCopy bufCopy = {
+ 0, // srcOffset
+ 0, // dstOffset,
+ myDataSize, // size
+ };
+
+ vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy);
+
+ VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
+ bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer
+ bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ bufMemBarrier2.buffer = buf;
+ bufMemBarrier2.offset = 0;
+ bufMemBarrier2.size = VK_WHOLE_SIZE;
+
+ vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
+ 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr);
+}
+\endcode
+
+\section usage_patterns_other_use_cases Other use cases
+
+Here are some other, less obvious use cases and their recommended settings:
+
+- An image that is used only as transfer source and destination, but it should stay on the device,
+ as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame,
+ for temporal antialiasing or other temporal effects.
+ - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT`
+ - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO
+- An image that is used only as transfer source and destination, but it should be placed
+ in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict
+ least recently used textures from VRAM.
+ - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT`
+ - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
+ as VMA needs a hint here to differentiate from the previous case.
+- A buffer that you want to map and write from the CPU, directly read from the GPU
+ (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or
+ host memory due to its large size.
+ - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT`
+ - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST
+ - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
+
+
+\page configuration Configuration
+
+Please check "CONFIGURATION SECTION" in the code to find macros that you can define
+before each include of this file or change directly in this file to provide
+your own implementation of basic facilities like assert, `min()` and `max()` functions,
+mutex, atomic etc.
+
+For example, define `VMA_ASSERT(expr)` before including the library to provide
+custom implementation of the assertion, compatible with your project.
+By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration
+and empty otherwise.
+
+Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations,
+including their names and other parameters. Example:
+
+\code
+#define VMA_LEAK_LOG_FORMAT(format, ...) do { \
+ printf((format), __VA_ARGS__); \
+ printf("\n"); \
+ } while(false)
+\endcode
+
+\section config_Vulkan_functions Pointers to Vulkan functions
+
+There are multiple ways to import pointers to Vulkan functions in the library.
+In the simplest case you don't need to do anything.
+If the compilation or linking of your program or the initialization of the #VmaAllocator
+doesn't work for you, you can try to reconfigure it.
+
+First, the allocator tries to fetch pointers to Vulkan functions linked statically,
+like this:
+
+\code
+m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory;
+\endcode
+
+If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`.
+
+Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions.
+You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or
+by using a helper library like [volk](https://github.com/zeux/volk).
+
+Third, VMA tries to fetch remaining pointers that are still null by calling
+`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own.
+You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr.
+Other pointers will be fetched automatically.
+If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`.
+
+Finally, all the function pointers required by the library (considering selected
+Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null.
+
+
+\section custom_memory_allocator Custom host memory allocator
+
+If you use custom allocator for CPU memory rather than default operator `new`
+and `delete` from C++, you can make this library using your allocator as well
+by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These
+functions will be passed to Vulkan, as well as used by the library itself to
+make any CPU-side allocations.
+
+\section allocation_callbacks Device memory allocation callbacks
+
+The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally.
+You can setup callbacks to be informed about these calls, e.g. for the purpose
+of gathering some statistics. To do it, fill optional member
+VmaAllocatorCreateInfo::pDeviceMemoryCallbacks.
+
+\section heap_memory_limit Device heap memory limit
+
+When device memory of certain heap runs out of free space, new allocations may
+fail (returning error code) or they may succeed, silently pushing some existing_
+memory blocks from GPU VRAM to system RAM (which degrades performance). This
+behavior is implementation-dependent - it depends on GPU vendor and graphics
+driver.
+
+On AMD cards it can be controlled while creating Vulkan device object by using
+VK_AMD_memory_overallocation_behavior extension, if available.
+
+Alternatively, if you want to test how your program behaves with limited amount of Vulkan device
+memory available without switching your graphics card to one that really has
+smaller VRAM, you can use a feature of this library intended for this purpose.
+To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit.
+
+
+
+\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation
+
+VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve
+performance on some GPUs. It augments Vulkan API with possibility to query
+driver whether it prefers particular buffer or image to have its own, dedicated
+allocation (separate `VkDeviceMemory` block) for better efficiency - to be able
+to do some internal optimizations. The extension is supported by this library.
+It will be used automatically when enabled.
+
+It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version
+and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion,
+you are all set.
+
+Otherwise, if you want to use it as an extension:
+
+1 . When creating Vulkan device, check if following 2 device extensions are
+supported (call `vkEnumerateDeviceExtensionProperties()`).
+If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`).
+
+- VK_KHR_get_memory_requirements2
+- VK_KHR_dedicated_allocation
+
+If you enabled these extensions:
+
+2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating
+your #VmaAllocator to inform the library that you enabled required extensions
+and you want the library to use them.
+
+\code
+allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
+
+vmaCreateAllocator(&allocatorInfo, &allocator);
+\endcode
+
+That is all. The extension will be automatically used whenever you create a
+buffer using vmaCreateBuffer() or image using vmaCreateImage().
+
+When using the extension together with Vulkan Validation Layer, you will receive
+warnings like this:
+
+_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._
+
+It is OK, you should just ignore it. It happens because you use function
+`vkGetBufferMemoryRequirements2KHR()` instead of standard
+`vkGetBufferMemoryRequirements()`, while the validation layer seems to be
+unaware of it.
+
+To learn more about this extension, see:
+
+- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation)
+- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5)
+
+
+
+\page vk_ext_memory_priority VK_EXT_memory_priority
+
+VK_EXT_memory_priority is a device extension that allows to pass additional "priority"
+value to Vulkan memory allocations that the implementation may use prefer certain
+buffers and images that are critical for performance to stay in device-local memory
+in cases when the memory is over-subscribed, while some others may be moved to the system memory.
+
+VMA offers convenient usage of this extension.
+If you enable it, you can pass "priority" parameter when creating allocations or custom pools
+and the library automatically passes the value to Vulkan using this extension.
+
+If you want to use this extension in connection with VMA, follow these steps:
+
+\section vk_ext_memory_priority_initialization Initialization
+
+1) Call `vkEnumerateDeviceExtensionProperties` for the physical device.
+Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority".
+
+2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`.
+Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned.
+Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true.
+
+3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority"
+to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`.
+
+4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`.
+Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`.
+Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to
+`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`.
+
+5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you
+have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT
+to VmaAllocatorCreateInfo::flags.
+
+\section vk_ext_memory_priority_usage Usage
+
+When using this extension, you should initialize following member:
+
+- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+- VmaPoolCreateInfo::priority when creating a custom pool.
+
+It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`.
+Memory allocated with higher value can be treated by the Vulkan implementation as higher priority
+and so it can have lower chances of being pushed out to system memory, experiencing degraded performance.
+
+It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images
+as dedicated and set high priority to them. For example:
+
+\code
+VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
+imgCreateInfo.extent.width = 3840;
+imgCreateInfo.extent.height = 2160;
+imgCreateInfo.extent.depth = 1;
+imgCreateInfo.mipLevels = 1;
+imgCreateInfo.arrayLayers = 1;
+imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+allocCreateInfo.priority = 1.0f;
+
+VkImage img;
+VmaAllocation alloc;
+vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr);
+\endcode
+
+`priority` member is ignored in the following situations:
+
+- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters
+ from the parameters passed in #VmaPoolCreateInfo when the pool was created.
+- Allocations created in default pools: They inherit the priority from the parameters
+ VMA used when creating default pools, which means `priority == 0.5f`.
+
+
+\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory
+
+VK_AMD_device_coherent_memory is a device extension that enables access to
+additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and
+`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for
+allocation of buffers intended for writing "breadcrumb markers" in between passes
+or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases.
+
+When the extension is available but has not been enabled, Vulkan physical device
+still exposes those memory types, but their usage is forbidden. VMA automatically
+takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt
+to allocate memory of such type is made.
+
+If you want to use this extension in connection with VMA, follow these steps:
+
+\section vk_amd_device_coherent_memory_initialization Initialization
+
+1) Call `vkEnumerateDeviceExtensionProperties` for the physical device.
+Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory".
+
+2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`.
+Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned.
+Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true.
+
+3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory"
+to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`.
+
+4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`.
+Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`.
+Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to
+`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`.
+
+5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you
+have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT
+to VmaAllocatorCreateInfo::flags.
+
+\section vk_amd_device_coherent_memory_usage Usage
+
+After following steps described above, you can create VMA allocations and custom pools
+out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible
+devices. There are multiple ways to do it, for example:
+
+- You can request or prefer to allocate out of such memory types by adding
+ `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags
+ or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with
+ other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage.
+- If you manually found memory type index to use for this purpose, force allocation
+ from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`.
+
+\section vk_amd_device_coherent_memory_more_information More information
+
+To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html)
+
+Example use of this extension can be found in the code of the sample and test suite
+accompanying this library.
+
+
+\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32
+
+On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE`
+of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances,
+in multiple processes, and/or in multiple APIs.
+VMA offers support for it.
+
+\section vk_khr_external_memory_win32_initialization Initialization
+
+1) Make sure the extension is defined in the code by including following header before including VMA:
+
+\code
+#include <vulkan/vulkan_win32.h>
+\endcode
+
+2) Check if "VK_KHR_external_memory_win32" is available among device extensions.
+Enable it when creating the `VkDevice` object.
+
+3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT
+when calling vmaCreateAllocator().
+
+4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro
+or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly.
+For more information, see \ref quick_start_initialization_importing_vulkan_functions.
+
+\section vk_khr_external_memory_win32_preparations Preparations
+
+You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`.
+
+To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain,
+and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain.
+To make use of them, you need to use \ref custom_memory_pools. Example:
+
+\code
+// Define an example buffer and allocation parameters.
+VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = {
+ VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
+ nullptr,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+};
+VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+exampleBufCreateInfo.size = 0x10000; // Doesn't matter here.
+exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+exampleBufCreateInfo.pNext = &externalMemBufCreateInfo;
+
+VmaAllocationCreateInfo exampleAllocCreateInfo = {};
+exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+// Find memory type index to use for the custom pool.
+uint32_t memTypeIndex;
+VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator,
+ &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
+// Check res...
+
+// Create a custom pool.
+constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = {
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
+ nullptr,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+};
+VmaPoolCreateInfo poolCreateInfo = {};
+poolCreateInfo.memoryTypeIndex = memTypeIndex;
+poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo;
+
+VmaPool pool;
+res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool);
+// Check res...
+
+// YOUR OTHER CODE COMES HERE....
+
+// At the end, don't forget to destroy it!
+vmaDestroyPool(g_Allocator, pool);
+\endcode
+
+Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged
+for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block.
+No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`.
+
+\section vk_khr_external_memory_win32_memory_allocation Memory allocation
+
+Finally, you can create a buffer with an allocation out of the custom pool.
+The buffer should use same flags as the sample buffer used to find the memory type.
+It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain.
+
+\code
+VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = {
+ VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
+ nullptr,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+};
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = // Your desired buffer size.
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+bufCreateInfo.pNext = &externalMemBufCreateInfo;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.pool = pool; // It is enough to set this one member.
+
+VkBuffer buf;
+VmaAllocation alloc;
+res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
+// Check res...
+
+// YOUR OTHER CODE COMES HERE....
+
+// At the end, don't forget to destroy it!
+vmaDestroyBuffer(g_Allocator, buf, alloc);
+\endcode
+
+If you need each allocation to have its own device memory block and start at offset 0, you can still do
+by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools.
+
+\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle
+
+After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to.
+VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`.
+
+\code
+HANDLE handle;
+res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle);
+// Check res...
+
+// YOUR OTHER CODE COMES HERE....
+
+// At the end, you must close the handle.
+CloseHandle(handle);
+\endcode
+
+Documentation of the VK_KHR_external_memory_win32 extension states that:
+
+> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType.
+
+This is ensured automatically inside VMA.
+The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed.
+Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close.
+
+For further information, please check documentation of the vmaGetMemoryWin32Handle() function.
+
+
+\page enabling_buffer_device_address Enabling buffer device address
+
+Device extension VK_KHR_buffer_device_address
+allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code.
+It has been promoted to core Vulkan 1.2.
+
+If you want to use this feature in connection with VMA, follow these steps:
+
+\section enabling_buffer_device_address_initialization Initialization
+
+1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device.
+Check if the extension is supported - if returned array of `VkExtensionProperties` contains
+"VK_KHR_buffer_device_address".
+
+2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`.
+Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned.
+Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true.
+
+3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add
+"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`.
+
+4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`.
+Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`.
+Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to
+`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`.
+
+5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you
+have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT
+to VmaAllocatorCreateInfo::flags.
+
+\section enabling_buffer_device_address_usage Usage
+
+After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA.
+The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to
+allocated memory blocks wherever it might be needed.
+
+Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`.
+The second part of this functionality related to "capture and replay" is not supported,
+as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage.
+
+\section enabling_buffer_device_address_more_information More information
+
+To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address)
+
+Example use of this extension can be found in the code of the sample and test suite
+accompanying this library.
+
+\page general_considerations General considerations
+
+\section general_considerations_thread_safety Thread safety
+
+- The library has no global state, so separate #VmaAllocator objects can be used
+ independently.
+ There should be no need to create multiple such objects though - one per `VkDevice` is enough.
+- By default, all calls to functions that take #VmaAllocator as first parameter
+ are safe to call from multiple threads simultaneously because they are
+ synchronized internally when needed.
+ This includes allocation and deallocation from default memory pool, as well as custom #VmaPool.
+- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT
+ flag, calls to functions that take such #VmaAllocator object must be
+ synchronized externally.
+- Access to a #VmaAllocation object must be externally synchronized. For example,
+ you must not call vmaGetAllocationInfo() and vmaMapMemory() from different
+ threads at the same time if you pass the same #VmaAllocation object to these
+ functions.
+- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously.
+
+\section general_considerations_versioning_and_compatibility Versioning and compatibility
+
+The library uses [**Semantic Versioning**](https://semver.org/),
+which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where:
+
+- Incremented Patch version means a release is backward- and forward-compatible,
+ introducing only some internal improvements, bug fixes, optimizations etc.
+ or changes that are out of scope of the official API described in this documentation.
+- Incremented Minor version means a release is backward-compatible,
+ so existing code that uses the library should continue to work, while some new
+ symbols could have been added: new structures, functions, new values in existing
+ enums and bit flags, new structure members, but not new function parameters.
+- Incrementing Major version means a release could break some backward compatibility.
+
+All changes between official releases are documented in file "CHANGELOG.md".
+
+\warning Backward compatibility is considered on the level of C++ source code, not binary linkage.
+Adding new members to existing structures is treated as backward compatible if initializing
+the new members to binary zero results in the old behavior.
+You should always fully initialize all library structures to zeros and not rely on their
+exact binary size.
+
+\section general_considerations_validation_layer_warnings Validation layer warnings
+
+When using this library, you can meet following types of warnings issued by
+Vulkan validation layer. They don't necessarily indicate a bug, so you may need
+to just ignore them.
+
+- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.*
+ - It happens when VK_KHR_dedicated_allocation extension is enabled.
+ `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it.
+- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.*
+ - It happens when you map a buffer or image, because the library maps entire
+ `VkDeviceMemory` block, where different types of images and buffers may end
+ up together, especially on GPUs with unified memory like Intel.
+- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.*
+ - It may happen when you use [defragmentation](@ref defragmentation).
+
+\section general_considerations_allocation_algorithm Allocation algorithm
+
+The library uses following algorithm for allocation, in order:
+
+-# Try to find free range of memory in existing blocks.
+-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size.
+-# If failed, try to create such block with size / 2, size / 4, size / 8.
+-# If failed, try to allocate separate `VkDeviceMemory` for this allocation,
+ just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+-# If failed, choose other memory type that meets the requirements specified in
+ VmaAllocationCreateInfo and go to point 1.
+-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+
+\section general_considerations_features_not_supported Features not supported
+
+Features deliberately excluded from the scope of this library:
+
+-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images
+ between CPU and GPU memory and related synchronization is responsibility of the user.
+ Defining some "texture" object that would automatically stream its data from a
+ staging copy in CPU memory to GPU memory would rather be a feature of another,
+ higher-level library implemented on top of VMA.
+ VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory.
+-# **Recreation of buffers and images.** Although the library has functions for
+ buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to
+ recreate these objects yourself after defragmentation. That is because the big
+ structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in
+ #VmaAllocation object.
+-# **Handling CPU memory allocation failures.** When dynamically creating small C++
+ objects in CPU memory (not Vulkan memory), allocation failures are not checked
+ and handled gracefully, because that would complicate code significantly and
+ is usually not needed in desktop PC applications anyway.
+ Success of an allocation is just checked with an assert.
+-# **Code free of any compiler warnings.** Maintaining the library to compile and
+ work correctly on so many different platforms is hard enough. Being free of
+ any warnings, on any version of any compiler, is simply not feasible.
+ There are many preprocessor macros that make some variables unused, function parameters unreferenced,
+ or conditional expressions constant in some configurations.
+ The code of this library should not be bigger or more complicated just to silence these warnings.
+ It is recommended to disable such warnings instead.
+-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but
+ are not going to be included into this repository.
+*/
Go back to lisible.xyz