33 files changed, 32614 insertions, 1702 deletions
diff --git a/assets/model.obj b/assets/model.obj
new file mode 100644
index 0000000..ab344bb
--- /dev/null
+++ b/assets/model.obj
@@ -0,0 +1,98 @@
+# Blender 4.4.1
+# www.blender.org
+mtllib model.mtl
+o Cube
+v 1.000000 1.000000 -1.000000
+v 1.000000 -1.000000 -1.000000
+v 1.000000 1.000000 1.000000
+v 1.000000 -1.000000 1.000000
+v -1.000000 1.000000 -1.000000
+v -1.000000 -1.000000 -1.000000
+v -1.000000 1.000000 1.000000
+v -1.000000 -1.000000 1.000000
+v -1.637802 -1.000000 0.000000
+v 1.389749 1.000000 0.000000
+v -1.637802 1.000000 0.000000
+v 1.389749 -1.000000 0.000000
+v 0.000000 -1.000000 -1.000000
+v 0.000000 1.000000 1.000000
+v 0.000000 -1.000000 1.000000
+v 0.000000 1.000000 -1.000000
+v 0.000000 -1.000000 0.000000
+v 0.000000 1.905488 0.000000
+vn -0.3792 0.6859 0.6211
+vn -0.0000 -0.0000 1.0000
+vn -0.8431 -0.0000 -0.5377
+vn -0.0000 -1.0000 -0.0000
+vn 0.9317 -0.0000 0.3631
+vn -0.0000 -0.0000 -1.0000
+vn 0.9317 -0.0000 -0.3631
+vn -0.8431 -0.0000 0.5377
+vn -0.3792 0.6859 -0.6211
+vn -0.0000 1.0000 -0.0000
+vn 0.4349 0.6675 -0.6044
+vn 0.4349 0.6675 0.6044
+vt 0.875000 0.625000
+vt 0.750000 0.750000
+vt 0.750000 0.625000
+vt 0.625000 0.875000
+vt 0.375000 1.000000
+vt 0.375000 0.875000
+vt 0.625000 0.125000
+vt 0.375000 0.250000
+vt 0.375000 0.125000
+vt 0.375000 0.625000
+vt 0.250000 0.750000
+vt 0.250000 0.625000
+vt 0.625000 0.625000
+vt 0.375000 0.750000
+vt 0.625000 0.375000
+vt 0.375000 0.500000
+vt 0.375000 0.375000
+vt 0.625000 0.500000
+vt 0.250000 0.500000
+vt 0.625000 0.000000
+vt 0.375000 0.000000
+vt 0.750000 0.500000
+vt 0.125000 0.625000
+vt 0.125000 0.500000
+vt 0.625000 0.250000
+vt 0.125000 0.750000
+vt 0.625000 0.750000
+vt 0.875000 0.750000
+vt 0.625000 1.000000
+vt 0.875000 0.500000
+s 0
+usemtl Material
+f 11/1/1 14/2/1 18/3/1
+f 14/4/2 8/5/2 15/6/2
+f 11/7/3 6/8/3 9/9/3
+f 12/10/4 15/11/4 17/12/4
+f 10/13/5 4/14/5 12/10/5
+f 16/15/6 2/16/6 13/17/6
+f 1/18/7 12/10/7 2/16/7
+f 2/16/4 17/12/4 13/19/4
+f 7/20/8 9/9/8 8/21/8
+f 16/22/9 11/1/9 18/3/9
+f 16/22/10 10/13/10 1/18/10
+f 13/19/4 9/23/4 6/24/4
+f 5/25/6 13/17/6 6/8/6
+f 17/12/4 8/26/4 9/23/4
+f 3/27/2 15/6/2 4/14/2
+f 10/13/10 14/2/10 3/27/10
+f 11/1/10 7/28/10 14/2/10
+f 14/4/2 7/29/2 8/5/2
+f 11/7/3 5/25/3 6/8/3
+f 12/10/4 4/14/4 15/11/4
+f 10/13/5 3/27/5 4/14/5
+f 16/15/6 1/18/6 2/16/6
+f 1/18/7 10/13/7 12/10/7
+f 2/16/4 12/10/4 17/12/4
+f 7/20/8 11/7/8 9/9/8
+f 16/22/10 5/30/10 11/1/10
+f 16/22/11 18/3/11 10/13/11
+f 13/19/4 17/12/4 9/23/4
+f 5/25/6 16/15/6 13/17/6
+f 17/12/4 15/11/4 8/26/4
+f 3/27/2 14/4/2 15/6/2
+f 10/13/12 18/3/12 14/2/12
diff --git a/assets/texture.png b/assets/texture.png
new file mode 100644
index 0000000..4065f75
--- /dev/null
+++ b/assets/texture.png
diff --git a/meson.build b/meson.build
index 6aa0ceb..8ea3009 100644
--- a/meson.build
+++ b/meson.build
@@ -1,8 +1,11 @@
-project('visible-gltf', 'c', default_options: ['warning_level=3', 'c_std=c23'])
+project('visiblegltf', [ 'c', 'cpp' ], default_options: ['c_std=c2x', 'cpp_std=c++20', 'warning_level=3'])
 
 build_type = get_option('buildtype')
 
 sdl3_dep = dependency('sdl3')
+
+vendor_incdir = include_directories('vendor', is_system: true)
+
 if host_machine.system() == 'darwin'
 moltenvk_library_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/lib'
 moltenvk_include_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/include'
@@ -10,7 +13,7 @@ vulkan_dep = declare_dependency(
   link_args: ['-L' + moltenvk_library_path, '-lvulkan'],
   include_directories: include_directories(moltenvk_include_path)
 )
-else 
+else
 vulkan_dep = dependency('vulkan')
 endif
 
@@ -19,14 +22,39 @@ if build_type == 'debug'
   vgltf_c_args += '-DVGLTF_DEBUG'
 endif
 
-executable(
+if host_machine.system() == 'darwin'
+  vgltf_c_args += '-DVGLTF_PLATFORM_MACOS'
+elif host_machine.system() == 'linux'
+  vgltf_c_args += '-DVGLTF_PLATFORM_LINUX'
+elif host_machine.system() == 'windows'
+  vgltf_c_args += '-DVGLTF_PLATFORM_WINDOWS'
+endif
+
+vgltf_deps = [
+  sdl3_dep,
+  vulkan_dep,
+]
+
+vgltf_srcs = [
+  'src/main.c',
+  'src/log.c',
+  'src/maths.c',
+  'src/alloc.c',
+  'src/hash.c',
+  'src/str.c',
+  'src/platform.c',
+  'src/platform_sdl.c',
+  'src/image.c',
+  'src/renderer/renderer.c',
+  'src/renderer/vma_usage.cpp',
+  'src/engine.c',
+]
+
+vgltf_exe = executable(
   'vgltf',
-  [
-    'src/main.c',
-    'src/log.c',
-    'src/platform_sdl.c',
-    'src/renderer.c',
-  ],
+  vgltf_srcs,
   c_args: vgltf_c_args,
-  dependencies: [sdl3_dep, vulkan_dep],
+  dependencies: vgltf_deps,
+  link_language: 'cpp',
+  include_directories: [vendor_incdir]
 )
diff --git a/shaders/triangle.frag b/shaders/triangle.frag
index 7c5b0e7..c7d99f3 100644
--- a/shaders/triangle.frag
+++ b/shaders/triangle.frag
@@ -1,9 +1,12 @@
 #version 450
 
 layout(location = 0) in vec3 fragColor;
+layout(location = 1) in vec2 fragTextureCoordinates;
 
 layout(location = 0) out vec4 outColor;
 
+layout(binding = 1) uniform sampler2D textureSampler;
+
 void main() {
-    outColor = vec4(fragColor, 1.0);
+    outColor = vec4(fragColor * texture(textureSampler, fragTextureCoordinates).rgb, 1.0);
 }
diff --git a/shaders/triangle.vert b/shaders/triangle.vert
index f5b2f8d..bf93f44 100644
--- a/shaders/triangle.vert
+++ b/shaders/triangle.vert
@@ -1,20 +1,20 @@
 #version 450
 
-layout(location = 0) out vec3 fragColor;
+layout(location = 0) in vec3 inPosition;
+layout(location = 1) in vec3 inColor;
+layout(location = 2) in vec2 inTextureCoordinates;
 
-vec2 positions[3] = vec2[](
-    vec2(0.0, -0.5),
-    vec2(0.5, 0.5),
-    vec2(-0.5, 0.5)
-);
+layout(location = 0) out vec3 fragColor;
+layout(location = 1) out vec2 fragTextureCoordinates;
 
-vec3 colors[3] = vec3[](
-    vec3(1.0, 0.0, 0.0),
-    vec3(0.0, 1.0, 0.0),
-    vec3(0.0, 0.0, 1.0)
-);
+layout(set = 0, binding = 0) uniform UniformBufferObject {
+    mat4 model;
+    mat4 view;
+    mat4 projection;
+} ubo;
 
 void main() {
-    gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);
-    fragColor = colors[gl_VertexIndex];
+    gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPosition, 1.0);
+    fragColor = inColor;
+    fragTextureCoordinates = inTextureCoordinates;
 }
diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..2fb7a78
--- /dev/null
+++ b/src/alloc.c
@@ -0,0 +1,171 @@
+#include "alloc.h"
+#include "maths.h"
+#include "platform.h"
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size) {
+  assert(allocator);
+  return allocator->allocate(size, allocator->ctx);
+}
+void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator,
+                                     size_t alignment, size_t size) {
+  assert(allocator);
+  return allocator->allocate_aligned(alignment, size, allocator->ctx);
+}
+void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator,
+                                   size_t count, size_t item_size) {
+  assert(allocator);
+  return allocator->allocate_array(count, item_size, allocator->ctx);
+}
+void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr,
+                               size_t old_size, size_t new_size) {
+  assert(allocator);
+  return allocator->reallocate(ptr, old_size, new_size, allocator->ctx);
+}
+void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr) {
+  assert(allocator);
+  allocator->free(ptr, allocator->ctx);
+}
+
+static void *memory_allocate(size_t size, void *ctx) {
+  (void)ctx;
+  void *ptr = malloc(size);
+  if (!ptr) {
+    VGLTF_PANIC("Couldn't allocate memory (out of mem?)");
+  }
+  return ptr;
+}
+
+static void *memory_allocate_aligned(size_t alignment, size_t size, void *ctx) {
+  (void)ctx;
+#ifdef VGLTF_PLATFORM_WINDOWS
+  void *ptr = _aligned_malloc(size, VGLTF_MAX(alignment, sizeof(void *)));
+#else
+  void *ptr = aligned_alloc(VGLTF_MAX(alignment, sizeof(void *)), size);
+#endif
+  if (!ptr) {
+    VGLTF_PANIC("Couldn't allocate aligned memory (out of mem?)");
+  }
+  return ptr;
+}
+
+static void *memory_allocate_array(size_t count, size_t item_size, void *ctx) {
+  (void)ctx;
+  void *ptr = calloc(count, item_size);
+  if (!ptr) {
+    VGLTF_PANIC("Couldn't allocate memory (out of mem?)");
+  }
+  return ptr;
+}
+
+static void *memory_reallocate(void *ptr, size_t old_size, size_t new_size,
+                               void *ctx) {
+  (void)old_size;
+  (void)ctx;
+  ptr = realloc(ptr, new_size);
+  if (!ptr) {
+    VGLTF_PANIC("Couldn't allocate memory (out of mem?)");
+  }
+  return ptr;
+}
+
+static void memory_free(void *ptr, void *ctx) {
+  (void)ctx;
+  free(ptr);
+}
+
+thread_local struct vgltf_allocator system_allocator = {
+    .allocate = memory_allocate,
+    .allocate_aligned = memory_allocate_aligned,
+    .allocate_array = memory_allocate_array,
+    .reallocate = memory_reallocate,
+    .free = memory_free};
+
+void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena,
+                    size_t size) {
+  assert(allocator);
+  assert(arena);
+  arena->size = 0;
+  arena->capacity = size;
+  arena->data = vgltf_allocator_allocate(allocator, size);
+}
+void vgltf_arena_deinit(struct vgltf_allocator *allocator,
+                      struct vgltf_arena *arena) {
+  assert(allocator);
+  assert(arena);
+  vgltf_allocator_free(allocator, arena->data);
+}
+void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size) {
+  assert(arena);
+  assert(arena->size + size <= arena->capacity);
+  void *ptr = arena->data + arena->size;
+  arena->size += size;
+  return ptr;
+}
+
+void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count,
+                               size_t item_size) {
+  assert(arena);
+  void *ptr = vgltf_arena_allocate(arena, count * item_size);
+  memset(ptr, 0, count * item_size);
+  return ptr;
+}
+
+void vgltf_arena_reset(struct vgltf_arena *arena) {
+  assert(arena);
+  arena->size = 0;
+}
+
+static void *arena_allocator_allocate(size_t size, void *ctx) {
+  assert(ctx);
+  return vgltf_arena_allocate(ctx, size);
+}
+static void *arena_allocator_allocate_aligned(size_t alignment, size_t size,
+                                              void *ctx) {
+  assert(ctx);
+  if (alignment < sizeof(void *) || (alignment & (alignment - 1)) != 0) {
+    return NULL;
+  }
+
+  void *ptr = vgltf_arena_allocate(ctx, size + alignment - 1 + sizeof(void *));
+  if (!ptr) {
+    return NULL;
+  }
+
+  return (void *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) &
+                  ~(alignment - 1));
+}
+
+static void *arena_allocator_allocate_array(size_t count, size_t item_size,
+                                            void *ctx) {
+  assert(ctx);
+  return vgltf_arena_allocate_array(ctx, count, item_size);
+}
+
+static void *arena_allocator_reallocate(void *ptr, size_t old_size,
+                                        size_t new_size, void *ctx) {
+  assert(ptr);
+  assert(ctx);
+
+  void *new_ptr = vgltf_arena_allocate(ctx, new_size);
+  memcpy(new_ptr, ptr, old_size);
+  return new_ptr;
+}
+
+static void arena_allocator_free(void *ptr, void *ctx) {
+  assert(ctx);
+  (void)ptr;
+}
+
+struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena) {
+  return (struct vgltf_allocator){
+      .ctx = arena,
+      .allocate = arena_allocator_allocate,
+      .allocate_aligned = arena_allocator_allocate_aligned,
+      .allocate_array = arena_allocator_allocate_array,
+      .reallocate = arena_allocator_reallocate,
+      .free = arena_allocator_free};
+}
diff --git a/src/alloc.h b/src/alloc.h
new file mode 100644
index 0000000..bde1d55
--- /dev/null
+++ b/src/alloc.h
@@ -0,0 +1,40 @@
+#ifndef VGLTF_ALLOC_H
+#define VGLTF_ALLOC_H
+
+#include <stddef.h>
+
+struct vgltf_allocator {
+  void *(*allocate)(size_t size, void *ctx);
+  void *(*allocate_aligned)(size_t alignment, size_t size, void *ctx);
+  void *(*allocate_array)(size_t count, size_t item_size, void *ctx);
+  void *(*reallocate)(void *ptr, size_t old_size, size_t new_size, void *ctx);
+  void (*free)(void *ptr, void *ctx);
+  void *ctx;
+};
+
+void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size);
+void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator,
+                                     size_t alignment, size_t size);
+void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator,
+                                   size_t count, size_t item_size);
+void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr,
+                               size_t old_size, size_t new_size);
+void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr);
+
+extern thread_local struct vgltf_allocator system_allocator;
+
+struct vgltf_arena {
+  size_t capacity;
+  size_t size;
+  char *data;
+};
+void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena,
+                    size_t size);
+void vgltf_arena_deinit(struct vgltf_allocator *allocator, struct vgltf_arena *arena);
+void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size);
+void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count,
+                               size_t item_size);
+void vgltf_arena_reset(struct vgltf_arena *arena);
+struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena);
+
+#endif // VGLTF_ALLOC_H
diff --git a/src/engine.c b/src/engine.c
new file mode 100644
index 0000000..8904474
--- /dev/null
+++ b/src/engine.c
@@ -0,0 +1,17 @@
+#include "engine.h"
+
+bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform) {
+  if (!vgltf_renderer_init(&engine->renderer, platform)) {
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+void vgltf_engine_deinit(struct vgltf_engine *engine) {
+  vgltf_renderer_deinit(&engine->renderer);
+}
+void vgltf_engine_run_frame(struct vgltf_engine *engine) {
+  vgltf_renderer_render_frame(&engine->renderer);
+}
diff --git a/src/engine.h b/src/engine.h
new file mode 100644
index 0000000..5a7bc2d
--- /dev/null
+++ b/src/engine.h
@@ -0,0 +1,14 @@
+#ifndef VGLTF_ENGINE_H
+#define VGLTF_ENGINE_H
+
+#include "renderer/renderer.h"
+
+struct vgltf_engine {
+  struct vgltf_renderer renderer;
+};
+
+bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform);
+void vgltf_engine_deinit(struct vgltf_engine *engine);
+void vgltf_engine_run_frame(struct vgltf_engine *engine);
+
+#endif // VGLTF_ENGINE_H
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..cfdafc3
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,15 @@
+#include "hash.h"
+#include <assert.h>
+
+uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes) {
+  assert(bytes);
+  static const uint64_t FNV_OFFSET_BASIS = 14695981039346656037u;
+  static const uint64_t FNV_PRIME = 1099511628211u;
+  uint64_t hash = FNV_OFFSET_BASIS;
+  for (size_t i = 0; i < nbytes; i++) {
+    hash = hash ^ bytes[i];
+    hash = hash * FNV_PRIME;
+  }
+
+  return hash;
+}
diff --git a/src/hash.h b/src/hash.h
new file mode 100644
index 0000000..f4f8e76
--- /dev/null
+++ b/src/hash.h
@@ -0,0 +1,9 @@
+#ifndef VGLTF_HASH_H
+#define VGLTF_HASH_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes);
+
+#endif // VGLTF_HASH_H
diff --git a/src/image.c b/src/image.c
new file mode 100644
index 0000000..a2d29c7
--- /dev/null
+++ b/src/image.c
@@ -0,0 +1,20 @@
+#include "image.h"
+
+#define STB_IMAGE_IMPLEMENTATION
+#include <stb_image.h>
+
+bool vgltf_image_load_from_file(struct vgltf_image *image,
+                              struct vgltf_string_view path) {
+  int width;
+  int height;
+  int tex_channels;
+  image->data =
+      stbi_load(path.data, &width, &height, &tex_channels, STBI_rgb_alpha);
+  image->width = width;
+  image->height = height;
+  image->format = VGLTF_IMAGE_FORMAT_R8G8B8A8;
+
+  return image->data != nullptr;
+}
+
+void vgltf_image_deinit(struct vgltf_image *image) { stbi_image_free(image->data); }
diff --git a/src/image.h b/src/image.h
new file mode 100644
index 0000000..426d605
--- /dev/null
+++ b/src/image.h
@@ -0,0 +1,21 @@
+#ifndef VGLTF_IMAGE_H
+#define VGLTF_IMAGE_H
+
+#include <stdint.h>
+#include "str.h"
+
+enum vgltf_image_format {
+  VGLTF_IMAGE_FORMAT_R8G8B8A8,
+};
+
+struct vgltf_image {
+  unsigned char* data;
+  uint32_t width;
+  uint32_t height;
+  enum vgltf_image_format format;
+};
+
+bool vgltf_image_load_from_file(struct vgltf_image* image, struct vgltf_string_view path);
+void vgltf_image_deinit(struct vgltf_image* image);
+
+#endif // VGLTF_IMAGE_H
diff --git a/src/log.c b/src/log.c
index 0c1b8a4..99c10dc 100644
--- a/src/log.c
+++ b/src/log.c
@@ -1,12 +1,5 @@
 #include "log.h"
 
-const char *vgltf_log_level_to_str(enum vgltf_log_level level) {
-  switch (level) {
-  case VGLTF_LOG_ERROR:
-    return "error";
-  case VGLTF_LOG_INFO:
-    return "info";
-  case VGLTF_LOG_DEBUG:
-    return "debug";
-  }
-}
+const char *vgltf_log_level_str[] = {[VGLTF_LOG_LEVEL_DBG] = "debug",
+                                   [VGLTF_LOG_LEVEL_INFO] = "info",
+                                   [VGLTF_LOG_LEVEL_ERR] = "error"};
diff --git a/src/log.h b/src/log.h
index 5e9dcc0..faf9edd 100644
--- a/src/log.h
+++ b/src/log.h
@@ -1,25 +1,26 @@
 #ifndef VGLTF_LOG_H
 #define VGLTF_LOG_H
 
-#include <stdio.h>
+#include <stdio.h> // IWYU pragma: keep
 
 enum vgltf_log_level {
-  VGLTF_LOG_DEBUG,
-  VGLTF_LOG_INFO,
-  VGLTF_LOG_ERROR,
+  VGLTF_LOG_LEVEL_DBG,
+  VGLTF_LOG_LEVEL_INFO,
+  VGLTF_LOG_LEVEL_ERR,
 };
-const char *vgltf_log_level_to_str(enum vgltf_log_level level);
 
-#define VGLTF_LOG(level, ...)                                                  \
+extern const char *vgltf_log_level_str[];
+
+#define VGLTF_LOG(level, ...)                                                    \
   do {                                                                         \
-    fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_to_str(level), __FILE__,    \
+    fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_str[level], __FILE__,         \
             __LINE__);                                                         \
     fprintf(stderr, __VA_ARGS__);                                              \
     fprintf(stderr, "\n");                                                     \
   } while (0)
 
-#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_DEBUG, __VA_ARGS__)
-#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_INFO, __VA_ARGS__)
-#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_ERROR, __VA_ARGS__)
+#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_LEVEL_DBG, __VA_ARGS__)
+#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_LEVEL_INFO, __VA_ARGS__)
+#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_LEVEL_ERR, __VA_ARGS__)
 
 #endif // VGLTF_LOG_H
diff --git a/src/main.c b/src/main.c
index d2ea49d..d518632 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,43 +1,39 @@
+#include "engine.h"
 #include "log.h"
 #include "platform.h"
-#include "renderer.h"
 
 int main(void) {
   struct vgltf_platform platform = {};
   if (!vgltf_platform_init(&platform)) {
-    VGLTF_LOG_ERR("Couldn't initialize the platform layer");
+    VGLTF_LOG_ERR("Platform initialization failed");
     goto err;
   }
 
-  struct vgltf_renderer renderer = {};
-  if (!vgltf_renderer_init(&renderer, &platform)) {
-    VGLTF_LOG_ERR("Couldn't initialize the renderer");
+  struct vgltf_engine engine = {};
+  if (!vgltf_engine_init(&engine, &platform)) {
+    VGLTF_LOG_ERR("Couldn't initialize the engine");
     goto deinit_platform;
   }
 
+  VGLTF_LOG_INFO("Starting main loop");
   while (true) {
     struct vgltf_event event;
     while (vgltf_platform_poll_event(&platform, &event)) {
-      if (event.type == VGLTF_EVENT_QUIT ||
-          (event.type == VGLTF_EVENT_KEY_DOWN &&
-           event.key.key == VGLTF_KEY_ESCAPE)) {
+      if (event.type == VGLTF_EVENT_QUIT || (event.type == VGLTF_EVENT_KEY_DOWN &&
+                                           event.key.key == VGLTF_KEY_ESCAPE)) {
         goto out_main_loop;
-      } else if (event.type == VGLTF_EVENT_WINDOW_RESIZED) {
-        vgltf_renderer_on_window_resized(
-            &renderer,
-            (struct vgltf_window_size){.width = event.window_resized.width,
-                                       .height = event.window_resized.height});
       }
     }
 
-    vgltf_renderer_triangle_pass(&renderer);
+    vgltf_engine_run_frame(&engine);
   }
 out_main_loop:
-  vgltf_renderer_deinit(&renderer);
+  VGLTF_LOG_INFO("Exiting main loop");
+  vgltf_engine_deinit(&engine);
   vgltf_platform_deinit(&platform);
   return 0;
 deinit_platform:
   vgltf_platform_deinit(&platform);
 err:
-  return 1;
+  return -1;
 }
diff --git a/src/maths.c b/src/maths.c
new file mode 100644
index 0000000..a79c68f
--- /dev/null
+++ b/src/maths.c
@@ -0,0 +1,103 @@
+#include "maths.h"
+#include <math.h>
+#include <string.h>
+
+vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs) {
+  return (vgltf_vec3){.x = lhs.x - rhs.x, .y = lhs.y - rhs.y, .z = lhs.z - rhs.z};
+}
+vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs) {
+  return (vgltf_vec3){.x = lhs.y * rhs.z - lhs.z * rhs.y,
+                    .y = lhs.z * rhs.x - lhs.x * rhs.z,
+                    .z = lhs.x * rhs.y - lhs.y * rhs.x};
+}
+vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs) {
+  return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z;
+}
+vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec) {
+  return sqrtf(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
+}
+vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec) {
+  vgltf_vec_value_type length = vgltf_vec3_length(vec);
+  return (vgltf_vec3){
+      .x = vec.x / length, .y = vec.y / length, .z = vec.z / length};
+}
+void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs) {
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      out[i * 4 + j] =
+          lhs[i * 4 + 0] * rhs[0 * 4 + j] + lhs[i * 4 + 1] * rhs[1 * 4 + j] +
+          lhs[i * 4 + 2] * rhs[2 * 4 + j] + lhs[i * 4 + 3] * rhs[3 * 4 + j];
+    }
+  }
+}
+void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix,
+                     vgltf_mat_value_type angle_radians, vgltf_vec3 axis) {
+  vgltf_vec3 a = vgltf_vec3_normalized(axis);
+  vgltf_vec_value_type c = cosf(angle_radians);
+  vgltf_vec_value_type s = sinf(angle_radians);
+  vgltf_vec_value_type t = 1.f - c;
+
+  vgltf_mat4 rotation_matrix = {t * a.x * a.x + c,
+                              t * a.x * a.y - s * a.z,
+                              t * a.x * a.z + s * a.y,
+                              0.f,
+                              t * a.x * a.y + s * a.z,
+                              t * a.y * a.y + c,
+                              t * a.y * a.z - s * a.x,
+                              0.f,
+                              t * a.x * a.z - s * a.y,
+                              t * a.y * a.z + s * a.x,
+                              t * a.z * a.z + c,
+                              0.f,
+                              0.f,
+                              0.f,
+                              0.f,
+                              1.f};
+
+  vgltf_mat4_multiply(out, matrix, rotation_matrix);
+}
+void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position,
+                      vgltf_vec3 target_position, vgltf_vec3 up_axis) {
+  vgltf_vec3 forward =
+      vgltf_vec3_normalized(vgltf_vec3_sub(target_position, eye_position));
+  vgltf_vec3 right = vgltf_vec3_normalized(vgltf_vec3_cross(forward, up_axis));
+  vgltf_vec3 camera_up = vgltf_vec3_cross(right, forward);
+
+  memcpy(out, (const vgltf_mat4)VGLTF_MAT4_IDENTITY, sizeof(vgltf_mat4));
+  out[0 * 4 + 0] = right.x;
+  out[1 * 4 + 0] = right.y;
+  out[2 * 4 + 0] = right.z;
+  out[0 * 4 + 1] = camera_up.x;
+  out[1 * 4 + 1] = camera_up.y;
+  out[2 * 4 + 1] = camera_up.z;
+  out[0 * 4 + 2] = -forward.x;
+  out[1 * 4 + 2] = -forward.y;
+  out[2 * 4 + 2] = -forward.z;
+  out[3 * 4 + 0] = -vgltf_vec3_dot(right, eye_position);
+  out[3 * 4 + 1] = -vgltf_vec3_dot(camera_up, eye_position);
+  out[3 * 4 + 2] = vgltf_vec3_dot(forward, eye_position);
+}
+void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov_radians,
+                          vgltf_mat_value_type aspect_ratio,
+                          vgltf_mat_value_type near, vgltf_mat_value_type far) {
+  float tan_half_fovy = tanf(fov_radians / 2.0f);
+  out[0] = 1.f / (aspect_ratio * tan_half_fovy);
+  out[1] = 0.0f;
+  out[2] = 0.0f;
+  out[3] = 0.0f;
+
+  out[4] = 0.0f;
+  out[5] = 1.f / tan_half_fovy;
+  out[6] = 0.0f;
+  out[7] = 0.0f;
+
+  out[8] = 0.0f;
+  out[9] = 0.0f;
+  out[10] = -(far + near) / (far - near);
+  out[11] = -1.0f;
+
+  out[12] = 0.0f;
+  out[13] = 0.0f;
+  out[14] = -(2.0f * far * near) / (far - near);
+  out[15] = 0.0f;
+}
diff --git a/src/maths.h b/src/maths.h
new file mode 100644
index 0000000..d50f285
--- /dev/null
+++ b/src/maths.h
@@ -0,0 +1,49 @@
+#ifndef VGLTF_MATHS_H
+#define VGLTF_MATHS_H
+
+typedef float vgltf_vec_value_type;
+
+constexpr double VGLTF_MATHS_PI = 3.14159265358979323846;
+#define VGLTF_MATHS_DEG_TO_RAD(deg) (deg * VGLTF_MATHS_PI / 180.0)
+#define VGLTF_MAX(x, y) ((x) > (y) ? (x) : (y))
+
+typedef struct {
+  vgltf_vec_value_type x;
+  vgltf_vec_value_type y;
+} vgltf_vec2;
+
+typedef struct {
+  vgltf_vec_value_type x;
+  vgltf_vec_value_type y;
+  vgltf_vec_value_type z;
+} vgltf_vec3;
+vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs);
+vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs);
+vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs);
+
+vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec);
+vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec);
+
+typedef vgltf_vec_value_type vgltf_mat_value_type;
+
+// row major
+typedef vgltf_mat_value_type vgltf_mat4[16];
+void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs);
+void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix,
+                     vgltf_mat_value_type angle_radians, vgltf_vec3 axis);
+void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position,
+                      vgltf_vec3 target_position, vgltf_vec3 up_axis);
+void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov,
+                          vgltf_mat_value_type aspect_ratio,
+                          vgltf_mat_value_type near, vgltf_mat_value_type far);
+
+// clang-format off
+#define VGLTF_MAT4_IDENTITY { \
+  1, 0, 0, 0, \
+  0, 1, 0, 0, \
+  0, 0, 1, 0, \
+  0, 0, 0, 1, \
+}
+// clang-format on
+
+#endif // VGLTF_MATHS_H
diff --git a/src/platform.c b/src/platform.c
new file mode 100644
index 0000000..da4d7d4
--- /dev/null
+++ b/src/platform.c
@@ -0,0 +1,5 @@
+#include "platform.h"
+
+#define VGLTF_GENERATE_KEY_STRING(KEY) #KEY,
+const char *vgltf_key_str[] = {VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_STRING)};
+#undef VGLTF_GENERATE_KEY_STRING
diff --git a/src/platform.h b/src/platform.h
index fe719d3..aff673f 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -3,66 +3,61 @@
 
 #include "log.h"
 #include <stdint.h>
-#include <stdlib.h>
+#include <stdlib.h> // IWYU pragma: keep
 
-#define VGLTF_PANIC(...)                                                       \
+#define VGLTF_PANIC(...)                                                         \
   do {                                                                         \
-    VGLTF_LOG_ERR("panic: " __VA_ARGS__);                                      \
+    VGLTF_LOG_ERR("PANIC " __VA_ARGS__);                                         \
     exit(1);                                                                   \
   } while (0)
 
-enum vgltf_event_type {
-  VGLTF_EVENT_QUIT,
-  VGLTF_EVENT_KEY_DOWN,
-  VGLTF_EVENT_WINDOW_RESIZED,
-  VGLTF_EVENT_UNKNOWN,
-};
+#define VGLTF_FOREACH_KEY(_M)                                                    \
+  _M(A)                                                                        \
+  _M(B)                                                                        \
+  _M(C)                                                                        \
+  _M(D)                                                                        \
+  _M(E)                                                                        \
+  _M(F)                                                                        \
+  _M(G)                                                                        \
+  _M(H)                                                                        \
+  _M(I)                                                                        \
+  _M(J)                                                                        \
+  _M(K)                                                                        \
+  _M(L)                                                                        \
+  _M(M)                                                                        \
+  _M(N)                                                                        \
+  _M(O)                                                                        \
+  _M(P)                                                                        \
+  _M(Q)                                                                        \
+  _M(R)                                                                        \
+  _M(S)                                                                        \
+  _M(T)                                                                        \
+  _M(U)                                                                        \
+  _M(V)                                                                        \
+  _M(W)                                                                        \
+  _M(X)                                                                        \
+  _M(Y)                                                                        \
+  _M(Z)                                                                        \
+  _M(ESCAPE)
 
+#define VGLTF_GENERATE_KEY_ENUM(KEY) VGLTF_KEY_##KEY,
 enum vgltf_key {
-  VGLTF_KEY_A,
-  VGLTF_KEY_B,
-  VGLTF_KEY_C,
-  VGLTF_KEY_D,
-  VGLTF_KEY_E,
-  VGLTF_KEY_F,
-  VGLTF_KEY_G,
-  VGLTF_KEY_H,
-  VGLTF_KEY_I,
-  VGLTF_KEY_J,
-  VGLTF_KEY_K,
-  VGLTF_KEY_L,
-  VGLTF_KEY_M,
-  VGLTF_KEY_N,
-  VGLTF_KEY_O,
-  VGLTF_KEY_P,
-  VGLTF_KEY_Q,
-  VGLTF_KEY_R,
-  VGLTF_KEY_S,
-  VGLTF_KEY_T,
-  VGLTF_KEY_U,
-  VGLTF_KEY_V,
-  VGLTF_KEY_W,
-  VGLTF_KEY_X,
-  VGLTF_KEY_Y,
-  VGLTF_KEY_Z,
-  VGLTF_KEY_ESCAPE,
+  VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_ENUM) VGLTF_KEY_COUNT,
   VGLTF_KEY_UNKNOWN
 };
+#undef VGLTF_GENERATE_KEY_ENUM
+extern const char *vgltf_key_str[];
+
+enum vgltf_event_type { VGLTF_EVENT_QUIT, VGLTF_EVENT_KEY_DOWN, VGLTF_EVENT_UNKNOWN };
 
 struct vgltf_key_event {
   enum vgltf_key key;
 };
 
-struct vgltf_window_resized_event {
-  int32_t width;
-  int32_t height;
-};
-
 struct vgltf_event {
   enum vgltf_event_type type;
   union {
     struct vgltf_key_event key;
-    struct vgltf_window_resized_event window_resized;
   };
 };
 
@@ -75,18 +70,19 @@ struct vgltf_platform;
 bool vgltf_platform_init(struct vgltf_platform *platform);
 void vgltf_platform_deinit(struct vgltf_platform *platform);
 bool vgltf_platform_poll_event(struct vgltf_platform *platform,
-                               struct vgltf_event *event);
+                             struct vgltf_event *event);
 bool vgltf_platform_get_window_size(struct vgltf_platform *platform,
-                                    struct vgltf_window_size *window_size);
-
-// Vulkan specifics
-#include "vulkan/vulkan_core.h"
-char const *const *
+                                  struct vgltf_window_size *window_size);
+bool vgltf_platform_get_current_time_nanoseconds(long *time);
+char *vgltf_platform_read_file_to_string(const char *filepath, size_t *out_size);
+const char *const *
 vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform,
-                                              uint32_t *count);
+                                            uint32_t *count);
+
+#include <vulkan/vulkan.h>
 bool vgltf_platform_create_vulkan_surface(struct vgltf_platform *platform,
-                                          VkInstance instance,
-                                          VkSurfaceKHR *surface);
+                                        VkInstance instance,
+                                        VkSurfaceKHR *surface);
 
 #include "platform_sdl.h"
 
diff --git a/src/platform_sdl.c b/src/platform_sdl.c
index 5cc6032..6593b9e 100644
--- a/src/platform_sdl.c
+++ b/src/platform_sdl.c
@@ -1,29 +1,25 @@
+#include "platform_sdl.h"
 #include "log.h"
 #include "platform.h"
-#include "platform_sdl.h"
-#include <SDL3/SDL_vulkan.h>
 
 bool vgltf_platform_init(struct vgltf_platform *platform) {
+  VGLTF_LOG_INFO("Initializing SDL platform...");
+
   if (!SDL_Init(SDL_INIT_VIDEO)) {
     VGLTF_LOG_ERR("SDL initialization failed: %s", SDL_GetError());
     goto err;
   }
 
-  constexpr char WINDOW_TITLE[] = "VisibleGLTF";
-  constexpr int WINDOW_WIDTH = 800;
-  constexpr int WINDOW_HEIGHT = 600;
-  SDL_Window *window =
-      SDL_CreateWindow(WINDOW_TITLE, WINDOW_WIDTH, WINDOW_HEIGHT,
-                       SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE);
-  if (!window) {
-    VGLTF_LOG_ERR("SDL window creation failed: %s", SDL_GetError());
-    goto quit_sdl;
+  platform->window = SDL_CreateWindow("vgltf", 800, 600, SDL_WINDOW_VULKAN);
+  if (!platform->window) {
+    VGLTF_LOG_ERR("Window creation failed: %s", SDL_GetError());
+    goto deinit_sdl;
   }
 
-  platform->window = window;
-
+  VGLTF_LOG_INFO("SDL platform initialized");
   return true;
-quit_sdl:
+
+deinit_sdl:
   SDL_Quit();
 err:
   return false;
@@ -31,67 +27,23 @@ err:
 void vgltf_platform_deinit(struct vgltf_platform *platform) {
   SDL_DestroyWindow(platform->window);
   SDL_Quit();
+  VGLTF_LOG_INFO("SDL platform deinitialized");
 }
-static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode keycode) {
-  switch (keycode) {
-  case SDLK_A:
-    return VGLTF_KEY_A;
-  case SDLK_B:
-    return VGLTF_KEY_B;
-  case SDLK_C:
-    return VGLTF_KEY_C;
-  case SDLK_D:
-    return VGLTF_KEY_D;
-  case SDLK_E:
-    return VGLTF_KEY_E;
-  case SDLK_F:
-    return VGLTF_KEY_F;
-  case SDLK_G:
-    return VGLTF_KEY_G;
-  case SDLK_H:
-    return VGLTF_KEY_H;
-  case SDLK_I:
-    return VGLTF_KEY_I;
-  case SDLK_J:
-    return VGLTF_KEY_J;
-  case SDLK_K:
-    return VGLTF_KEY_K;
-  case SDLK_L:
-    return VGLTF_KEY_L;
-  case SDLK_M:
-    return VGLTF_KEY_M;
-  case SDLK_N:
-    return VGLTF_KEY_N;
-  case SDLK_O:
-    return VGLTF_KEY_O;
-  case SDLK_P:
-    return VGLTF_KEY_P;
-  case SDLK_Q:
-    return VGLTF_KEY_Q;
-  case SDLK_R:
-    return VGLTF_KEY_R;
-  case SDLK_S:
-    return VGLTF_KEY_S;
-  case SDLK_T:
-    return VGLTF_KEY_T;
-  case SDLK_U:
-    return VGLTF_KEY_U;
-  case SDLK_V:
-    return VGLTF_KEY_V;
-  case SDLK_W:
-    return VGLTF_KEY_W;
-  case SDLK_X:
-    return VGLTF_KEY_X;
-  case SDLK_Y:
-    return VGLTF_KEY_Y;
-  case SDLK_Z:
-    return VGLTF_KEY_Z;
-  case SDLK_ESCAPE:
-    return VGLTF_KEY_ESCAPE;
+
+#define VGLTF_GENERATE_SDL_KEYCODE_MAPPING(KEY)                                \
+  case SDLK_##KEY:                                                             \
+    return VGLTF_KEY_##KEY;
+
+static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode key_code) {
+  switch (key_code) {
+    VGLTF_FOREACH_KEY(VGLTF_GENERATE_SDL_KEYCODE_MAPPING)
   default:
     return VGLTF_KEY_UNKNOWN;
   }
 }
+
+#undef VGLTF_GENERATE_SDL_KEYCODE_MAPPING
+
 bool vgltf_platform_poll_event(struct vgltf_platform *platform,
                                struct vgltf_event *event) {
   (void)platform;
@@ -106,16 +58,12 @@ bool vgltf_platform_poll_event(struct vgltf_platform *platform,
       event->type = VGLTF_EVENT_KEY_DOWN;
       event->key.key = vgltf_key_from_sdl_keycode(sdl_event.key.key);
       break;
-    case SDL_EVENT_WINDOW_RESIZED:
-      event->type = VGLTF_EVENT_WINDOW_RESIZED;
-      event->window_resized.width = sdl_event.display.data1;
-      event->window_resized.height = sdl_event.display.data2;
-      break;
     default:
       event->type = VGLTF_EVENT_UNKNOWN;
       break;
     }
   }
+
   return pending_events;
 }
 bool vgltf_platform_get_window_size(struct vgltf_platform *platform,
@@ -123,7 +71,31 @@ bool vgltf_platform_get_window_size(struct vgltf_platform *platform,
   return SDL_GetWindowSize(platform->window, &window_size->width,
                            &window_size->height);
 }
-char const *const *
+bool vgltf_platform_get_current_time_nanoseconds(long *time) {
+  if (!SDL_GetCurrentTime(time)) {
+    VGLTF_LOG_ERR("'SDL_GetCurrentTime failed: %s", SDL_GetError());
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+char *vgltf_platform_read_file_to_string(const char *filepath,
+                                         size_t *out_size) {
+  char *file_data = SDL_LoadFile(filepath, out_size);
+  if (!file_data) {
+    VGLTF_LOG_ERR("Couldn't load file: %s", SDL_GetError());
+    return NULL;
+  }
+
+  return file_data;
+}
+
+#include <SDL3/SDL_vulkan.h>
+
+const char *const *
 vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform,
                                               uint32_t *count) {
   (void)platform;
diff --git a/src/renderer.c b/src/renderer.c
deleted file mode 100644
index 7022af6..0000000
--- a/src/renderer.c
+++ /dev/null
@@ -1,1470 +0,0 @@
-#include "log.h"
-#include "renderer.h"
-#include "src/platform.h"
-#include "vulkan/vulkan_core.h"
-#include <assert.h>
-
-static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"};
-static constexpr int VALIDATION_LAYER_COUNT =
-    sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]);
-
-#ifdef VGLTF_DEBUG
-static constexpr bool enable_validation_layers = true;
-#else
-static constexpr bool enable_validation_layers = false;
-#endif
-
-static VKAPI_ATTR VkBool32 VKAPI_CALL
-debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
-               VkDebugUtilsMessageTypeFlagBitsEXT message_type,
-               const VkDebugUtilsMessengerCallbackDataEXT *callback_data,
-               void *user_data) {
-  (void)message_severity;
-  (void)message_type;
-  (void)user_data;
-  VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage);
-  return VK_FALSE;
-}
-
-static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10;
-struct required_instance_extensions {
-  const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
-  uint32_t count;
-};
-void required_instance_extensions_push(
-    struct required_instance_extensions *required_instance_extensions,
-    const char *required_instance_extension) {
-  if (required_instance_extensions->count ==
-      REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
-    VGLTF_PANIC("required instance extensions array is full");
-  }
-  required_instance_extensions
-      ->extensions[required_instance_extensions->count++] =
-      required_instance_extension;
-}
-
-static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128;
-struct supported_instance_extensions {
-  VkExtensionProperties
-      properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
-  uint32_t count;
-};
-bool supported_instance_extensions_init(
-    struct supported_instance_extensions *supported_instance_extensions) {
-  if (vkEnumerateInstanceExtensionProperties(
-          nullptr, &supported_instance_extensions->count, nullptr) !=
-      VK_SUCCESS) {
-    goto err;
-  }
-
-  if (supported_instance_extensions->count >
-      SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
-    VGLTF_LOG_ERR("supported instance extensions array cannot fit all the "
-                  "VkExtensionProperties");
-    goto err;
-  }
-
-  if (vkEnumerateInstanceExtensionProperties(
-          nullptr, &supported_instance_extensions->count,
-          supported_instance_extensions->properties) != VK_SUCCESS) {
-    goto err;
-  }
-  return true;
-err:
-  return false;
-}
-void supported_instance_extensions_debug_print(
-    const struct supported_instance_extensions *supported_instance_extensions) {
-  VGLTF_LOG_DBG("Supported instance extensions:");
-  for (uint32_t i = 0; i < supported_instance_extensions->count; i++) {
-    VGLTF_LOG_DBG("\t- %s",
-                  supported_instance_extensions->properties[i].extensionName);
-  }
-}
-bool supported_instance_extensions_includes(
-    const struct supported_instance_extensions *supported_instance_extensions,
-    const char *extension_name) {
-  for (uint32_t supported_instance_extension_index = 0;
-       supported_instance_extension_index <
-       supported_instance_extensions->count;
-       supported_instance_extension_index++) {
-    const VkExtensionProperties *extension_properties =
-        &supported_instance_extensions
-             ->properties[supported_instance_extension_index];
-    if (strcmp(extension_properties->extensionName, extension_name) == 0) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64;
-struct supported_validation_layers {
-  VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY];
-  uint32_t count;
-};
-bool supported_validation_layers_init(
-    struct supported_validation_layers *supported_validation_layers) {
-  if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count,
-                                         nullptr) != VK_SUCCESS) {
-    goto err;
-  }
-
-  if (supported_validation_layers->count >
-      SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) {
-    VGLTF_LOG_ERR("supported validation layers array cannot fit all the "
-                  "VkLayerProperties");
-    goto err;
-  }
-
-  if (vkEnumerateInstanceLayerProperties(
-          &supported_validation_layers->count,
-          supported_validation_layers->properties) != VK_SUCCESS) {
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool are_validation_layer_supported() {
-  struct supported_validation_layers supported_layers = {};
-  if (!supported_validation_layers_init(&supported_layers)) {
-    goto err;
-  }
-
-  for (int requested_layer_index = 0;
-       requested_layer_index < VALIDATION_LAYER_COUNT;
-       requested_layer_index++) {
-    const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index];
-    bool requested_layer_found = false;
-    for (uint32_t supported_layer_index = 0;
-         supported_layer_index < supported_layers.count;
-         supported_layer_index++) {
-      VkLayerProperties *supported_layer =
-          &supported_layers.properties[supported_layer_index];
-      if (strcmp(requested_layer_name, supported_layer->layerName) == 0) {
-        requested_layer_found = true;
-        break;
-      }
-    }
-
-    if (!requested_layer_found) {
-      goto err;
-    }
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool fetch_required_instance_extensions(
-    struct required_instance_extensions *required_extensions,
-    struct vgltf_platform *platform) {
-  struct supported_instance_extensions supported_extensions = {};
-  if (!supported_instance_extensions_init(&supported_extensions)) {
-    VGLTF_LOG_ERR(
-        "Couldn't fetch supported instance extensions details (OOM?)");
-    goto err;
-  }
-  supported_instance_extensions_debug_print(&supported_extensions);
-
-  uint32_t platform_required_extension_count = 0;
-  const char *const *platform_required_extensions =
-      vgltf_platform_get_vulkan_instance_extensions(
-          platform, &platform_required_extension_count);
-  for (uint32_t platform_required_extension_index = 0;
-       platform_required_extension_index < platform_required_extension_count;
-       platform_required_extension_index++) {
-    required_instance_extensions_push(
-        required_extensions,
-        platform_required_extensions[platform_required_extension_index]);
-  }
-  required_instance_extensions_push(
-      required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
-
-  if (enable_validation_layers) {
-    required_instance_extensions_push(required_extensions,
-                                      VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
-  }
-
-  bool all_extensions_supported = true;
-  for (uint32_t required_extension_index = 0;
-       required_extension_index < required_extensions->count;
-       required_extension_index++) {
-    const char *required_extension_name =
-        required_extensions->extensions[required_extension_index];
-    if (!supported_instance_extensions_includes(&supported_extensions,
-                                                required_extension_name)) {
-      VGLTF_LOG_ERR("Unsupported instance extension: %s",
-                    required_extension_name);
-      all_extensions_supported = false;
-    }
-  }
-
-  if (!all_extensions_supported) {
-    VGLTF_LOG_ERR("Some required extensions are unsupported.");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static void populate_debug_messenger_create_info(
-    VkDebugUtilsMessengerCreateInfoEXT *create_info) {
-  *create_info = (VkDebugUtilsMessengerCreateInfoEXT){};
-  create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
-  create_info->messageSeverity =
-      VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
-      VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
-      VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
-  create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
-                             VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
-                             VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
-  create_info->pfnUserCallback = debug_callback;
-}
-
-static bool vgltf_renderer_create_instance(struct vgltf_renderer *renderer,
-                                           struct vgltf_platform *platform) {
-  VGLTF_LOG_INFO("Creating vulkan instance...");
-  if (enable_validation_layers && !are_validation_layer_supported()) {
-    VGLTF_LOG_ERR("Requested validation layers aren't supported");
-    goto err;
-  }
-
-  VkApplicationInfo application_info = {
-      .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
-      .pApplicationName = "Visible GLTF",
-      .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
-      .pEngineName = "No Engine",
-      .engineVersion = VK_MAKE_VERSION(1, 0, 0),
-      .apiVersion = VK_API_VERSION_1_2};
-
-  struct required_instance_extensions required_extensions = {};
-  fetch_required_instance_extensions(&required_extensions, platform);
-
-  VkInstanceCreateInfo create_info = {
-      .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
-      .pApplicationInfo = &application_info,
-      .enabledExtensionCount = required_extensions.count,
-      .ppEnabledExtensionNames = required_extensions.extensions,
-      .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR};
-
-  VkDebugUtilsMessengerCreateInfoEXT debug_create_info;
-  if (enable_validation_layers) {
-    create_info.enabledLayerCount = VALIDATION_LAYER_COUNT;
-    create_info.ppEnabledLayerNames = VALIDATION_LAYERS;
-    populate_debug_messenger_create_info(&debug_create_info);
-    create_info.pNext = &debug_create_info;
-  }
-
-  if (vkCreateInstance(&create_info, nullptr, &renderer->instance) !=
-      VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to create VkInstance");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static VkResult create_debug_utils_messenger_ext(
-    VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info,
-    const VkAllocationCallbacks *allocator,
-    VkDebugUtilsMessengerEXT *debug_messenger) {
-  auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
-      instance, "vkCreateDebugUtilsMessengerEXT");
-  if (func != nullptr) {
-    return func(instance, create_info, allocator, debug_messenger);
-  }
-
-  return VK_ERROR_EXTENSION_NOT_PRESENT;
-}
-
-static void
-destroy_debug_utils_messenger_ext(VkInstance instance,
-                                  VkDebugUtilsMessengerEXT debug_messenger,
-                                  const VkAllocationCallbacks *allocator) {
-  auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
-      instance, "vkDestroyDebugUtilsMessengerEXT");
-  if (func != nullptr) {
-    func(instance, debug_messenger, allocator);
-  }
-}
-
-static void
-vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) {
-  if (!enable_validation_layers)
-    return;
-  VkDebugUtilsMessengerCreateInfoEXT create_info;
-  populate_debug_messenger_create_info(&create_info);
-  create_debug_utils_messenger_ext(renderer->instance, &create_info, nullptr,
-                                   &renderer->debug_messenger);
-}
-
-static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128;
-struct available_physical_devices {
-  VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY];
-  uint32_t count;
-};
-static bool
-available_physical_devices_init(VkInstance instance,
-                                struct available_physical_devices *devices) {
-
-  if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) !=
-      VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't enumerate physical devices");
-    goto err;
-  }
-
-  if (devices->count == 0) {
-    VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support");
-    goto err;
-  }
-
-  if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) {
-    VGLTF_LOG_ERR("available physical devices array cannot fit all available "
-                  "physical devices");
-    goto err;
-  }
-
-  if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) !=
-      VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't enumerate physical devices");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-struct queue_family_indices {
-  uint32_t graphics_family;
-  uint32_t present_family;
-  bool has_graphics_family;
-  bool has_present_family;
-};
-bool queue_family_indices_is_complete(
-    const struct queue_family_indices *indices) {
-  return indices->has_graphics_family && indices->has_present_family;
-}
-bool queue_family_indices_for_device(struct queue_family_indices *indices,
-                                     VkPhysicalDevice device,
-                                     VkSurfaceKHR surface) {
-  static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64;
-  uint32_t queue_family_count = 0;
-  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
-                                           nullptr);
-
-  if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) {
-    VGLTF_LOG_ERR(
-        "Queue family properties array cannot fit all queue family properties");
-    goto err;
-  }
-
-  VkQueueFamilyProperties
-      queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {};
-  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
-                                           queue_family_properties);
-
-  for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count;
-       queue_family_index++) {
-    VkQueueFamilyProperties *queue_family =
-        &queue_family_properties[queue_family_index];
-
-    VkBool32 present_support;
-    vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface,
-                                         &present_support);
-
-    if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) {
-      indices->graphics_family = queue_family_index;
-      indices->has_graphics_family = true;
-    }
-
-    if (present_support) {
-      indices->present_family = queue_family_index;
-      indices->has_present_family = true;
-    }
-
-    if (queue_family_indices_is_complete(indices)) {
-      break;
-    }
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool is_in_array(uint32_t *array, int length, uint32_t value) {
-  for (int i = 0; i < length; i++) {
-    if (array[i] == value) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 128;
-struct supported_extensions {
-  VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY];
-  uint32_t count;
-};
-bool supported_extensions_init(
-    struct supported_extensions *supported_extensions,
-    VkPhysicalDevice device) {
-  if (vkEnumerateDeviceExtensionProperties(device, nullptr,
-                                           &supported_extensions->count,
-                                           nullptr) != VK_SUCCESS) {
-    goto err;
-  }
-
-  if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) {
-    VGLTF_LOG_ERR(
-        "supported extensions aarray cannot fit all the VkExtensionProperties");
-    goto err;
-  }
-
-  if (vkEnumerateDeviceExtensionProperties(
-          device, nullptr, &supported_extensions->count,
-          supported_extensions->properties) != VK_SUCCESS) {
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool supported_extensions_includes_extension(
-    struct supported_extensions *supported_extensions,
-    const char *extension_name) {
-  for (uint32_t supported_extension_index = 0;
-       supported_extension_index < supported_extensions->count;
-       supported_extension_index++) {
-    if (strcmp(supported_extensions->properties[supported_extension_index]
-                   .extensionName,
-               extension_name) == 0) {
-      return true;
-    }
-  }
-  return false;
-}
-
-static const char *DEVICE_EXTENSIONS[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
-                                          "VK_KHR_portability_subset"};
-static constexpr int DEVICE_EXTENSION_COUNT =
-    sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]);
-static bool are_device_extensions_supported(VkPhysicalDevice device) {
-  struct supported_extensions supported_extensions = {};
-  if (!supported_extensions_init(&supported_extensions, device)) {
-    goto err;
-  }
-
-  for (uint32_t required_extension_index = 0;
-       required_extension_index < DEVICE_EXTENSION_COUNT;
-       required_extension_index++) {
-    if (!supported_extensions_includes_extension(
-            &supported_extensions,
-            DEVICE_EXTENSIONS[required_extension_index])) {
-      VGLTF_LOG_DBG("Unsupported: %s",
-                    DEVICE_EXTENSIONS[required_extension_index]);
-      goto err;
-    }
-  }
-
-  return true;
-
-err:
-  return false;
-}
-
-static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256;
-static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256;
-struct swapchain_support_details {
-  VkSurfaceCapabilitiesKHR capabilities;
-  VkSurfaceFormatKHR
-      formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT];
-  VkPresentModeKHR
-      present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT];
-  uint32_t format_count;
-  uint32_t present_mode_count;
-};
-bool swapchain_support_details_query_from_device(
-    struct swapchain_support_details *swapchain_support_details,
-    VkPhysicalDevice device, VkSurfaceKHR surface) {
-  if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
-          device, surface, &swapchain_support_details->capabilities) !=
-      VK_SUCCESS) {
-    goto err;
-  }
-
-  if (vkGetPhysicalDeviceSurfaceFormatsKHR(
-          device, surface, &swapchain_support_details->format_count, nullptr) !=
-      VK_SUCCESS) {
-    goto err;
-  }
-
-  if (swapchain_support_details->format_count != 0 &&
-      swapchain_support_details->format_count <
-          SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) {
-    if (vkGetPhysicalDeviceSurfaceFormatsKHR(
-            device, surface, &swapchain_support_details->format_count,
-            swapchain_support_details->formats) != VK_SUCCESS) {
-      goto err;
-    }
-  }
-
-  if (vkGetPhysicalDeviceSurfacePresentModesKHR(
-          device, surface, &swapchain_support_details->present_mode_count,
-          nullptr) != VK_SUCCESS) {
-    goto err;
-  }
-
-  if (swapchain_support_details->present_mode_count != 0 &&
-      swapchain_support_details->present_mode_count <
-          SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) {
-    if (vkGetPhysicalDeviceSurfacePresentModesKHR(
-            device, surface, &swapchain_support_details->present_mode_count,
-            swapchain_support_details->present_modes) != VK_SUCCESS) {
-      goto err;
-    }
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool is_physical_device_suitable(VkPhysicalDevice device,
-                                        VkSurfaceKHR surface) {
-  struct queue_family_indices indices = {};
-  queue_family_indices_for_device(&indices, device, surface);
-
-  VGLTF_LOG_DBG("Checking for physical device extension support");
-  bool extensions_supported = are_device_extensions_supported(device);
-  VGLTF_LOG_DBG("Supported: %d", extensions_supported);
-
-  bool swapchain_adequate = false;
-  if (extensions_supported) {
-
-    VGLTF_LOG_DBG("Checking for swapchain support details");
-    struct swapchain_support_details swapchain_support_details = {};
-    if (!swapchain_support_details_query_from_device(&swapchain_support_details,
-                                                     device, surface)) {
-      VGLTF_LOG_ERR("Couldn't query swapchain support details from device");
-      goto err;
-    }
-
-    swapchain_adequate = swapchain_support_details.format_count > 0 &&
-                         swapchain_support_details.present_mode_count > 0;
-  }
-
-  return queue_family_indices_is_complete(&indices) && extensions_supported &&
-         swapchain_adequate;
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_pick_physical_device(struct vgltf_renderer *renderer) {
-  VkPhysicalDevice physical_device = VK_NULL_HANDLE;
-
-  struct available_physical_devices available_physical_devices = {};
-  if (!available_physical_devices_init(renderer->instance,
-                                       &available_physical_devices)) {
-    VGLTF_LOG_ERR("Couldn't fetch available physical devices");
-    goto err;
-  }
-
-  for (uint32_t available_physical_device_index = 0;
-       available_physical_device_index < available_physical_devices.count;
-       available_physical_device_index++) {
-    VkPhysicalDevice available_physical_device =
-        available_physical_devices.devices[available_physical_device_index];
-    if (is_physical_device_suitable(available_physical_device,
-                                    renderer->surface)) {
-      physical_device = available_physical_device;
-      break;
-    }
-  }
-
-  if (physical_device == VK_NULL_HANDLE) {
-    VGLTF_LOG_ERR("Failed to find a suitable GPU");
-    goto err;
-  }
-
-  renderer->physical_device = physical_device;
-
-  return true;
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_create_logical_device(struct vgltf_renderer *renderer) {
-  struct queue_family_indices queue_family_indices = {};
-  queue_family_indices_for_device(&queue_family_indices,
-                                  renderer->physical_device, renderer->surface);
-  static constexpr int MAX_QUEUE_FAMILY_COUNT = 2;
-
-  uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {};
-  int unique_queue_family_count = 0;
-
-  if (!is_in_array(unique_queue_families, unique_queue_family_count,
-                   queue_family_indices.graphics_family)) {
-    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
-    unique_queue_families[unique_queue_family_count++] =
-        queue_family_indices.graphics_family;
-  }
-  if (!is_in_array(unique_queue_families, unique_queue_family_count,
-                   queue_family_indices.present_family)) {
-    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
-    unique_queue_families[unique_queue_family_count++] =
-        queue_family_indices.present_family;
-  }
-
-  float queue_priority = 1.f;
-  VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {};
-  int queue_create_info_count = 0;
-  for (int unique_queue_family_index = 0;
-       unique_queue_family_index < unique_queue_family_count;
-       unique_queue_family_index++) {
-    queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){
-        .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
-        .queueFamilyIndex = unique_queue_families[unique_queue_family_index],
-        .queueCount = 1,
-        .pQueuePriorities = &queue_priority};
-  }
-
-  VkPhysicalDeviceFeatures device_features = {};
-  VkDeviceCreateInfo create_info = {
-      .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
-      .pQueueCreateInfos = queue_create_infos,
-      .queueCreateInfoCount = queue_create_info_count,
-      .pEnabledFeatures = &device_features,
-      .ppEnabledExtensionNames = DEVICE_EXTENSIONS,
-      .enabledExtensionCount = DEVICE_EXTENSION_COUNT};
-  if (vkCreateDevice(renderer->physical_device, &create_info, nullptr,
-                     &renderer->device) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to create logical device");
-    goto err;
-  }
-
-  vkGetDeviceQueue(renderer->device, queue_family_indices.graphics_family, 0,
-                   &renderer->graphics_queue);
-  vkGetDeviceQueue(renderer->device, queue_family_indices.present_family, 0,
-                   &renderer->present_queue);
-
-  return true;
-err:
-  return false;
-}
-
-static bool vgltf_renderer_create_surface(struct vgltf_renderer *renderer,
-                                          struct vgltf_platform *platform) {
-  if (!vgltf_platform_create_vulkan_surface(platform, renderer->instance,
-                                            &renderer->surface)) {
-    VGLTF_LOG_ERR("Couldn't create surface");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static VkSurfaceFormatKHR
-choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats,
-                                uint32_t available_format_count) {
-  for (uint32_t available_format_index = 0;
-       available_format_index < available_format_count;
-       available_format_index++) {
-    VkSurfaceFormatKHR *available_format =
-        &available_formats[available_format_index];
-    if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB &&
-        available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
-      return *available_format;
-    }
-  }
-
-  return available_formats[0];
-}
-
-static VkPresentModeKHR
-choose_swapchain_present_mode(VkPresentModeKHR *available_modes,
-                              uint32_t available_mode_count) {
-  for (uint32_t available_mode_index = 0;
-       available_mode_index < available_mode_count; available_mode_index++) {
-    VkPresentModeKHR available_mode = available_modes[available_mode_index];
-    if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) {
-      return available_mode;
-    }
-  }
-
-  return VK_PRESENT_MODE_FIFO_KHR;
-}
-
-static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) {
-  return value < min ? min : value > max ? max : value;
-}
-
-static VkExtent2D
-choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width,
-                        int height) {
-  if (capabilities->currentExtent.width != UINT32_MAX) {
-    return capabilities->currentExtent;
-  } else {
-    VkExtent2D actual_extent = {width, height};
-    actual_extent.width =
-        clamp_uint32(capabilities->minImageExtent.width,
-                     capabilities->maxImageExtent.width, actual_extent.width);
-    actual_extent.height =
-        clamp_uint32(capabilities->minImageExtent.height,
-                     capabilities->maxImageExtent.height, actual_extent.height);
-    return actual_extent;
-  }
-}
-
-static bool vgltf_renderer_create_swapchain(struct vgltf_renderer *renderer) {
-  struct swapchain_support_details swapchain_support_details = {};
-  swapchain_support_details_query_from_device(
-      &swapchain_support_details, renderer->physical_device, renderer->surface);
-
-  VkSurfaceFormatKHR surface_format =
-      choose_swapchain_surface_format(swapchain_support_details.formats,
-                                      swapchain_support_details.format_count);
-  VkPresentModeKHR present_mode = choose_swapchain_present_mode(
-      swapchain_support_details.present_modes,
-      swapchain_support_details.present_mode_count);
-
-  VkExtent2D extent = choose_swapchain_extent(
-      &swapchain_support_details.capabilities, renderer->window_size.width,
-      renderer->window_size.height);
-  uint32_t image_count =
-      swapchain_support_details.capabilities.minImageCount + 1;
-  if (swapchain_support_details.capabilities.maxImageCount > 0 &&
-      image_count > swapchain_support_details.capabilities.maxImageCount) {
-    image_count = swapchain_support_details.capabilities.maxImageCount;
-  }
-
-  VkSwapchainCreateInfoKHR create_info = {
-      .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
-      .surface = renderer->surface,
-      .minImageCount = image_count,
-      .imageFormat = surface_format.format,
-      .imageColorSpace = surface_format.colorSpace,
-      .imageExtent = extent,
-      .imageArrayLayers = 1,
-      .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT};
-  struct queue_family_indices indices = {};
-  queue_family_indices_for_device(&indices, renderer->physical_device,
-                                  renderer->surface);
-  uint32_t queue_family_indices[] = {indices.graphics_family,
-                                     indices.present_family};
-  if (indices.graphics_family != indices.present_family) {
-    create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
-    create_info.queueFamilyIndexCount = 2;
-    create_info.pQueueFamilyIndices = queue_family_indices;
-  } else {
-    create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
-  }
-
-  create_info.preTransform =
-      swapchain_support_details.capabilities.currentTransform;
-  create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
-  create_info.presentMode = present_mode;
-  create_info.clipped = VK_TRUE;
-  create_info.oldSwapchain = VK_NULL_HANDLE;
-
-  if (vkCreateSwapchainKHR(renderer->device, &create_info, nullptr,
-                           &renderer->swapchain) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Swapchain creation failed!");
-    goto err;
-  }
-
-  if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain,
-                              &renderer->swapchain_image_count,
-                              nullptr) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't get swapchain image count");
-    goto destroy_swapchain;
-  }
-
-  if (renderer->swapchain_image_count >
-      VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) {
-    VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images",
-                  renderer->swapchain_image_count);
-    goto destroy_swapchain;
-  }
-
-  if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain,
-                              &renderer->swapchain_image_count,
-                              renderer->swapchain_images) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't get swapchain images");
-    goto destroy_swapchain;
-  }
-
-  renderer->swapchain_image_format = surface_format.format;
-  renderer->swapchain_extent = extent;
-
-  return true;
-destroy_swapchain:
-  vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr);
-err:
-  return false;
-}
-
-static bool vgltf_renderer_create_image_views(struct vgltf_renderer *renderer) {
-  uint32_t swapchain_image_index;
-  for (swapchain_image_index = 0;
-       swapchain_image_index < renderer->swapchain_image_count;
-       swapchain_image_index++) {
-    VkImage swapchain_image = renderer->swapchain_images[swapchain_image_index];
-
-    VkImageViewCreateInfo create_info = {
-        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-        .image = swapchain_image,
-        .viewType = VK_IMAGE_VIEW_TYPE_2D,
-        .format = renderer->swapchain_image_format,
-        .components = {VK_COMPONENT_SWIZZLE_IDENTITY,
-                       VK_COMPONENT_SWIZZLE_IDENTITY,
-                       VK_COMPONENT_SWIZZLE_IDENTITY,
-                       VK_COMPONENT_SWIZZLE_IDENTITY},
-        .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                             .levelCount = 1,
-                             .layerCount = 1}};
-
-    if (vkCreateImageView(
-            renderer->device, &create_info, nullptr,
-            &renderer->swapchain_image_views[swapchain_image_index]) !=
-        VK_SUCCESS) {
-      goto err;
-    }
-  }
-  return true;
-err:
-  for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index;
-       to_remove_index++) {
-    vkDestroyImageView(renderer->device,
-                       renderer->swapchain_image_views[to_remove_index],
-                       nullptr);
-  }
-  return false;
-}
-
-static bool create_shader_module(VkDevice device, const unsigned char *code,
-                                 int size, VkShaderModule *out) {
-  VkShaderModuleCreateInfo create_info = {
-      .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
-      .codeSize = size,
-      .pCode = (const uint32_t *)code,
-  };
-  if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't create shader module");
-    goto err;
-  }
-  return true;
-err:
-  return false;
-}
-
-static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) {
-  VkAttachmentDescription color_attachment = {
-      .format = renderer->swapchain_image_format,
-      .samples = VK_SAMPLE_COUNT_1_BIT,
-      .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
-      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
-      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
-      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
-      .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR};
-  VkAttachmentReference color_attachment_ref = {
-      .attachment = 0,
-      .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-  };
-  VkSubpassDescription subpass = {.pipelineBindPoint =
-                                      VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                  .pColorAttachments = &color_attachment_ref,
-                                  .colorAttachmentCount = 1};
-  VkSubpassDependency dependency = {
-      .srcSubpass = VK_SUBPASS_EXTERNAL,
-      .dstSubpass = 0,
-      .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-      .srcAccessMask = 0,
-      .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-      .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT};
-
-  VkRenderPassCreateInfo render_pass_info = {
-      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-      .attachmentCount = 1,
-      .pAttachments = &color_attachment,
-      .subpassCount = 1,
-      .pSubpasses = &subpass,
-      .dependencyCount = 1,
-      .pDependencies = &dependency};
-
-  if (vkCreateRenderPass(renderer->device, &render_pass_info, nullptr,
-                         &renderer->render_pass) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to create render pass");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) {
-  static constexpr unsigned char triangle_shader_vert_code[] = {
-#embed "../compiled_shaders/triangle.vert.spv"
-  };
-  static constexpr unsigned char triangle_shader_frag_code[] = {
-#embed "../compiled_shaders/triangle.frag.spv"
-  };
-
-  VkShaderModule triangle_shader_vert_module;
-  if (!create_shader_module(renderer->device, triangle_shader_vert_code,
-                            sizeof(triangle_shader_vert_code),
-                            &triangle_shader_vert_module)) {
-    VGLTF_LOG_ERR("Couldn't create triangle vert shader module");
-    goto err;
-  }
-
-  VkShaderModule triangle_shader_frag_module;
-  if (!create_shader_module(renderer->device, triangle_shader_frag_code,
-                            sizeof(triangle_shader_frag_code),
-                            &triangle_shader_frag_module)) {
-    VGLTF_LOG_ERR("Couldn't create triangle frag shader module");
-    goto destroy_vert_shader_module;
-  }
-
-  VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-      .stage = VK_SHADER_STAGE_VERTEX_BIT,
-      .module = triangle_shader_vert_module,
-      .pName = "main"};
-  VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-      .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-      .module = triangle_shader_frag_module,
-      .pName = "main"};
-  VkPipelineShaderStageCreateInfo shader_stages[] = {
-      triangle_shader_vert_stage_create_info,
-      triangle_shader_frag_stage_create_info};
-
-  VkDynamicState dynamic_states[] = {
-      VK_DYNAMIC_STATE_VIEWPORT,
-      VK_DYNAMIC_STATE_SCISSOR,
-  };
-
-  VkPipelineDynamicStateCreateInfo dynamic_state = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-      .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]),
-      .pDynamicStates = dynamic_states};
-
-  VkPipelineVertexInputStateCreateInfo vertex_input_info = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-      .vertexBindingDescriptionCount = 0,
-      .vertexAttributeDescriptionCount = 0,
-  };
-
-  VkPipelineInputAssemblyStateCreateInfo input_assembly = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
-      .primitiveRestartEnable = VK_FALSE,
-  };
-
-  VkPipelineViewportStateCreateInfo viewport_state = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-      .viewportCount = 1,
-      .scissorCount = 1};
-
-  VkPipelineRasterizationStateCreateInfo rasterizer = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-      .depthClampEnable = VK_FALSE,
-      .rasterizerDiscardEnable = VK_FALSE,
-      .polygonMode = VK_POLYGON_MODE_FILL,
-      .lineWidth = 1.f,
-      .cullMode = VK_CULL_MODE_BACK_BIT,
-      .frontFace = VK_FRONT_FACE_CLOCKWISE,
-      .depthBiasEnable = VK_FALSE};
-
-  VkPipelineMultisampleStateCreateInfo multisampling = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-      .sampleShadingEnable = VK_FALSE,
-      .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
-  };
-
-  VkPipelineColorBlendAttachmentState color_blend_attachment = {
-      .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
-                        VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
-      .blendEnable = VK_FALSE,
-  };
-
-  VkPipelineColorBlendStateCreateInfo color_blending = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-      .logicOpEnable = VK_FALSE,
-      .attachmentCount = 1,
-      .pAttachments = &color_blend_attachment};
-
-  VkPipelineLayoutCreateInfo pipeline_layout_info = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-  };
-
-  if (vkCreatePipelineLayout(renderer->device, &pipeline_layout_info, nullptr,
-                             &renderer->pipeline_layout) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't create pipeline layout");
-    goto destroy_frag_shader_module;
-  }
-
-  VkGraphicsPipelineCreateInfo pipeline_info = {
-      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-      .stageCount = 2,
-      .pStages = shader_stages,
-      .pVertexInputState = &vertex_input_info,
-      .pInputAssemblyState = &input_assembly,
-      .pViewportState = &viewport_state,
-      .pRasterizationState = &rasterizer,
-      .pMultisampleState = &multisampling,
-      .pColorBlendState = &color_blending,
-      .pDynamicState = &dynamic_state,
-      .layout = renderer->pipeline_layout,
-      .renderPass = renderer->render_pass,
-      .subpass = 0,
-  };
-
-  if (vkCreateGraphicsPipelines(renderer->device, VK_NULL_HANDLE, 1,
-                                &pipeline_info, nullptr,
-                                &renderer->graphics_pipeline) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't create pipeline");
-    goto destroy_pipeline_layout;
-  }
-
-  vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr);
-  vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr);
-  return true;
-destroy_pipeline_layout:
-  vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr);
-destroy_frag_shader_module:
-  vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr);
-destroy_vert_shader_module:
-  vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr);
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) {
-  for (uint32_t i = 0; i < renderer->swapchain_image_count; i++) {
-    VkImageView attachments[] = {renderer->swapchain_image_views[i]};
-
-    VkFramebufferCreateInfo framebuffer_info = {
-        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-        .renderPass = renderer->render_pass,
-        .attachmentCount = 1,
-        .pAttachments = attachments,
-        .width = renderer->swapchain_extent.width,
-        .height = renderer->swapchain_extent.height,
-        .layers = 1};
-
-    if (vkCreateFramebuffer(renderer->device, &framebuffer_info, nullptr,
-                            &renderer->swapchain_framebuffers[i]) !=
-        VK_SUCCESS) {
-      VGLTF_LOG_ERR("Failed to create framebuffer");
-      goto err;
-    }
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) {
-  struct queue_family_indices queue_family_indices = {};
-  if (!queue_family_indices_for_device(&queue_family_indices,
-                                       renderer->physical_device,
-                                       renderer->surface)) {
-    VGLTF_LOG_ERR("Couldn't fetch queue family indices");
-    goto err;
-  }
-
-  VkCommandPoolCreateInfo pool_info = {
-      .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
-      .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
-      .queueFamilyIndex = queue_family_indices.graphics_family};
-
-  if (vkCreateCommandPool(renderer->device, &pool_info, nullptr,
-                          &renderer->command_pool) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't create command pool");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) {
-  VkCommandBufferAllocateInfo allocate_info = {
-      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
-      .commandPool = renderer->command_pool,
-      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-      .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT};
-
-  if (vkAllocateCommandBuffers(renderer->device, &allocate_info,
-                               renderer->command_buffer) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Couldn't allocate command buffers");
-    goto err;
-  }
-
-  return true;
-err:
-  return false;
-}
-
-static bool
-vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) {
-  VkSemaphoreCreateInfo semaphore_info = {
-      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-  };
-
-  VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-                                  .flags = VK_FENCE_CREATE_SIGNALED_BIT};
-
-  int frame_in_flight_index = 0;
-  for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
-       frame_in_flight_index++) {
-    if (vkCreateSemaphore(
-            renderer->device, &semaphore_info, nullptr,
-            &renderer->image_available_semaphores[frame_in_flight_index]) !=
-            VK_SUCCESS ||
-        vkCreateSemaphore(
-            renderer->device, &semaphore_info, nullptr,
-            &renderer->render_finished_semaphores[frame_in_flight_index]) !=
-            VK_SUCCESS ||
-        vkCreateFence(renderer->device, &fence_info, nullptr,
-                      &renderer->in_flight_fences[frame_in_flight_index]) !=
-            VK_SUCCESS) {
-      VGLTF_LOG_ERR("Couldn't create sync objects");
-      goto err;
-    }
-  }
-
-  return true;
-err:
-  for (int frame_in_flight_to_delete_index = 0;
-       frame_in_flight_to_delete_index < frame_in_flight_index;
-       frame_in_flight_to_delete_index++) {
-    vkDestroyFence(renderer->device,
-                   renderer->in_flight_fences[frame_in_flight_index], nullptr);
-    vkDestroySemaphore(
-        renderer->device,
-        renderer->render_finished_semaphores[frame_in_flight_index], nullptr);
-    vkDestroySemaphore(
-        renderer->device,
-        renderer->image_available_semaphores[frame_in_flight_index], nullptr);
-  }
-  return false;
-}
-
-static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) {
-  for (uint32_t framebuffer_index = 0;
-       framebuffer_index < renderer->swapchain_image_count;
-       framebuffer_index++) {
-    vkDestroyFramebuffer(renderer->device,
-                         renderer->swapchain_framebuffers[framebuffer_index],
-                         nullptr);
-  }
-
-  for (uint32_t image_view_index = 0;
-       image_view_index < renderer->swapchain_image_count; image_view_index++) {
-    vkDestroyImageView(renderer->device,
-                       renderer->swapchain_image_views[image_view_index],
-                       nullptr);
-  }
-
-  vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr);
-}
-
-static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) {
-  vkDeviceWaitIdle(renderer->device);
-  vgltf_renderer_cleanup_swapchain(renderer);
-
-  // TODO add error handling
-  vgltf_renderer_create_swapchain(renderer);
-  vgltf_renderer_create_image_views(renderer);
-  vgltf_renderer_create_framebuffers(renderer);
-  return true;
-}
-
-bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer) {
-  vkWaitForFences(renderer->device, 1,
-                  &renderer->in_flight_fences[renderer->current_frame], VK_TRUE,
-                  UINT64_MAX);
-
-  uint32_t image_index;
-  VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR(
-      renderer->device, renderer->swapchain, UINT64_MAX,
-      renderer->image_available_semaphores[renderer->current_frame],
-      VK_NULL_HANDLE, &image_index);
-  if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR ||
-      acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR ||
-      renderer->framebuffer_resized) {
-    renderer->framebuffer_resized = false;
-    vgltf_renderer_recreate_swapchain(renderer);
-    return true;
-  } else if (acquire_swapchain_image_result != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to acquire a swapchain image");
-    goto err;
-  }
-
-  vkResetFences(renderer->device, 1,
-                &renderer->in_flight_fences[renderer->current_frame]);
-
-  vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0);
-  VkCommandBufferBeginInfo begin_info = {
-      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-  };
-
-  if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame],
-                           &begin_info) != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to begin recording command buffer");
-    goto err;
-  }
-
-  VkRenderPassBeginInfo render_pass_info = {
-      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-      .renderPass = renderer->render_pass,
-      .framebuffer = renderer->swapchain_framebuffers[image_index],
-      .renderArea = {.offset = {}, .extent = renderer->swapchain_extent},
-      .clearValueCount = 1,
-      .pClearValues =
-          &(const VkClearValue){.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}},
-
-  };
-
-  vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame],
-                       &render_pass_info, VK_SUBPASS_CONTENTS_INLINE);
-  vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame],
-                    VK_PIPELINE_BIND_POINT_GRAPHICS,
-                    renderer->graphics_pipeline);
-  VkViewport viewport = {.x = 0.f,
-                         .y = 0.f,
-                         .width = (float)renderer->swapchain_extent.width,
-                         .height = (float)renderer->swapchain_extent.height,
-                         .minDepth = 0.f,
-                         .maxDepth = 1.f};
-  vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1,
-                   &viewport);
-  VkRect2D scissor = {.offset = {}, .extent = renderer->swapchain_extent};
-  vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1,
-                  &scissor);
-
-  vkCmdDraw(renderer->command_buffer[renderer->current_frame], 3, 1, 0, 0);
-
-  vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]);
-
-  if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) !=
-      VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to record command buffer");
-    goto err;
-  }
-
-  VkSubmitInfo submit_info = {
-      .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-  };
-
-  VkSemaphore wait_semaphores[] = {
-      renderer->image_available_semaphores[renderer->current_frame]};
-  VkPipelineStageFlags wait_stages[] = {
-      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
-  submit_info.waitSemaphoreCount = 1;
-  submit_info.pWaitSemaphores = wait_semaphores;
-  submit_info.pWaitDstStageMask = wait_stages;
-  submit_info.commandBufferCount = 1;
-  submit_info.pCommandBuffers =
-      &renderer->command_buffer[renderer->current_frame];
-
-  VkSemaphore signal_semaphores[] = {
-      renderer->render_finished_semaphores[renderer->current_frame]};
-  submit_info.signalSemaphoreCount = 1;
-  submit_info.pSignalSemaphores = signal_semaphores;
-  if (vkQueueSubmit(renderer->graphics_queue, 1, &submit_info,
-                    renderer->in_flight_fences[renderer->current_frame]) !=
-      VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to submit draw command buffer");
-    goto err;
-  }
-
-  VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
-                                   .waitSemaphoreCount = 1,
-                                   .pWaitSemaphores = signal_semaphores};
-
-  VkSwapchainKHR swapchains[] = {renderer->swapchain};
-  present_info.swapchainCount = 1;
-  present_info.pSwapchains = swapchains;
-  present_info.pImageIndices = &image_index;
-  VkResult result = vkQueuePresentKHR(renderer->present_queue, &present_info);
-  if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) {
-    vgltf_renderer_recreate_swapchain(renderer);
-  } else if (acquire_swapchain_image_result != VK_SUCCESS) {
-    VGLTF_LOG_ERR("Failed to acquire a swapchain image");
-    goto err;
-  }
-  renderer->current_frame =
-      (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
-  return true;
-err:
-  return false;
-}
-
-bool vgltf_renderer_init(struct vgltf_renderer *renderer,
-                         struct vgltf_platform *platform) {
-  if (!vgltf_renderer_create_instance(renderer, platform)) {
-    VGLTF_LOG_ERR("instance creation failed");
-    goto err;
-  }
-  vgltf_renderer_setup_debug_messenger(renderer);
-  if (!vgltf_renderer_create_surface(renderer, platform)) {
-    goto destroy_instance;
-  }
-
-  if (!vgltf_renderer_pick_physical_device(renderer)) {
-    VGLTF_LOG_ERR("Couldn't pick physical device");
-    goto destroy_surface;
-  }
-  if (!vgltf_renderer_create_logical_device(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create logical device");
-    goto destroy_device;
-  }
-
-  struct vgltf_window_size window_size = {800, 600};
-  if (!vgltf_platform_get_window_size(platform, &window_size)) {
-    VGLTF_LOG_ERR("Couldn't get window size");
-    goto destroy_device;
-  }
-  renderer->window_size = window_size;
-
-  if (!vgltf_renderer_create_swapchain(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create swapchain");
-    goto destroy_device;
-  }
-
-  if (!vgltf_renderer_create_image_views(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create image views");
-    goto destroy_swapchain;
-  }
-
-  if (!vgltf_renderer_create_render_pass(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create render pass");
-    goto destroy_image_views;
-  }
-
-  if (!vgltf_renderer_create_graphics_pipeline(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create graphics pipeline");
-    goto destroy_render_pass;
-  }
-
-  if (!vgltf_renderer_create_framebuffers(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create framebuffers");
-    goto destroy_graphics_pipeline;
-  }
-
-  if (!vgltf_renderer_create_command_pool(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create command pool");
-    goto destroy_frame_buffers;
-  }
-
-  if (!vgltf_renderer_create_command_buffer(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create command buffer");
-    goto destroy_command_pool;
-  }
-
-  if (!vgltf_renderer_create_sync_objects(renderer)) {
-    VGLTF_LOG_ERR("Couldn't create sync objects");
-    goto destroy_command_pool;
-  }
-
-  return true;
-
-destroy_command_pool:
-  vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr);
-destroy_frame_buffers:
-  for (uint32_t swapchain_framebuffer_index = 0;
-       swapchain_framebuffer_index < renderer->swapchain_image_count;
-       swapchain_framebuffer_index++) {
-    vkDestroyFramebuffer(
-        renderer->device,
-        renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr);
-  }
-destroy_graphics_pipeline:
-  vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr);
-  vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr);
-destroy_render_pass:
-  vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr);
-destroy_image_views:
-  for (uint32_t swapchain_image_view_index = 0;
-       swapchain_image_view_index < renderer->swapchain_image_count;
-       swapchain_image_view_index++) {
-    vkDestroyImageView(
-        renderer->device,
-        renderer->swapchain_image_views[swapchain_image_view_index], nullptr);
-  }
-destroy_swapchain:
-  vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr);
-destroy_device:
-  vkDestroyDevice(renderer->device, nullptr);
-destroy_surface:
-  vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr);
-destroy_instance:
-  if (enable_validation_layers) {
-    destroy_debug_utils_messenger_ext(renderer->instance,
-                                      renderer->debug_messenger, nullptr);
-  }
-  vkDestroyInstance(renderer->instance, nullptr);
-err:
-  return false;
-}
-void vgltf_renderer_deinit(struct vgltf_renderer *renderer) {
-  vkDeviceWaitIdle(renderer->device);
-  vgltf_renderer_cleanup_swapchain(renderer);
-  vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr);
-  vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr);
-  vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr);
-  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
-    vkDestroySemaphore(renderer->device,
-                       renderer->image_available_semaphores[i], nullptr);
-    vkDestroySemaphore(renderer->device,
-                       renderer->render_finished_semaphores[i], nullptr);
-    vkDestroyFence(renderer->device, renderer->in_flight_fences[i], nullptr);
-  }
-  vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr);
-  vkDestroyDevice(renderer->device, nullptr);
-  if (enable_validation_layers) {
-    destroy_debug_utils_messenger_ext(renderer->instance,
-                                      renderer->debug_messenger, nullptr);
-  }
-  vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr);
-  vkDestroyInstance(renderer->instance, nullptr);
-}
-void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
-                                      struct vgltf_window_size size) {
-  if (size.width > 0 && size.height > 0 &&
-      size.width != renderer->window_size.width &&
-      size.height != renderer->window_size.height) {
-    renderer->window_size = size;
-    renderer->framebuffer_resized = true;
-  }
-}
diff --git a/src/renderer.h b/src/renderer.h
deleted file mode 100644
index a0417aa..0000000
--- a/src/renderer.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef VGLTF_RENDERER_H
-#define VGLTF_RENDERER_H
-
-#include "platform.h"
-#include <vulkan/vulkan.h>
-
-constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2;
-constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32;
-struct vgltf_renderer {
-  VkInstance instance;
-  VkPhysicalDevice physical_device;
-  VkDevice device;
-  VkQueue graphics_queue;
-  VkQueue present_queue;
-  VkDebugUtilsMessengerEXT debug_messenger;
-  VkSurfaceKHR surface;
-  VkSwapchainKHR swapchain;
-  VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
-  VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
-  VkFormat swapchain_image_format;
-  VkExtent2D swapchain_extent;
-  uint32_t swapchain_image_count;
-  VkRenderPass render_pass;
-  VkPipelineLayout pipeline_layout;
-  VkPipeline graphics_pipeline;
-  VkFramebuffer
-      swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
-  VkCommandPool command_pool;
-  VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
-  VkSemaphore
-      image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
-  VkSemaphore
-      render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
-  VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
-  struct vgltf_window_size window_size;
-  uint32_t current_frame;
-  bool framebuffer_resized;
-};
-bool vgltf_renderer_init(struct vgltf_renderer *renderer,
-                         struct vgltf_platform *platform);
-void vgltf_renderer_deinit(struct vgltf_renderer *renderer);
-bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer);
-void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
-                                      struct vgltf_window_size size);
-#endif // VGLTF_RENDERER_H
diff --git a/src/renderer/renderer.c b/src/renderer/renderer.c
new file mode 100644
index 0000000..d34ef73
--- /dev/null
+++ b/src/renderer/renderer.c
@@ -0,0 +1,2559 @@
+#include "renderer.h"
+#include "../image.h"
+#include "../log.h"
+#include "../maths.h"
+#include "../platform.h"
+#include "vma_usage.h"
+#include <math.h>
+
+#define TINYOBJ_LOADER_C_IMPLEMENTATION
+#include "vendor/tiny_obj_loader_c.h"
+
+#include <assert.h>
+#include <vulkan/vulkan_core.h>
+
+static const char MODEL_PATH[] = "assets/model.obj";
+static const char TEXTURE_PATH[] = "assets/texture.png";
+
+VkVertexInputBindingDescription vgltf_vertex_binding_description() {
+  return (VkVertexInputBindingDescription){
+      .binding = 0,
+      .stride = sizeof(struct vgltf_vertex),
+      .inputRate = VK_VERTEX_INPUT_RATE_VERTEX};
+}
+struct vgltf_vertex_input_attribute_descriptions
+vgltf_vertex_attribute_descriptions(void) {
+  return (struct vgltf_vertex_input_attribute_descriptions){
+      .descriptions = {(VkVertexInputAttributeDescription){
+                           .binding = 0,
+                           .location = 0,
+                           .format = VK_FORMAT_R32G32B32_SFLOAT,
+                           .offset = offsetof(struct vgltf_vertex, position)},
+                       (VkVertexInputAttributeDescription){
+                           .binding = 0,
+                           .location = 1,
+                           .format = VK_FORMAT_R32G32B32_SFLOAT,
+                           .offset = offsetof(struct vgltf_vertex, color)},
+                       (VkVertexInputAttributeDescription){
+                           .binding = 0,
+                           .location = 2,
+                           .format = VK_FORMAT_R32G32_SFLOAT,
+                           .offset = offsetof(struct vgltf_vertex,
+                                              texture_coordinates)}},
+      .count = 3};
+}
+
+static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"};
+static constexpr int VALIDATION_LAYER_COUNT =
+    sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]);
+
+#ifdef VGLTF_DEBUG
+static constexpr bool enable_validation_layers = true;
+#else
+static constexpr bool enable_validation_layers = false;
+#endif
+
+static VKAPI_ATTR VkBool32 VKAPI_CALL
+debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
+               VkDebugUtilsMessageTypeFlagBitsEXT message_type,
+               const VkDebugUtilsMessengerCallbackDataEXT *callback_data,
+               void *user_data) {
+  (void)message_severity;
+  (void)message_type;
+  (void)user_data;
+  VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage);
+  return VK_FALSE;
+}
+
+static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10;
+struct required_instance_extensions {
+  const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
+  uint32_t count;
+};
+void required_instance_extensions_push(
+    struct required_instance_extensions *required_instance_extensions,
+    const char *required_instance_extension) {
+  if (required_instance_extensions->count ==
+      REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
+    VGLTF_PANIC("required instance extensions array is full");
+  }
+  required_instance_extensions
+      ->extensions[required_instance_extensions->count++] =
+      required_instance_extension;
+}
+
+static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128;
+struct supported_instance_extensions {
+  VkExtensionProperties
+      properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY];
+  uint32_t count;
+};
+bool supported_instance_extensions_init(
+    struct supported_instance_extensions *supported_instance_extensions) {
+  if (vkEnumerateInstanceExtensionProperties(
+          nullptr, &supported_instance_extensions->count, nullptr) !=
+      VK_SUCCESS) {
+    goto err;
+  }
+
+  if (supported_instance_extensions->count >
+      SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) {
+    VGLTF_LOG_ERR("supported instance extensions array cannot fit all the "
+                  "VkExtensionProperties");
+    goto err;
+  }
+
+  if (vkEnumerateInstanceExtensionProperties(
+          nullptr, &supported_instance_extensions->count,
+          supported_instance_extensions->properties) != VK_SUCCESS) {
+    goto err;
+  }
+  return true;
+err:
+  return false;
+}
+void supported_instance_extensions_debug_print(
+    const struct supported_instance_extensions *supported_instance_extensions) {
+  VGLTF_LOG_DBG("Supported instance extensions:");
+  for (uint32_t i = 0; i < supported_instance_extensions->count; i++) {
+    VGLTF_LOG_DBG("\t- %s",
+                  supported_instance_extensions->properties[i].extensionName);
+  }
+}
+bool supported_instance_extensions_includes(
+    const struct supported_instance_extensions *supported_instance_extensions,
+    const char *extension_name) {
+  for (uint32_t supported_instance_extension_index = 0;
+       supported_instance_extension_index <
+       supported_instance_extensions->count;
+       supported_instance_extension_index++) {
+    const VkExtensionProperties *extension_properties =
+        &supported_instance_extensions
+             ->properties[supported_instance_extension_index];
+    if (strcmp(extension_properties->extensionName, extension_name) == 0) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64;
+struct supported_validation_layers {
+  VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY];
+  uint32_t count;
+};
+bool supported_validation_layers_init(
+    struct supported_validation_layers *supported_validation_layers) {
+  if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count,
+                                         nullptr) != VK_SUCCESS) {
+    goto err;
+  }
+
+  if (supported_validation_layers->count >
+      SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) {
+    VGLTF_LOG_ERR("supported validation layers array cannot fit all the "
+                  "VkLayerProperties");
+    goto err;
+  }
+
+  if (vkEnumerateInstanceLayerProperties(
+          &supported_validation_layers->count,
+          supported_validation_layers->properties) != VK_SUCCESS) {
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool are_validation_layer_supported() {
+  struct supported_validation_layers supported_layers = {};
+  if (!supported_validation_layers_init(&supported_layers)) {
+    goto err;
+  }
+
+  for (int requested_layer_index = 0;
+       requested_layer_index < VALIDATION_LAYER_COUNT;
+       requested_layer_index++) {
+    const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index];
+    bool requested_layer_found = false;
+    for (uint32_t supported_layer_index = 0;
+         supported_layer_index < supported_layers.count;
+         supported_layer_index++) {
+      VkLayerProperties *supported_layer =
+          &supported_layers.properties[supported_layer_index];
+      if (strcmp(requested_layer_name, supported_layer->layerName) == 0) {
+        requested_layer_found = true;
+        break;
+      }
+    }
+
+    if (!requested_layer_found) {
+      goto err;
+    }
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool fetch_required_instance_extensions(
+    struct required_instance_extensions *required_extensions,
+    struct vgltf_platform *platform) {
+  struct supported_instance_extensions supported_extensions = {};
+  if (!supported_instance_extensions_init(&supported_extensions)) {
+    VGLTF_LOG_ERR(
+        "Couldn't fetch supported instance extensions details (OOM?)");
+    goto err;
+  }
+  supported_instance_extensions_debug_print(&supported_extensions);
+
+  uint32_t platform_required_extension_count = 0;
+  const char *const *platform_required_extensions =
+      vgltf_platform_get_vulkan_instance_extensions(
+          platform, &platform_required_extension_count);
+  for (uint32_t platform_required_extension_index = 0;
+       platform_required_extension_index < platform_required_extension_count;
+       platform_required_extension_index++) {
+    required_instance_extensions_push(
+        required_extensions,
+        platform_required_extensions[platform_required_extension_index]);
+  }
+#ifdef VGLTF_PLATFORM_MACOS
+  required_instance_extensions_push(
+      required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
+#endif // VGLTF_PLATFORM_MACOS
+
+  if (enable_validation_layers) {
+    required_instance_extensions_push(required_extensions,
+                                      VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+  }
+
+  bool all_extensions_supported = true;
+  for (uint32_t required_extension_index = 0;
+       required_extension_index < required_extensions->count;
+       required_extension_index++) {
+    const char *required_extension_name =
+        required_extensions->extensions[required_extension_index];
+    if (!supported_instance_extensions_includes(&supported_extensions,
+                                                required_extension_name)) {
+      VGLTF_LOG_ERR("Unsupported instance extension: %s",
+                    required_extension_name);
+      all_extensions_supported = false;
+    }
+  }
+
+  if (!all_extensions_supported) {
+    VGLTF_LOG_ERR("Some required extensions are unsupported.");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static void populate_debug_messenger_create_info(
+    VkDebugUtilsMessengerCreateInfoEXT *create_info) {
+  *create_info = (VkDebugUtilsMessengerCreateInfoEXT){};
+  create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
+  create_info->messageSeverity =
+      VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+      VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+      VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
+  create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+                             VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+                             VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
+  create_info->pfnUserCallback = debug_callback;
+}
+
+static bool vgltf_vk_instance_init(struct vgltf_vk_instance *instance,
+                                   struct vgltf_platform *platform) {
+  VGLTF_LOG_INFO("Creating vulkan instance...");
+  if (enable_validation_layers && !are_validation_layer_supported()) {
+    VGLTF_LOG_ERR("Requested validation layers aren't supported");
+    goto err;
+  }
+
+  VkApplicationInfo application_info = {
+      .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+      .pApplicationName = "Visible GLTF",
+      .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
+      .pEngineName = "No Engine",
+      .engineVersion = VK_MAKE_VERSION(1, 0, 0),
+      .apiVersion = VK_API_VERSION_1_2};
+
+  struct required_instance_extensions required_extensions = {};
+  fetch_required_instance_extensions(&required_extensions, platform);
+
+  VkInstanceCreateFlags flags = 0;
+#ifdef VGLTF_PLATFORM_MACOS
+  flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
+#endif // VGLTF_PLATFORM_MACOS
+
+  VkInstanceCreateInfo create_info = {
+      .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+      .pApplicationInfo = &application_info,
+      .enabledExtensionCount = required_extensions.count,
+      .ppEnabledExtensionNames = required_extensions.extensions,
+      .flags = flags};
+
+  VkDebugUtilsMessengerCreateInfoEXT debug_create_info;
+  if (enable_validation_layers) {
+    create_info.enabledLayerCount = VALIDATION_LAYER_COUNT;
+    create_info.ppEnabledLayerNames = VALIDATION_LAYERS;
+    populate_debug_messenger_create_info(&debug_create_info);
+    create_info.pNext = &debug_create_info;
+  }
+
+  if (vkCreateInstance(&create_info, nullptr, &instance->instance) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to create VkInstance");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+static void vgltf_vk_instance_deinit(struct vgltf_vk_instance *instance) {
+  vkDestroyInstance(instance->instance, nullptr);
+}
+
+static VkResult create_debug_utils_messenger_ext(
+    VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info,
+    const VkAllocationCallbacks *allocator,
+    VkDebugUtilsMessengerEXT *debug_messenger) {
+  auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
+      instance, "vkCreateDebugUtilsMessengerEXT");
+  if (func != nullptr) {
+    return func(instance, create_info, allocator, debug_messenger);
+  }
+
+  return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+static void
+destroy_debug_utils_messenger_ext(VkInstance instance,
+                                  VkDebugUtilsMessengerEXT debug_messenger,
+                                  const VkAllocationCallbacks *allocator) {
+  auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
+      instance, "vkDestroyDebugUtilsMessengerEXT");
+  if (func != nullptr) {
+    func(instance, debug_messenger, allocator);
+  }
+}
+
+static void
+vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) {
+  if (!enable_validation_layers)
+    return;
+  VkDebugUtilsMessengerCreateInfoEXT create_info;
+  populate_debug_messenger_create_info(&create_info);
+  create_debug_utils_messenger_ext(renderer->instance.instance, &create_info,
+                                   nullptr, &renderer->debug_messenger);
+}
+
+static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128;
+struct available_physical_devices {
+  VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY];
+  uint32_t count;
+};
+static bool
+available_physical_devices_init(VkInstance instance,
+                                struct available_physical_devices *devices) {
+
+  if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't enumerate physical devices");
+    goto err;
+  }
+
+  if (devices->count == 0) {
+    VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support");
+    goto err;
+  }
+
+  if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) {
+    VGLTF_LOG_ERR("available physical devices array cannot fit all available "
+                  "physical devices");
+    goto err;
+  }
+
+  if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't enumerate physical devices");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+struct queue_family_indices {
+  uint32_t graphics_family;
+  uint32_t present_family;
+  bool has_graphics_family;
+  bool has_present_family;
+};
+bool queue_family_indices_is_complete(
+    const struct queue_family_indices *indices) {
+  return indices->has_graphics_family && indices->has_present_family;
+}
+bool queue_family_indices_for_device(struct queue_family_indices *indices,
+                                     VkPhysicalDevice device,
+                                     VkSurfaceKHR surface) {
+  static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64;
+  uint32_t queue_family_count = 0;
+  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
+                                           nullptr);
+
+  if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) {
+    VGLTF_LOG_ERR(
+        "Queue family properties array cannot fit all queue family properties");
+    goto err;
+  }
+
+  VkQueueFamilyProperties
+      queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {};
+  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count,
+                                           queue_family_properties);
+
+  for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count;
+       queue_family_index++) {
+    VkQueueFamilyProperties *queue_family =
+        &queue_family_properties[queue_family_index];
+
+    VkBool32 present_support;
+    vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface,
+                                         &present_support);
+
+    if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) {
+      indices->graphics_family = queue_family_index;
+      indices->has_graphics_family = true;
+    }
+
+    if (present_support) {
+      indices->present_family = queue_family_index;
+      indices->has_present_family = true;
+    }
+
+    if (queue_family_indices_is_complete(indices)) {
+      break;
+    }
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool is_in_array(uint32_t *array, int length, uint32_t value) {
+  for (int i = 0; i < length; i++) {
+    if (array[i] == value) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 1024;
+struct supported_extensions {
+  VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY];
+  uint32_t count;
+};
+bool supported_extensions_init(
+    struct supported_extensions *supported_extensions,
+    VkPhysicalDevice device) {
+  if (vkEnumerateDeviceExtensionProperties(device, nullptr,
+                                           &supported_extensions->count,
+                                           nullptr) != VK_SUCCESS) {
+    goto err;
+  }
+
+  if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) {
+    VGLTF_LOG_ERR("supported extensions array cannot fit all the supported "
+                  "VkExtensionProperties (%u)",
+                  supported_extensions->count);
+    goto err;
+  }
+
+  if (vkEnumerateDeviceExtensionProperties(
+          device, nullptr, &supported_extensions->count,
+          supported_extensions->properties) != VK_SUCCESS) {
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool supported_extensions_includes_extension(
+    struct supported_extensions *supported_extensions,
+    const char *extension_name) {
+  for (uint32_t supported_extension_index = 0;
+       supported_extension_index < supported_extensions->count;
+       supported_extension_index++) {
+    if (strcmp(supported_extensions->properties[supported_extension_index]
+                   .extensionName,
+               extension_name) == 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static const char *DEVICE_EXTENSIONS[] = {
+    VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+#ifdef VGLTF_PLATFORM_MACOS
+    "VK_KHR_portability_subset",
+#endif
+};
+static constexpr int DEVICE_EXTENSION_COUNT =
+    sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]);
+static bool are_device_extensions_supported(VkPhysicalDevice device) {
+  struct supported_extensions supported_extensions = {};
+  if (!supported_extensions_init(&supported_extensions, device)) {
+    goto err;
+  }
+
+  for (uint32_t required_extension_index = 0;
+       required_extension_index < DEVICE_EXTENSION_COUNT;
+       required_extension_index++) {
+    if (!supported_extensions_includes_extension(
+            &supported_extensions,
+            DEVICE_EXTENSIONS[required_extension_index])) {
+      VGLTF_LOG_DBG("Unsupported: %s",
+                    DEVICE_EXTENSIONS[required_extension_index]);
+      goto err;
+    }
+  }
+
+  return true;
+
+err:
+  return false;
+}
+
+static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256;
+static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256;
+struct swapchain_support_details {
+  VkSurfaceCapabilitiesKHR capabilities;
+  VkSurfaceFormatKHR
+      formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT];
+  VkPresentModeKHR
+      present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT];
+  uint32_t format_count;
+  uint32_t present_mode_count;
+};
+bool swapchain_support_details_query_from_device(
+    struct swapchain_support_details *swapchain_support_details,
+    VkPhysicalDevice device, VkSurfaceKHR surface) {
+  if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
+          device, surface, &swapchain_support_details->capabilities) !=
+      VK_SUCCESS) {
+    goto err;
+  }
+
+  if (vkGetPhysicalDeviceSurfaceFormatsKHR(
+          device, surface, &swapchain_support_details->format_count, nullptr) !=
+      VK_SUCCESS) {
+    goto err;
+  }
+
+  if (swapchain_support_details->format_count != 0 &&
+      swapchain_support_details->format_count <
+          SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) {
+    if (vkGetPhysicalDeviceSurfaceFormatsKHR(
+            device, surface, &swapchain_support_details->format_count,
+            swapchain_support_details->formats) != VK_SUCCESS) {
+      goto err;
+    }
+  }
+
+  if (vkGetPhysicalDeviceSurfacePresentModesKHR(
+          device, surface, &swapchain_support_details->present_mode_count,
+          nullptr) != VK_SUCCESS) {
+    goto err;
+  }
+
+  if (swapchain_support_details->present_mode_count != 0 &&
+      swapchain_support_details->present_mode_count <
+          SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) {
+    if (vkGetPhysicalDeviceSurfacePresentModesKHR(
+            device, surface, &swapchain_support_details->present_mode_count,
+            swapchain_support_details->present_modes) != VK_SUCCESS) {
+      goto err;
+    }
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool is_physical_device_suitable(VkPhysicalDevice device,
+                                        VkSurfaceKHR surface) {
+  struct queue_family_indices indices = {};
+  queue_family_indices_for_device(&indices, device, surface);
+
+  VGLTF_LOG_DBG("Checking for physical device extension support");
+  bool extensions_supported = are_device_extensions_supported(device);
+  VGLTF_LOG_DBG("Supported: %d", extensions_supported);
+
+  bool swapchain_adequate = false;
+  if (extensions_supported) {
+
+    VGLTF_LOG_DBG("Checking for swapchain support details");
+    struct swapchain_support_details swapchain_support_details = {};
+    if (!swapchain_support_details_query_from_device(&swapchain_support_details,
+                                                     device, surface)) {
+      VGLTF_LOG_ERR("Couldn't query swapchain support details from device");
+      goto err;
+    }
+
+    swapchain_adequate = swapchain_support_details.format_count > 0 &&
+                         swapchain_support_details.present_mode_count > 0;
+  }
+
+  VkPhysicalDeviceFeatures supported_features;
+  vkGetPhysicalDeviceFeatures(device, &supported_features);
+
+  return queue_family_indices_is_complete(&indices) && extensions_supported &&
+         swapchain_adequate && supported_features.samplerAnisotropy;
+err:
+  return false;
+}
+
+static bool pick_physical_device(VkPhysicalDevice *physical_device,
+                                 struct vgltf_vk_instance *instance,
+                                 VkSurfaceKHR surface) {
+  VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE;
+  struct available_physical_devices available_physical_devices = {};
+  if (!available_physical_devices_init(instance->instance,
+                                       &available_physical_devices)) {
+    VGLTF_LOG_ERR("Couldn't fetch available physical devices");
+    goto err;
+  }
+
+  for (uint32_t available_physical_device_index = 0;
+       available_physical_device_index < available_physical_devices.count;
+       available_physical_device_index++) {
+    VkPhysicalDevice available_physical_device =
+        available_physical_devices.devices[available_physical_device_index];
+    if (is_physical_device_suitable(available_physical_device, surface)) {
+      vk_physical_device = available_physical_device;
+      break;
+    }
+  }
+
+  if (vk_physical_device == VK_NULL_HANDLE) {
+    VGLTF_LOG_ERR("Failed to find a suitable GPU");
+    goto err;
+  }
+
+  *physical_device = vk_physical_device;
+
+  return true;
+err:
+  return false;
+}
+
+static bool create_logical_device(VkDevice *device, VkQueue *graphics_queue,
+                                  VkQueue *present_queue,
+                                  VkPhysicalDevice physical_device,
+                                  VkSurfaceKHR surface) {
+  struct queue_family_indices queue_family_indices = {};
+  queue_family_indices_for_device(&queue_family_indices, physical_device,
+                                  surface);
+  static constexpr int MAX_QUEUE_FAMILY_COUNT = 2;
+
+  uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {};
+  int unique_queue_family_count = 0;
+
+  if (!is_in_array(unique_queue_families, unique_queue_family_count,
+                   queue_family_indices.graphics_family)) {
+    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
+    unique_queue_families[unique_queue_family_count++] =
+        queue_family_indices.graphics_family;
+  }
+  if (!is_in_array(unique_queue_families, unique_queue_family_count,
+                   queue_family_indices.present_family)) {
+    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT);
+    unique_queue_families[unique_queue_family_count++] =
+        queue_family_indices.present_family;
+  }
+
+  float queue_priority = 1.f;
+  VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {};
+  int queue_create_info_count = 0;
+  for (int unique_queue_family_index = 0;
+       unique_queue_family_index < unique_queue_family_count;
+       unique_queue_family_index++) {
+    queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){
+        .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+        .queueFamilyIndex = unique_queue_families[unique_queue_family_index],
+        .queueCount = 1,
+        .pQueuePriorities = &queue_priority};
+  }
+
+  VkPhysicalDeviceFeatures device_features = {
+      .samplerAnisotropy = VK_TRUE,
+  };
+  VkDeviceCreateInfo create_info = {
+      .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+      .pQueueCreateInfos = queue_create_infos,
+      .queueCreateInfoCount = queue_create_info_count,
+      .pEnabledFeatures = &device_features,
+      .ppEnabledExtensionNames = DEVICE_EXTENSIONS,
+      .enabledExtensionCount = DEVICE_EXTENSION_COUNT};
+  if (vkCreateDevice(physical_device, &create_info, nullptr, device) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to create logical device");
+    goto err;
+  }
+
+  vkGetDeviceQueue(*device, queue_family_indices.graphics_family, 0,
+                   graphics_queue);
+  vkGetDeviceQueue(*device, queue_family_indices.present_family, 0,
+                   present_queue);
+
+  return true;
+err:
+  return false;
+}
+
+static bool create_allocator(VmaAllocator *allocator,
+                             struct vgltf_vk_device *device,
+                             struct vgltf_vk_instance *instance) {
+  VmaAllocatorCreateInfo create_info = {.device = device->device,
+                                        .instance = instance->instance,
+                                        .physicalDevice =
+                                            device->physical_device};
+
+  if (vmaCreateAllocator(&create_info, allocator) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create VMA allocator");
+    goto err;
+  }
+  return true;
+err:
+  return false;
+}
+
+static bool vgltf_vk_surface_init(struct vgltf_vk_surface *surface,
+                                  struct vgltf_vk_instance *instance,
+                                  struct vgltf_platform *platform) {
+  if (!vgltf_platform_create_vulkan_surface(platform, instance->instance,
+                                            &surface->surface)) {
+    VGLTF_LOG_ERR("Couldn't create surface");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static void vgltf_vk_surface_deinit(struct vgltf_vk_surface *surface,
+                                    struct vgltf_vk_instance *instance) {
+  vkDestroySurfaceKHR(instance->instance, surface->surface, nullptr);
+}
+
+static VkSurfaceFormatKHR
+choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats,
+                                uint32_t available_format_count) {
+  for (uint32_t available_format_index = 0;
+       available_format_index < available_format_count;
+       available_format_index++) {
+    VkSurfaceFormatKHR *available_format =
+        &available_formats[available_format_index];
+    if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB &&
+        available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
+      return *available_format;
+    }
+  }
+
+  return available_formats[0];
+}
+
+static VkPresentModeKHR
+choose_swapchain_present_mode(VkPresentModeKHR *available_modes,
+                              uint32_t available_mode_count) {
+  for (uint32_t available_mode_index = 0;
+       available_mode_index < available_mode_count; available_mode_index++) {
+    VkPresentModeKHR available_mode = available_modes[available_mode_index];
+    if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) {
+      return available_mode;
+    }
+  }
+
+  return VK_PRESENT_MODE_FIFO_KHR;
+}
+
+static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) {
+  return value < min ? min : value > max ? max : value;
+}
+
+static VkExtent2D
+choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width,
+                        int height) {
+  if (capabilities->currentExtent.width != UINT32_MAX) {
+    return capabilities->currentExtent;
+  } else {
+    VkExtent2D actual_extent = {width, height};
+    actual_extent.width =
+        clamp_uint32(capabilities->minImageExtent.width,
+                     capabilities->maxImageExtent.width, actual_extent.width);
+    actual_extent.height =
+        clamp_uint32(capabilities->minImageExtent.height,
+                     capabilities->maxImageExtent.height, actual_extent.height);
+    return actual_extent;
+  }
+}
+
+static bool create_swapchain(struct vgltf_vk_swapchain *swapchain,
+                             struct vgltf_vk_device *device,
+                             struct vgltf_vk_surface *surface,
+                             struct vgltf_window_size *window_size) {
+  struct swapchain_support_details swapchain_support_details = {};
+  swapchain_support_details_query_from_device(
+      &swapchain_support_details, device->physical_device, surface->surface);
+
+  VkSurfaceFormatKHR surface_format =
+      choose_swapchain_surface_format(swapchain_support_details.formats,
+                                      swapchain_support_details.format_count);
+  VkPresentModeKHR present_mode = choose_swapchain_present_mode(
+      swapchain_support_details.present_modes,
+      swapchain_support_details.present_mode_count);
+
+  VkExtent2D extent =
+      choose_swapchain_extent(&swapchain_support_details.capabilities,
+                              window_size->width, window_size->height);
+  uint32_t image_count =
+      swapchain_support_details.capabilities.minImageCount + 1;
+  if (swapchain_support_details.capabilities.maxImageCount > 0 &&
+      image_count > swapchain_support_details.capabilities.maxImageCount) {
+    image_count = swapchain_support_details.capabilities.maxImageCount;
+  }
+
+  VkSwapchainCreateInfoKHR create_info = {
+      .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+      .surface = surface->surface,
+      .minImageCount = image_count,
+      .imageFormat = surface_format.format,
+      .imageColorSpace = surface_format.colorSpace,
+      .imageExtent = extent,
+      .imageArrayLayers = 1,
+      .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT};
+  struct queue_family_indices indices = {};
+  queue_family_indices_for_device(&indices, device->physical_device,
+                                  surface->surface);
+  uint32_t queue_family_indices[] = {indices.graphics_family,
+                                     indices.present_family};
+  if (indices.graphics_family != indices.present_family) {
+    create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
+    create_info.queueFamilyIndexCount = 2;
+    create_info.pQueueFamilyIndices = queue_family_indices;
+  } else {
+    create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  }
+
+  create_info.preTransform =
+      swapchain_support_details.capabilities.currentTransform;
+  create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+  create_info.presentMode = present_mode;
+  create_info.clipped = VK_TRUE;
+  create_info.oldSwapchain = VK_NULL_HANDLE;
+
+  if (vkCreateSwapchainKHR(device->device, &create_info, nullptr,
+                           &swapchain->swapchain) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Swapchain creation failed!");
+    goto err;
+  }
+
+  if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain,
+                              &swapchain->swapchain_image_count,
+                              nullptr) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't get swapchain image count");
+    goto destroy_swapchain;
+  }
+
+  if (swapchain->swapchain_image_count >
+      VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) {
+    VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images",
+                  swapchain->swapchain_image_count);
+    goto destroy_swapchain;
+  }
+
+  if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain,
+                              &swapchain->swapchain_image_count,
+                              swapchain->swapchain_images) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't get swapchain images");
+    goto destroy_swapchain;
+  }
+
+  swapchain->swapchain_image_format = surface_format.format;
+  swapchain->swapchain_extent = extent;
+
+  return true;
+destroy_swapchain:
+  vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr);
+err:
+  return false;
+}
+
+static bool create_image_view(struct vgltf_vk_device *device, VkImage image,
+                              VkFormat format, VkImageView *image_view,
+                              VkImageAspectFlags aspect_flags,
+                              uint32_t mip_level_count) {
+
+  VkImageViewCreateInfo create_info = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+      .image = image,
+      .viewType = VK_IMAGE_VIEW_TYPE_2D,
+      .format = format,
+      .components = {VK_COMPONENT_SWIZZLE_IDENTITY,
+                     VK_COMPONENT_SWIZZLE_IDENTITY,
+                     VK_COMPONENT_SWIZZLE_IDENTITY,
+                     VK_COMPONENT_SWIZZLE_IDENTITY},
+      .subresourceRange = {.aspectMask = aspect_flags,
+                           .levelCount = mip_level_count,
+                           .layerCount = 1}};
+  if (vkCreateImageView(device->device, &create_info, nullptr, image_view) !=
+      VK_SUCCESS) {
+    return false;
+  }
+
+  return true;
+}
+
+static bool create_swapchain_image_views(struct vgltf_vk_swapchain *swapchain,
+                                         struct vgltf_vk_device *device) {
+  uint32_t swapchain_image_index;
+  for (swapchain_image_index = 0;
+       swapchain_image_index < swapchain->swapchain_image_count;
+       swapchain_image_index++) {
+    VkImage swapchain_image =
+        swapchain->swapchain_images[swapchain_image_index];
+
+    if (!create_image_view(
+            device, swapchain_image, swapchain->swapchain_image_format,
+            &swapchain->swapchain_image_views[swapchain_image_index],
+            VK_IMAGE_ASPECT_COLOR_BIT, 1)) {
+      goto err;
+    }
+  }
+  return true;
+err:
+  for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index;
+       to_remove_index++) {
+    vkDestroyImageView(device->device,
+                       swapchain->swapchain_image_views[to_remove_index],
+                       nullptr);
+  }
+  return false;
+}
+
+static bool create_shader_module(VkDevice device, const unsigned char *code,
+                                 int size, VkShaderModule *out) {
+  VkShaderModuleCreateInfo create_info = {
+      .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+      .codeSize = size,
+      .pCode = (const uint32_t *)code,
+  };
+  if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create shader module");
+    goto err;
+  }
+  return true;
+err:
+  return false;
+}
+
+static VkFormat find_supported_format(struct vgltf_renderer *renderer,
+                                      const VkFormat *candidates,
+                                      int candidate_count, VkImageTiling tiling,
+                                      VkFormatFeatureFlags features) {
+  for (int candidate_index = 0; candidate_index < candidate_count;
+       candidate_index++) {
+    VkFormat candidate = candidates[candidate_index];
+    VkFormatProperties properties;
+    vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device,
+                                        candidate, &properties);
+    if (tiling == VK_IMAGE_TILING_LINEAR &&
+        (properties.linearTilingFeatures & features) == features) {
+      return candidate;
+    } else if (tiling == VK_IMAGE_TILING_OPTIMAL &&
+               (properties.optimalTilingFeatures & features) == features) {
+      return candidate;
+    }
+  }
+
+  return VK_FORMAT_UNDEFINED;
+}
+
+static VkFormat find_depth_format(struct vgltf_renderer *renderer) {
+  return find_supported_format(renderer,
+                               (const VkFormat[]){VK_FORMAT_D32_SFLOAT,
+                                                  VK_FORMAT_D32_SFLOAT_S8_UINT,
+                                                  VK_FORMAT_D24_UNORM_S8_UINT},
+                               3, VK_IMAGE_TILING_OPTIMAL,
+                               VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
+}
+
+static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) {
+  VkAttachmentDescription color_attachment = {
+      .format = renderer->swapchain.swapchain_image_format,
+      .samples = VK_SAMPLE_COUNT_1_BIT,
+      .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+      .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR};
+  VkAttachmentReference color_attachment_ref = {
+      .attachment = 0,
+      .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+  };
+  VkAttachmentDescription depth_attachment = {
+      .format = find_depth_format(renderer),
+      .samples = VK_SAMPLE_COUNT_1_BIT,
+      .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+      .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+      .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};
+  VkAttachmentReference depth_attachment_ref = {
+      .attachment = 1,
+      .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+  };
+
+  VkSubpassDescription subpass = {
+      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+      .pColorAttachments = &color_attachment_ref,
+      .colorAttachmentCount = 1,
+      .pDepthStencilAttachment = &depth_attachment_ref};
+  VkSubpassDependency dependency = {
+      .srcSubpass = VK_SUBPASS_EXTERNAL,
+      .dstSubpass = 0,
+      .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+      .srcAccessMask = 0,
+      .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+      .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                       VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT};
+
+  VkAttachmentDescription attachments[] = {color_attachment, depth_attachment};
+  int attachment_count = sizeof(attachments) / sizeof(attachments[0]);
+  VkRenderPassCreateInfo render_pass_info = {
+      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+      .attachmentCount = attachment_count,
+      .pAttachments = attachments,
+      .subpassCount = 1,
+      .pSubpasses = &subpass,
+      .dependencyCount = 1,
+      .pDependencies = &dependency};
+
+  if (vkCreateRenderPass(renderer->device.device, &render_pass_info, nullptr,
+                         &renderer->render_pass) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to create render pass");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_descriptor_set_layout(struct vgltf_renderer *renderer) {
+  VkDescriptorSetLayoutBinding ubo_layout_binding = {
+      .binding = 0,
+      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+      .descriptorCount = 1,
+      .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+  };
+  VkDescriptorSetLayoutBinding sampler_layout_binding = {
+      .binding = 1,
+      .descriptorCount = 1,
+      .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+      .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+  };
+
+  VkDescriptorSetLayoutBinding bindings[] = {ubo_layout_binding,
+                                             sampler_layout_binding};
+  int binding_count = sizeof(bindings) / sizeof(bindings[0]);
+
+  VkDescriptorSetLayoutCreateInfo layout_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .bindingCount = binding_count,
+      .pBindings = bindings};
+
+  if (vkCreateDescriptorSetLayout(renderer->device.device, &layout_info,
+                                  nullptr, &renderer->descriptor_set_layout) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to create descriptor set layout");
+    goto err;
+  }
+  return true;
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) {
+  static constexpr unsigned char triangle_shader_vert_code[] = {
+#embed "../compiled_shaders/triangle.vert.spv"
+  };
+  static constexpr unsigned char triangle_shader_frag_code[] = {
+#embed "../compiled_shaders/triangle.frag.spv"
+  };
+
+  VkShaderModule triangle_shader_vert_module;
+  if (!create_shader_module(renderer->device.device, triangle_shader_vert_code,
+                            sizeof(triangle_shader_vert_code),
+                            &triangle_shader_vert_module)) {
+    VGLTF_LOG_ERR("Couldn't create triangle vert shader module");
+    goto err;
+  }
+
+  VkShaderModule triangle_shader_frag_module;
+  if (!create_shader_module(renderer->device.device, triangle_shader_frag_code,
+                            sizeof(triangle_shader_frag_code),
+                            &triangle_shader_frag_module)) {
+    VGLTF_LOG_ERR("Couldn't create triangle frag shader module");
+    goto destroy_vert_shader_module;
+  }
+
+  VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_VERTEX_BIT,
+      .module = triangle_shader_vert_module,
+      .pName = "main"};
+  VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+      .module = triangle_shader_frag_module,
+      .pName = "main"};
+  VkPipelineShaderStageCreateInfo shader_stages[] = {
+      triangle_shader_vert_stage_create_info,
+      triangle_shader_frag_stage_create_info};
+
+  VkDynamicState dynamic_states[] = {
+      VK_DYNAMIC_STATE_VIEWPORT,
+      VK_DYNAMIC_STATE_SCISSOR,
+  };
+
+  VkPipelineDynamicStateCreateInfo dynamic_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+      .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]),
+      .pDynamicStates = dynamic_states};
+
+  VkVertexInputBindingDescription vertex_binding_description =
+      vgltf_vertex_binding_description();
+  struct vgltf_vertex_input_attribute_descriptions
+      vertex_attribute_descriptions = vgltf_vertex_attribute_descriptions();
+
+  VkPipelineVertexInputStateCreateInfo vertex_input_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+      .vertexBindingDescriptionCount = 1,
+      .vertexAttributeDescriptionCount = vertex_attribute_descriptions.count,
+      .pVertexBindingDescriptions = &vertex_binding_description,
+      .pVertexAttributeDescriptions =
+          vertex_attribute_descriptions.descriptions};
+
+  VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+      .primitiveRestartEnable = VK_FALSE,
+  };
+
+  VkPipelineViewportStateCreateInfo viewport_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+      .viewportCount = 1,
+      .scissorCount = 1};
+
+  VkPipelineRasterizationStateCreateInfo rasterizer = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+      .depthClampEnable = VK_FALSE,
+      .rasterizerDiscardEnable = VK_FALSE,
+      .polygonMode = VK_POLYGON_MODE_FILL,
+      .lineWidth = 1.f,
+      .cullMode = VK_CULL_MODE_BACK_BIT,
+      .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+      .depthBiasEnable = VK_FALSE};
+
+  VkPipelineMultisampleStateCreateInfo multisampling = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+      .sampleShadingEnable = VK_FALSE,
+      .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+  };
+
+  VkPipelineColorBlendAttachmentState color_blend_attachment = {
+      .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+                        VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+      .blendEnable = VK_FALSE,
+  };
+
+  VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+      .depthTestEnable = VK_TRUE,
+      .depthWriteEnable = VK_TRUE,
+      .depthCompareOp = VK_COMPARE_OP_LESS,
+      .depthBoundsTestEnable = VK_FALSE,
+      .stencilTestEnable = VK_FALSE,
+  };
+
+  VkPipelineColorBlendStateCreateInfo color_blending = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+      .logicOpEnable = VK_FALSE,
+      .attachmentCount = 1,
+      .pAttachments = &color_blend_attachment};
+
+  VkPipelineLayoutCreateInfo pipeline_layout_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &renderer->descriptor_set_layout};
+
+  if (vkCreatePipelineLayout(renderer->device.device, &pipeline_layout_info,
+                             nullptr,
+                             &renderer->pipeline_layout) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create pipeline layout");
+    goto destroy_frag_shader_module;
+  }
+
+  VkGraphicsPipelineCreateInfo pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = 2,
+      .pStages = shader_stages,
+      .pVertexInputState = &vertex_input_info,
+      .pInputAssemblyState = &input_assembly,
+      .pViewportState = &viewport_state,
+      .pRasterizationState = &rasterizer,
+      .pMultisampleState = &multisampling,
+      .pColorBlendState = &color_blending,
+      .pDepthStencilState = &depth_stencil,
+      .pDynamicState = &dynamic_state,
+      .layout = renderer->pipeline_layout,
+      .renderPass = renderer->render_pass,
+      .subpass = 0,
+  };
+
+  if (vkCreateGraphicsPipelines(renderer->device.device, VK_NULL_HANDLE, 1,
+                                &pipeline_info, nullptr,
+                                &renderer->graphics_pipeline) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create pipeline");
+    goto destroy_pipeline_layout;
+  }
+
+  vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module,
+                        nullptr);
+  vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module,
+                        nullptr);
+  return true;
+destroy_pipeline_layout:
+  vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout,
+                          nullptr);
+destroy_frag_shader_module:
+  vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module,
+                        nullptr);
+destroy_vert_shader_module:
+  vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module,
+                        nullptr);
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) {
+  for (uint32_t i = 0; i < renderer->swapchain.swapchain_image_count; i++) {
+    VkImageView attachments[] = {renderer->swapchain.swapchain_image_views[i],
+                                 renderer->depth_image_view};
+    int attachment_count = sizeof(attachments) / sizeof(attachments[0]);
+
+    VkFramebufferCreateInfo framebuffer_info = {
+        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+        .renderPass = renderer->render_pass,
+        .attachmentCount = attachment_count,
+        .pAttachments = attachments,
+        .width = renderer->swapchain.swapchain_extent.width,
+        .height = renderer->swapchain.swapchain_extent.height,
+        .layers = 1};
+
+    if (vkCreateFramebuffer(renderer->device.device, &framebuffer_info, nullptr,
+                            &renderer->swapchain_framebuffers[i]) !=
+        VK_SUCCESS) {
+      VGLTF_LOG_ERR("Failed to create framebuffer");
+      goto err;
+    }
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) {
+  struct queue_family_indices queue_family_indices = {};
+  if (!queue_family_indices_for_device(&queue_family_indices,
+                                       renderer->device.physical_device,
+                                       renderer->surface.surface)) {
+    VGLTF_LOG_ERR("Couldn't fetch queue family indices");
+    goto err;
+  }
+
+  VkCommandPoolCreateInfo pool_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+      .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+      .queueFamilyIndex = queue_family_indices.graphics_family};
+
+  if (vkCreateCommandPool(renderer->device.device, &pool_info, nullptr,
+                          &renderer->command_pool) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create command pool");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static VkCommandBuffer
+begin_single_time_commands(struct vgltf_renderer *renderer) {
+  VkCommandBufferAllocateInfo allocate_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+      .commandPool = renderer->command_pool,
+      .commandBufferCount = 1};
+
+  VkCommandBuffer command_buffer;
+  vkAllocateCommandBuffers(renderer->device.device, &allocate_info,
+                           &command_buffer);
+
+  VkCommandBufferBeginInfo begin_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+      .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
+
+  vkBeginCommandBuffer(command_buffer, &begin_info);
+
+  return command_buffer;
+}
+
+static void end_single_time_commands(struct vgltf_renderer *renderer,
+                                     VkCommandBuffer command_buffer) {
+  vkEndCommandBuffer(command_buffer);
+  VkSubmitInfo submit_info = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                              .commandBufferCount = 1,
+                              .pCommandBuffers = &command_buffer};
+
+  vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info,
+                VK_NULL_HANDLE);
+  vkQueueWaitIdle(renderer->device.graphics_queue);
+  vkFreeCommandBuffers(renderer->device.device, renderer->command_pool, 1,
+                       &command_buffer);
+}
+
+static bool vgltf_renderer_copy_buffer(struct vgltf_renderer *renderer,
+                                       VkBuffer src_buffer, VkBuffer dst_buffer,
+                                       VkDeviceSize size) {
+  VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+  VkBufferCopy copy_region = {.size = size};
+  vkCmdCopyBuffer(command_buffer, src_buffer, dst_buffer, 1, &copy_region);
+  end_single_time_commands(renderer, command_buffer);
+  return true;
+}
+
+static void vgltf_renderer_create_image(
+    struct vgltf_renderer *renderer, uint32_t width, uint32_t height,
+    uint32_t mip_level_count, VkFormat format, VkImageTiling tiling,
+    VkImageUsageFlags usage, VkMemoryPropertyFlags properties,
+    struct vgltf_renderer_allocated_image *image) {
+
+  vmaCreateImage(
+      renderer->device.allocator,
+      &(const VkImageCreateInfo){
+          .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+          .imageType = VK_IMAGE_TYPE_2D,
+          .extent = {width, height, 1},
+          .mipLevels = mip_level_count,
+          .arrayLayers = 1,
+          .format = format,
+          .tiling = tiling,
+          .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+          .usage = usage,
+          .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+          .samples = VK_SAMPLE_COUNT_1_BIT,
+      },
+      &(const VmaAllocationCreateInfo){.usage = VMA_MEMORY_USAGE_GPU_ONLY,
+                                       .requiredFlags = properties},
+      &image->image, &image->allocation, &image->info);
+}
+
+static bool has_stencil_component(VkFormat format) {
+  return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+         format == VK_FORMAT_D24_UNORM_S8_UINT;
+}
+
+static bool transition_image_layout(struct vgltf_renderer *renderer,
+                                    VkImage image, VkFormat format,
+                                    VkImageLayout old_layout,
+                                    VkImageLayout new_layout,
+                                    uint32_t mip_level_count) {
+  (void)format;
+  VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+  VkImageMemoryBarrier barrier = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+      .oldLayout = old_layout,
+      .newLayout = new_layout,
+      .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+      .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+      .image = image,
+      .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                           .baseMipLevel = 0,
+                           .levelCount = mip_level_count,
+                           .baseArrayLayer = 0,
+                           .layerCount = 1},
+      .srcAccessMask = 0,
+      .dstAccessMask = 0};
+
+  if (new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+    barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+
+    if (has_stencil_component(format)) {
+      barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
+    }
+  } else {
+    barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+  }
+
+  VkPipelineStageFlags source_stage;
+  VkPipelineStageFlags destination_stage;
+  if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
+      new_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+    barrier.srcAccessMask = 0;
+    barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    destination_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+  } else if (old_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+             new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+    source_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+    destination_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+  } else if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
+             new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+    barrier.srcAccessMask = 0;
+    barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    destination_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+  } else {
+    goto err;
+  }
+
+  vkCmdPipelineBarrier(command_buffer, source_stage, destination_stage, 0, 0,
+                       nullptr, 0, nullptr, 1, &barrier);
+
+  end_single_time_commands(renderer, command_buffer);
+  return true;
+err:
+  return false;
+}
+
+void copy_buffer_to_image(struct vgltf_renderer *renderer, VkBuffer buffer,
+                          VkImage image, uint32_t width, uint32_t height) {
+  VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+  VkBufferImageCopy region = {
+      .bufferOffset = 0,
+      .bufferRowLength = 0,
+      .bufferImageHeight = 0,
+      .imageSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                           .mipLevel = 0,
+                           .baseArrayLayer = 0,
+                           .layerCount = 1},
+      .imageOffset = {0, 0, 0},
+      .imageExtent = {width, height, 1}};
+
+  vkCmdCopyBufferToImage(command_buffer, buffer, image,
+                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
+
+  end_single_time_commands(renderer, command_buffer);
+}
+
+static bool
+vgltf_renderer_create_depth_resources(struct vgltf_renderer *renderer) {
+  VkFormat depth_format = find_depth_format(renderer);
+  vgltf_renderer_create_image(
+      renderer, renderer->swapchain.swapchain_extent.width,
+      renderer->swapchain.swapchain_extent.height, 1, depth_format,
+      VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->depth_image);
+  create_image_view(&renderer->device, renderer->depth_image.image,
+                    depth_format, &renderer->depth_image_view,
+                    VK_IMAGE_ASPECT_DEPTH_BIT, 1);
+
+  transition_image_layout(renderer, renderer->depth_image.image, depth_format,
+                          VK_IMAGE_LAYOUT_UNDEFINED,
+                          VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1);
+  return true;
+}
+
+static bool
+vgltf_renderer_create_buffer(struct vgltf_renderer *renderer, VkDeviceSize size,
+                             VkBufferUsageFlags usage,
+                             VkMemoryPropertyFlags properties,
+                             struct vgltf_renderer_allocated_buffer *buffer) {
+  VkBufferCreateInfo buffer_info = {.sType =
+                                        VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+                                    .size = size,
+                                    .usage = usage,
+                                    .sharingMode = VK_SHARING_MODE_EXCLUSIVE};
+  VmaAllocationCreateInfo alloc_info = {
+      .usage = VMA_MEMORY_USAGE_AUTO,
+      .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
+               VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
+      .preferredFlags = properties};
+
+  if (vmaCreateBuffer(renderer->device.allocator, &buffer_info, &alloc_info,
+                      &buffer->buffer, &buffer->allocation,
+                      &buffer->info) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to create buffer");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static void generate_mipmaps(struct vgltf_renderer *renderer, VkImage image,
+                             VkFormat image_format, int32_t texture_width,
+                             int32_t texture_height, uint32_t mip_levels) {
+  VkFormatProperties format_properties;
+  vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device,
+                                      image_format, &format_properties);
+  if (!(format_properties.optimalTilingFeatures &
+        VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) {
+    VGLTF_PANIC("Texture image format does not support linear blitting!");
+  }
+
+  VkCommandBuffer command_buffer = begin_single_time_commands(renderer);
+  VkImageMemoryBarrier barrier = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+      .image = image,
+      .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+      .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+      .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                           .baseArrayLayer = 0,
+                           .layerCount = 1,
+                           .levelCount = 1}};
+
+  int32_t mip_width = texture_width;
+  int32_t mip_height = texture_height;
+
+  for (uint32_t i = 1; i < mip_levels; i++) {
+    barrier.subresourceRange.baseMipLevel = i - 1;
+    barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+    barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
+                         nullptr, 1, &barrier);
+    VkImageBlit blit = {
+        .srcOffsets = {{0, 0, 0}, {mip_width, mip_height, 1}},
+        .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                           .mipLevel = i - 1,
+                           .baseArrayLayer = 0,
+                           .layerCount = 1},
+        .dstOffsets = {{0, 0, 0},
+                       {mip_width > 1 ? mip_width / 2 : 1,
+                        mip_height > 1 ? mip_height / 2 : 1, 1}},
+        .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                           .mipLevel = i,
+                           .baseArrayLayer = 0,
+                           .layerCount = 1},
+    };
+    vkCmdBlitImage(command_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                   image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit,
+                   VK_FILTER_LINEAR);
+    barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+    barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+    barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr,
+                         0, nullptr, 1, &barrier);
+    if (mip_width > 1)
+      mip_width /= 2;
+    if (mip_height > 1)
+      mip_height /= 2;
+  }
+  barrier.subresourceRange.baseMipLevel = mip_levels - 1;
+  barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+  barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+  barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  end_single_time_commands(renderer, command_buffer);
+}
+
+static bool
+vgltf_renderer_create_texture_image(struct vgltf_renderer *renderer) {
+  struct vgltf_image image;
+  if (!vgltf_image_load_from_file(&image, SV(TEXTURE_PATH))) {
+    VGLTF_LOG_ERR("Couldn't load image from file");
+    goto err;
+  }
+  renderer->mip_level_count =
+      floor(log2(VGLTF_MAX(image.width, image.height))) + 1;
+
+  VkDeviceSize image_size = image.width * image.height * 4;
+  struct vgltf_renderer_allocated_buffer staging_buffer = {};
+  if (!vgltf_renderer_create_buffer(renderer, image_size,
+                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                    &staging_buffer)) {
+    VGLTF_LOG_ERR("Couldn't create staging buffer");
+    goto deinit_image;
+  }
+
+  void *data;
+  vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data);
+  memcpy(data, image.data, image_size);
+  vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation);
+
+  vgltf_renderer_create_image(
+      renderer, image.width, image.height, renderer->mip_level_count,
+      VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_TILING_OPTIMAL,
+      VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+          VK_IMAGE_USAGE_SAMPLED_BIT,
+      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->texture_image);
+
+  transition_image_layout(renderer, renderer->texture_image.image,
+                          VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED,
+                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                          renderer->mip_level_count);
+  copy_buffer_to_image(renderer, staging_buffer.buffer,
+                       renderer->texture_image.image, image.width,
+                       image.height);
+
+  generate_mipmaps(renderer, renderer->texture_image.image,
+                   VK_FORMAT_R8G8B8A8_SRGB, image.width, image.height,
+                   renderer->mip_level_count);
+
+  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+                   staging_buffer.allocation);
+  vgltf_image_deinit(&image);
+  return true;
+deinit_image:
+  vgltf_image_deinit(&image);
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_texture_image_view(struct vgltf_renderer *renderer) {
+  return create_image_view(
+      &renderer->device, renderer->texture_image.image, VK_FORMAT_R8G8B8A8_SRGB,
+      &renderer->texture_image_view, VK_IMAGE_ASPECT_COLOR_BIT,
+      renderer->mip_level_count);
+}
+
+static bool
+vgltf_renderer_create_texture_sampler(struct vgltf_renderer *renderer) {
+  VkPhysicalDeviceProperties properties = {};
+  vkGetPhysicalDeviceProperties(renderer->device.physical_device, &properties);
+
+  VkSamplerCreateInfo sampler_info = {
+      .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+      .magFilter = VK_FILTER_LINEAR,
+      .minFilter = VK_FILTER_LINEAR,
+      .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+      .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+      .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+      .anisotropyEnable = VK_TRUE,
+      .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
+      .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
+      .unnormalizedCoordinates = VK_FALSE,
+      .compareEnable = VK_FALSE,
+      .compareOp = VK_COMPARE_OP_ALWAYS,
+      .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+      .mipLodBias = 0.f,
+      .minLod = 0.f,
+      .maxLod = renderer->mip_level_count};
+
+  if (vkCreateSampler(renderer->device.device, &sampler_info, nullptr,
+                      &renderer->texture_sampler) != VK_SUCCESS) {
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static void get_file_data(void *ctx, const char *filename, const int is_mtl,
+                          const char *obj_filename, char **data, size_t *len) {
+  (void)ctx;
+  (void)is_mtl;
+
+  if (!filename) {
+    VGLTF_LOG_ERR("Null filename");
+    *data = NULL;
+    *len = 0;
+    return;
+  }
+  *data = vgltf_platform_read_file_to_string(obj_filename, len);
+}
+
+static bool load_model(struct vgltf_renderer *renderer) {
+  tinyobj_attrib_t attrib;
+  tinyobj_shape_t *shapes = nullptr;
+  size_t shape_count;
+  tinyobj_material_t *materials = nullptr;
+  size_t material_count;
+
+  if ((tinyobj_parse_obj(&attrib, &shapes, &shape_count, &materials,
+                         &material_count, MODEL_PATH, get_file_data, nullptr,
+                         TINYOBJ_FLAG_TRIANGULATE)) != TINYOBJ_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't load obj");
+    return false;
+  }
+
+  for (size_t shape_index = 0; shape_index < shape_count; shape_index++) {
+    tinyobj_shape_t *shape = &shapes[shape_index];
+    unsigned int face_offset = shape->face_offset;
+    for (size_t face_index = face_offset;
+         face_index < face_offset + shape->length; face_index++) {
+      float v[3][3];
+      float t[3][2];
+
+      tinyobj_vertex_index_t idx0 = attrib.faces[face_index * 3 + 0];
+      tinyobj_vertex_index_t idx1 = attrib.faces[face_index * 3 + 1];
+      tinyobj_vertex_index_t idx2 = attrib.faces[face_index * 3 + 2];
+
+      for (int k = 0; k < 3; k++) {
+        int f0 = idx0.v_idx;
+        int f1 = idx1.v_idx;
+        int f2 = idx2.v_idx;
+
+        v[0][k] = attrib.vertices[3 * (size_t)f0 + k];
+        v[1][k] = attrib.vertices[3 * (size_t)f1 + k];
+        v[2][k] = attrib.vertices[3 * (size_t)f2 + k];
+      }
+
+      for (int k = 0; k < 2; k++) {
+        int t0 = idx0.vt_idx;
+        int t1 = idx1.vt_idx;
+        int t2 = idx2.vt_idx;
+
+        t[0][k] = attrib.texcoords[2 * (size_t)t0 + k];
+        t[1][k] = attrib.texcoords[2 * (size_t)t1 + k];
+        t[2][k] = attrib.texcoords[2 * (size_t)t2 + k];
+      }
+
+      for (int k = 0; k < 3; k++) {
+        renderer->vertices[renderer->vertex_count++] = (struct vgltf_vertex){
+            .position = {v[k][0], v[k][1], v[k][2]},
+            .texture_coordinates = {t[k][0], 1.f - t[k][1]},
+            .color = {1.f, 1.f, 1.f}};
+        renderer->indices[renderer->index_count++] = renderer->index_count;
+      }
+    }
+    tinyobj_attrib_free(&attrib);
+    tinyobj_shapes_free(shapes, shape_count);
+    tinyobj_materials_free(materials, material_count);
+  }
+  return true;
+}
+
+static bool
+vgltf_renderer_create_vertex_buffer(struct vgltf_renderer *renderer) {
+  VkDeviceSize buffer_size =
+      renderer->vertex_count * sizeof(struct vgltf_vertex);
+
+  struct vgltf_renderer_allocated_buffer staging_buffer = {};
+  if (!vgltf_renderer_create_buffer(renderer, buffer_size,
+                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                    &staging_buffer)) {
+    VGLTF_LOG_ERR("Failed to create transfer buffer");
+    goto err;
+  }
+
+  void *data;
+  vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data);
+  memcpy(data, renderer->vertices,
+         renderer->vertex_count * sizeof(struct vgltf_vertex));
+  vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation);
+
+  if (!vgltf_renderer_create_buffer(
+          renderer, buffer_size,
+          VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->vertex_buffer)) {
+    VGLTF_LOG_ERR("Failed to create vertex buffer");
+    goto destroy_staging_buffer;
+  }
+
+  vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer,
+                             renderer->vertex_buffer.buffer, buffer_size);
+  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+                   staging_buffer.allocation);
+  return true;
+destroy_staging_buffer:
+  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+                   staging_buffer.allocation);
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_index_buffer(struct vgltf_renderer *renderer) {
+  VkDeviceSize buffer_size = renderer->index_count * sizeof(uint16_t);
+  struct vgltf_renderer_allocated_buffer staging_buffer = {};
+  if (!vgltf_renderer_create_buffer(renderer, buffer_size,
+                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                    &staging_buffer)) {
+    VGLTF_LOG_ERR("Failed to create transfer buffer");
+    goto err;
+  }
+
+  void *data;
+  vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data);
+  memcpy(data, renderer->indices, renderer->index_count * sizeof(uint16_t));
+  vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation);
+
+  if (!vgltf_renderer_create_buffer(
+          renderer, buffer_size,
+          VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->index_buffer)) {
+    VGLTF_LOG_ERR("Failed to create index buffer");
+    goto destroy_staging_buffer;
+  }
+  vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer,
+                             renderer->index_buffer.buffer, buffer_size);
+  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+                   staging_buffer.allocation);
+  return true;
+
+destroy_staging_buffer:
+  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer,
+                   staging_buffer.allocation);
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) {
+  VkCommandBufferAllocateInfo allocate_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+      .commandPool = renderer->command_pool,
+      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+      .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT};
+
+  if (vkAllocateCommandBuffers(renderer->device.device, &allocate_info,
+                               renderer->command_buffer) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't allocate command buffers");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool
+vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) {
+  VkSemaphoreCreateInfo semaphore_info = {
+      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+  };
+
+  VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+                                  .flags = VK_FENCE_CREATE_SIGNALED_BIT};
+
+  int frame_in_flight_index = 0;
+  for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+       frame_in_flight_index++) {
+    if (vkCreateSemaphore(
+            renderer->device.device, &semaphore_info, nullptr,
+            &renderer->image_available_semaphores[frame_in_flight_index]) !=
+            VK_SUCCESS ||
+        vkCreateSemaphore(
+            renderer->device.device, &semaphore_info, nullptr,
+            &renderer->render_finished_semaphores[frame_in_flight_index]) !=
+            VK_SUCCESS ||
+        vkCreateFence(renderer->device.device, &fence_info, nullptr,
+                      &renderer->in_flight_fences[frame_in_flight_index]) !=
+            VK_SUCCESS) {
+      VGLTF_LOG_ERR("Couldn't create sync objects");
+      goto err;
+    }
+  }
+
+  return true;
+err:
+  for (int frame_in_flight_to_delete_index = 0;
+       frame_in_flight_to_delete_index < frame_in_flight_index;
+       frame_in_flight_to_delete_index++) {
+    vkDestroyFence(renderer->device.device,
+                   renderer->in_flight_fences[frame_in_flight_index], nullptr);
+    vkDestroySemaphore(
+        renderer->device.device,
+        renderer->render_finished_semaphores[frame_in_flight_index], nullptr);
+    vkDestroySemaphore(
+        renderer->device.device,
+        renderer->image_available_semaphores[frame_in_flight_index], nullptr);
+  }
+  return false;
+}
+
+static bool vgltf_vk_swapchain_init(struct vgltf_vk_swapchain *swapchain,
+                                    struct vgltf_vk_device *device,
+                                    struct vgltf_vk_surface *surface,
+                                    struct vgltf_window_size *window_size) {
+  if (!create_swapchain(swapchain, device, surface, window_size)) {
+    VGLTF_LOG_ERR("Couldn't create swapchain");
+    goto err;
+  }
+
+  if (!create_swapchain_image_views(swapchain, device)) {
+    VGLTF_LOG_ERR("Couldn't create image views");
+    goto destroy_swapchain;
+  }
+
+  return true;
+destroy_swapchain:
+  vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr);
+err:
+  return false;
+}
+
+static void vgltf_vk_swapchain_deinit(struct vgltf_vk_swapchain *swapchain,
+                                      struct vgltf_vk_device *device) {
+  for (uint32_t swapchain_image_view_index = 0;
+       swapchain_image_view_index < swapchain->swapchain_image_count;
+       swapchain_image_view_index++) {
+    vkDestroyImageView(
+        device->device,
+        swapchain->swapchain_image_views[swapchain_image_view_index], nullptr);
+  }
+  vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr);
+}
+
+static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) {
+  vkDestroyImageView(renderer->device.device, renderer->depth_image_view,
+                     nullptr);
+  vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image,
+                  renderer->depth_image.allocation);
+
+  for (uint32_t framebuffer_index = 0;
+       framebuffer_index < renderer->swapchain.swapchain_image_count;
+       framebuffer_index++) {
+    vkDestroyFramebuffer(renderer->device.device,
+                         renderer->swapchain_framebuffers[framebuffer_index],
+                         nullptr);
+  }
+
+  vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device);
+}
+
+static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) {
+  vkDeviceWaitIdle(renderer->device.device);
+  vgltf_renderer_cleanup_swapchain(renderer);
+
+  // TODO add error handling
+  create_swapchain(&renderer->swapchain, &renderer->device, &renderer->surface,
+                   &renderer->window_size);
+  create_swapchain_image_views(&renderer->swapchain, &renderer->device);
+  vgltf_renderer_create_depth_resources(renderer);
+  vgltf_renderer_create_framebuffers(renderer);
+  return true;
+}
+
+static void vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer,
+                                         uint32_t swapchain_image_index) {
+  VkRenderPassBeginInfo render_pass_info = {
+      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+      .renderPass = renderer->render_pass,
+      .framebuffer = renderer->swapchain_framebuffers[swapchain_image_index],
+      .renderArea = {.offset = {},
+                     .extent = renderer->swapchain.swapchain_extent},
+      .clearValueCount = 2,
+      .pClearValues =
+          (const VkClearValue[]){{.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}},
+                                 {.depthStencil = {1.0f, 0}}},
+
+  };
+
+  vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame],
+                       &render_pass_info, VK_SUBPASS_CONTENTS_INLINE);
+  vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame],
+                    VK_PIPELINE_BIND_POINT_GRAPHICS,
+                    renderer->graphics_pipeline);
+  VkViewport viewport = {
+      .x = 0.f,
+      .y = 0.f,
+      .width = (float)renderer->swapchain.swapchain_extent.width,
+      .height = (float)renderer->swapchain.swapchain_extent.height,
+      .minDepth = 0.f,
+      .maxDepth = 1.f};
+  vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1,
+                   &viewport);
+  VkRect2D scissor = {.offset = {},
+                      .extent = renderer->swapchain.swapchain_extent};
+  vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1,
+                  &scissor);
+
+  VkBuffer vertex_buffers[] = {renderer->vertex_buffer.buffer};
+  VkDeviceSize offsets[] = {0};
+  vkCmdBindVertexBuffers(renderer->command_buffer[renderer->current_frame], 0,
+                         1, vertex_buffers, offsets);
+  vkCmdBindIndexBuffer(renderer->command_buffer[renderer->current_frame],
+                       renderer->index_buffer.buffer, 0, VK_INDEX_TYPE_UINT16);
+
+  vkCmdBindDescriptorSets(
+      renderer->command_buffer[renderer->current_frame],
+      VK_PIPELINE_BIND_POINT_GRAPHICS, renderer->pipeline_layout, 0, 1,
+      &renderer->descriptor_sets[renderer->current_frame], 0, nullptr);
+  vkCmdDrawIndexed(renderer->command_buffer[renderer->current_frame],
+                   renderer->index_count, 1, 0, 0, 0);
+
+  vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]);
+}
+
+static void update_uniform_buffer(struct vgltf_renderer *renderer,
+                                  uint32_t current_frame) {
+  static long long start_time_nanoseconds = 0;
+  if (start_time_nanoseconds == 0) {
+    if (!vgltf_platform_get_current_time_nanoseconds(&start_time_nanoseconds)) {
+      VGLTF_LOG_ERR("Couldn't get current time");
+    }
+  }
+
+  long long current_time_nanoseconds = 0;
+  if (!vgltf_platform_get_current_time_nanoseconds(&current_time_nanoseconds)) {
+    VGLTF_LOG_ERR("Couldn't get current time");
+  }
+
+  long elapsed_time_nanoseconds =
+      current_time_nanoseconds - start_time_nanoseconds;
+  float elapsed_time_seconds = elapsed_time_nanoseconds / 1e9f;
+  VGLTF_LOG_INFO("Elapsed time: %f", elapsed_time_seconds);
+
+  vgltf_mat4 model_matrix;
+  vgltf_mat4_rotate(model_matrix, (vgltf_mat4)VGLTF_MAT4_IDENTITY,
+                    elapsed_time_seconds * VGLTF_MATHS_DEG_TO_RAD(90.0f),
+                    (vgltf_vec3){0.f, 0.f, 1.f});
+
+  vgltf_mat4 view_matrix;
+  vgltf_mat4_look_at(view_matrix, (vgltf_vec3){2.f, 2.f, 2.f},
+                     (vgltf_vec3){0.f, 0.f, 0.f}, (vgltf_vec3){0.f, 0.f, 1.f});
+
+  vgltf_mat4 projection_matrix;
+  vgltf_mat4_perspective(projection_matrix, VGLTF_MATHS_DEG_TO_RAD(45.f),
+                         (float)renderer->swapchain.swapchain_extent.width /
+                             (float)renderer->swapchain.swapchain_extent.height,
+                         0.1f, 10.f);
+  projection_matrix[1 * 4 + 1] *= -1;
+
+  struct vgltf_renderer_uniform_buffer_object ubo = {};
+  memcpy(ubo.model, model_matrix, sizeof(vgltf_mat4));
+  memcpy(ubo.view, view_matrix, sizeof(vgltf_mat4));
+  memcpy(ubo.projection, projection_matrix, sizeof(vgltf_mat4));
+  memcpy(renderer->mapped_uniform_buffers[current_frame], &ubo, sizeof(ubo));
+}
+
+bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer) {
+  vkWaitForFences(renderer->device.device, 1,
+                  &renderer->in_flight_fences[renderer->current_frame], VK_TRUE,
+                  UINT64_MAX);
+
+  uint32_t image_index;
+  VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR(
+      renderer->device.device, renderer->swapchain.swapchain, UINT64_MAX,
+      renderer->image_available_semaphores[renderer->current_frame],
+      VK_NULL_HANDLE, &image_index);
+  if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR ||
+      acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR ||
+      renderer->framebuffer_resized) {
+    renderer->framebuffer_resized = false;
+    vgltf_renderer_recreate_swapchain(renderer);
+    return true;
+  } else if (acquire_swapchain_image_result != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to acquire a swapchain image");
+    goto err;
+  }
+
+  vkResetFences(renderer->device.device, 1,
+                &renderer->in_flight_fences[renderer->current_frame]);
+
+  vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0);
+  VkCommandBufferBeginInfo begin_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+  };
+
+  if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame],
+                           &begin_info) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to begin recording command buffer");
+    goto err;
+  }
+
+  vgltf_renderer_triangle_pass(renderer, image_index);
+
+  if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to record command buffer");
+    goto err;
+  }
+
+  update_uniform_buffer(renderer, renderer->current_frame);
+
+  VkSubmitInfo submit_info = {
+      .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+  };
+
+  VkSemaphore wait_semaphores[] = {
+      renderer->image_available_semaphores[renderer->current_frame]};
+  VkPipelineStageFlags wait_stages[] = {
+      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
+  submit_info.waitSemaphoreCount = 1;
+  submit_info.pWaitSemaphores = wait_semaphores;
+  submit_info.pWaitDstStageMask = wait_stages;
+  submit_info.commandBufferCount = 1;
+  submit_info.pCommandBuffers =
+      &renderer->command_buffer[renderer->current_frame];
+
+  VkSemaphore signal_semaphores[] = {
+      renderer->render_finished_semaphores[renderer->current_frame]};
+  submit_info.signalSemaphoreCount = 1;
+  submit_info.pSignalSemaphores = signal_semaphores;
+  if (vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info,
+                    renderer->in_flight_fences[renderer->current_frame]) !=
+      VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to submit draw command buffer");
+    goto err;
+  }
+
+  VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+                                   .waitSemaphoreCount = 1,
+                                   .pWaitSemaphores = signal_semaphores};
+
+  VkSwapchainKHR swapchains[] = {renderer->swapchain.swapchain};
+  present_info.swapchainCount = 1;
+  present_info.pSwapchains = swapchains;
+  present_info.pImageIndices = &image_index;
+  VkResult result =
+      vkQueuePresentKHR(renderer->device.present_queue, &present_info);
+  if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) {
+    vgltf_renderer_recreate_swapchain(renderer);
+  } else if (acquire_swapchain_image_result != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Failed to acquire a swapchain image");
+    goto err;
+  }
+  renderer->current_frame =
+      (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+  return true;
+err:
+  return false;
+}
+static bool
+vgltf_renderer_create_uniform_buffers(struct vgltf_renderer *renderer) {
+  VkDeviceSize buffer_size =
+      sizeof(struct vgltf_renderer_uniform_buffer_object);
+
+  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+    vgltf_renderer_create_buffer(renderer, buffer_size,
+                                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+                                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                     VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                 &renderer->uniform_buffers[i]);
+    vmaMapMemory(renderer->device.allocator,
+                 renderer->uniform_buffers[i].allocation,
+                 &renderer->mapped_uniform_buffers[i]);
+  }
+
+  return true;
+}
+
+static bool
+vgltf_renderer_create_descriptor_pool(struct vgltf_renderer *renderer) {
+  VkDescriptorPoolSize pool_sizes[] = {
+      (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+                             .descriptorCount =
+                                 VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT},
+      (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                             .descriptorCount =
+                                 VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}};
+  int pool_size_count = sizeof(pool_sizes) / sizeof(pool_sizes[0]);
+
+  VkDescriptorPoolCreateInfo pool_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+      .poolSizeCount = pool_size_count,
+      .pPoolSizes = pool_sizes,
+      .maxSets = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT};
+
+  if (vkCreateDescriptorPool(renderer->device.device, &pool_info, nullptr,
+                             &renderer->descriptor_pool) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create uniform descriptor pool");
+    goto err;
+  }
+
+  return true;
+err:
+  return false;
+}
+static bool
+vgltf_renderer_create_descriptor_sets(struct vgltf_renderer *renderer) {
+  VkDescriptorSetLayout layouts[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT] = {};
+  for (int layout_index = 0;
+       layout_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+       layout_index++) {
+    layouts[layout_index] = renderer->descriptor_set_layout;
+  }
+
+  VkDescriptorSetAllocateInfo alloc_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+      .descriptorPool = renderer->descriptor_pool,
+      .descriptorSetCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT,
+      .pSetLayouts = layouts};
+
+  if (vkAllocateDescriptorSets(renderer->device.device, &alloc_info,
+                               renderer->descriptor_sets) != VK_SUCCESS) {
+    VGLTF_LOG_ERR("Couldn't create descriptor sets");
+    goto err;
+  }
+
+  for (int set_index = 0; set_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT;
+       set_index++) {
+    VkDescriptorBufferInfo buffer_info = {
+        .buffer = renderer->uniform_buffers[set_index].buffer,
+        .offset = 0,
+        .range = sizeof(struct vgltf_renderer_uniform_buffer_object)};
+
+    VkDescriptorImageInfo image_info = {
+        .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+        .imageView = renderer->texture_image_view,
+        .sampler = renderer->texture_sampler,
+    };
+
+    VkWriteDescriptorSet descriptor_writes[] = {
+        (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                               .dstSet = renderer->descriptor_sets[set_index],
+                               .dstBinding = 0,
+                               .dstArrayElement = 0,
+                               .descriptorType =
+                                   VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+                               .descriptorCount = 1,
+                               .pBufferInfo = &buffer_info},
+
+        (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                               .dstSet = renderer->descriptor_sets[set_index],
+                               .dstBinding = 1,
+                               .dstArrayElement = 0,
+                               .descriptorType =
+                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                               .descriptorCount = 1,
+                               .pImageInfo = &image_info}};
+    int descriptor_write_count =
+        sizeof(descriptor_writes) / sizeof(descriptor_writes[0]);
+
+    vkUpdateDescriptorSets(renderer->device.device, descriptor_write_count,
+                           descriptor_writes, 0, nullptr);
+  }
+
+  return true;
+err:
+  return false;
+}
+
+static bool vgltf_vk_device_init(struct vgltf_vk_device *device,
+                                 struct vgltf_vk_instance *instance,
+                                 struct vgltf_vk_surface *surface) {
+  if (!pick_physical_device(&device->physical_device, instance,
+                            surface->surface)) {
+    VGLTF_LOG_ERR("Couldn't pick physical device");
+    goto err;
+  }
+
+  if (!create_logical_device(&device->device, &device->graphics_queue,
+                             &device->present_queue, device->physical_device,
+                             surface->surface)) {
+    VGLTF_LOG_ERR("Couldn't pick logical device");
+    goto err;
+  }
+
+  if (!create_allocator(&device->allocator, device, instance)) {
+    VGLTF_LOG_ERR("Couldn't create allocator");
+    goto destroy_logical_device;
+  }
+
+  return true;
+destroy_logical_device:
+  vkDestroyDevice(device->device, nullptr);
+err:
+  return false;
+}
+
+static void vgltf_vk_device_deinit(struct vgltf_vk_device *device) {
+  vmaDestroyAllocator(device->allocator);
+  vkDestroyDevice(device->device, nullptr);
+}
+
+bool vgltf_renderer_init(struct vgltf_renderer *renderer,
+                         struct vgltf_platform *platform) {
+  if (!vgltf_vk_instance_init(&renderer->instance, platform)) {
+    VGLTF_LOG_ERR("instance creation failed");
+    goto err;
+  }
+  vgltf_renderer_setup_debug_messenger(renderer);
+  if (!vgltf_vk_surface_init(&renderer->surface, &renderer->instance,
+                             platform)) {
+    goto destroy_instance;
+  }
+
+  if (!vgltf_vk_device_init(&renderer->device, &renderer->instance,
+                            &renderer->surface)) {
+    VGLTF_LOG_ERR("Device creation failed");
+    goto destroy_surface;
+  }
+
+  struct vgltf_window_size window_size = {800, 600};
+  if (!vgltf_platform_get_window_size(platform, &window_size)) {
+    VGLTF_LOG_ERR("Couldn't get window size");
+    goto destroy_device;
+  }
+  renderer->window_size = window_size;
+
+  if (!vgltf_vk_swapchain_init(&renderer->swapchain, &renderer->device,
+                               &renderer->surface, &renderer->window_size)) {
+    VGLTF_LOG_ERR("Couldn't create swapchain");
+    goto destroy_device;
+  }
+
+  if (!vgltf_renderer_create_render_pass(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create render pass");
+    goto destroy_swapchain;
+  }
+
+  if (!vgltf_renderer_create_descriptor_set_layout(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create descriptor set layout");
+    goto destroy_render_pass;
+  }
+
+  if (!vgltf_renderer_create_graphics_pipeline(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create graphics pipeline");
+    goto destroy_descriptor_set_layout;
+  }
+
+  if (!vgltf_renderer_create_command_pool(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create command pool");
+    goto destroy_graphics_pipeline;
+  }
+
+  if (!vgltf_renderer_create_depth_resources(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create depth resources");
+    goto destroy_command_pool;
+  }
+
+  if (!vgltf_renderer_create_framebuffers(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create framebuffers");
+    goto destroy_depth_resources;
+  }
+
+  if (!vgltf_renderer_create_texture_image(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create texture image");
+    goto destroy_frame_buffers;
+  }
+
+  if (!vgltf_renderer_create_texture_image_view(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create texture image view");
+    goto destroy_texture_image;
+  }
+
+  if (!vgltf_renderer_create_texture_sampler(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create texture sampler");
+    goto destroy_texture_image_view;
+  }
+
+  if (!load_model(renderer)) {
+    VGLTF_LOG_ERR("Couldn't load model");
+    goto destroy_texture_sampler;
+  }
+
+  if (!vgltf_renderer_create_vertex_buffer(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create vertex buffer");
+    goto destroy_model;
+  }
+
+  if (!vgltf_renderer_create_index_buffer(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create index buffer");
+    goto destroy_vertex_buffer;
+  }
+
+  if (!vgltf_renderer_create_uniform_buffers(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create uniform buffers");
+    goto destroy_index_buffer;
+  }
+
+  if (!vgltf_renderer_create_descriptor_pool(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create descriptor pool");
+    goto destroy_uniform_buffers;
+  }
+
+  if (!vgltf_renderer_create_descriptor_sets(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create descriptor sets");
+    goto destroy_descriptor_pool;
+  }
+
+  if (!vgltf_renderer_create_command_buffer(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create command buffer");
+    goto destroy_descriptor_pool;
+  }
+
+  if (!vgltf_renderer_create_sync_objects(renderer)) {
+    VGLTF_LOG_ERR("Couldn't create sync objects");
+    goto destroy_descriptor_pool;
+  }
+
+  return true;
+
+destroy_descriptor_pool:
+  vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool,
+                          nullptr);
+destroy_uniform_buffers:
+  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+    vmaDestroyBuffer(renderer->device.allocator,
+                     renderer->uniform_buffers[i].buffer,
+                     renderer->uniform_buffers[i].allocation);
+  }
+destroy_index_buffer:
+  vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer,
+                   renderer->index_buffer.allocation);
+destroy_vertex_buffer:
+  vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer,
+                   renderer->vertex_buffer.allocation);
+destroy_model:
+  // TODO
+destroy_texture_sampler:
+  vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr);
+destroy_texture_image_view:
+  vkDestroyImageView(renderer->device.device, renderer->texture_image_view,
+                     nullptr);
+destroy_texture_image:
+  vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image,
+                  renderer->texture_image.allocation);
+destroy_depth_resources:
+  vkDestroyImageView(renderer->device.device, renderer->depth_image_view,
+                     nullptr);
+  vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image,
+                  renderer->depth_image.allocation);
+destroy_command_pool:
+  vkDestroyCommandPool(renderer->device.device, renderer->command_pool,
+                       nullptr);
+destroy_frame_buffers:
+  for (uint32_t swapchain_framebuffer_index = 0;
+       swapchain_framebuffer_index < renderer->swapchain.swapchain_image_count;
+       swapchain_framebuffer_index++) {
+    vkDestroyFramebuffer(
+        renderer->device.device,
+        renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr);
+  }
+destroy_graphics_pipeline:
+  vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline,
+                    nullptr);
+  vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout,
+                          nullptr);
+destroy_descriptor_set_layout:
+  vkDestroyDescriptorSetLayout(renderer->device.device,
+                               renderer->descriptor_set_layout, nullptr);
+destroy_render_pass:
+  vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr);
+destroy_swapchain:
+  vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device);
+destroy_device:
+  vgltf_vk_device_deinit(&renderer->device);
+destroy_surface:
+  vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance);
+destroy_instance:
+  if (enable_validation_layers) {
+    destroy_debug_utils_messenger_ext(renderer->instance.instance,
+                                      renderer->debug_messenger, nullptr);
+  }
+  vgltf_vk_instance_deinit(&renderer->instance);
+err:
+  return false;
+}
+void vgltf_renderer_deinit(struct vgltf_renderer *renderer) {
+  vkDeviceWaitIdle(renderer->device.device);
+  vgltf_renderer_cleanup_swapchain(renderer);
+  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+    vmaUnmapMemory(renderer->device.allocator,
+                   renderer->uniform_buffers[i].allocation);
+    vmaDestroyBuffer(renderer->device.allocator,
+                     renderer->uniform_buffers[i].buffer,
+                     renderer->uniform_buffers[i].allocation);
+  }
+  vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer,
+                   renderer->index_buffer.allocation);
+  vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer,
+                   renderer->vertex_buffer.allocation);
+  vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr);
+  vkDestroyImageView(renderer->device.device, renderer->texture_image_view,
+                     nullptr);
+  vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image,
+                  renderer->texture_image.allocation);
+  vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline,
+                    nullptr);
+  vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout,
+                          nullptr);
+  vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool,
+                          nullptr);
+  vkDestroyDescriptorSetLayout(renderer->device.device,
+                               renderer->descriptor_set_layout, nullptr);
+  vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr);
+  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) {
+    vkDestroySemaphore(renderer->device.device,
+                       renderer->image_available_semaphores[i], nullptr);
+    vkDestroySemaphore(renderer->device.device,
+                       renderer->render_finished_semaphores[i], nullptr);
+    vkDestroyFence(renderer->device.device, renderer->in_flight_fences[i],
+                   nullptr);
+  }
+  vkDestroyCommandPool(renderer->device.device, renderer->command_pool,
+                       nullptr);
+  vgltf_vk_device_deinit(&renderer->device);
+  vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance);
+  if (enable_validation_layers) {
+    destroy_debug_utils_messenger_ext(renderer->instance.instance,
+                                      renderer->debug_messenger, nullptr);
+  }
+  vgltf_vk_instance_deinit(&renderer->instance);
+}
+void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
+                                      struct vgltf_window_size size) {
+  if (size.width > 0 && size.height > 0 &&
+      size.width != renderer->window_size.width &&
+      size.height != renderer->window_size.height) {
+    renderer->window_size = size;
+    renderer->framebuffer_resized = true;
+  }
+}
diff --git a/src/renderer/renderer.h b/src/renderer/renderer.h
new file mode 100644
index 0000000..79e1f3d
--- /dev/null
+++ b/src/renderer/renderer.h
@@ -0,0 +1,126 @@
+#ifndef VGLTF_RENDERER_H
+#define VGLTF_RENDERER_H
+
+#include "../maths.h"
+#include "../platform.h"
+#include "vma_usage.h"
+#include <vulkan/vulkan.h>
+
+struct vgltf_vertex {
+  vgltf_vec3 position;
+  vgltf_vec3 color;
+  vgltf_vec2 texture_coordinates;
+};
+VkVertexInputBindingDescription vgltf_vertex_binding_description(void);
+
+struct vgltf_vertex_input_attribute_descriptions {
+  VkVertexInputAttributeDescription descriptions[3];
+  uint32_t count;
+};
+struct vgltf_vertex_input_attribute_descriptions
+vgltf_vertex_attribute_descriptions(void);
+
+struct vgltf_renderer_uniform_buffer_object {
+  alignas(16) vgltf_mat4 model;
+  alignas(16) vgltf_mat4 view;
+  alignas(16) vgltf_mat4 projection;
+};
+
+struct vgltf_renderer_allocated_buffer {
+  VkBuffer buffer;
+  VmaAllocation allocation;
+  VmaAllocationInfo info;
+};
+
+struct vgltf_renderer_allocated_image {
+  VkImage image;
+  VmaAllocation allocation;
+  VmaAllocationInfo info;
+};
+
+struct vgltf_vk_instance {
+  VkInstance instance;
+};
+
+struct vgltf_vk_device {
+  VkPhysicalDevice physical_device;
+  VkDevice device;
+  VkQueue graphics_queue;
+  VkQueue present_queue;
+  VmaAllocator allocator;
+};
+
+struct vgltf_vk_surface {
+  VkSurfaceKHR surface;
+};
+
+constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32;
+struct vgltf_vk_swapchain {
+  VkSwapchainKHR swapchain;
+  VkFormat swapchain_image_format;
+  VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
+  VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
+  VkExtent2D swapchain_extent;
+  uint32_t swapchain_image_count;
+};
+
+struct vgltf_vk_pipeline {
+  VkPipelineLayout layout;
+  VkPipeline pipeline;
+};
+
+constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2;
+struct vgltf_renderer {
+  struct vgltf_vk_instance instance;
+  struct vgltf_vk_device device;
+  VkDebugUtilsMessengerEXT debug_messenger;
+  struct vgltf_vk_surface surface;
+  struct vgltf_vk_swapchain swapchain;
+  struct vgltf_renderer_allocated_image depth_image;
+  VkImageView depth_image_view;
+
+  VkRenderPass render_pass;
+  VkDescriptorSetLayout descriptor_set_layout;
+
+  VkDescriptorPool descriptor_pool;
+  VkDescriptorSet descriptor_sets[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+  VkPipelineLayout pipeline_layout;
+  VkPipeline graphics_pipeline;
+
+  VkFramebuffer swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT];
+
+  VkCommandPool command_pool;
+  VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+  VkSemaphore
+      image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+  VkSemaphore
+      render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+  VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+
+  struct vgltf_renderer_allocated_buffer
+      uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+  void *mapped_uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT];
+
+  uint32_t mip_level_count;
+  struct vgltf_renderer_allocated_image texture_image;
+  VkImageView texture_image_view;
+  VkSampler texture_sampler;
+  struct vgltf_vertex vertices[100000];
+  int vertex_count;
+  uint16_t indices[100000];
+  int index_count;
+  struct vgltf_renderer_allocated_buffer vertex_buffer;
+  struct vgltf_renderer_allocated_buffer index_buffer;
+
+  struct vgltf_window_size window_size;
+  uint32_t current_frame;
+  bool framebuffer_resized;
+};
+bool vgltf_renderer_init(struct vgltf_renderer *renderer,
+                       struct vgltf_platform *platform);
+void vgltf_renderer_deinit(struct vgltf_renderer *renderer);
+bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer);
+void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer,
+                                    struct vgltf_window_size size);
+
+#endif // VGLTF_RENDERER_H
diff --git a/src/renderer/vma_usage.cpp b/src/renderer/vma_usage.cpp
new file mode 100644
index 0000000..83006a1
--- /dev/null
+++ b/src/renderer/vma_usage.cpp
@@ -0,0 +1,4 @@
+#include "vma_usage.h"
+
+#define VMA_IMPLEMENTATION
+#include <vk_mem_alloc.h>
diff --git a/src/renderer/vma_usage.h b/src/renderer/vma_usage.h
new file mode 100644
index 0000000..e9b5aa4
--- /dev/null
+++ b/src/renderer/vma_usage.h
@@ -0,0 +1,6 @@
+#ifndef VGLTF_VMA_USAGE_H
+#define VGLTF_VMA_USAGE_H
+
+#include <vk_mem_alloc.h>
+
+#endif // VGLTF_VMA_USAGE_H
diff --git a/src/str.c b/src/str.c
new file mode 100644
index 0000000..9c68d43
--- /dev/null
+++ b/src/str.c
@@ -0,0 +1,181 @@
+#include "str.h"
+#include "alloc.h"
+#include "hash.h"
+#include "platform.h"
+#include <assert.h>
+#include <stdarg.h>
+#include <string.h>
+
+struct vgltf_string_view vgltf_string_view_from_literal(const char *str) {
+  assert(str);
+  size_t length = strlen(str);
+  return (struct vgltf_string_view){.length = length, .data = str};
+}
+struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string) {
+  return (struct vgltf_string_view){.length = string.length, .data = string.data};
+}
+char vgltf_string_view_at(const struct vgltf_string_view *string_view,
+                        size_t index) {
+  assert(string_view);
+  assert(index < string_view->length);
+  return string_view->data[index];
+}
+bool vgltf_string_view_eq(struct vgltf_string_view view,
+                        struct vgltf_string_view other) {
+  return view.length == other.length &&
+         (strncmp(view.data, other.data, view.length) == 0);
+}
+size_t vgltf_string_view_length(const struct vgltf_string_view *string_view) {
+  assert(string_view);
+  return string_view->length;
+}
+
+uint64_t vgltf_string_view_hash(const struct vgltf_string_view view) {
+  return vgltf_hash_fnv_1a(view.data, view.length);
+}
+
+int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view,
+                                             size_t offset,
+                                             uint32_t *codepoint) {
+  assert(codepoint);
+  assert(offset < view.length);
+
+  const unsigned char *s = (unsigned char *)&view.data[offset];
+
+  int size;
+  if ((*s & 0x80) == 0) {
+    *codepoint = *s;
+    size = 1;
+  } else if ((*s & 0xE0) == 0xC0) {
+    *codepoint = *s & 0x1f;
+    size = 2;
+  } else if ((*s & 0xF0) == 0xE0) {
+    *codepoint = *s & 0x0f;
+    size = 3;
+  } else if ((*s & 0xF8) == 0xF0) {
+    *codepoint = *s & 0x07;
+    size = 4;
+  } else {
+    VGLTF_LOG_ERR("Invalid UTF-8 sequence");
+    return 0;
+  }
+
+  for (int i = 1; i < size; i++) {
+    if ((s[i] & 0xC0) != 0x80) {
+      VGLTF_LOG_ERR("Invalid UTF-8 continuation byte");
+      return 0;
+    }
+
+    *codepoint = (*codepoint << 6) | (s[i] & 0x3F);
+  }
+
+  return size;
+}
+int vgltf_string_utf8_encode_codepoint(uint32_t codepoint,
+                                     char encoded_codepoint[4]) {
+  assert(encoded_codepoint);
+  if (codepoint > 0x10FFFF) {
+    return -1;
+  }
+
+  if (codepoint <= 0x7F) {
+    encoded_codepoint[0] = (uint8_t)codepoint;
+    return 1;
+  } else if (codepoint <= 0x7FF) {
+    encoded_codepoint[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+    encoded_codepoint[1] = 0x80 | (codepoint & 0x3F);
+    return 2;
+  } else if (codepoint <= 0xFFFF) {
+    encoded_codepoint[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
+    encoded_codepoint[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+    encoded_codepoint[2] = 0x80 | (codepoint & 0x3F);
+    return 3;
+  } else {
+    encoded_codepoint[0] = 0xF0 | ((codepoint >> 18) & 0x07);
+    encoded_codepoint[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+    encoded_codepoint[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+    encoded_codepoint[3] = 0x80 | (codepoint & 0x3F);
+    return 4;
+  }
+}
+
+struct vgltf_string
+vgltf_string_from_null_terminated(struct vgltf_allocator *allocator,
+                                const char *str) {
+  assert(allocator);
+  assert(str);
+  struct vgltf_string string;
+  size_t length = strlen(str);
+  char *data = vgltf_allocator_allocate(allocator, length + 1);
+  if (!data) {
+    VGLTF_PANIC("Couldn't allocate string");
+  }
+  strncpy(data, str, length);
+  string.length = length;
+  string.data = data;
+  return string;
+}
+struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator,
+                                   const struct vgltf_string string) {
+  assert(allocator);
+
+  size_t length = string.length;
+  char *data = vgltf_allocator_allocate(allocator, length + 1);
+  memcpy(data, string.data, length);
+  data[length] = '\0';
+
+  return (struct vgltf_string){.data = data, .length = length};
+}
+struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator,
+                                         struct vgltf_string_view head,
+                                         struct vgltf_string_view tail) {
+  assert(allocator);
+  size_t length = head.length + tail.length;
+  char *data = vgltf_allocator_allocate(allocator, length + 1);
+  memcpy(data, head.data, head.length);
+  memcpy(data + head.length, tail.data, tail.length);
+  data[length] = '\0';
+  return (struct vgltf_string){.data = data, .length = length};
+}
+struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator,
+                                       struct vgltf_string_view fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  struct vgltf_string formatted_string =
+      vgltf_string_vformatted(allocator, fmt, args);
+  va_end(args);
+
+  return formatted_string;
+}
+struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator,
+                                        struct vgltf_string_view fmt,
+                                        va_list args) {
+  assert(allocator);
+  char str[1024];
+  size_t length = vsnprintf(str, 1024, fmt.data, args);
+  char *data = vgltf_allocator_allocate(allocator, length + 1);
+  memcpy(data, str, length);
+  data[length] = '\0';
+  return (struct vgltf_string){.data = data, .length = length};
+}
+void vgltf_string_deinit(struct vgltf_allocator *allocator,
+                       struct vgltf_string *string) {
+  assert(allocator);
+  assert(string);
+  vgltf_allocator_free(allocator, string->data);
+}
+size_t vgltf_string_length(const struct vgltf_string *string) {
+  return string->length;
+}
+bool vgltf_string_eq_view(const struct vgltf_string string,
+                        const struct vgltf_string_view view) {
+  return string.length == view.length &&
+         (strncmp(string.data, view.data, string.length) == 0);
+}
+uint64_t vgltf_string_hash(const struct vgltf_string string) {
+  return vgltf_hash_fnv_1a(string.data, string.length);
+}
+bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other) {
+  return string.length == other.length &&
+         (strncmp(string.data, other.data, string.length) == 0);
+}
diff --git a/src/str.h b/src/str.h
new file mode 100644
index 0000000..c0e4e5c
--- /dev/null
+++ b/src/str.h
@@ -0,0 +1,62 @@
+#ifndef VGLTF_STR_H
+#define VGLTF_STR_H
+
+#include "alloc.h"
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h> // IWYU pragma: keep
+
+#define SV(str)                                                                \
+  (struct vgltf_string_view) { .data = str, .length = strlen(str) }
+
+struct vgltf_string;
+struct vgltf_string_view {
+  const char *data;
+  size_t length;
+};
+
+struct vgltf_string_view vgltf_string_view_from_literal(const char *str);
+struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string);
+size_t vgltf_string_view_length(const struct vgltf_string_view *string_view);
+char vgltf_string_view_at(const struct vgltf_string_view *string_view,
+                        size_t index);
+bool vgltf_string_view_eq(struct vgltf_string_view view,
+                        struct vgltf_string_view other);
+uint64_t vgltf_string_view_hash(const struct vgltf_string_view view);
+// Fetches the next utf8 codepoint in the string at the given offset
+// Returns the size of the codepoint in bytes, 0 in case of error
+int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view,
+                                             size_t offset,
+                                             uint32_t *codepoint);
+// codepoint has to be a char[4]
+int vgltf_string_utf8_encode_codepoint(uint32_t codepoint,
+                                     char encoded_codepoint[4]);
+
+struct vgltf_string {
+  char *data;
+  size_t length;
+};
+struct vgltf_string
+vgltf_string_from_null_terminated(struct vgltf_allocator *allocator,
+                                const char *str);
+struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator,
+                                   const struct vgltf_string string);
+struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator,
+                                         struct vgltf_string_view head,
+                                         struct vgltf_string_view tail);
+struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator,
+                                       struct vgltf_string_view fmt, ...);
+struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator,
+                                        struct vgltf_string_view fmt,
+                                        va_list args);
+void vgltf_string_deinit(struct vgltf_allocator *allocator,
+                       struct vgltf_string *string);
+size_t vgltf_string_length(const struct vgltf_string *string);
+bool vgltf_string_eq_view(const struct vgltf_string string,
+                        const struct vgltf_string_view view);
+uint64_t vgltf_string_hash(const struct vgltf_string string);
+bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other);
+
+#endif // VGLTF_STR_H
diff --git a/thirdpartylicenses.md b/thirdpartylicenses.md
new file mode 100644
index 0000000..3d898b6
--- /dev/null
+++ b/thirdpartylicenses.md
@@ -0,0 +1,46 @@
+# stb_image:
+Public Domain
+
+# vk_mem_alloc:
+Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+# tinyobjloader_c:
+The MIT License (MIT)
+
+Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/stb_image.h b/vendor/stb_image.h
new file mode 100644
index 0000000..9eedabe
--- /dev/null
+++ b/vendor/stb_image.h
@@ -0,0 +1,7988 @@
+/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb
+                                  no warranty implied; use at your own risk
+
+   Do this:
+      #define STB_IMAGE_IMPLEMENTATION
+   before you include this file in *one* C or C++ file to create the implementation.
+
+   // i.e. it should look like this:
+   #include ...
+   #include ...
+   #include ...
+   #define STB_IMAGE_IMPLEMENTATION
+   #include "stb_image.h"
+
+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
+
+   QUICK NOTES:
+      Primarily of interest to game developers and other people who can
+          avoid problematic images and only need the trivial interface
+
+      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+      PNG 1/2/4/8/16-bit-per-channel
+
+      TGA (not sure what subset, if a subset)
+      BMP non-1bpp, non-RLE
+      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+
+      GIF (*comp always reports as 4-channel)
+      HDR (radiance rgbE format)
+      PIC (Softimage PIC)
+      PNM (PPM and PGM binary only)
+
+      Animated GIF still needs a proper API, but here's one way to do it:
+          http://gist.github.com/urraka/685d9a6340b26b830d49
+
+      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+      - decode from arbitrary I/O callbacks
+      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
+   Full documentation under "DOCUMENTATION" below.
+
+
+LICENSE
+
+  See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+      2.30  (2024-05-31) avoid erroneous gcc warning
+      2.29  (2023-05-xx) optimizations
+      2.28  (2023-01-29) many error fixes, security errors, just tons of stuff
+      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
+      2.26  (2020-07-13) many minor fixes
+      2.25  (2020-02-02) fix warnings
+      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
+      2.23  (2019-08-11) fix clang static analysis warning
+      2.22  (2019-03-04) gif fixes, fix warnings
+      2.21  (2019-02-25) fix typo in comment
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+                         RGB-format JPEG; remove white matting in PSD;
+                         allocate large structures on the stack;
+                         correct channel count for PNG & BMP
+      2.10  (2016-01-22) avoid warning introduced in 2.09
+      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+
+   See end of file for full revision history.
+
+
+ ============================    Contributors    =========================
+
+ Image formats                          Extensions, features
+    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
+    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
+    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
+    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
+    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
+    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
+    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
+    github:urraka (animated gif)           Junggon Kim (PNM comments)
+    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
+                                           socks-the-fox (16-bit PNG)
+                                           Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
+    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
+    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
+    John-Mark Allen
+    Carmelo J Fdez-Aguera
+
+ Bug & warning fixes
+    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko
+    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski
+    Phil Jordan                                Dave Moore           Roy Eltham
+    Hayaki Saito            Nathan Reed        Won Chun
+    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community
+    Thomas Ruf              Ronny Chevalier                         github:rlyeh
+    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
+    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
+    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
+    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
+    Cass Everitt            Ryamond Barbiero                        github:grim210
+    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
+    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus
+    Josh Tobin              Neil Bickford      Matthew Gregan       github:poppolopoppo
+    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
+    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
+                            Brad Weinberger    Matvey Cherevko      github:mosra
+    Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
+    Ryan C. Gordon          [reserved]                              [reserved]
+                     DO NOT ADD YOUR NAME HERE
+
+                     Jacko Dirks
+
+  To add your name to the credits, pick a random blank space in the middle and fill it.
+  80% of merge conflicts on stb PRs are due to people adding their name at the end
+  of the credits.
+*/
+
+#ifndef STBI_INCLUDE_STB_IMAGE_H
+#define STBI_INCLUDE_STB_IMAGE_H
+
+// DOCUMENTATION
+//
+// Limitations:
+//    - no 12-bit-per-channel JPEG
+//    - no JPEGs with arithmetic coding
+//    - GIF always returns *comp=4
+//
+// Basic usage (see HDR discussion below for HDR usage):
+//    int x,y,n;
+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+//    // ... process data if not NULL ...
+//    // ... x = width, y = height, n = # 8-bit components per pixel ...
+//    // ... replace '0' with '1'..'4' to force that many components per pixel
+//    // ... but 'n' will always be the number that it would have been if you said 0
+//    stbi_image_free(data);
+//
+// Standard parameters:
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
+//
+// The return value from an image loader is an 'unsigned char *' which points
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+// with each pixel consisting of N interleaved 8-bit components; the first
+// pixel pointed to is top-left-most in the image. There is no padding between
+// image scanlines or between pixels, regardless of format. The number of
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
+//
+// An output image with N components has the following components interleaved
+// in this order in each pixel:
+//
+//     N=#comp     components
+//       1           grey
+//       2           grey, alpha
+//       3           red, green, blue
+//       4           red, green, blue, alpha
+//
+// If image loading fails for any reason, the return value will be NULL,
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// more user-friendly ones.
+//
+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+//
+// To query the width, height and component count of an image without having to
+// decode the full file, you can use the stbi_info family of functions:
+//
+//   int x,y,n,ok;
+//   ok = stbi_info(filename, &x, &y, &n);
+//   // returns ok=1 and sets x, y, n if image is a supported format,
+//   // 0 otherwise.
+//
+// Note that stb_image pervasively uses ints in its public API for sizes,
+// including sizes of memory buffers. This is now part of the API and thus
+// hard to change without causing breakage. As a result, the various image
+// loaders all have certain limits on image size; these differ somewhat
+// by format but generally boil down to either just under 2GB or just under
+// 1GB. When the decoded image would be larger than this, stb_image decoding
+// will fail.
+//
+// Additionally, stb_image will reject image files that have any of their
+// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
+// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
+// the only way to have an image with such dimensions load correctly
+// is for it to have a rather extreme aspect ratio. Either way, the
+// assumption here is that such larger images are likely to be malformed
+// or malicious. If you do need to load an image with individual dimensions
+// larger than that, and it still fits in the overall size limit, you can
+// #define STBI_MAX_DIMENSIONS on your own to be something larger.
+//
+// ===========================================================================
+//
+// UNICODE:
+//
+//   If compiling for Windows and you wish to use Unicode filenames, compile
+//   with
+//       #define STBI_WINDOWS_UTF8
+//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
+//   Windows wchar_t filenames to utf8.
+//
+// ===========================================================================
+//
+// Philosophy
+//
+// stb libraries are designed with the following priorities:
+//
+//    1. easy to use
+//    2. easy to maintain
+//    3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy-to-use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// provide more explicit reasons why performance can't be emphasized.
+//
+//    - Portable ("ease of use")
+//    - Small source code footprint ("easy to maintain")
+//    - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
+//
+// ===========================================================================
+//
+// HDR image support   (disable by defining STBI_NO_HDR)
+//
+// stb_image supports loading HDR images in general, and currently the Radiance
+// .HDR file format specifically. You can still load any file through the existing
+// interface; if you attempt to load an HDR file, it will be automatically remapped
+// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// both of these constants can be reconfigured through this interface:
+//
+//     stbi_hdr_to_ldr_gamma(2.2f);
+//     stbi_hdr_to_ldr_scale(1.0f);
+//
+// (note, do not use _inverse_ constants; stbi_image will invert them
+// appropriately).
+//
+// Additionally, there is a new, parallel interface for loading files as
+// (linear) floats to preserve the full dynamic range:
+//
+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
+//
+// If you load LDR images through this interface, those images will
+// be promoted to floating point values, run through the inverse of
+// constants corresponding to the above:
+//
+//     stbi_ldr_to_hdr_scale(1.0f);
+//     stbi_ldr_to_hdr_gamma(2.2f);
+//
+// Finally, given a filename (or an open file or memory block--see header
+// file for details) containing image data, you can query for the "most
+// appropriate" interface to use (that is, whether the image is HDR or
+// not), using:
+//
+//     stbi_is_hdr(char *filename);
+//
+// ===========================================================================
+//
+// iPhone PNG support:
+//
+// We optionally support converting iPhone-formatted PNGs (which store
+// premultiplied BGRA) back to RGB, even though they're internally encoded
+// differently. To enable this conversion, call
+// stbi_convert_iphone_png_to_rgb(1).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
+//
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+//  - You can suppress implementation of any of the decoders to reduce
+//    your code footprint by #defining one or more of the following
+//    symbols before creating the implementation.
+//
+//        STBI_NO_JPEG
+//        STBI_NO_PNG
+//        STBI_NO_BMP
+//        STBI_NO_PSD
+//        STBI_NO_TGA
+//        STBI_NO_GIF
+//        STBI_NO_HDR
+//        STBI_NO_PIC
+//        STBI_NO_PNM   (.ppm and .pgm)
+//
+//  - You can request *only* certain decoders and suppress all other ones
+//    (this will be more forward-compatible, as addition of new decoders
+//    doesn't require you to disable them explicitly):
+//
+//        STBI_ONLY_JPEG
+//        STBI_ONLY_PNG
+//        STBI_ONLY_BMP
+//        STBI_ONLY_PSD
+//        STBI_ONLY_TGA
+//        STBI_ONLY_GIF
+//        STBI_ONLY_HDR
+//        STBI_ONLY_PIC
+//        STBI_ONLY_PNM   (.ppm and .pgm)
+//
+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
+//    than that size (in either width or height) without further processing.
+//    This is to let programs in the wild set an upper bound to prevent
+//    denial-of-service attacks on untrusted data, as one could generate a
+//    valid image of gigantic dimensions and force stb_image to allocate a
+//    huge block of memory and spend disproportionate time decoding it. By
+//    default this is set to (1 << 24), which is 16777216, but that's still
+//    very big.
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif // STBI_NO_STDIO
+
+#define STBI_VERSION 1
+
+enum
+{
+   STBI_default = 0, // only used for desired_channels
+
+   STBI_grey       = 1,
+   STBI_grey_alpha = 2,
+   STBI_rgb        = 3,
+   STBI_rgb_alpha  = 4
+};
+
+#include <stdlib.h>
+typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef STBIDEF
+#ifdef STB_IMAGE_STATIC
+#define STBIDEF static
+#else
+#define STBIDEF extern
+#endif
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// PRIMARY API - works on images of any type
+//
+
+//
+// load image by filename, open file, or memory buffer
+//
+
+typedef struct
+{
+   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
+   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
+} stbi_io_callbacks;
+
+////////////////////////////////////
+//
+// 8-bits-per-channel interface
+//
+
+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+#endif
+
+#ifdef STBI_WINDOWS_UTF8
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+
+////////////////////////////////////
+//
+// float-per-channel interface
+//
+#ifndef STBI_NO_LINEAR
+   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
+
+   #ifndef STBI_NO_STDIO
+   STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+   #endif
+#endif
+
+#ifndef STBI_NO_HDR
+   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
+   STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_LINEAR
+   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
+   STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
+#endif // STBI_NO_LINEAR
+
+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr          (char const *filename);
+STBIDEF int      stbi_is_hdr_from_file(FILE *f);
+#endif // STBI_NO_STDIO
+
+
+// get a VERY brief reason for failure
+// on most compilers (and ALL modern mainstream compilers) this is threadsafe
+STBIDEF const char *stbi_failure_reason  (void);
+
+// free the loaded image -- this is just free()
+STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
+
+// get image dimensions & components without fully decoding
+STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
+STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit          (char const *filename);
+STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
+#endif
+
+
+
+// for image formats that explicitly notate that they have premultiplied alpha,
+// we just return the colors as stored in the file. set this flag to force
+// unpremultiplication. results are undefined if the unpremultiply overflow.
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
+// indicate whether we should process iphone images back to canonical format,
+// or just pass them through "as-is"
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
+// flip the image vertically, so the first pixel in the output array is the bottom left
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
+// as above, but only applies to images loaded on the thread that calls the function
+// this function is only available if your compiler supports thread-local variables;
+// calling it will fail to link if your compiler doesn't
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
+
+// ZLIB client - used by PNG, available for other purposes
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
+STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+//
+////   end header file   /////////////////////////////////////////////////////
+#endif // STBI_INCLUDE_STB_IMAGE_H
+
+#ifdef STB_IMAGE_IMPLEMENTATION
+
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+  || defined(STBI_ONLY_ZLIB)
+   #ifndef STBI_ONLY_JPEG
+   #define STBI_NO_JPEG
+   #endif
+   #ifndef STBI_ONLY_PNG
+   #define STBI_NO_PNG
+   #endif
+   #ifndef STBI_ONLY_BMP
+   #define STBI_NO_BMP
+   #endif
+   #ifndef STBI_ONLY_PSD
+   #define STBI_NO_PSD
+   #endif
+   #ifndef STBI_ONLY_TGA
+   #define STBI_NO_TGA
+   #endif
+   #ifndef STBI_ONLY_GIF
+   #define STBI_NO_GIF
+   #endif
+   #ifndef STBI_ONLY_HDR
+   #define STBI_NO_HDR
+   #endif
+   #ifndef STBI_ONLY_PIC
+   #define STBI_NO_PIC
+   #endif
+   #ifndef STBI_ONLY_PNM
+   #define STBI_NO_PNM
+   #endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#include <math.h>  // ldexp, pow
+#endif
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STBI_ASSERT
+#include <assert.h>
+#define STBI_ASSERT(x) assert(x)
+#endif
+
+#ifdef __cplusplus
+#define STBI_EXTERN extern "C"
+#else
+#define STBI_EXTERN extern
+#endif
+
+
+#ifndef _MSC_VER
+   #ifdef __cplusplus
+   #define stbi_inline inline
+   #else
+   #define stbi_inline
+   #endif
+#else
+   #define stbi_inline __forceinline
+#endif
+
+#ifndef STBI_NO_THREAD_LOCALS
+   #if defined(__cplusplus) &&  __cplusplus >= 201103L
+      #define STBI_THREAD_LOCAL       thread_local
+   #elif defined(__GNUC__) && __GNUC__ < 5
+      #define STBI_THREAD_LOCAL       __thread
+   #elif defined(_MSC_VER)
+      #define STBI_THREAD_LOCAL       __declspec(thread)
+   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
+      #define STBI_THREAD_LOCAL       _Thread_local
+   #endif
+
+   #ifndef STBI_THREAD_LOCAL
+      #if defined(__GNUC__)
+        #define STBI_THREAD_LOCAL       __thread
+      #endif
+   #endif
+#endif
+
+#if defined(_MSC_VER) || defined(__SYMBIAN32__)
+typedef unsigned short stbi__uint16;
+typedef   signed short stbi__int16;
+typedef unsigned int   stbi__uint32;
+typedef   signed int   stbi__int32;
+#else
+#include <stdint.h>
+typedef uint16_t stbi__uint16;
+typedef int16_t  stbi__int16;
+typedef uint32_t stbi__uint32;
+typedef int32_t  stbi__int32;
+#endif
+
+// should produce compiler error if size is wrong
+typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBI_NOTUSED(v)  (void)(v)
+#else
+#define STBI_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifdef _MSC_VER
+#define STBI_HAS_LROTL
+#endif
+
+#ifdef STBI_HAS_LROTL
+   #define stbi_lrot(x,y)  _lrotl(x,y)
+#else
+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31)))
+#endif
+
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz)           malloc(sz)
+#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
+#define STBI_FREE(p)              free(p)
+#endif
+
+#ifndef STBI_REALLOC_SIZED
+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
+#endif
+
+// x86/x64 detection
+#if defined(__x86_64__) || defined(_M_X64)
+#define STBI__X64_TARGET
+#elif defined(__i386) || defined(_M_IX86)
+#define STBI__X86_TARGET
+#endif
+
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
+#define STBI_NO_SIMD
+#endif
+
+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+//
+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+// simultaneously enabling "-mstackrealign".
+//
+// See https://github.com/nothings/stb/issues/81 for more information.
+//
+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+#define STBI_NO_SIMD
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400  // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+   int info[4];
+   __cpuid(info,1);
+   return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+   int res;
+   __asm {
+      mov  eax,1
+      cpuid
+      mov  res,edx
+   }
+   return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
+{
+   int info3 = stbi__cpuid3();
+   return ((info3 >> 26) & 1) != 0;
+}
+#endif
+
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
+{
+   // If we're even attempting to compile this on GCC/Clang, that means
+   // -msse2 is on, which means the compiler is allowed to use SSE2
+   // instructions at will, and so are we.
+   return 1;
+}
+#endif
+
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+#ifdef _MSC_VER
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+#else
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
+#ifndef STBI_MAX_DIMENSIONS
+#define STBI_MAX_DIMENSIONS (1 << 24)
+#endif
+
+///////////////////////////////////////////////
+//
+//  stbi__context struct and start_xxx functions
+
+// stbi__context structure is our basic context used by all images, so it
+// contains all the IO context, plus some basic image information
+typedef struct
+{
+   stbi__uint32 img_x, img_y;
+   int img_n, img_out_n;
+
+   stbi_io_callbacks io;
+   void *io_user_data;
+
+   int read_from_callbacks;
+   int buflen;
+   stbi_uc buffer_start[128];
+   int callback_already_read;
+
+   stbi_uc *img_buffer, *img_buffer_end;
+   stbi_uc *img_buffer_original, *img_buffer_original_end;
+} stbi__context;
+
+
+static void stbi__refill_buffer(stbi__context *s);
+
+// initialize a memory-decode context
+static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
+{
+   s->io.read = NULL;
+   s->read_from_callbacks = 0;
+   s->callback_already_read = 0;
+   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
+   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
+}
+
+// initialize a callback-based context
+static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
+{
+   s->io = *c;
+   s->io_user_data = user;
+   s->buflen = sizeof(s->buffer_start);
+   s->read_from_callbacks = 1;
+   s->callback_already_read = 0;
+   s->img_buffer = s->img_buffer_original = s->buffer_start;
+   stbi__refill_buffer(s);
+   s->img_buffer_original_end = s->img_buffer_end;
+}
+
+#ifndef STBI_NO_STDIO
+
+static int stbi__stdio_read(void *user, char *data, int size)
+{
+   return (int) fread(data,1,size,(FILE*) user);
+}
+
+static void stbi__stdio_skip(void *user, int n)
+{
+   int ch;
+   fseek((FILE*) user, n, SEEK_CUR);
+   ch = fgetc((FILE*) user);  /* have to read a byte to reset feof()'s flag */
+   if (ch != EOF) {
+      ungetc(ch, (FILE *) user);  /* push byte back onto stream if valid. */
+   }
+}
+
+static int stbi__stdio_eof(void *user)
+{
+   return feof((FILE*) user) || ferror((FILE *) user);
+}
+
+static stbi_io_callbacks stbi__stdio_callbacks =
+{
+   stbi__stdio_read,
+   stbi__stdio_skip,
+   stbi__stdio_eof,
+};
+
+static void stbi__start_file(stbi__context *s, FILE *f)
+{
+   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
+}
+
+//static void stop_file(stbi__context *s) { }
+
+#endif // !STBI_NO_STDIO
+
+static void stbi__rewind(stbi__context *s)
+{
+   // conceptually rewind SHOULD rewind to the beginning of the stream,
+   // but we just rewind to the beginning of the initial buffer, because
+   // we only use it after doing 'test', which only ever looks at at most 92 bytes
+   s->img_buffer = s->img_buffer_original;
+   s->img_buffer_end = s->img_buffer_original_end;
+}
+
+enum
+{
+   STBI_ORDER_RGB,
+   STBI_ORDER_BGR
+};
+
+typedef struct
+{
+   int bits_per_channel;
+   int num_channels;
+   int channel_order;
+} stbi__result_info;
+
+#ifndef STBI_NO_JPEG
+static int      stbi__jpeg_test(stbi__context *s);
+static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNG
+static int      stbi__png_test(stbi__context *s);
+static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__png_is16(stbi__context *s);
+#endif
+
+#ifndef STBI_NO_BMP
+static int      stbi__bmp_test(stbi__context *s);
+static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_TGA
+static int      stbi__tga_test(stbi__context *s);
+static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PSD
+static int      stbi__psd_test(stbi__context *s);
+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
+static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__psd_is16(stbi__context *s);
+#endif
+
+#ifndef STBI_NO_HDR
+static int      stbi__hdr_test(stbi__context *s);
+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PIC
+static int      stbi__pic_test(stbi__context *s);
+static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_GIF
+static int      stbi__gif_test(stbi__context *s);
+static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNM
+static int      stbi__pnm_test(stbi__context *s);
+static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__pnm_is16(stbi__context *s);
+#endif
+
+static
+#ifdef STBI_THREAD_LOCAL
+STBI_THREAD_LOCAL
+#endif
+const char *stbi__g_failure_reason;
+
+STBIDEF const char *stbi_failure_reason(void)
+{
+   return stbi__g_failure_reason;
+}
+
+#ifndef STBI_NO_FAILURE_STRINGS
+static int stbi__err(const char *str)
+{
+   stbi__g_failure_reason = str;
+   return 0;
+}
+#endif
+
+static void *stbi__malloc(size_t size)
+{
+    return STBI_MALLOC(size);
+}
+
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+   if (b < 0) return 0;
+   // now 0 <= b <= INT_MAX, hence also
+   // 0 <= INT_MAX - b <= INTMAX.
+   // And "a + b <= INT_MAX" (which might overflow) is the
+   // same as a <= INT_MAX - b (no overflow)
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+   if (a < 0 || b < 0) return 0;
+   if (b == 0) return 1; // mul-by-0 is always safe
+   // portable way to check for no overflows in a*b
+   return a <= INT_MAX/b;
+}
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
+}
+#endif
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__addsizes_valid(a*b*c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
+}
+#endif
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// mallocs with size overflow checking
+static void *stbi__malloc_mad2(int a, int b, int add)
+{
+   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+   return stbi__malloc(a*b + add);
+}
+#endif
+
+static void *stbi__malloc_mad3(int a, int b, int c, int add)
+{
+   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+   return stbi__malloc(a*b*c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+   return stbi__malloc(a*b*c*d + add);
+}
+#endif
+
+// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
+static int stbi__addints_valid(int a, int b)
+{
+   if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
+   if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
+static int stbi__mul2shorts_valid(int a, int b)
+{
+   if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
+   if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
+   if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
+   return a >= SHRT_MIN / b;
+}
+
+// stbi__err - error
+// stbi__errpf - error returning pointer to float
+// stbi__errpuc - error returning pointer to unsigned char
+
+#ifdef STBI_NO_FAILURE_STRINGS
+   #define stbi__err(x,y)  0
+#elif defined(STBI_FAILURE_USERMSG)
+   #define stbi__err(x,y)  stbi__err(y)
+#else
+   #define stbi__err(x,y)  stbi__err(x)
+#endif
+
+#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+STBIDEF void stbi_image_free(void *retval_from_stbi_load)
+{
+   STBI_FREE(retval_from_stbi_load);
+}
+
+#ifndef STBI_NO_LINEAR
+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
+#endif
+
+static int stbi__vertically_flip_on_load_global = 0;
+
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
+{
+   stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
+}
+
+#ifndef STBI_THREAD_LOCAL
+#define stbi__vertically_flip_on_load  stbi__vertically_flip_on_load_global
+#else
+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
+
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
+{
+   stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
+   stbi__vertically_flip_on_load_set = 1;
+}
+
+#define stbi__vertically_flip_on_load  (stbi__vertically_flip_on_load_set       \
+                                         ? stbi__vertically_flip_on_load_local  \
+                                         : stbi__vertically_flip_on_load_global)
+#endif // STBI_THREAD_LOCAL
+
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+   ri->num_channels = 0;
+
+   // test the formats with a very explicit header first (at least a FOURCC
+   // or distinctive magic number first)
+   #ifndef STBI_NO_PNG
+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_BMP
+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_GIF
+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
+   #else
+   STBI_NOTUSED(bpc);
+   #endif
+   #ifndef STBI_NO_PIC
+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   // then the formats that can end up attempting to load with just 1 or 2
+   // bytes matching expectations; these are prone to false positives, so
+   // try them later
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_test(s)) {
+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
+      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+   }
+   #endif
+
+   #ifndef STBI_NO_TGA
+   // test tga last because it's a crappy test!
+   if (stbi__tga_test(s))
+      return stbi__tga_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi_uc *reduced;
+
+   reduced = (stbi_uc *) stbi__malloc(img_len);
+   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+   STBI_FREE(orig);
+   return reduced;
+}
+
+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi__uint16 *enlarged;
+
+   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
+   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+   STBI_FREE(orig);
+   return enlarged;
+}
+
+static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
+{
+   int row;
+   size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+   stbi_uc temp[2048];
+   stbi_uc *bytes = (stbi_uc *)image;
+
+   for (row = 0; row < (h>>1); row++) {
+      stbi_uc *row0 = bytes + row*bytes_per_row;
+      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
+      // swap row0 with row1
+      size_t bytes_left = bytes_per_row;
+      while (bytes_left) {
+         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+         memcpy(temp, row0, bytes_copy);
+         memcpy(row0, row1, bytes_copy);
+         memcpy(row1, temp, bytes_copy);
+         row0 += bytes_copy;
+         row1 += bytes_copy;
+         bytes_left -= bytes_copy;
+      }
+   }
+}
+
+#ifndef STBI_NO_GIF
+static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
+{
+   int slice;
+   int slice_size = w * h * bytes_per_pixel;
+
+   stbi_uc *bytes = (stbi_uc *)image;
+   for (slice = 0; slice < z; ++slice) {
+      stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+      bytes += slice_size;
+   }
+}
+#endif
+
+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+   if (result == NULL)
+      return NULL;
+
+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+   if (ri.bits_per_channel != 8) {
+      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 8;
+   }
+
+   // @TODO: move stbi__convert_format to here
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+   }
+
+   return (unsigned char *) result;
+}
+
+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+   if (result == NULL)
+      return NULL;
+
+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+   if (ri.bits_per_channel != 16) {
+      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 16;
+   }
+
+   // @TODO: move stbi__convert_format16 to here
+   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+   }
+
+   return (stbi__uint16 *) result;
+}
+
+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
+static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
+{
+   if (stbi__vertically_flip_on_load && result != NULL) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
+   }
+}
+#endif
+
+#ifndef STBI_NO_STDIO
+
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+#endif
+
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+{
+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
+}
+#endif
+
+static FILE *stbi__fopen(char const *filename, char const *mode)
+{
+   FILE *f;
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+   wchar_t wMode[64];
+   wchar_t wFilename[1024];
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
+      return 0;
+
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
+      return 0;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+	if (0 != _wfopen_s(&f, wFilename, wMode))
+		f = 0;
+#else
+   f = _wfopen(wFilename, wMode);
+#endif
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400
+   if (0 != fopen_s(&f, filename, mode))
+      f=0;
+#else
+   f = fopen(filename, mode);
+#endif
+   return f;
+}
+
+
+STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   unsigned char *result;
+   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__uint16 *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   stbi__uint16 *result;
+   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+
+#endif //!STBI_NO_STDIO
+
+STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+}
+
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+
+   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+   if (stbi__vertically_flip_on_load) {
+      stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
+   }
+
+   return result;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *data;
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_test(s)) {
+      stbi__result_info ri;
+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
+      if (hdr_data)
+         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
+      return hdr_data;
+   }
+   #endif
+   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
+   if (data)
+      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+}
+
+STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   float *result;
+   FILE *f = stbi__fopen(filename, "rb");
+   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
+   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_file(&s,f);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+#endif // !STBI_NO_STDIO
+
+#endif // !STBI_NO_LINEAR
+
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+// reports false!
+
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(buffer);
+   STBI_NOTUSED(len);
+   return 0;
+   #endif
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr          (char const *filename)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   int result=0;
+   if (f) {
+      result = stbi_is_hdr_from_file(f);
+      fclose(f);
+   }
+   return result;
+}
+
+STBIDEF int stbi_is_hdr_from_file(FILE *f)
+{
+   #ifndef STBI_NO_HDR
+   long pos = ftell(f);
+   int res;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   res = stbi__hdr_test(&s);
+   fseek(f, pos, SEEK_SET);
+   return res;
+   #else
+   STBI_NOTUSED(f);
+   return 0;
+   #endif
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(clbk);
+   STBI_NOTUSED(user);
+   return 0;
+   #endif
+}
+
+#ifndef STBI_NO_LINEAR
+static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
+
+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#endif
+
+static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
+
+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
+STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Common code used by all image loaders
+//
+
+enum
+{
+   STBI__SCAN_load=0,
+   STBI__SCAN_type,
+   STBI__SCAN_header
+};
+
+static void stbi__refill_buffer(stbi__context *s)
+{
+   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
+   s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
+   if (n == 0) {
+      // at end of file, treat same as if from memory, but need to handle case
+      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+      s->read_from_callbacks = 0;
+      s->img_buffer = s->buffer_start;
+      s->img_buffer_end = s->buffer_start+1;
+      *s->img_buffer = 0;
+   } else {
+      s->img_buffer = s->buffer_start;
+      s->img_buffer_end = s->buffer_start + n;
+   }
+}
+
+stbi_inline static stbi_uc stbi__get8(stbi__context *s)
+{
+   if (s->img_buffer < s->img_buffer_end)
+      return *s->img_buffer++;
+   if (s->read_from_callbacks) {
+      stbi__refill_buffer(s);
+      return *s->img_buffer++;
+   }
+   return 0;
+}
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
+stbi_inline static int stbi__at_eof(stbi__context *s)
+{
+   if (s->io.read) {
+      if (!(s->io.eof)(s->io_user_data)) return 0;
+      // if feof() is true, check if buffer = end
+      // special case: we've only got the special 0 character at the end
+      if (s->read_from_callbacks == 0) return 1;
+   }
+
+   return s->img_buffer >= s->img_buffer_end;
+}
+#endif
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
+// nothing
+#else
+static void stbi__skip(stbi__context *s, int n)
+{
+   if (n == 0) return;  // already there!
+   if (n < 0) {
+      s->img_buffer = s->img_buffer_end;
+      return;
+   }
+   if (s->io.read) {
+      int blen = (int) (s->img_buffer_end - s->img_buffer);
+      if (blen < n) {
+         s->img_buffer = s->img_buffer_end;
+         (s->io.skip)(s->io_user_data, n - blen);
+         return;
+      }
+   }
+   s->img_buffer += n;
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
+// nothing
+#else
+static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
+{
+   if (s->io.read) {
+      int blen = (int) (s->img_buffer_end - s->img_buffer);
+      if (blen < n) {
+         int res, count;
+
+         memcpy(buffer, s->img_buffer, blen);
+
+         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
+         res = (count == (n-blen));
+         s->img_buffer = s->img_buffer_end;
+         return res;
+      }
+   }
+
+   if (s->img_buffer+n <= s->img_buffer_end) {
+      memcpy(buffer, s->img_buffer, n);
+      s->img_buffer += n;
+      return 1;
+   } else
+      return 0;
+}
+#endif
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
+static int stbi__get16be(stbi__context *s)
+{
+   int z = stbi__get8(s);
+   return (z << 8) + stbi__get8(s);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
+static stbi__uint32 stbi__get32be(stbi__context *s)
+{
+   stbi__uint32 z = stbi__get16be(s);
+   return (z << 16) + stbi__get16be(s);
+}
+#endif
+
+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+// nothing
+#else
+static int stbi__get16le(stbi__context *s)
+{
+   int z = stbi__get8(s);
+   return z + (stbi__get8(s) << 8);
+}
+#endif
+
+#ifndef STBI_NO_BMP
+static stbi__uint32 stbi__get32le(stbi__context *s)
+{
+   stbi__uint32 z = stbi__get16le(s);
+   z += (stbi__uint32)stbi__get16le(s) << 16;
+   return z;
+}
+#endif
+
+#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
+
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
+//////////////////////////////////////////////////////////////////////////////
+//
+//  generic converter from built-in img_n to req_comp
+//    individual types do this automatically as much as possible (e.g. jpeg
+//    does all cases internally since it needs to colorspace convert anyway,
+//    and it never has alpha, so very few cases ). png can automatically
+//    interleave an alpha=255 channel, but falls back to this for other cases
+//
+//  assume data buffer is malloced, so malloc a new one and free that one
+//  only failure mode is malloc failing
+
+static stbi_uc stbi__compute_y(int r, int g, int b)
+{
+   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
+static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   unsigned char *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      unsigned char *src  = data + j * x * img_n   ;
+      unsigned char *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
+      }
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   stbi__uint16 *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      stbi__uint16 *src  = data + j * x * img_n   ;
+      stbi__uint16 *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
+      }
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
+{
+   int i,k,n;
+   float *output;
+   if (!data) return NULL;
+   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
+   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
+   // compute number of non-alpha components
+   if (comp & 1) n = comp; else n = comp-1;
+   for (i=0; i < x*y; ++i) {
+      for (k=0; k < n; ++k) {
+         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+      }
+   }
+   if (n < comp) {
+      for (i=0; i < x*y; ++i) {
+         output[i*comp + n] = data[i*comp + n]/255.0f;
+      }
+   }
+   STBI_FREE(data);
+   return output;
+}
+#endif
+
+#ifndef STBI_NO_HDR
+#define stbi__float2int(x)   ((int) (x))
+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
+{
+   int i,k,n;
+   stbi_uc *output;
+   if (!data) return NULL;
+   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
+   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
+   // compute number of non-alpha components
+   if (comp & 1) n = comp; else n = comp-1;
+   for (i=0; i < x*y; ++i) {
+      for (k=0; k < n; ++k) {
+         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+         if (z < 0) z = 0;
+         if (z > 255) z = 255;
+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+      }
+      if (k < comp) {
+         float z = data[i*comp+k] * 255 + 0.5f;
+         if (z < 0) z = 0;
+         if (z > 255) z = 255;
+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+      }
+   }
+   STBI_FREE(data);
+   return output;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  "baseline" JPEG/JFIF decoder
+//
+//    simple implementation
+//      - doesn't support delayed output of y-dimension
+//      - simple interface (only one output format: 8-bit interleaved RGB)
+//      - doesn't try to recover corrupt jpegs
+//      - doesn't allow partial loading, loading multiple at once
+//      - still fast on x86 (copying globals into locals doesn't help x86)
+//      - allocates lots of intermediate memory (full size of all components)
+//        - non-interleaved case requires this anyway
+//        - allows good upsampling (see next)
+//    high-quality
+//      - upsampled channels are bilinearly interpolated, even across blocks
+//      - quality integer IDCT derived from IJG's 'slow'
+//    performance
+//      - fast huffman; reasonable integer IDCT
+//      - some SIMD kernels for common paths on targets with SSE2/NEON
+//      - uses a lot of intermediate memory, could cache poorly
+
+#ifndef STBI_NO_JPEG
+
+// huffman decoding acceleration
+#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
+
+typedef struct
+{
+   stbi_uc  fast[1 << FAST_BITS];
+   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+   stbi__uint16 code[256];
+   stbi_uc  values[256];
+   stbi_uc  size[257];
+   unsigned int maxcode[18];
+   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
+} stbi__huffman;
+
+typedef struct
+{
+   stbi__context *s;
+   stbi__huffman huff_dc[4];
+   stbi__huffman huff_ac[4];
+   stbi__uint16 dequant[4][64];
+   stbi__int16 fast_ac[4][1 << FAST_BITS];
+
+// sizes for components, interleaved MCUs
+   int img_h_max, img_v_max;
+   int img_mcu_x, img_mcu_y;
+   int img_mcu_w, img_mcu_h;
+
+// definition of jpeg image component
+   struct
+   {
+      int id;
+      int h,v;
+      int tq;
+      int hd,ha;
+      int dc_pred;
+
+      int x,y,w2,h2;
+      stbi_uc *data;
+      void *raw_data, *raw_coeff;
+      stbi_uc *linebuf;
+      short   *coeff;   // progressive only
+      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
+   } img_comp[4];
+
+   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
+   int            code_bits;   // number of valid bits
+   unsigned char  marker;      // marker seen while filling entropy buffer
+   int            nomore;      // flag if we saw a marker so must stop
+
+   int            progressive;
+   int            spec_start;
+   int            spec_end;
+   int            succ_high;
+   int            succ_low;
+   int            eob_run;
+   int            jfif;
+   int            app14_color_transform; // Adobe APP14 tag
+   int            rgb;
+
+   int scan_n, order[4];
+   int restart_interval, todo;
+
+// kernels
+   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
+   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
+   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
+} stbi__jpeg;
+
+static int stbi__build_huffman(stbi__huffman *h, int *count)
+{
+   int i,j,k=0;
+   unsigned int code;
+   // build size list for each symbol (from JPEG spec)
+   for (i=0; i < 16; ++i) {
+      for (j=0; j < count[i]; ++j) {
+         h->size[k++] = (stbi_uc) (i+1);
+         if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
+      }
+   }
+   h->size[k] = 0;
+
+   // compute actual symbols (from jpeg spec)
+   code = 0;
+   k = 0;
+   for(j=1; j <= 16; ++j) {
+      // compute delta to add to code to compute symbol id
+      h->delta[j] = k - code;
+      if (h->size[k] == j) {
+         while (h->size[k] == j)
+            h->code[k++] = (stbi__uint16) (code++);
+         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
+      }
+      // compute largest code + 1 for this size, preshifted as needed later
+      h->maxcode[j] = code << (16-j);
+      code <<= 1;
+   }
+   h->maxcode[j] = 0xffffffff;
+
+   // build non-spec acceleration table; 255 is flag for not-accelerated
+   memset(h->fast, 255, 1 << FAST_BITS);
+   for (i=0; i < k; ++i) {
+      int s = h->size[i];
+      if (s <= FAST_BITS) {
+         int c = h->code[i] << (FAST_BITS-s);
+         int m = 1 << (FAST_BITS-s);
+         for (j=0; j < m; ++j) {
+            h->fast[c+j] = (stbi_uc) i;
+         }
+      }
+   }
+   return 1;
+}
+
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
+{
+   int i;
+   for (i=0; i < (1 << FAST_BITS); ++i) {
+      stbi_uc fast = h->fast[i];
+      fast_ac[i] = 0;
+      if (fast < 255) {
+         int rs = h->values[fast];
+         int run = (rs >> 4) & 15;
+         int magbits = rs & 15;
+         int len = h->size[fast];
+
+         if (magbits && len + magbits <= FAST_BITS) {
+            // magnitude code followed by receive_extend code
+            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+            int m = 1 << (magbits - 1);
+            if (k < m) k += (~0U << magbits) + 1;
+            // if the result is small enough, we can fit it in fast_ac table
+            if (k >= -128 && k <= 127)
+               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
+         }
+      }
+   }
+}
+
+static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
+{
+   do {
+      unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
+      if (b == 0xff) {
+         int c = stbi__get8(j->s);
+         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
+         if (c != 0) {
+            j->marker = (unsigned char) c;
+            j->nomore = 1;
+            return;
+         }
+      }
+      j->code_buffer |= b << (24 - j->code_bits);
+      j->code_bits += 8;
+   } while (j->code_bits <= 24);
+}
+
+// (1 << n) - 1
+static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
+
+// decode a jpeg huffman value from the bitstream
+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
+{
+   unsigned int temp;
+   int c,k;
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+   // look at the top FAST_BITS and determine what symbol ID it is,
+   // if the code is <= FAST_BITS
+   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+   k = h->fast[c];
+   if (k < 255) {
+      int s = h->size[k];
+      if (s > j->code_bits)
+         return -1;
+      j->code_buffer <<= s;
+      j->code_bits -= s;
+      return h->values[k];
+   }
+
+   // naive test is to shift the code_buffer down so k bits are
+   // valid, then test against maxcode. To speed this up, we've
+   // preshifted maxcode left so that it has (16-k) 0s at the
+   // end; in other words, regardless of the number of bits, it
+   // wants to be compared against something shifted to have 16;
+   // that way we don't need to shift inside the loop.
+   temp = j->code_buffer >> 16;
+   for (k=FAST_BITS+1 ; ; ++k)
+      if (temp < h->maxcode[k])
+         break;
+   if (k == 17) {
+      // error! code not found
+      j->code_bits -= 16;
+      return -1;
+   }
+
+   if (k > j->code_bits)
+      return -1;
+
+   // convert the huffman code to the symbol id
+   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+   if(c < 0 || c >= 256) // symbol id out of bounds!
+       return -1;
+   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
+   // convert the id to a symbol
+   j->code_bits -= k;
+   j->code_buffer <<= k;
+   return h->values[c];
+}
+
+// bias[n] = (-1<<n) + 1
+static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+
+// combined JPEG 'receive' and JPEG 'extend', since baseline
+// always extends everything it receives.
+stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
+{
+   unsigned int k;
+   int sgn;
+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
+
+   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
+   k = stbi_lrot(j->code_buffer, n);
+   j->code_buffer = k & ~stbi__bmask[n];
+   k &= stbi__bmask[n];
+   j->code_bits -= n;
+   return k + (stbi__jbias[n] & (sgn - 1));
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
+{
+   unsigned int k;
+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
+   k = stbi_lrot(j->code_buffer, n);
+   j->code_buffer = k & ~stbi__bmask[n];
+   k &= stbi__bmask[n];
+   j->code_bits -= n;
+   return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
+{
+   unsigned int k;
+   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+   if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
+   k = j->code_buffer;
+   j->code_buffer <<= 1;
+   --j->code_bits;
+   return k & 0x80000000;
+}
+
+// given a value that's at position X in the zigzag stream,
+// where does it appear in the 8x8 matrix coded as row-major?
+static const stbi_uc stbi__jpeg_dezigzag[64+15] =
+{
+    0,  1,  8, 16,  9,  2,  3, 10,
+   17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34,
+   27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36,
+   29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46,
+   53, 60, 61, 54, 47, 55, 62, 63,
+   // let corrupt input sample past end
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63
+};
+
+// decode one 64-entry block--
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
+{
+   int diff,dc,k;
+   int t;
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+   t = stbi__jpeg_huff_decode(j, hdc);
+   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
+
+   // 0 all the ac values now so we can do it 32-bits at a time
+   memset(data,0,64*sizeof(data[0]));
+
+   diff = t ? stbi__extend_receive(j, t) : 0;
+   if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
+   dc = j->img_comp[b].dc_pred + diff;
+   j->img_comp[b].dc_pred = dc;
+   if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+   data[0] = (short) (dc * dequant[0]);
+
+   // decode AC components, see JPEG spec
+   k = 1;
+   do {
+      unsigned int zig;
+      int c,r,s;
+      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+      r = fac[c];
+      if (r) { // fast-AC path
+         k += (r >> 4) & 15; // run
+         s = r & 15; // combined length
+         if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
+         j->code_buffer <<= s;
+         j->code_bits -= s;
+         // decode into unzigzag'd location
+         zig = stbi__jpeg_dezigzag[k++];
+         data[zig] = (short) ((r >> 8) * dequant[zig]);
+      } else {
+         int rs = stbi__jpeg_huff_decode(j, hac);
+         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+         s = rs & 15;
+         r = rs >> 4;
+         if (s == 0) {
+            if (rs != 0xf0) break; // end block
+            k += 16;
+         } else {
+            k += r;
+            // decode into unzigzag'd location
+            zig = stbi__jpeg_dezigzag[k++];
+            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
+         }
+      }
+   } while (k < 64);
+   return 1;
+}
+
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
+{
+   int diff,dc;
+   int t;
+   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+   if (j->succ_high == 0) {
+      // first scan for DC coefficient, must be first
+      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
+      t = stbi__jpeg_huff_decode(j, hdc);
+      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+      diff = t ? stbi__extend_receive(j, t) : 0;
+
+      if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
+      dc = j->img_comp[b].dc_pred + diff;
+      j->img_comp[b].dc_pred = dc;
+      if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+      data[0] = (short) (dc * (1 << j->succ_low));
+   } else {
+      // refinement scan for DC coefficient
+      if (stbi__jpeg_get_bit(j))
+         data[0] += (short) (1 << j->succ_low);
+   }
+   return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
+{
+   int k;
+   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+   if (j->succ_high == 0) {
+      int shift = j->succ_low;
+
+      if (j->eob_run) {
+         --j->eob_run;
+         return 1;
+      }
+
+      k = j->spec_start;
+      do {
+         unsigned int zig;
+         int c,r,s;
+         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+         r = fac[c];
+         if (r) { // fast-AC path
+            k += (r >> 4) & 15; // run
+            s = r & 15; // combined length
+            if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
+            j->code_buffer <<= s;
+            j->code_bits -= s;
+            zig = stbi__jpeg_dezigzag[k++];
+            data[zig] = (short) ((r >> 8) * (1 << shift));
+         } else {
+            int rs = stbi__jpeg_huff_decode(j, hac);
+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+            s = rs & 15;
+            r = rs >> 4;
+            if (s == 0) {
+               if (r < 15) {
+                  j->eob_run = (1 << r);
+                  if (r)
+                     j->eob_run += stbi__jpeg_get_bits(j, r);
+                  --j->eob_run;
+                  break;
+               }
+               k += 16;
+            } else {
+               k += r;
+               zig = stbi__jpeg_dezigzag[k++];
+               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
+            }
+         }
+      } while (k <= j->spec_end);
+   } else {
+      // refinement scan for these AC coefficients
+
+      short bit = (short) (1 << j->succ_low);
+
+      if (j->eob_run) {
+         --j->eob_run;
+         for (k = j->spec_start; k <= j->spec_end; ++k) {
+            short *p = &data[stbi__jpeg_dezigzag[k]];
+            if (*p != 0)
+               if (stbi__jpeg_get_bit(j))
+                  if ((*p & bit)==0) {
+                     if (*p > 0)
+                        *p += bit;
+                     else
+                        *p -= bit;
+                  }
+         }
+      } else {
+         k = j->spec_start;
+         do {
+            int r,s;
+            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+            s = rs & 15;
+            r = rs >> 4;
+            if (s == 0) {
+               if (r < 15) {
+                  j->eob_run = (1 << r) - 1;
+                  if (r)
+                     j->eob_run += stbi__jpeg_get_bits(j, r);
+                  r = 64; // force end of block
+               } else {
+                  // r=15 s=0 should write 16 0s, so we just do
+                  // a run of 15 0s and then write s (which is 0),
+                  // so we don't have to do anything special here
+               }
+            } else {
+               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+               // sign bit
+               if (stbi__jpeg_get_bit(j))
+                  s = bit;
+               else
+                  s = -bit;
+            }
+
+            // advance by r
+            while (k <= j->spec_end) {
+               short *p = &data[stbi__jpeg_dezigzag[k++]];
+               if (*p != 0) {
+                  if (stbi__jpeg_get_bit(j))
+                     if ((*p & bit)==0) {
+                        if (*p > 0)
+                           *p += bit;
+                        else
+                           *p -= bit;
+                     }
+               } else {
+                  if (r == 0) {
+                     *p = (short) s;
+                     break;
+                  }
+                  --r;
+               }
+            }
+         } while (k <= j->spec_end);
+      }
+   }
+   return 1;
+}
+
+// take a -128..127 value and stbi__clamp it and convert to 0..255
+stbi_inline static stbi_uc stbi__clamp(int x)
+{
+   // trick to use a single test to catch both cases
+   if ((unsigned int) x > 255) {
+      if (x < 0) return 0;
+      if (x > 255) return 255;
+   }
+   return (stbi_uc) x;
+}
+
+#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
+#define stbi__fsh(x)  ((x) * 4096)
+
+// derived from jidctint -- DCT_ISLOW
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
+   p2 = s2;                                    \
+   p3 = s6;                                    \
+   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
+   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
+   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
+   p2 = s0;                                    \
+   p3 = s4;                                    \
+   t0 = stbi__fsh(p2+p3);                      \
+   t1 = stbi__fsh(p2-p3);                      \
+   x0 = t0+t3;                                 \
+   x3 = t0-t3;                                 \
+   x1 = t1+t2;                                 \
+   x2 = t1-t2;                                 \
+   t0 = s7;                                    \
+   t1 = s5;                                    \
+   t2 = s3;                                    \
+   t3 = s1;                                    \
+   p3 = t0+t2;                                 \
+   p4 = t1+t3;                                 \
+   p1 = t0+t3;                                 \
+   p2 = t1+t2;                                 \
+   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
+   t0 = t0*stbi__f2f( 0.298631336f);           \
+   t1 = t1*stbi__f2f( 2.053119869f);           \
+   t2 = t2*stbi__f2f( 3.072711026f);           \
+   t3 = t3*stbi__f2f( 1.501321110f);           \
+   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
+   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
+   p3 = p3*stbi__f2f(-1.961570560f);           \
+   p4 = p4*stbi__f2f(-0.390180644f);           \
+   t3 += p1+p4;                                \
+   t2 += p2+p3;                                \
+   t1 += p2+p4;                                \
+   t0 += p1+p3;
+
+static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
+{
+   int i,val[64],*v=val;
+   stbi_uc *o;
+   short *d = data;
+
+   // columns
+   for (i=0; i < 8; ++i,++d, ++v) {
+      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
+           && d[40]==0 && d[48]==0 && d[56]==0) {
+         //    no shortcut                 0     seconds
+         //    (1|2|3|4|5|6|7)==0          0     seconds
+         //    all separate               -0.047 seconds
+         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
+         int dcterm = d[0]*4;
+         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+      } else {
+         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
+         // constants scaled things up by 1<<12; let's bring them back
+         // down, but keep 2 extra bits of precision
+         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
+         v[ 0] = (x0+t3) >> 10;
+         v[56] = (x0-t3) >> 10;
+         v[ 8] = (x1+t2) >> 10;
+         v[48] = (x1-t2) >> 10;
+         v[16] = (x2+t1) >> 10;
+         v[40] = (x2-t1) >> 10;
+         v[24] = (x3+t0) >> 10;
+         v[32] = (x3-t0) >> 10;
+      }
+   }
+
+   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
+      // no fast case since the first 1D IDCT spread components out
+      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
+      // constants scaled things up by 1<<12, plus we had 1<<2 from first
+      // loop, plus horizontal and vertical each scale by sqrt(8) so together
+      // we've got an extra 1<<3, so 1<<17 total we need to remove.
+      // so we want to round that, which means adding 0.5 * 1<<17,
+      // aka 65536. Also, we'll end up with -128 to 127 that we want
+      // to encode as 0..255 by adding 128, so we'll add that before the shift
+      x0 += 65536 + (128<<17);
+      x1 += 65536 + (128<<17);
+      x2 += 65536 + (128<<17);
+      x3 += 65536 + (128<<17);
+      // tried computing the shifts into temps, or'ing the temps to see
+      // if any were out of range, but that was slower
+      o[0] = stbi__clamp((x0+t3) >> 17);
+      o[7] = stbi__clamp((x0-t3) >> 17);
+      o[1] = stbi__clamp((x1+t2) >> 17);
+      o[6] = stbi__clamp((x1-t2) >> 17);
+      o[2] = stbi__clamp((x2+t1) >> 17);
+      o[5] = stbi__clamp((x2-t1) >> 17);
+      o[3] = stbi__clamp((x3+t0) >> 17);
+      o[4] = stbi__clamp((x3-t0) >> 17);
+   }
+}
+
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+   // This is constructed to match our regular (generic) integer IDCT exactly.
+   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
+   __m128i tmp;
+
+   // dot product constant: even elems=x, odd elems=y
+   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
+   // out(1) = c1[even]*x + c1[odd]*y
+   #define dct_rot(out0,out1, x,y,c0,c1) \
+      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+   // out = in << 12  (in 16-bit, out 32-bit)
+   #define dct_widen(out, in) \
+      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+   // wide add
+   #define dct_wadd(out, a, b) \
+      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+   // wide sub
+   #define dct_wsub(out, a, b) \
+      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+   // butterfly a/b, add bias, then shift by "s" and pack
+   #define dct_bfly32o(out0, out1, a,b,bias,s) \
+      { \
+         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+         dct_wadd(sum, abiased, b); \
+         dct_wsub(dif, abiased, b); \
+         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+      }
+
+   // 8-bit interleave step (for transposes)
+   #define dct_interleave8(a, b) \
+      tmp = a; \
+      a = _mm_unpacklo_epi8(a, b); \
+      b = _mm_unpackhi_epi8(tmp, b)
+
+   // 16-bit interleave step (for transposes)
+   #define dct_interleave16(a, b) \
+      tmp = a; \
+      a = _mm_unpacklo_epi16(a, b); \
+      b = _mm_unpackhi_epi16(tmp, b)
+
+   #define dct_pass(bias,shift) \
+      { \
+         /* even part */ \
+         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+         __m128i sum04 = _mm_add_epi16(row0, row4); \
+         __m128i dif04 = _mm_sub_epi16(row0, row4); \
+         dct_widen(t0e, sum04); \
+         dct_widen(t1e, dif04); \
+         dct_wadd(x0, t0e, t3e); \
+         dct_wsub(x3, t0e, t3e); \
+         dct_wadd(x1, t1e, t2e); \
+         dct_wsub(x2, t1e, t2e); \
+         /* odd part */ \
+         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+         __m128i sum17 = _mm_add_epi16(row1, row7); \
+         __m128i sum35 = _mm_add_epi16(row3, row5); \
+         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+         dct_wadd(x4, y0o, y4o); \
+         dct_wadd(x5, y1o, y5o); \
+         dct_wadd(x6, y2o, y5o); \
+         dct_wadd(x7, y3o, y4o); \
+         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+      }
+
+   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
+   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
+   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
+   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
+   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
+
+   // rounding biases in column/row passes, see stbi__idct_block for explanation.
+   __m128i bias_0 = _mm_set1_epi32(512);
+   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
+
+   // load
+   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
+   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
+   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
+   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
+   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
+   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
+   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
+   row7 = _mm_load_si128((const __m128i *) (data + 7*8));
+
+   // column pass
+   dct_pass(bias_0, 10);
+
+   {
+      // 16bit 8x8 transpose pass 1
+      dct_interleave16(row0, row4);
+      dct_interleave16(row1, row5);
+      dct_interleave16(row2, row6);
+      dct_interleave16(row3, row7);
+
+      // transpose pass 2
+      dct_interleave16(row0, row2);
+      dct_interleave16(row1, row3);
+      dct_interleave16(row4, row6);
+      dct_interleave16(row5, row7);
+
+      // transpose pass 3
+      dct_interleave16(row0, row1);
+      dct_interleave16(row2, row3);
+      dct_interleave16(row4, row5);
+      dct_interleave16(row6, row7);
+   }
+
+   // row pass
+   dct_pass(bias_1, 17);
+
+   {
+      // pack
+      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+      __m128i p1 = _mm_packus_epi16(row2, row3);
+      __m128i p2 = _mm_packus_epi16(row4, row5);
+      __m128i p3 = _mm_packus_epi16(row6, row7);
+
+      // 8bit 8x8 transpose pass 1
+      dct_interleave8(p0, p2); // a0e0a1e1...
+      dct_interleave8(p1, p3); // c0g0c1g1...
+
+      // transpose pass 2
+      dct_interleave8(p0, p1); // a0c0e0g0...
+      dct_interleave8(p2, p3); // b0d0f0h0...
+
+      // transpose pass 3
+      dct_interleave8(p0, p2); // a0b0c0d0...
+      dct_interleave8(p1, p3); // a4b4c4d4...
+
+      // store
+      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
+   }
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
+   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
+   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
+   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
+   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
+   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+// wide add
+#define dct_wadd(out, a, b) \
+   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+   { \
+      dct_wadd(sum, a, b); \
+      dct_wsub(dif, a, b); \
+      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+   }
+
+#define dct_pass(shiftop, shift) \
+   { \
+      /* even part */ \
+      int16x8_t sum26 = vaddq_s16(row2, row6); \
+      dct_long_mul(p1e, sum26, rot0_0); \
+      dct_long_mac(t2e, p1e, row6, rot0_1); \
+      dct_long_mac(t3e, p1e, row2, rot0_2); \
+      int16x8_t sum04 = vaddq_s16(row0, row4); \
+      int16x8_t dif04 = vsubq_s16(row0, row4); \
+      dct_widen(t0e, sum04); \
+      dct_widen(t1e, dif04); \
+      dct_wadd(x0, t0e, t3e); \
+      dct_wsub(x3, t0e, t3e); \
+      dct_wadd(x1, t1e, t2e); \
+      dct_wsub(x2, t1e, t2e); \
+      /* odd part */ \
+      int16x8_t sum15 = vaddq_s16(row1, row5); \
+      int16x8_t sum17 = vaddq_s16(row1, row7); \
+      int16x8_t sum35 = vaddq_s16(row3, row5); \
+      int16x8_t sum37 = vaddq_s16(row3, row7); \
+      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+      dct_long_mul(p5o, sumodd, rot1_0); \
+      dct_long_mac(p1o, p5o, sum17, rot1_1); \
+      dct_long_mac(p2o, p5o, sum35, rot1_2); \
+      dct_long_mul(p3o, sum37, rot2_0); \
+      dct_long_mul(p4o, sum15, rot2_1); \
+      dct_wadd(sump13o, p1o, p3o); \
+      dct_wadd(sump24o, p2o, p4o); \
+      dct_wadd(sump23o, p2o, p3o); \
+      dct_wadd(sump14o, p1o, p4o); \
+      dct_long_mac(x4, sump13o, row7, rot3_0); \
+      dct_long_mac(x5, sump24o, row5, rot3_1); \
+      dct_long_mac(x6, sump23o, row3, rot3_2); \
+      dct_long_mac(x7, sump14o, row1, rot3_3); \
+      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+   }
+
+   // load
+   row0 = vld1q_s16(data + 0*8);
+   row1 = vld1q_s16(data + 1*8);
+   row2 = vld1q_s16(data + 2*8);
+   row3 = vld1q_s16(data + 3*8);
+   row4 = vld1q_s16(data + 4*8);
+   row5 = vld1q_s16(data + 5*8);
+   row6 = vld1q_s16(data + 6*8);
+   row7 = vld1q_s16(data + 7*8);
+
+   // add DC bias
+   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+   // column pass
+   dct_pass(vrshrn_n_s32, 10);
+
+   // 16bit 8x8 transpose
+   {
+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+      // pass 1
+      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+      dct_trn16(row2, row3);
+      dct_trn16(row4, row5);
+      dct_trn16(row6, row7);
+
+      // pass 2
+      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+      dct_trn32(row1, row3);
+      dct_trn32(row4, row6);
+      dct_trn32(row5, row7);
+
+      // pass 3
+      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+      dct_trn64(row1, row5);
+      dct_trn64(row2, row6);
+      dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+   }
+
+   // row pass
+   // vrshrn_n_s32 only supports shifts up to 16, we need
+   // 17. so do a non-rounding shift of 16 first then follow
+   // up with a rounding shift by 1.
+   dct_pass(vshrn_n_s32, 16);
+
+   {
+      // pack and round
+      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+      // again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+      // sadly can't use interleaved stores here since we only write
+      // 8 bytes to each scan line!
+
+      // 8x8 8-bit transpose pass 1
+      dct_trn8_8(p0, p1);
+      dct_trn8_8(p2, p3);
+      dct_trn8_8(p4, p5);
+      dct_trn8_8(p6, p7);
+
+      // pass 2
+      dct_trn8_16(p0, p2);
+      dct_trn8_16(p1, p3);
+      dct_trn8_16(p4, p6);
+      dct_trn8_16(p5, p7);
+
+      // pass 3
+      dct_trn8_32(p0, p4);
+      dct_trn8_32(p1, p5);
+      dct_trn8_32(p2, p6);
+      dct_trn8_32(p3, p7);
+
+      // store
+      vst1_u8(out, p0); out += out_stride;
+      vst1_u8(out, p1); out += out_stride;
+      vst1_u8(out, p2); out += out_stride;
+      vst1_u8(out, p3); out += out_stride;
+      vst1_u8(out, p4); out += out_stride;
+      vst1_u8(out, p5); out += out_stride;
+      vst1_u8(out, p6); out += out_stride;
+      vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+   }
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
+}
+
+#endif // STBI_NEON
+
+#define STBI__MARKER_none  0xff
+// if there's a pending marker from the entropy stream, return that
+// otherwise, fetch from the stream and get a marker. if there's no
+// marker, return 0xff, which is never a valid marker value
+static stbi_uc stbi__get_marker(stbi__jpeg *j)
+{
+   stbi_uc x;
+   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
+   x = stbi__get8(j->s);
+   if (x != 0xff) return STBI__MARKER_none;
+   while (x == 0xff)
+      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
+   return x;
+}
+
+// in each scan, we'll have scan_n components, and the order
+// of the components is specified by order[]
+#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
+
+// after a restart interval, stbi__jpeg_reset the entropy decoder and
+// the dc prediction
+static void stbi__jpeg_reset(stbi__jpeg *j)
+{
+   j->code_bits = 0;
+   j->code_buffer = 0;
+   j->nomore = 0;
+   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
+   j->marker = STBI__MARKER_none;
+   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+   j->eob_run = 0;
+   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+   // since we don't even allow 1<<30 pixels
+}
+
+static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
+{
+   stbi__jpeg_reset(z);
+   if (!z->progressive) {
+      if (z->scan_n == 1) {
+         int i,j;
+         STBI_SIMD_ALIGN(short, data[64]);
+         int n = z->order[0];
+         // non-interleaved data, we just need to process one block at a time,
+         // in trivial scanline order
+         // number of blocks to do just depends on how many actual "pixels" this
+         // component has, independent of interleaved MCU blocking and such
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               int ha = z->img_comp[n].ha;
+               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+               // every data block is an MCU, so countdown the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  // if it's NOT a restart, then just bail, so we get corrupt data
+                  // rather than no data
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      } else { // interleaved
+         int i,j,k,x,y;
+         STBI_SIMD_ALIGN(short, data[64]);
+         for (j=0; j < z->img_mcu_y; ++j) {
+            for (i=0; i < z->img_mcu_x; ++i) {
+               // scan an interleaved mcu... process scan_n components in order
+               for (k=0; k < z->scan_n; ++k) {
+                  int n = z->order[k];
+                  // scan out an mcu's worth of this component; that's just determined
+                  // by the basic H and V specified for the component
+                  for (y=0; y < z->img_comp[n].v; ++y) {
+                     for (x=0; x < z->img_comp[n].h; ++x) {
+                        int x2 = (i*z->img_comp[n].h + x)*8;
+                        int y2 = (j*z->img_comp[n].v + y)*8;
+                        int ha = z->img_comp[n].ha;
+                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
+                     }
+                  }
+               }
+               // after all interleaved components, that's an interleaved MCU,
+               // so now count down the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      }
+   } else {
+      if (z->scan_n == 1) {
+         int i,j;
+         int n = z->order[0];
+         // non-interleaved data, we just need to process one block at a time,
+         // in trivial scanline order
+         // number of blocks to do just depends on how many actual "pixels" this
+         // component has, independent of interleaved MCU blocking and such
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+               if (z->spec_start == 0) {
+                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+                     return 0;
+               } else {
+                  int ha = z->img_comp[n].ha;
+                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+                     return 0;
+               }
+               // every data block is an MCU, so countdown the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      } else { // interleaved
+         int i,j,k,x,y;
+         for (j=0; j < z->img_mcu_y; ++j) {
+            for (i=0; i < z->img_mcu_x; ++i) {
+               // scan an interleaved mcu... process scan_n components in order
+               for (k=0; k < z->scan_n; ++k) {
+                  int n = z->order[k];
+                  // scan out an mcu's worth of this component; that's just determined
+                  // by the basic H and V specified for the component
+                  for (y=0; y < z->img_comp[n].v; ++y) {
+                     for (x=0; x < z->img_comp[n].h; ++x) {
+                        int x2 = (i*z->img_comp[n].h + x);
+                        int y2 = (j*z->img_comp[n].v + y);
+                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+                           return 0;
+                     }
+                  }
+               }
+               // after all interleaved components, that's an interleaved MCU,
+               // so now count down the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      }
+   }
+}
+
+static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
+{
+   int i;
+   for (i=0; i < 64; ++i)
+      data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg *z)
+{
+   if (z->progressive) {
+      // dequantize and idct the data
+      int i,j,n;
+      for (n=0; n < z->s->img_n; ++n) {
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+            }
+         }
+      }
+   }
+}
+
+static int stbi__process_marker(stbi__jpeg *z, int m)
+{
+   int L;
+   switch (m) {
+      case STBI__MARKER_none: // no marker found
+         return stbi__err("expected marker","Corrupt JPEG");
+
+      case 0xDD: // DRI - specify restart interval
+         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
+         z->restart_interval = stbi__get16be(z->s);
+         return 1;
+
+      case 0xDB: // DQT - define quantization table
+         L = stbi__get16be(z->s)-2;
+         while (L > 0) {
+            int q = stbi__get8(z->s);
+            int p = q >> 4, sixteen = (p != 0);
+            int t = q & 15,i;
+            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
+            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
+
+            for (i=0; i < 64; ++i)
+               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+            L -= (sixteen ? 129 : 65);
+         }
+         return L==0;
+
+      case 0xC4: // DHT - define huffman table
+         L = stbi__get16be(z->s)-2;
+         while (L > 0) {
+            stbi_uc *v;
+            int sizes[16],i,n=0;
+            int q = stbi__get8(z->s);
+            int tc = q >> 4;
+            int th = q & 15;
+            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
+            for (i=0; i < 16; ++i) {
+               sizes[i] = stbi__get8(z->s);
+               n += sizes[i];
+            }
+            if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
+            L -= 17;
+            if (tc == 0) {
+               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
+               v = z->huff_dc[th].values;
+            } else {
+               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
+               v = z->huff_ac[th].values;
+            }
+            for (i=0; i < n; ++i)
+               v[i] = stbi__get8(z->s);
+            if (tc != 0)
+               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+            L -= n;
+         }
+         return L==0;
+   }
+
+   // check for comment block or APP blocks
+   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+      L = stbi__get16be(z->s);
+      if (L < 2) {
+         if (m == 0xFE)
+            return stbi__err("bad COM len","Corrupt JPEG");
+         else
+            return stbi__err("bad APP len","Corrupt JPEG");
+      }
+      L -= 2;
+
+      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+         static const unsigned char tag[5] = {'J','F','I','F','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 5; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 5;
+         if (ok)
+            z->jfif = 1;
+      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+         static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 6; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 6;
+         if (ok) {
+            stbi__get8(z->s); // version
+            stbi__get16be(z->s); // flags0
+            stbi__get16be(z->s); // flags1
+            z->app14_color_transform = stbi__get8(z->s); // color transform
+            L -= 6;
+         }
+      }
+
+      stbi__skip(z->s, L);
+      return 1;
+   }
+
+   return stbi__err("unknown marker","Corrupt JPEG");
+}
+
+// after we see SOS
+static int stbi__process_scan_header(stbi__jpeg *z)
+{
+   int i;
+   int Ls = stbi__get16be(z->s);
+   z->scan_n = stbi__get8(z->s);
+   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
+   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
+   for (i=0; i < z->scan_n; ++i) {
+      int id = stbi__get8(z->s), which;
+      int q = stbi__get8(z->s);
+      for (which = 0; which < z->s->img_n; ++which)
+         if (z->img_comp[which].id == id)
+            break;
+      if (which == z->s->img_n) return 0; // no match
+      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
+      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
+      z->order[i] = which;
+   }
+
+   {
+      int aa;
+      z->spec_start = stbi__get8(z->s);
+      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
+      aa = stbi__get8(z->s);
+      z->succ_high = (aa >> 4);
+      z->succ_low  = (aa & 15);
+      if (z->progressive) {
+         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+            return stbi__err("bad SOS", "Corrupt JPEG");
+      } else {
+         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
+         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
+         z->spec_end = 63;
+      }
+   }
+
+   return 1;
+}
+
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+   int i;
+   for (i=0; i < ncomp; ++i) {
+      if (z->img_comp[i].raw_data) {
+         STBI_FREE(z->img_comp[i].raw_data);
+         z->img_comp[i].raw_data = NULL;
+         z->img_comp[i].data = NULL;
+      }
+      if (z->img_comp[i].raw_coeff) {
+         STBI_FREE(z->img_comp[i].raw_coeff);
+         z->img_comp[i].raw_coeff = 0;
+         z->img_comp[i].coeff = 0;
+      }
+      if (z->img_comp[i].linebuf) {
+         STBI_FREE(z->img_comp[i].linebuf);
+         z->img_comp[i].linebuf = NULL;
+      }
+   }
+   return why;
+}
+
+static int stbi__process_frame_header(stbi__jpeg *z, int scan)
+{
+   stbi__context *s = z->s;
+   int Lf,p,i,q, h_max=1,v_max=1,c;
+   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
+   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
+   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   c = stbi__get8(s);
+   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
+   s->img_n = c;
+   for (i=0; i < c; ++i) {
+      z->img_comp[i].data = NULL;
+      z->img_comp[i].linebuf = NULL;
+   }
+
+   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
+
+   z->rgb = 0;
+   for (i=0; i < s->img_n; ++i) {
+      static const unsigned char rgb[3] = { 'R', 'G', 'B' };
+      z->img_comp[i].id = stbi__get8(s);
+      if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+         ++z->rgb;
+      q = stbi__get8(s);
+      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
+      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
+      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
+   }
+
+   if (scan != STBI__SCAN_load) return 1;
+
+   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
+
+   for (i=0; i < s->img_n; ++i) {
+      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
+      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+   }
+
+   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+   // and I've never seen a non-corrupted JPEG file actually use them
+   for (i=0; i < s->img_n; ++i) {
+      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
+      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
+   }
+
+   // compute interleaved mcu info
+   z->img_h_max = h_max;
+   z->img_v_max = v_max;
+   z->img_mcu_w = h_max * 8;
+   z->img_mcu_h = v_max * 8;
+   // these sizes can't be more than 17 bits
+   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
+   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
+
+   for (i=0; i < s->img_n; ++i) {
+      // number of effective pixels (e.g. for non-interleaved MCU)
+      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
+      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
+      // to simplify generation, we'll allocate enough memory to decode
+      // the bogus oversized data from using interleaved MCUs and their
+      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+      // discard the extra data until colorspace conversion
+      //
+      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+      // so these muls can't overflow with 32-bit ints (which we require)
+      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+      z->img_comp[i].coeff = 0;
+      z->img_comp[i].raw_coeff = 0;
+      z->img_comp[i].linebuf = NULL;
+      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+      if (z->img_comp[i].raw_data == NULL)
+         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
+      // align blocks for idct using mmx/sse
+      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
+      if (z->progressive) {
+         // w2, h2 are multiples of 8 (see above)
+         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+         if (z->img_comp[i].raw_coeff == NULL)
+            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
+         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
+      }
+   }
+
+   return 1;
+}
+
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
+#define stbi__DNL(x)         ((x) == 0xdc)
+#define stbi__SOI(x)         ((x) == 0xd8)
+#define stbi__EOI(x)         ((x) == 0xd9)
+#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+#define stbi__SOS(x)         ((x) == 0xda)
+
+#define stbi__SOF_progressive(x)   ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
+{
+   int m;
+   z->jfif = 0;
+   z->app14_color_transform = -1; // valid values are 0,1,2
+   z->marker = STBI__MARKER_none; // initialize cached marker to empty
+   m = stbi__get_marker(z);
+   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
+   if (scan == STBI__SCAN_type) return 1;
+   m = stbi__get_marker(z);
+   while (!stbi__SOF(m)) {
+      if (!stbi__process_marker(z,m)) return 0;
+      m = stbi__get_marker(z);
+      while (m == STBI__MARKER_none) {
+         // some files have extra padding after their blocks, so ok, we'll scan
+         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
+         m = stbi__get_marker(z);
+      }
+   }
+   z->progressive = stbi__SOF_progressive(m);
+   if (!stbi__process_frame_header(z, scan)) return 0;
+   return 1;
+}
+
+static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
+{
+   // some JPEGs have junk at end, skip over it but if we find what looks
+   // like a valid marker, resume there
+   while (!stbi__at_eof(j->s)) {
+      stbi_uc x = stbi__get8(j->s);
+      while (x == 0xff) { // might be a marker
+         if (stbi__at_eof(j->s)) return STBI__MARKER_none;
+         x = stbi__get8(j->s);
+         if (x != 0x00 && x != 0xff) {
+            // not a stuffed zero or lead-in to another marker, looks
+            // like an actual marker, return it
+            return x;
+         }
+         // stuffed zero has x=0 now which ends the loop, meaning we go
+         // back to regular scan loop.
+         // repeated 0xff keeps trying to read the next byte of the marker.
+      }
+   }
+   return STBI__MARKER_none;
+}
+
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg *j)
+{
+   int m;
+   for (m = 0; m < 4; m++) {
+      j->img_comp[m].raw_data = NULL;
+      j->img_comp[m].raw_coeff = NULL;
+   }
+   j->restart_interval = 0;
+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
+   m = stbi__get_marker(j);
+   while (!stbi__EOI(m)) {
+      if (stbi__SOS(m)) {
+         if (!stbi__process_scan_header(j)) return 0;
+         if (!stbi__parse_entropy_coded_data(j)) return 0;
+         if (j->marker == STBI__MARKER_none ) {
+         j->marker = stbi__skip_jpeg_junk_at_end(j);
+            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+         }
+         m = stbi__get_marker(j);
+         if (STBI__RESTART(m))
+            m = stbi__get_marker(j);
+      } else if (stbi__DNL(m)) {
+         int Ld = stbi__get16be(j->s);
+         stbi__uint32 NL = stbi__get16be(j->s);
+         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
+         m = stbi__get_marker(j);
+      } else {
+         if (!stbi__process_marker(j, m)) return 1;
+         m = stbi__get_marker(j);
+      }
+   }
+   if (j->progressive)
+      stbi__jpeg_finish(j);
+   return 1;
+}
+
+// static jfif-centered resampling (across block boundaries)
+
+typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
+                                    int w, int hs);
+
+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
+
+static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   STBI_NOTUSED(out);
+   STBI_NOTUSED(in_far);
+   STBI_NOTUSED(w);
+   STBI_NOTUSED(hs);
+   return in_near;
+}
+
+static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate two samples vertically for every one in input
+   int i;
+   STBI_NOTUSED(hs);
+   for (i=0; i < w; ++i)
+      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
+   return out;
+}
+
+static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate two samples horizontally for every one in input
+   int i;
+   stbi_uc *input = in_near;
+
+   if (w == 1) {
+      // if only one sample, can't do any interpolation
+      out[0] = out[1] = input[0];
+      return out;
+   }
+
+   out[0] = input[0];
+   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
+   for (i=1; i < w-1; ++i) {
+      int n = 3*input[i]+2;
+      out[i*2+0] = stbi__div4(n+input[i-1]);
+      out[i*2+1] = stbi__div4(n+input[i+1]);
+   }
+   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
+   out[i*2+1] = input[w-1];
+
+   STBI_NOTUSED(in_far);
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+
+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
+
+static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate 2x2 samples for every one in input
+   int i,t0,t1;
+   if (w == 1) {
+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+      return out;
+   }
+
+   t1 = 3*in_near[0] + in_far[0];
+   out[0] = stbi__div4(t1+2);
+   for (i=1; i < w; ++i) {
+      t0 = t1;
+      t1 = 3*in_near[i]+in_far[i];
+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
+   }
+   out[w*2-1] = stbi__div4(t1+2);
+
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate 2x2 samples for every one in input
+   int i=0,t0,t1;
+
+   if (w == 1) {
+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+      return out;
+   }
+
+   t1 = 3*in_near[0] + in_far[0];
+   // process groups of 8 pixels for as long as we can.
+   // note we can't handle the last pixel in a row in this loop
+   // because we need to handle the filter boundary conditions.
+   for (; i < ((w-1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+      // load and perform the vertical filtering pass
+      // this uses 3*x + y = 4*x + (y - x)
+      __m128i zero  = _mm_setzero_si128();
+      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
+      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
+      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
+      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+      __m128i diff  = _mm_sub_epi16(farw, nearw);
+      __m128i nears = _mm_slli_epi16(nearw, 2);
+      __m128i curr  = _mm_add_epi16(nears, diff); // current row
+
+      // horizontal filter works the same based on shifted vers of current
+      // row. "prev" is current row shifted right by 1 pixel; we need to
+      // insert the previous pixel value (from t1).
+      // "next" is current row shifted left by 1 pixel, with first pixel
+      // of next block of 8 pixels added in.
+      __m128i prv0 = _mm_slli_si128(curr, 2);
+      __m128i nxt0 = _mm_srli_si128(curr, 2);
+      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
+      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
+
+      // horizontal filter, polyphase implementation since it's convenient:
+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
+      // note the shared term.
+      __m128i bias  = _mm_set1_epi16(8);
+      __m128i curs = _mm_slli_epi16(curr, 2);
+      __m128i prvd = _mm_sub_epi16(prev, curr);
+      __m128i nxtd = _mm_sub_epi16(next, curr);
+      __m128i curb = _mm_add_epi16(curs, bias);
+      __m128i even = _mm_add_epi16(prvd, curb);
+      __m128i odd  = _mm_add_epi16(nxtd, curb);
+
+      // interleave even and odd pixels, then undo scaling.
+      __m128i int0 = _mm_unpacklo_epi16(even, odd);
+      __m128i int1 = _mm_unpackhi_epi16(even, odd);
+      __m128i de0  = _mm_srli_epi16(int0, 4);
+      __m128i de1  = _mm_srli_epi16(int1, 4);
+
+      // pack and write output
+      __m128i outv = _mm_packus_epi16(de0, de1);
+      _mm_storeu_si128((__m128i *) (out + i*2), outv);
+#elif defined(STBI_NEON)
+      // load and perform the vertical filtering pass
+      // this uses 3*x + y = 4*x + (y - x)
+      uint8x8_t farb  = vld1_u8(in_far + i);
+      uint8x8_t nearb = vld1_u8(in_near + i);
+      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+      int16x8_t curr  = vaddq_s16(nears, diff); // current row
+
+      // horizontal filter works the same based on shifted vers of current
+      // row. "prev" is current row shifted right by 1 pixel; we need to
+      // insert the previous pixel value (from t1).
+      // "next" is current row shifted left by 1 pixel, with first pixel
+      // of next block of 8 pixels added in.
+      int16x8_t prv0 = vextq_s16(curr, curr, 7);
+      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
+
+      // horizontal filter, polyphase implementation since it's convenient:
+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
+      // note the shared term.
+      int16x8_t curs = vshlq_n_s16(curr, 2);
+      int16x8_t prvd = vsubq_s16(prev, curr);
+      int16x8_t nxtd = vsubq_s16(next, curr);
+      int16x8_t even = vaddq_s16(curs, prvd);
+      int16x8_t odd  = vaddq_s16(curs, nxtd);
+
+      // undo scaling and round, then store with even/odd phases interleaved
+      uint8x8x2_t o;
+      o.val[0] = vqrshrun_n_s16(even, 4);
+      o.val[1] = vqrshrun_n_s16(odd,  4);
+      vst2_u8(out + i*2, o);
+#endif
+
+      // "previous" value for next iter
+      t1 = 3*in_near[i+7] + in_far[i+7];
+   }
+
+   t0 = t1;
+   t1 = 3*in_near[i] + in_far[i];
+   out[i*2] = stbi__div16(3*t1 + t0 + 8);
+
+   for (++i; i < w; ++i) {
+      t0 = t1;
+      t1 = 3*in_near[i]+in_far[i];
+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
+   }
+   out[w*2-1] = stbi__div4(t1+2);
+
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+#endif
+
+static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // resample with nearest-neighbor
+   int i,j;
+   STBI_NOTUSED(in_far);
+   for (i=0; i < w; ++i)
+      for (j=0; j < hs; ++j)
+         out[i*hs+j] = in_near[i];
+   return out;
+}
+
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
+{
+   int i;
+   for (i=0; i < count; ++i) {
+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
+      r >>= 20;
+      g >>= 20;
+      b >>= 20;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
+{
+   int i = 0;
+
+#ifdef STBI_SSE2
+   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
+   // it's useful in practice (you wouldn't use it for textures, for example).
+   // so just accelerate step == 4 case.
+   if (step == 4) {
+      // this is a fairly straightforward implementation and not super-optimized.
+      __m128i signflip  = _mm_set1_epi8(-0x80);
+      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
+      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
+      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
+      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
+      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
+      __m128i xw = _mm_set1_epi16(255); // alpha channel
+
+      for (; i+7 < count; i += 8) {
+         // load
+         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
+         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
+         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
+         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+         // unpack to short (and left-shift cr, cb by 8)
+         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
+         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+         // color transform
+         __m128i yws = _mm_srli_epi16(yw, 4);
+         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+         __m128i rws = _mm_add_epi16(cr0, yws);
+         __m128i gwt = _mm_add_epi16(cb0, yws);
+         __m128i bws = _mm_add_epi16(yws, cb1);
+         __m128i gws = _mm_add_epi16(gwt, cr1);
+
+         // descale
+         __m128i rw = _mm_srai_epi16(rws, 4);
+         __m128i bw = _mm_srai_epi16(bws, 4);
+         __m128i gw = _mm_srai_epi16(gws, 4);
+
+         // back to byte, set up for transpose
+         __m128i brb = _mm_packus_epi16(rw, bw);
+         __m128i gxb = _mm_packus_epi16(gw, xw);
+
+         // transpose to interleave channels
+         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
+         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+         // store
+         _mm_storeu_si128((__m128i *) (out + 0), o0);
+         _mm_storeu_si128((__m128i *) (out + 16), o1);
+         out += 32;
+      }
+   }
+#endif
+
+#ifdef STBI_NEON
+   // in this version, step=3 support would be easy to add. but is there demand?
+   if (step == 4) {
+      // this is a fairly straightforward implementation and not super-optimized.
+      uint8x8_t signflip = vdup_n_u8(0x80);
+      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
+      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
+      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
+      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
+
+      for (; i+7 < count; i += 8) {
+         // load
+         uint8x8_t y_bytes  = vld1_u8(y + i);
+         uint8x8_t cr_bytes = vld1_u8(pcr + i);
+         uint8x8_t cb_bytes = vld1_u8(pcb + i);
+         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+         // expand to s16
+         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+         int16x8_t crw = vshll_n_s8(cr_biased, 7);
+         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+         // color transform
+         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+         int16x8_t rws = vaddq_s16(yws, cr0);
+         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+         int16x8_t bws = vaddq_s16(yws, cb1);
+
+         // undo scaling, round, convert to byte
+         uint8x8x4_t o;
+         o.val[0] = vqrshrun_n_s16(rws, 4);
+         o.val[1] = vqrshrun_n_s16(gws, 4);
+         o.val[2] = vqrshrun_n_s16(bws, 4);
+         o.val[3] = vdup_n_u8(255);
+
+         // store, interleaving r/g/b/a
+         vst4_u8(out, o);
+         out += 8*4;
+      }
+   }
+#endif
+
+   for (; i < count; ++i) {
+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed + cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
+      r >>= 20;
+      g >>= 20;
+      b >>= 20;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+#endif
+
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg *j)
+{
+   j->idct_block_kernel = stbi__idct_block;
+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+   if (stbi__sse2_available()) {
+      j->idct_block_kernel = stbi__idct_simd;
+      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+   }
+#endif
+
+#ifdef STBI_NEON
+   j->idct_block_kernel = stbi__idct_simd;
+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
+
+// clean up the temporary component buffers
+static void stbi__cleanup_jpeg(stbi__jpeg *j)
+{
+   stbi__free_jpeg_components(j, j->s->img_n, 0);
+}
+
+typedef struct
+{
+   resample_row_func resample;
+   stbi_uc *line0,*line1;
+   int hs,vs;   // expansion factor in each axis
+   int w_lores; // horizontal pixels pre-expansion
+   int ystep;   // how far through vertical expansion we are
+   int ypos;    // which pre-expansion row we're on
+} stbi__resample;
+
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+   unsigned int t = x*y + 128;
+   return (stbi_uc) ((t + (t >>8)) >> 8);
+}
+
+static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
+{
+   int n, decode_n, is_rgb;
+   z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
+   // validate req_comp
+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+
+   // load a jpeg image from whichever source, but leave in YCbCr format
+   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+
+   // determine actual number of components to generate
+   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
+   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
+   if (z->s->img_n == 3 && n < 3 && !is_rgb)
+      decode_n = 1;
+   else
+      decode_n = z->s->img_n;
+
+   // nothing to do if no components requested; check this now to avoid
+   // accessing uninitialized coutput[0] later
+   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
+
+   // resample and color-convert
+   {
+      int k;
+      unsigned int i,j;
+      stbi_uc *output;
+      stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
+
+      stbi__resample res_comp[4];
+
+      for (k=0; k < decode_n; ++k) {
+         stbi__resample *r = &res_comp[k];
+
+         // allocate line buffer big enough for upsampling off the edges
+         // with upsample factor of 4
+         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
+         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+         r->hs      = z->img_h_max / z->img_comp[k].h;
+         r->vs      = z->img_v_max / z->img_comp[k].v;
+         r->ystep   = r->vs >> 1;
+         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
+         r->ypos    = 0;
+         r->line0   = r->line1 = z->img_comp[k].data;
+
+         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
+         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
+         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
+         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
+         else                               r->resample = stbi__resample_row_generic;
+      }
+
+      // can't error after this so, this is safe
+      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
+      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+      // now go ahead and resample
+      for (j=0; j < z->s->img_y; ++j) {
+         stbi_uc *out = output + n * z->s->img_x * j;
+         for (k=0; k < decode_n; ++k) {
+            stbi__resample *r = &res_comp[k];
+            int y_bot = r->ystep >= (r->vs >> 1);
+            coutput[k] = r->resample(z->img_comp[k].linebuf,
+                                     y_bot ? r->line1 : r->line0,
+                                     y_bot ? r->line0 : r->line1,
+                                     r->w_lores, r->hs);
+            if (++r->ystep >= r->vs) {
+               r->ystep = 0;
+               r->line0 = r->line1;
+               if (++r->ypos < z->img_comp[k].y)
+                  r->line1 += z->img_comp[k].w2;
+            }
+         }
+         if (n >= 3) {
+            stbi_uc *y = coutput[0];
+            if (z->s->img_n == 3) {
+               if (is_rgb) {
+                  for (i=0; i < z->s->img_x; ++i) {
+                     out[0] = y[i];
+                     out[1] = coutput[1][i];
+                     out[2] = coutput[2][i];
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else {
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
+            } else if (z->s->img_n == 4) {
+               if (z->app14_color_transform == 0) { // CMYK
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(coutput[0][i], m);
+                     out[1] = stbi__blinn_8x8(coutput[1][i], m);
+                     out[2] = stbi__blinn_8x8(coutput[2][i], m);
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else if (z->app14_color_transform == 2) { // YCCK
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(255 - out[0], m);
+                     out[1] = stbi__blinn_8x8(255 - out[1], m);
+                     out[2] = stbi__blinn_8x8(255 - out[2], m);
+                     out += n;
+                  }
+               } else { // YCbCr + alpha?  Ignore the fourth channel for now
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
+            } else
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = out[1] = out[2] = y[i];
+                  out[3] = 255; // not used if n==3
+                  out += n;
+               }
+         } else {
+            if (is_rgb) {
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i)
+                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+               else {
+                  for (i=0; i < z->s->img_x; ++i, out += 2) {
+                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+                     out[1] = 255;
+                  }
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  stbi_uc m = coutput[3][i];
+                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+                  out[0] = stbi__compute_y(r, g, b);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else {
+               stbi_uc *y = coutput[0];
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
+               else
+                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
+            }
+         }
+      }
+      stbi__cleanup_jpeg(z);
+      *out_x = z->s->img_x;
+      *out_y = z->s->img_y;
+      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
+      return output;
+   }
+}
+
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   unsigned char* result;
+   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__errpuc("outofmem", "Out of memory");
+   memset(j, 0, sizeof(stbi__jpeg));
+   STBI_NOTUSED(ri);
+   j->s = s;
+   stbi__setup_jpeg(j);
+   result = load_jpeg_image(j, x,y,comp,req_comp);
+   STBI_FREE(j);
+   return result;
+}
+
+static int stbi__jpeg_test(stbi__context *s)
+{
+   int r;
+   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__err("outofmem", "Out of memory");
+   memset(j, 0, sizeof(stbi__jpeg));
+   j->s = s;
+   stbi__setup_jpeg(j);
+   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
+   stbi__rewind(s);
+   STBI_FREE(j);
+   return r;
+}
+
+static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
+{
+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
+      stbi__rewind( j->s );
+      return 0;
+   }
+   if (x) *x = j->s->img_x;
+   if (y) *y = j->s->img_y;
+   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
+   return 1;
+}
+
+static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int result;
+   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+   if (!j) return stbi__err("outofmem", "Out of memory");
+   memset(j, 0, sizeof(stbi__jpeg));
+   j->s = s;
+   result = stbi__jpeg_info_raw(j, x, y, comp);
+   STBI_FREE(j);
+   return result;
+}
+#endif
+
+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
+//    simple implementation
+//      - all input must be provided in an upfront buffer
+//      - all output is written to a single output buffer (can malloc/realloc)
+//    performance
+//      - fast huffman
+
+#ifndef STBI_NO_ZLIB
+
+// fast-way is faster to check than jpeg huffman, but slow way is slower
+#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
+#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
+
+// zlib-style huffman encoding
+// (jpegs packs from left, zlib from right, so can't share code)
+typedef struct
+{
+   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
+   stbi__uint16 firstcode[16];
+   int maxcode[17];
+   stbi__uint16 firstsymbol[16];
+   stbi_uc  size[STBI__ZNSYMS];
+   stbi__uint16 value[STBI__ZNSYMS];
+} stbi__zhuffman;
+
+stbi_inline static int stbi__bitreverse16(int n)
+{
+  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
+  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
+  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
+  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
+  return n;
+}
+
+stbi_inline static int stbi__bit_reverse(int v, int bits)
+{
+   STBI_ASSERT(bits <= 16);
+   // to bit reverse n bits, reverse 16 and shift
+   // e.g. 11 bits, bit reverse and shift away 5
+   return stbi__bitreverse16(v) >> (16-bits);
+}
+
+static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
+{
+   int i,k=0;
+   int code, next_code[16], sizes[17];
+
+   // DEFLATE spec for generating codes
+   memset(sizes, 0, sizeof(sizes));
+   memset(z->fast, 0, sizeof(z->fast));
+   for (i=0; i < num; ++i)
+      ++sizes[sizelist[i]];
+   sizes[0] = 0;
+   for (i=1; i < 16; ++i)
+      if (sizes[i] > (1 << i))
+         return stbi__err("bad sizes", "Corrupt PNG");
+   code = 0;
+   for (i=1; i < 16; ++i) {
+      next_code[i] = code;
+      z->firstcode[i] = (stbi__uint16) code;
+      z->firstsymbol[i] = (stbi__uint16) k;
+      code = (code + sizes[i]);
+      if (sizes[i])
+         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
+      z->maxcode[i] = code << (16-i); // preshift for inner loop
+      code <<= 1;
+      k += sizes[i];
+   }
+   z->maxcode[16] = 0x10000; // sentinel
+   for (i=0; i < num; ++i) {
+      int s = sizelist[i];
+      if (s) {
+         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
+         z->size [c] = (stbi_uc     ) s;
+         z->value[c] = (stbi__uint16) i;
+         if (s <= STBI__ZFAST_BITS) {
+            int j = stbi__bit_reverse(next_code[s],s);
+            while (j < (1 << STBI__ZFAST_BITS)) {
+               z->fast[j] = fastv;
+               j += (1 << s);
+            }
+         }
+         ++next_code[s];
+      }
+   }
+   return 1;
+}
+
+// zlib-from-memory implementation for PNG reading
+//    because PNG allows splitting the zlib stream arbitrarily,
+//    and it's annoying structurally to have PNG call ZLIB call PNG,
+//    we require PNG read all the IDATs and combine them into a single
+//    memory buffer
+
+typedef struct
+{
+   stbi_uc *zbuffer, *zbuffer_end;
+   int num_bits;
+   int hit_zeof_once;
+   stbi__uint32 code_buffer;
+
+   char *zout;
+   char *zout_start;
+   char *zout_end;
+   int   z_expandable;
+
+   stbi__zhuffman z_length, z_distance;
+} stbi__zbuf;
+
+stbi_inline static int stbi__zeof(stbi__zbuf *z)
+{
+   return (z->zbuffer >= z->zbuffer_end);
+}
+
+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
+{
+   return stbi__zeof(z) ? 0 : *z->zbuffer++;
+}
+
+static void stbi__fill_bits(stbi__zbuf *z)
+{
+   do {
+      if (z->code_buffer >= (1U << z->num_bits)) {
+        z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */
+        return;
+      }
+      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
+      z->num_bits += 8;
+   } while (z->num_bits <= 24);
+}
+
+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
+{
+   unsigned int k;
+   if (z->num_bits < n) stbi__fill_bits(z);
+   k = z->code_buffer & ((1 << n) - 1);
+   z->code_buffer >>= n;
+   z->num_bits -= n;
+   return k;
+}
+
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
+{
+   int b,s,k;
+   // not resolved by fast table, so compute it the slow way
+   // use jpeg approach, which requires MSbits at top
+   k = stbi__bit_reverse(a->code_buffer, 16);
+   for (s=STBI__ZFAST_BITS+1; ; ++s)
+      if (k < z->maxcode[s])
+         break;
+   if (s >= 16) return -1; // invalid code!
+   // code size is s, so:
+   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
+   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
+   if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
+   a->code_buffer >>= s;
+   a->num_bits -= s;
+   return z->value[b];
+}
+
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
+{
+   int b,s;
+   if (a->num_bits < 16) {
+      if (stbi__zeof(a)) {
+         if (!a->hit_zeof_once) {
+            // This is the first time we hit eof, insert 16 extra padding btis
+            // to allow us to keep going; if we actually consume any of them
+            // though, that is invalid data. This is caught later.
+            a->hit_zeof_once = 1;
+            a->num_bits += 16; // add 16 implicit zero bits
+         } else {
+            // We already inserted our extra 16 padding bits and are again
+            // out, this stream is actually prematurely terminated.
+            return -1;
+         }
+      } else {
+         stbi__fill_bits(a);
+      }
+   }
+   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+   if (b) {
+      s = b >> 9;
+      a->code_buffer >>= s;
+      a->num_bits -= s;
+      return b & 511;
+   }
+   return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
+{
+   char *q;
+   unsigned int cur, limit, old_limit;
+   z->zout = zout;
+   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
+   cur   = (unsigned int) (z->zout - z->zout_start);
+   limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
+   if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
+   while (cur + n > limit) {
+      if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
+      limit *= 2;
+   }
+   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+   STBI_NOTUSED(old_limit);
+   if (q == NULL) return stbi__err("outofmem", "Out of memory");
+   z->zout_start = q;
+   z->zout       = q + cur;
+   z->zout_end   = q + limit;
+   return 1;
+}
+
+static const int stbi__zlength_base[31] = {
+   3,4,5,6,7,8,9,10,11,13,
+   15,17,19,23,27,31,35,43,51,59,
+   67,83,99,115,131,163,195,227,258,0,0 };
+
+static const int stbi__zlength_extra[31]=
+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+
+static const int stbi__zdist_extra[32] =
+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+static int stbi__parse_huffman_block(stbi__zbuf *a)
+{
+   char *zout = a->zout;
+   for(;;) {
+      int z = stbi__zhuffman_decode(a, &a->z_length);
+      if (z < 256) {
+         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
+         if (zout >= a->zout_end) {
+            if (!stbi__zexpand(a, zout, 1)) return 0;
+            zout = a->zout;
+         }
+         *zout++ = (char) z;
+      } else {
+         stbi_uc *p;
+         int len,dist;
+         if (z == 256) {
+            a->zout = zout;
+            if (a->hit_zeof_once && a->num_bits < 16) {
+               // The first time we hit zeof, we inserted 16 extra zero bits into our bit
+               // buffer so the decoder can just do its speculative decoding. But if we
+               // actually consumed any of those bits (which is the case when num_bits < 16),
+               // the stream actually read past the end so it is malformed.
+               return stbi__err("unexpected end","Corrupt PNG");
+            }
+            return 1;
+         }
+         if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
+         z -= 257;
+         len = stbi__zlength_base[z];
+         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+         z = stbi__zhuffman_decode(a, &a->z_distance);
+         if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
+         dist = stbi__zdist_base[z];
+         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
+         if (len > a->zout_end - zout) {
+            if (!stbi__zexpand(a, zout, len)) return 0;
+            zout = a->zout;
+         }
+         p = (stbi_uc *) (zout - dist);
+         if (dist == 1) { // run of one byte; common in images.
+            stbi_uc v = *p;
+            if (len) { do *zout++ = v; while (--len); }
+         } else {
+            if (len) { do *zout++ = *p++; while (--len); }
+         }
+      }
+   }
+}
+
+static int stbi__compute_huffman_codes(stbi__zbuf *a)
+{
+   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+   stbi__zhuffman z_codelength;
+   stbi_uc lencodes[286+32+137];//padding for maximum single op
+   stbi_uc codelength_sizes[19];
+   int i,n;
+
+   int hlit  = stbi__zreceive(a,5) + 257;
+   int hdist = stbi__zreceive(a,5) + 1;
+   int hclen = stbi__zreceive(a,4) + 4;
+   int ntot  = hlit + hdist;
+
+   memset(codelength_sizes, 0, sizeof(codelength_sizes));
+   for (i=0; i < hclen; ++i) {
+      int s = stbi__zreceive(a,3);
+      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
+   }
+   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
+
+   n = 0;
+   while (n < ntot) {
+      int c = stbi__zhuffman_decode(a, &z_codelength);
+      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
+      if (c < 16)
+         lencodes[n++] = (stbi_uc) c;
+      else {
+         stbi_uc fill = 0;
+         if (c == 16) {
+            c = stbi__zreceive(a,2)+3;
+            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+            fill = lencodes[n-1];
+         } else if (c == 17) {
+            c = stbi__zreceive(a,3)+3;
+         } else if (c == 18) {
+            c = stbi__zreceive(a,7)+11;
+         } else {
+            return stbi__err("bad codelengths", "Corrupt PNG");
+         }
+         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+         memset(lencodes+n, fill, c);
+         n += c;
+      }
+   }
+   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
+   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
+   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
+   return 1;
+}
+
+static int stbi__parse_uncompressed_block(stbi__zbuf *a)
+{
+   stbi_uc header[4];
+   int len,nlen,k;
+   if (a->num_bits & 7)
+      stbi__zreceive(a, a->num_bits & 7); // discard
+   // drain the bit-packed data into header
+   k = 0;
+   while (a->num_bits > 0) {
+      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
+      a->code_buffer >>= 8;
+      a->num_bits -= 8;
+   }
+   if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
+   // now fill header the normal way
+   while (k < 4)
+      header[k++] = stbi__zget8(a);
+   len  = header[1] * 256 + header[0];
+   nlen = header[3] * 256 + header[2];
+   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
+   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
+   if (a->zout + len > a->zout_end)
+      if (!stbi__zexpand(a, a->zout, len)) return 0;
+   memcpy(a->zout, a->zbuffer, len);
+   a->zbuffer += len;
+   a->zout += len;
+   return 1;
+}
+
+static int stbi__parse_zlib_header(stbi__zbuf *a)
+{
+   int cmf   = stbi__zget8(a);
+   int cm    = cmf & 15;
+   /* int cinfo = cmf >> 4; */
+   int flg   = stbi__zget8(a);
+   if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
+   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
+   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
+   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
+   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+   return 1;
+}
+
+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
+{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
+{
+   int i;   // use <= to match clearly with spec
+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
+
+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
+}
+*/
+
+static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
+{
+   int final, type;
+   if (parse_header)
+      if (!stbi__parse_zlib_header(a)) return 0;
+   a->num_bits = 0;
+   a->code_buffer = 0;
+   a->hit_zeof_once = 0;
+   do {
+      final = stbi__zreceive(a,1);
+      type = stbi__zreceive(a,2);
+      if (type == 0) {
+         if (!stbi__parse_uncompressed_block(a)) return 0;
+      } else if (type == 3) {
+         return 0;
+      } else {
+         if (type == 1) {
+            // use fixed code lengths
+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0;
+            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
+         } else {
+            if (!stbi__compute_huffman_codes(a)) return 0;
+         }
+         if (!stbi__parse_huffman_block(a)) return 0;
+      }
+   } while (!final);
+   return 1;
+}
+
+static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
+{
+   a->zout_start = obuf;
+   a->zout       = obuf;
+   a->zout_end   = obuf + olen;
+   a->z_expandable = exp;
+
+   return stbi__parse_zlib(a, parse_header);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(initial_size);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer + len;
+   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
+{
+   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(initial_size);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer + len;
+   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
+{
+   stbi__zbuf a;
+   a.zbuffer = (stbi_uc *) ibuffer;
+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+      return (int) (a.zout - a.zout_start);
+   else
+      return -1;
+}
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(16384);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer+len;
+   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
+{
+   stbi__zbuf a;
+   a.zbuffer = (stbi_uc *) ibuffer;
+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+      return (int) (a.zout - a.zout_start);
+   else
+      return -1;
+}
+#endif
+
+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
+//    simple implementation
+//      - only 8-bit samples
+//      - no CRC checking
+//      - allocates lots of intermediate memory
+//        - avoids problem of streaming data between subsystems
+//        - avoids explicit window management
+//    performance
+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
+#ifndef STBI_NO_PNG
+typedef struct
+{
+   stbi__uint32 length;
+   stbi__uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
+{
+   stbi__pngchunk c;
+   c.length = stbi__get32be(s);
+   c.type   = stbi__get32be(s);
+   return c;
+}
+
+static int stbi__check_png_header(stbi__context *s)
+{
+   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+   int i;
+   for (i=0; i < 8; ++i)
+      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
+   return 1;
+}
+
+typedef struct
+{
+   stbi__context *s;
+   stbi_uc *idata, *expanded, *out;
+   int depth;
+} stbi__png;
+
+
+enum {
+   STBI__F_none=0,
+   STBI__F_sub=1,
+   STBI__F_up=2,
+   STBI__F_avg=3,
+   STBI__F_paeth=4,
+   // synthetic filter used for first scanline to avoid needing a dummy row of 0s
+   STBI__F_avg_first
+};
+
+static stbi_uc first_row_filter[5] =
+{
+   STBI__F_none,
+   STBI__F_sub,
+   STBI__F_none,
+   STBI__F_avg_first,
+   STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
+};
+
+static int stbi__paeth(int a, int b, int c)
+{
+   // This formulation looks very different from the reference in the PNG spec, but is
+   // actually equivalent and has favorable data dependencies and admits straightforward
+   // generation of branch-free code, which helps performance significantly.
+   int thresh = c*3 - (a + b);
+   int lo = a < b ? a : b;
+   int hi = a < b ? b : a;
+   int t0 = (hi <= thresh) ? lo : c;
+   int t1 = (thresh <= lo) ? hi : t0;
+   return t1;
+}
+
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// adds an extra all-255 alpha channel
+// dest == src is legal
+// img_n must be 1 or 3
+static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
+{
+   int i;
+   // must process data backwards since we allow dest==src
+   if (img_n == 1) {
+      for (i=x-1; i >= 0; --i) {
+         dest[i*2+1] = 255;
+         dest[i*2+0] = src[i];
+      }
+   } else {
+      STBI_ASSERT(img_n == 3);
+      for (i=x-1; i >= 0; --i) {
+         dest[i*4+3] = 255;
+         dest[i*4+2] = src[i*3+2];
+         dest[i*4+1] = src[i*3+1];
+         dest[i*4+0] = src[i*3+0];
+      }
+   }
+}
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+   int bytes = (depth == 16 ? 2 : 1);
+   stbi__context *s = a->s;
+   stbi__uint32 i,j,stride = x*out_n*bytes;
+   stbi__uint32 img_len, img_width_bytes;
+   stbi_uc *filter_buf;
+   int all_ok = 1;
+   int k;
+   int img_n = s->img_n; // copy it into a local for later
+
+   int output_bytes = out_n*bytes;
+   int filter_bytes = img_n*bytes;
+   int width = x;
+
+   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
+   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
+   if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+   // note: error exits here don't need to clean up a->out individually,
+   // stbi__do_png always does on error.
+   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
+   img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+   if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
+   img_len = (img_width_bytes + 1) * y;
+
+   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+   // so just check for raw_len < img_len always.
+   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
+
+   // Allocate two scan lines worth of filter workspace buffer.
+   filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
+   if (!filter_buf) return stbi__err("outofmem", "Out of memory");
+
+   // Filtering for low-bit-depth images
+   if (depth < 8) {
+      filter_bytes = 1;
+      width = img_width_bytes;
+   }
+
+   for (j=0; j < y; ++j) {
+      // cur/prior filter buffers alternate
+      stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
+      stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
+      stbi_uc *dest = a->out + stride*j;
+      int nk = width * filter_bytes;
+      int filter = *raw++;
+
+      // check filter type
+      if (filter > 4) {
+         all_ok = stbi__err("invalid filter","Corrupt PNG");
+         break;
+      }
+
+      // if first row, use special filter that doesn't sample previous row
+      if (j == 0) filter = first_row_filter[filter];
+
+      // perform actual filtering
+      switch (filter) {
+      case STBI__F_none:
+         memcpy(cur, raw, nk);
+         break;
+      case STBI__F_sub:
+         memcpy(cur, raw, filter_bytes);
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
+         break;
+      case STBI__F_up:
+         for (k = 0; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
+         break;
+      case STBI__F_avg:
+         for (k = 0; k < filter_bytes; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
+         break;
+      case STBI__F_paeth:
+         for (k = 0; k < filter_bytes; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
+         break;
+      case STBI__F_avg_first:
+         memcpy(cur, raw, filter_bytes);
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
+         break;
+      }
+
+      raw += nk;
+
+      // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
+      if (depth < 8) {
+         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+         stbi_uc *in = cur;
+         stbi_uc *out = dest;
+         stbi_uc inb = 0;
+         stbi__uint32 nsmp = x*img_n;
+
+         // expand bits to bytes first
+         if (depth == 4) {
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 1) == 0) inb = *in++;
+               *out++ = scale * (inb >> 4);
+               inb <<= 4;
+            }
+         } else if (depth == 2) {
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 3) == 0) inb = *in++;
+               *out++ = scale * (inb >> 6);
+               inb <<= 2;
+            }
+         } else {
+            STBI_ASSERT(depth == 1);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 7) == 0) inb = *in++;
+               *out++ = scale * (inb >> 7);
+               inb <<= 1;
+            }
+         }
+
+         // insert alpha=255 values if desired
+         if (img_n != out_n)
+            stbi__create_png_alpha_expand8(dest, dest, x, img_n);
+      } else if (depth == 8) {
+         if (img_n == out_n)
+            memcpy(dest, cur, x*img_n);
+         else
+            stbi__create_png_alpha_expand8(dest, cur, x, img_n);
+      } else if (depth == 16) {
+         // convert the image data from big-endian to platform-native
+         stbi__uint16 *dest16 = (stbi__uint16*)dest;
+         stbi__uint32 nsmp = x*img_n;
+
+         if (img_n == out_n) {
+            for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
+               *dest16 = (cur[0] << 8) | cur[1];
+         } else {
+            STBI_ASSERT(img_n+1 == out_n);
+            if (img_n == 1) {
+               for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
+                  dest16[0] = (cur[0] << 8) | cur[1];
+                  dest16[1] = 0xffff;
+               }
+            } else {
+               STBI_ASSERT(img_n == 3);
+               for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
+                  dest16[0] = (cur[0] << 8) | cur[1];
+                  dest16[1] = (cur[2] << 8) | cur[3];
+                  dest16[2] = (cur[4] << 8) | cur[5];
+                  dest16[3] = 0xffff;
+               }
+            }
+         }
+      }
+   }
+
+   STBI_FREE(filter_buf);
+   if (!all_ok) return 0;
+
+   return 1;
+}
+
+static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
+{
+   int bytes = (depth == 16 ? 2 : 1);
+   int out_bytes = out_n * bytes;
+   stbi_uc *final;
+   int p;
+   if (!interlaced)
+      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
+   // de-interlacing
+   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+   if (!final) return stbi__err("outofmem", "Out of memory");
+   for (p=0; p < 7; ++p) {
+      int xorig[] = { 0,4,0,2,0,1,0 };
+      int yorig[] = { 0,0,4,0,2,0,1 };
+      int xspc[]  = { 8,8,4,4,2,2,1 };
+      int yspc[]  = { 8,8,8,4,4,2,2 };
+      int i,j,x,y;
+      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
+      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
+      if (x && y) {
+         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+            STBI_FREE(final);
+            return 0;
+         }
+         for (j=0; j < y; ++j) {
+            for (i=0; i < x; ++i) {
+               int out_y = j*yspc[p]+yorig[p];
+               int out_x = i*xspc[p]+xorig[p];
+               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
+                      a->out + (j*x+i)*out_bytes, out_bytes);
+            }
+         }
+         STBI_FREE(a->out);
+         image_data += img_len;
+         image_data_len -= img_len;
+      }
+   }
+   a->out = final;
+
+   return 1;
+}
+
+static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi_uc *p = z->out;
+
+   // compute color-based transparency, assuming we've
+   // already got 255 as the alpha value in the output
+   STBI_ASSERT(out_n == 2 || out_n == 4);
+
+   if (out_n == 2) {
+      for (i=0; i < pixel_count; ++i) {
+         p[1] = (p[0] == tc[0] ? 0 : 255);
+         p += 2;
+      }
+   } else {
+      for (i=0; i < pixel_count; ++i) {
+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+            p[3] = 0;
+         p += 4;
+      }
+   }
+   return 1;
+}
+
+static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi__uint16 *p = (stbi__uint16*) z->out;
+
+   // compute color-based transparency, assuming we've
+   // already got 65535 as the alpha value in the output
+   STBI_ASSERT(out_n == 2 || out_n == 4);
+
+   if (out_n == 2) {
+      for (i = 0; i < pixel_count; ++i) {
+         p[1] = (p[0] == tc[0] ? 0 : 65535);
+         p += 2;
+      }
+   } else {
+      for (i = 0; i < pixel_count; ++i) {
+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+            p[3] = 0;
+         p += 4;
+      }
+   }
+   return 1;
+}
+
+static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
+{
+   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+   stbi_uc *p, *temp_out, *orig = a->out;
+
+   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
+   if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+   // between here and free(out) below, exitting would leak
+   temp_out = p;
+
+   if (pal_img_n == 3) {
+      for (i=0; i < pixel_count; ++i) {
+         int n = orig[i]*4;
+         p[0] = palette[n  ];
+         p[1] = palette[n+1];
+         p[2] = palette[n+2];
+         p += 3;
+      }
+   } else {
+      for (i=0; i < pixel_count; ++i) {
+         int n = orig[i]*4;
+         p[0] = palette[n  ];
+         p[1] = palette[n+1];
+         p[2] = palette[n+2];
+         p[3] = palette[n+3];
+         p += 4;
+      }
+   }
+   STBI_FREE(a->out);
+   a->out = temp_out;
+
+   STBI_NOTUSED(len);
+
+   return 1;
+}
+
+static int stbi__unpremultiply_on_load_global = 0;
+static int stbi__de_iphone_flag_global = 0;
+
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag_global = flag_true_if_should_convert;
+}
+
+#ifndef STBI_THREAD_LOCAL
+#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global
+#define stbi__de_iphone_flag  stbi__de_iphone_flag_global
+#else
+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
+
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_set = 1;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag_local = flag_true_if_should_convert;
+   stbi__de_iphone_flag_set = 1;
+}
+
+#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \
+                                       ? stbi__unpremultiply_on_load_local      \
+                                       : stbi__unpremultiply_on_load_global)
+#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \
+                                ? stbi__de_iphone_flag_local                    \
+                                : stbi__de_iphone_flag_global)
+#endif // STBI_THREAD_LOCAL
+
+static void stbi__de_iphone(stbi__png *z)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi_uc *p = z->out;
+
+   if (s->img_out_n == 3) {  // convert bgr to rgb
+      for (i=0; i < pixel_count; ++i) {
+         stbi_uc t = p[0];
+         p[0] = p[2];
+         p[2] = t;
+         p += 3;
+      }
+   } else {
+      STBI_ASSERT(s->img_out_n == 4);
+      if (stbi__unpremultiply_on_load) {
+         // convert bgr to rgb and unpremultiply
+         for (i=0; i < pixel_count; ++i) {
+            stbi_uc a = p[3];
+            stbi_uc t = p[0];
+            if (a) {
+               stbi_uc half = a / 2;
+               p[0] = (p[2] * 255 + half) / a;
+               p[1] = (p[1] * 255 + half) / a;
+               p[2] = ( t   * 255 + half) / a;
+            } else {
+               p[0] = p[2];
+               p[2] = t;
+            }
+            p += 4;
+         }
+      } else {
+         // convert bgr to rgb
+         for (i=0; i < pixel_count; ++i) {
+            stbi_uc t = p[0];
+            p[0] = p[2];
+            p[2] = t;
+            p += 4;
+         }
+      }
+   }
+}
+
+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
+
+static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
+{
+   stbi_uc palette[1024], pal_img_n=0;
+   stbi_uc has_trans=0, tc[3]={0};
+   stbi__uint16 tc16[3];
+   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
+   int first=1,k,interlace=0, color=0, is_iphone=0;
+   stbi__context *s = z->s;
+
+   z->expanded = NULL;
+   z->idata = NULL;
+   z->out = NULL;
+
+   if (!stbi__check_png_header(s)) return 0;
+
+   if (scan == STBI__SCAN_type) return 1;
+
+   for (;;) {
+      stbi__pngchunk c = stbi__get_chunk_header(s);
+      switch (c.type) {
+         case STBI__PNG_TYPE('C','g','B','I'):
+            is_iphone = 1;
+            stbi__skip(s, c.length);
+            break;
+         case STBI__PNG_TYPE('I','H','D','R'): {
+            int comp,filter;
+            if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
+            first = 0;
+            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
+            s->img_x = stbi__get32be(s);
+            s->img_y = stbi__get32be(s);
+            if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+            if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
+            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
+            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
+            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
+            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
+            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
+            if (!pal_img_n) {
+               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+            } else {
+               // if paletted, then pal_n is our final components, and
+               // img_n is # components to decompress/filter.
+               s->img_n = 1;
+               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
+            }
+            // even with SCAN_header, have to scan to see if we have a tRNS
+            break;
+         }
+
+         case STBI__PNG_TYPE('P','L','T','E'):  {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
+            pal_len = c.length / 3;
+            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
+            for (i=0; i < pal_len; ++i) {
+               palette[i*4+0] = stbi__get8(s);
+               palette[i*4+1] = stbi__get8(s);
+               palette[i*4+2] = stbi__get8(s);
+               palette[i*4+3] = 255;
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('t','R','N','S'): {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
+            if (pal_img_n) {
+               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
+               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
+               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
+               pal_img_n = 4;
+               for (i=0; i < c.length; ++i)
+                  palette[i*4+3] = stbi__get8(s);
+            } else {
+               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
+               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
+               has_trans = 1;
+               // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
+               if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
+               if (z->depth == 16) {
+                  for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
+                     tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+               } else {
+                  for (k = 0; k < s->img_n && k < 3; ++k)
+                     tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+               }
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('I','D','A','T'): {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
+            if (scan == STBI__SCAN_header) {
+               // header scan definitely stops at first IDAT
+               if (pal_img_n)
+                  s->img_n = pal_img_n;
+               return 1;
+            }
+            if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
+            if ((int)(ioff + c.length) < (int)ioff) return 0;
+            if (ioff + c.length > idata_limit) {
+               stbi__uint32 idata_limit_old = idata_limit;
+               stbi_uc *p;
+               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
+               while (ioff + c.length > idata_limit)
+                  idata_limit *= 2;
+               STBI_NOTUSED(idata_limit_old);
+               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+               z->idata = p;
+            }
+            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
+            ioff += c.length;
+            break;
+         }
+
+         case STBI__PNG_TYPE('I','E','N','D'): {
+            stbi__uint32 raw_len, bpl;
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (scan != STBI__SCAN_load) return 1;
+            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
+            // initial guess for decoded data size to avoid unnecessary reallocs
+            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
+            if (z->expanded == NULL) return 0; // zlib should set error
+            STBI_FREE(z->idata); z->idata = NULL;
+            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
+               s->img_out_n = s->img_n+1;
+            else
+               s->img_out_n = s->img_n;
+            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
+            if (has_trans) {
+               if (z->depth == 16) {
+                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
+               } else {
+                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
+               }
+            }
+            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+               stbi__de_iphone(z);
+            if (pal_img_n) {
+               // pal_img_n == 3 or 4
+               s->img_n = pal_img_n; // record the actual colors we had
+               s->img_out_n = pal_img_n;
+               if (req_comp >= 3) s->img_out_n = req_comp;
+               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+                  return 0;
+            } else if (has_trans) {
+               // non-paletted image with tRNS -> source image has (constant) alpha
+               ++s->img_n;
+            }
+            STBI_FREE(z->expanded); z->expanded = NULL;
+            // end of PNG chunk, read and skip CRC
+            stbi__get32be(s);
+            return 1;
+         }
+
+         default:
+            // if critical, fail
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if ((c.type & (1 << 29)) == 0) {
+               #ifndef STBI_NO_FAILURE_STRINGS
+               // not threadsafe
+               static char invalid_chunk[] = "XXXX PNG chunk not known";
+               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
+               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
+               #endif
+               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+            }
+            stbi__skip(s, c.length);
+            break;
+      }
+      // end of PNG chunk, read and skip CRC
+      stbi__get32be(s);
+   }
+}
+
+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
+{
+   void *result=NULL;
+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
+      if (p->depth <= 8)
+         ri->bits_per_channel = 8;
+      else if (p->depth == 16)
+         ri->bits_per_channel = 16;
+      else
+         return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
+      result = p->out;
+      p->out = NULL;
+      if (req_comp && req_comp != p->s->img_out_n) {
+         if (ri->bits_per_channel == 8)
+            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         else
+            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         p->s->img_out_n = req_comp;
+         if (result == NULL) return result;
+      }
+      *x = p->s->img_x;
+      *y = p->s->img_y;
+      if (n) *n = p->s->img_n;
+   }
+   STBI_FREE(p->out);      p->out      = NULL;
+   STBI_FREE(p->expanded); p->expanded = NULL;
+   STBI_FREE(p->idata);    p->idata    = NULL;
+
+   return result;
+}
+
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__do_png(&p, x,y,comp,req_comp, ri);
+}
+
+static int stbi__png_test(stbi__context *s)
+{
+   int r;
+   r = stbi__check_png_header(s);
+   stbi__rewind(s);
+   return r;
+}
+
+static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
+{
+   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
+      stbi__rewind( p->s );
+      return 0;
+   }
+   if (x) *x = p->s->img_x;
+   if (y) *y = p->s->img_y;
+   if (comp) *comp = p->s->img_n;
+   return 1;
+}
+
+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__png_info_raw(&p, x, y, comp);
+}
+
+static int stbi__png_is16(stbi__context *s)
+{
+   stbi__png p;
+   p.s = s;
+   if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+	   return 0;
+   if (p.depth != 16) {
+      stbi__rewind(p.s);
+      return 0;
+   }
+   return 1;
+}
+#endif
+
+// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test_raw(stbi__context *s)
+{
+   int r;
+   int sz;
+   if (stbi__get8(s) != 'B') return 0;
+   if (stbi__get8(s) != 'M') return 0;
+   stbi__get32le(s); // discard filesize
+   stbi__get16le(s); // discard reserved
+   stbi__get16le(s); // discard reserved
+   stbi__get32le(s); // discard data offset
+   sz = stbi__get32le(s);
+   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+   return r;
+}
+
+static int stbi__bmp_test(stbi__context *s)
+{
+   int r = stbi__bmp_test_raw(s);
+   stbi__rewind(s);
+   return r;
+}
+
+
+// returns 0..31 for the highest set bit
+static int stbi__high_bit(unsigned int z)
+{
+   int n=0;
+   if (z == 0) return -1;
+   if (z >= 0x10000) { n += 16; z >>= 16; }
+   if (z >= 0x00100) { n +=  8; z >>=  8; }
+   if (z >= 0x00010) { n +=  4; z >>=  4; }
+   if (z >= 0x00004) { n +=  2; z >>=  2; }
+   if (z >= 0x00002) { n +=  1;/* >>=  1;*/ }
+   return n;
+}
+
+static int stbi__bitcount(unsigned int a)
+{
+   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
+   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
+   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+   a = (a + (a >> 8)); // max 16 per 8 bits
+   a = (a + (a >> 16)); // max 32 per 8 bits
+   return a & 0xff;
+}
+
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(unsigned int v, int shift, int bits)
+{
+   static unsigned int mul_table[9] = {
+      0,
+      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+   };
+   static unsigned int shift_table[9] = {
+      0, 0,0,1,0,2,4,6,0,
+   };
+   if (shift < 0)
+      v <<= -shift;
+   else
+      v >>= shift;
+   STBI_ASSERT(v < 256);
+   v >>= (8-bits);
+   STBI_ASSERT(bits >= 0 && bits <= 8);
+   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
+}
+
+typedef struct
+{
+   int bpp, offset, hsz;
+   unsigned int mr,mg,mb,ma, all_a;
+   int extra_read;
+} stbi__bmp_data;
+
+static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
+{
+   // BI_BITFIELDS specifies masks explicitly, don't override
+   if (compress == 3)
+      return 1;
+
+   if (compress == 0) {
+      if (info->bpp == 16) {
+         info->mr = 31u << 10;
+         info->mg = 31u <<  5;
+         info->mb = 31u <<  0;
+      } else if (info->bpp == 32) {
+         info->mr = 0xffu << 16;
+         info->mg = 0xffu <<  8;
+         info->mb = 0xffu <<  0;
+         info->ma = 0xffu << 24;
+         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+      } else {
+         // otherwise, use defaults, which is all-0
+         info->mr = info->mg = info->mb = info->ma = 0;
+      }
+      return 1;
+   }
+   return 0; // error
+}
+
+static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
+{
+   int hsz;
+   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
+   stbi__get32le(s); // discard filesize
+   stbi__get16le(s); // discard reserved
+   stbi__get16le(s); // discard reserved
+   info->offset = stbi__get32le(s);
+   info->hsz = hsz = stbi__get32le(s);
+   info->mr = info->mg = info->mb = info->ma = 0;
+   info->extra_read = 14;
+
+   if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
+
+   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+   if (hsz == 12) {
+      s->img_x = stbi__get16le(s);
+      s->img_y = stbi__get16le(s);
+   } else {
+      s->img_x = stbi__get32le(s);
+      s->img_y = stbi__get32le(s);
+   }
+   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
+   info->bpp = stbi__get16le(s);
+   if (hsz != 12) {
+      int compress = stbi__get32le(s);
+      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
+      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
+      stbi__get32le(s); // discard sizeof
+      stbi__get32le(s); // discard hres
+      stbi__get32le(s); // discard vres
+      stbi__get32le(s); // discard colorsused
+      stbi__get32le(s); // discard max important
+      if (hsz == 40 || hsz == 56) {
+         if (hsz == 56) {
+            stbi__get32le(s);
+            stbi__get32le(s);
+            stbi__get32le(s);
+            stbi__get32le(s);
+         }
+         if (info->bpp == 16 || info->bpp == 32) {
+            if (compress == 0) {
+               stbi__bmp_set_mask_defaults(info, compress);
+            } else if (compress == 3) {
+               info->mr = stbi__get32le(s);
+               info->mg = stbi__get32le(s);
+               info->mb = stbi__get32le(s);
+               info->extra_read += 12;
+               // not documented, but generated by photoshop and handled by mspaint
+               if (info->mr == info->mg && info->mg == info->mb) {
+                  // ?!?!?
+                  return stbi__errpuc("bad BMP", "bad BMP");
+               }
+            } else
+               return stbi__errpuc("bad BMP", "bad BMP");
+         }
+      } else {
+         // V4/V5 header
+         int i;
+         if (hsz != 108 && hsz != 124)
+            return stbi__errpuc("bad BMP", "bad BMP");
+         info->mr = stbi__get32le(s);
+         info->mg = stbi__get32le(s);
+         info->mb = stbi__get32le(s);
+         info->ma = stbi__get32le(s);
+         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
+            stbi__bmp_set_mask_defaults(info, compress);
+         stbi__get32le(s); // discard color space
+         for (i=0; i < 12; ++i)
+            stbi__get32le(s); // discard color space parameters
+         if (hsz == 124) {
+            stbi__get32le(s); // discard rendering intent
+            stbi__get32le(s); // discard offset of profile data
+            stbi__get32le(s); // discard size of profile data
+            stbi__get32le(s); // discard reserved
+         }
+      }
+   }
+   return (void *) 1;
+}
+
+
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *out;
+   unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
+   stbi_uc pal[256][4];
+   int psize=0,i,j,width;
+   int flip_vertically, pad, target;
+   stbi__bmp_data info;
+   STBI_NOTUSED(ri);
+
+   info.all_a = 255;
+   if (stbi__bmp_parse_header(s, &info) == NULL)
+      return NULL; // error code already set
+
+   flip_vertically = ((int) s->img_y) > 0;
+   s->img_y = abs((int) s->img_y);
+
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+   mr = info.mr;
+   mg = info.mg;
+   mb = info.mb;
+   ma = info.ma;
+   all_a = info.all_a;
+
+   if (info.hsz == 12) {
+      if (info.bpp < 24)
+         psize = (info.offset - info.extra_read - 24) / 3;
+   } else {
+      if (info.bpp < 16)
+         psize = (info.offset - info.extra_read - info.hsz) >> 2;
+   }
+   if (psize == 0) {
+      // accept some number of extra bytes after the header, but if the offset points either to before
+      // the header ends or implies a large amount of extra data, reject the file as malformed
+      int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
+      int header_limit = 1024; // max we actually read is below 256 bytes currently.
+      int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
+      if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
+         return stbi__errpuc("bad header", "Corrupt BMP");
+      }
+      // we established that bytes_read_so_far is positive and sensible.
+      // the first half of this test rejects offsets that are either too small positives, or
+      // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
+      // ensures the number computed in the second half of the test can't overflow.
+      if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
+         return stbi__errpuc("bad offset", "Corrupt BMP");
+      } else {
+         stbi__skip(s, info.offset - bytes_read_so_far);
+      }
+   }
+
+   if (info.bpp == 24 && ma == 0xff000000)
+      s->img_n = 3;
+   else
+      s->img_n = ma ? 4 : 3;
+   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+      target = req_comp;
+   else
+      target = s->img_n; // if they want monochrome, we'll post-convert
+
+   // sanity-check size
+   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "Corrupt BMP");
+
+   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   if (info.bpp < 16) {
+      int z=0;
+      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+      for (i=0; i < psize; ++i) {
+         pal[i][2] = stbi__get8(s);
+         pal[i][1] = stbi__get8(s);
+         pal[i][0] = stbi__get8(s);
+         if (info.hsz != 12) stbi__get8(s);
+         pal[i][3] = 255;
+      }
+      stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+      if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+      else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+      else if (info.bpp == 8) width = s->img_x;
+      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+      pad = (-width)&3;
+      if (info.bpp == 1) {
+         for (j=0; j < (int) s->img_y; ++j) {
+            int bit_offset = 7, v = stbi__get8(s);
+            for (i=0; i < (int) s->img_x; ++i) {
+               int color = (v>>bit_offset)&0x1;
+               out[z++] = pal[color][0];
+               out[z++] = pal[color][1];
+               out[z++] = pal[color][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               if((--bit_offset) < 0) {
+                  bit_offset = 7;
+                  v = stbi__get8(s);
+               }
+            }
+            stbi__skip(s, pad);
+         }
+      } else {
+         for (j=0; j < (int) s->img_y; ++j) {
+            for (i=0; i < (int) s->img_x; i += 2) {
+               int v=stbi__get8(s),v2=0;
+               if (info.bpp == 4) {
+                  v2 = v & 15;
+                  v >>= 4;
+               }
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               v = (info.bpp == 8) ? stbi__get8(s) : v2;
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+            }
+            stbi__skip(s, pad);
+         }
+      }
+   } else {
+      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
+      int z = 0;
+      int easy=0;
+      stbi__skip(s, info.offset - info.extra_read - info.hsz);
+      if (info.bpp == 24) width = 3 * s->img_x;
+      else if (info.bpp == 16) width = 2*s->img_x;
+      else /* bpp = 32 and pad = 0 */ width=0;
+      pad = (-width) & 3;
+      if (info.bpp == 24) {
+         easy = 1;
+      } else if (info.bpp == 32) {
+         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+            easy = 2;
+      }
+      if (!easy) {
+         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+         // right shift amt to put high bit in position #7
+         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
+         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
+         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
+         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
+         if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+      }
+      for (j=0; j < (int) s->img_y; ++j) {
+         if (easy) {
+            for (i=0; i < (int) s->img_x; ++i) {
+               unsigned char a;
+               out[z+2] = stbi__get8(s);
+               out[z+1] = stbi__get8(s);
+               out[z+0] = stbi__get8(s);
+               z += 3;
+               a = (easy == 2 ? stbi__get8(s) : 255);
+               all_a |= a;
+               if (target == 4) out[z++] = a;
+            }
+         } else {
+            int bpp = info.bpp;
+            for (i=0; i < (int) s->img_x; ++i) {
+               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
+               unsigned int a;
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
+               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+               all_a |= a;
+               if (target == 4) out[z++] = STBI__BYTECAST(a);
+            }
+         }
+         stbi__skip(s, pad);
+      }
+   }
+
+   // if alpha channel is all 0s, replace with all 255s
+   if (target == 4 && all_a == 0)
+      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
+         out[i] = 255;
+
+   if (flip_vertically) {
+      stbi_uc t;
+      for (j=0; j < (int) s->img_y>>1; ++j) {
+         stbi_uc *p1 = out +      j     *s->img_x*target;
+         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
+         for (i=0; i < (int) s->img_x*target; ++i) {
+            t = p1[i]; p1[i] = p2[i]; p2[i] = t;
+         }
+      }
+   }
+
+   if (req_comp && req_comp != target) {
+      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+
+   *x = s->img_x;
+   *y = s->img_y;
+   if (comp) *comp = s->img_n;
+   return out;
+}
+#endif
+
+// Targa Truevision - TGA
+// by Jonathan Dummer
+#ifndef STBI_NO_TGA
+// returns STBI_rgb or whatever, 0 on error
+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
+{
+   // only RGB or RGBA (incl. 16bit) or grey allowed
+   if (is_rgb16) *is_rgb16 = 0;
+   switch(bits_per_pixel) {
+      case 8:  return STBI_grey;
+      case 16: if(is_grey) return STBI_grey_alpha;
+               // fallthrough
+      case 15: if(is_rgb16) *is_rgb16 = 1;
+               return STBI_rgb;
+      case 24: // fallthrough
+      case 32: return bits_per_pixel/8;
+      default: return 0;
+   }
+}
+
+static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
+{
+    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
+    int sz, tga_colormap_type;
+    stbi__get8(s);                   // discard Offset
+    tga_colormap_type = stbi__get8(s); // colormap type
+    if( tga_colormap_type > 1 ) {
+        stbi__rewind(s);
+        return 0;      // only RGB or indexed allowed
+    }
+    tga_image_type = stbi__get8(s); // image type
+    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
+        if (tga_image_type != 1 && tga_image_type != 9) {
+            stbi__rewind(s);
+            return 0;
+        }
+        stbi__skip(s,4);       // skip index of first colormap entry and number of entries
+        sz = stbi__get8(s);    //   check bits per palette color entry
+        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
+            stbi__rewind(s);
+            return 0;
+        }
+        stbi__skip(s,4);       // skip image x and y origin
+        tga_colormap_bpp = sz;
+    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
+            stbi__rewind(s);
+            return 0; // only RGB or grey allowed, +/- RLE
+        }
+        stbi__skip(s,9); // skip colormap specification and image x/y origin
+        tga_colormap_bpp = 0;
+    }
+    tga_w = stbi__get16le(s);
+    if( tga_w < 1 ) {
+        stbi__rewind(s);
+        return 0;   // test width
+    }
+    tga_h = stbi__get16le(s);
+    if( tga_h < 1 ) {
+        stbi__rewind(s);
+        return 0;   // test height
+    }
+    tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+    stbi__get8(s); // ignore alpha bits
+    if (tga_colormap_bpp != 0) {
+        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+            // when using a colormap, tga_bits_per_pixel is the size of the indexes
+            // I don't think anything but 8 or 16bit indexes makes sense
+            stbi__rewind(s);
+            return 0;
+        }
+        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
+    } else {
+        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
+    }
+    if(!tga_comp) {
+      stbi__rewind(s);
+      return 0;
+    }
+    if (x) *x = tga_w;
+    if (y) *y = tga_h;
+    if (comp) *comp = tga_comp;
+    return 1;                   // seems to have passed everything
+}
+
+static int stbi__tga_test(stbi__context *s)
+{
+   int res = 0;
+   int sz, tga_color_type;
+   stbi__get8(s);      //   discard Offset
+   tga_color_type = stbi__get8(s);   //   color type
+   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
+   sz = stbi__get8(s);   //   image type
+   if ( tga_color_type == 1 ) { // colormapped (paletted) image
+      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
+      stbi__skip(s,4);       // skip index of first colormap entry and number of entries
+      sz = stbi__get8(s);    //   check bits per palette color entry
+      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+      stbi__skip(s,4);       // skip image x and y origin
+   } else { // "normal" image w/o colormap
+      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
+      stbi__skip(s,9); // skip colormap specification and image x/y origin
+   }
+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
+   sz = stbi__get8(s);   //   bits per pixel
+   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
+   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+
+   res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
+errorEnd:
+   stbi__rewind(s);
+   return res;
+}
+
+// read 16bit value and convert to 24bit RGB
+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+{
+   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
+   stbi__uint16 fiveBitMask = 31;
+   // we have 3 channels with 5bits each
+   int r = (px >> 10) & fiveBitMask;
+   int g = (px >> 5) & fiveBitMask;
+   int b = px & fiveBitMask;
+   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+   out[0] = (stbi_uc)((r * 255)/31);
+   out[1] = (stbi_uc)((g * 255)/31);
+   out[2] = (stbi_uc)((b * 255)/31);
+
+   // some people claim that the most significant bit might be used for alpha
+   // (possibly if an alpha-bit is set in the "image descriptor byte")
+   // but that only made 16bit test images completely translucent..
+   // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+}
+
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   //   read in the TGA header stuff
+   int tga_offset = stbi__get8(s);
+   int tga_indexed = stbi__get8(s);
+   int tga_image_type = stbi__get8(s);
+   int tga_is_RLE = 0;
+   int tga_palette_start = stbi__get16le(s);
+   int tga_palette_len = stbi__get16le(s);
+   int tga_palette_bits = stbi__get8(s);
+   int tga_x_origin = stbi__get16le(s);
+   int tga_y_origin = stbi__get16le(s);
+   int tga_width = stbi__get16le(s);
+   int tga_height = stbi__get16le(s);
+   int tga_bits_per_pixel = stbi__get8(s);
+   int tga_comp, tga_rgb16=0;
+   int tga_inverted = stbi__get8(s);
+   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+   //   image data
+   unsigned char *tga_data;
+   unsigned char *tga_palette = NULL;
+   int i, j;
+   unsigned char raw_data[4] = {0};
+   int RLE_count = 0;
+   int RLE_repeating = 0;
+   int read_next_pixel = 1;
+   STBI_NOTUSED(ri);
+   STBI_NOTUSED(tga_x_origin); // @TODO
+   STBI_NOTUSED(tga_y_origin); // @TODO
+
+   if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+   //   do a tiny bit of precessing
+   if ( tga_image_type >= 8 )
+   {
+      tga_image_type -= 8;
+      tga_is_RLE = 1;
+   }
+   tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
+   //   If I'm paletted, then I'll use the number of bits from the palette
+   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
+   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
+
+   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+      return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
+   //   tga info
+   *x = tga_width;
+   *y = tga_height;
+   if (comp) *comp = tga_comp;
+
+   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+      return stbi__errpuc("too large", "Corrupt TGA");
+
+   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
+   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
+
+   // skip to the data's starting position (offset usually = 0)
+   stbi__skip(s, tga_offset );
+
+   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
+      for (i=0; i < tga_height; ++i) {
+         int row = tga_inverted ? tga_height -i - 1 : i;
+         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
+         stbi__getn(s, tga_row, tga_width * tga_comp);
+      }
+   } else  {
+      //   do I need to load a palette?
+      if ( tga_indexed)
+      {
+         if (tga_palette_len == 0) {  /* you have to have at least one entry! */
+            STBI_FREE(tga_data);
+            return stbi__errpuc("bad palette", "Corrupt TGA");
+         }
+
+         //   any data to skip? (offset usually = 0)
+         stbi__skip(s, tga_palette_start );
+         //   load the palette
+         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
+         if (!tga_palette) {
+            STBI_FREE(tga_data);
+            return stbi__errpuc("outofmem", "Out of memory");
+         }
+         if (tga_rgb16) {
+            stbi_uc *pal_entry = tga_palette;
+            STBI_ASSERT(tga_comp == STBI_rgb);
+            for (i=0; i < tga_palette_len; ++i) {
+               stbi__tga_read_rgb16(s, pal_entry);
+               pal_entry += tga_comp;
+            }
+         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+               STBI_FREE(tga_data);
+               STBI_FREE(tga_palette);
+               return stbi__errpuc("bad palette", "Corrupt TGA");
+         }
+      }
+      //   load the data
+      for (i=0; i < tga_width * tga_height; ++i)
+      {
+         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+         if ( tga_is_RLE )
+         {
+            if ( RLE_count == 0 )
+            {
+               //   yep, get the next byte as a RLE command
+               int RLE_cmd = stbi__get8(s);
+               RLE_count = 1 + (RLE_cmd & 127);
+               RLE_repeating = RLE_cmd >> 7;
+               read_next_pixel = 1;
+            } else if ( !RLE_repeating )
+            {
+               read_next_pixel = 1;
+            }
+         } else
+         {
+            read_next_pixel = 1;
+         }
+         //   OK, if I need to read a pixel, do it now
+         if ( read_next_pixel )
+         {
+            //   load however much data we did have
+            if ( tga_indexed )
+            {
+               // read in index, then perform the lookup
+               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
+               if ( pal_idx >= tga_palette_len ) {
+                  // invalid index
+                  pal_idx = 0;
+               }
+               pal_idx *= tga_comp;
+               for (j = 0; j < tga_comp; ++j) {
+                  raw_data[j] = tga_palette[pal_idx+j];
+               }
+            } else if(tga_rgb16) {
+               STBI_ASSERT(tga_comp == STBI_rgb);
+               stbi__tga_read_rgb16(s, raw_data);
+            } else {
+               //   read in the data raw
+               for (j = 0; j < tga_comp; ++j) {
+                  raw_data[j] = stbi__get8(s);
+               }
+            }
+            //   clear the reading flag for the next pixel
+            read_next_pixel = 0;
+         } // end of reading a pixel
+
+         // copy data
+         for (j = 0; j < tga_comp; ++j)
+           tga_data[i*tga_comp+j] = raw_data[j];
+
+         //   in case we're in RLE mode, keep counting down
+         --RLE_count;
+      }
+      //   do I need to invert the image?
+      if ( tga_inverted )
+      {
+         for (j = 0; j*2 < tga_height; ++j)
+         {
+            int index1 = j * tga_width * tga_comp;
+            int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+            for (i = tga_width * tga_comp; i > 0; --i)
+            {
+               unsigned char temp = tga_data[index1];
+               tga_data[index1] = tga_data[index2];
+               tga_data[index2] = temp;
+               ++index1;
+               ++index2;
+            }
+         }
+      }
+      //   clear my palette, if I had one
+      if ( tga_palette != NULL )
+      {
+         STBI_FREE( tga_palette );
+      }
+   }
+
+   // swap RGB - if the source data was RGB16, it already is in the right order
+   if (tga_comp >= 3 && !tga_rgb16)
+   {
+      unsigned char* tga_pixel = tga_data;
+      for (i=0; i < tga_width * tga_height; ++i)
+      {
+         unsigned char temp = tga_pixel[0];
+         tga_pixel[0] = tga_pixel[2];
+         tga_pixel[2] = temp;
+         tga_pixel += tga_comp;
+      }
+   }
+
+   // convert to target component count
+   if (req_comp && req_comp != tga_comp)
+      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
+
+   //   the things I do to get rid of an error message, and yet keep
+   //   Microsoft's C compilers happy... [8^(
+   tga_palette_start = tga_palette_len = tga_palette_bits =
+         tga_x_origin = tga_y_origin = 0;
+   STBI_NOTUSED(tga_palette_start);
+   //   OK, done
+   return tga_data;
+}
+#endif
+
+// *************************************************************************************************
+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context *s)
+{
+   int r = (stbi__get32be(s) == 0x38425053);
+   stbi__rewind(s);
+   return r;
+}
+
+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
+{
+   int count, nleft, len;
+
+   count = 0;
+   while ((nleft = pixelCount - count) > 0) {
+      len = stbi__get8(s);
+      if (len == 128) {
+         // No-op.
+      } else if (len < 128) {
+         // Copy next len+1 bytes literally.
+         len++;
+         if (len > nleft) return 0; // corrupt data
+         count += len;
+         while (len) {
+            *p = stbi__get8(s);
+            p += 4;
+            len--;
+         }
+      } else if (len > 128) {
+         stbi_uc   val;
+         // Next -len+1 bytes in the dest are replicated from next source byte.
+         // (Interpret len as a negative 8-bit int.)
+         len = 257 - len;
+         if (len > nleft) return 0; // corrupt data
+         val = stbi__get8(s);
+         count += len;
+         while (len) {
+            *p = val;
+            p += 4;
+            len--;
+         }
+      }
+   }
+
+   return 1;
+}
+
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+   int pixelCount;
+   int channelCount, compression;
+   int channel, i;
+   int bitdepth;
+   int w,h;
+   stbi_uc *out;
+   STBI_NOTUSED(ri);
+
+   // Check identifier
+   if (stbi__get32be(s) != 0x38425053)   // "8BPS"
+      return stbi__errpuc("not PSD", "Corrupt PSD image");
+
+   // Check file type version.
+   if (stbi__get16be(s) != 1)
+      return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
+   // Skip 6 reserved bytes.
+   stbi__skip(s, 6 );
+
+   // Read the number of channels (R, G, B, A, etc).
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16)
+      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
+   // Read the rows and columns of the image.
+   h = stbi__get32be(s);
+   w = stbi__get32be(s);
+
+   if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+   // Make sure the depth is 8 bits.
+   bitdepth = stbi__get16be(s);
+   if (bitdepth != 8 && bitdepth != 16)
+      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
+   // Make sure the color mode is RGB.
+   // Valid options are:
+   //   0: Bitmap
+   //   1: Grayscale
+   //   2: Indexed color
+   //   3: RGB color
+   //   4: CMYK color
+   //   7: Multichannel
+   //   8: Duotone
+   //   9: Lab color
+   if (stbi__get16be(s) != 3)
+      return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
+   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
+   stbi__skip(s,stbi__get32be(s) );
+
+   // Skip the image resources.  (resolution, pen tool paths, etc)
+   stbi__skip(s, stbi__get32be(s) );
+
+   // Skip the reserved data.
+   stbi__skip(s, stbi__get32be(s) );
+
+   // Find out if the data is compressed.
+   // Known values:
+   //   0: no compression
+   //   1: RLE compressed
+   compression = stbi__get16be(s);
+   if (compression > 1)
+      return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
+   // Check size
+   if (!stbi__mad3sizes_valid(4, w, h, 0))
+      return stbi__errpuc("too large", "Corrupt PSD");
+
+   // Create the destination image.
+
+   if (!compression && bitdepth == 16 && bpc == 16) {
+      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
+      ri->bits_per_channel = 16;
+   } else
+      out = (stbi_uc *) stbi__malloc(4 * w*h);
+
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   pixelCount = w*h;
+
+   // Initialize the data to zero.
+   //memset( out, 0, pixelCount * 4 );
+
+   // Finally, the image data.
+   if (compression) {
+      // RLE as used by .PSD and .TIFF
+      // Loop until you get the number of unpacked bytes you are expecting:
+      //     Read the next source byte into n.
+      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+      //     Else if n is 128, noop.
+      // Endloop
+
+      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
+      // which we're going to just skip.
+      stbi__skip(s, h * channelCount * 2 );
+
+      // Read the RLE data by channel.
+      for (channel = 0; channel < 4; channel++) {
+         stbi_uc *p;
+
+         p = out+channel;
+         if (channel >= channelCount) {
+            // Fill this channel with default data.
+            for (i = 0; i < pixelCount; i++, p += 4)
+               *p = (channel == 3 ? 255 : 0);
+         } else {
+            // Read the RLE data.
+            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+               STBI_FREE(out);
+               return stbi__errpuc("corrupt", "bad RLE data");
+            }
+         }
+      }
+
+   } else {
+      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
+      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
+
+      // Read the data by channel.
+      for (channel = 0; channel < 4; channel++) {
+         if (channel >= channelCount) {
+            // Fill this channel with default data.
+            if (bitdepth == 16 && bpc == 16) {
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               stbi__uint16 val = channel == 3 ? 65535 : 0;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = val;
+            } else {
+               stbi_uc *p = out+channel;
+               stbi_uc val = channel == 3 ? 255 : 0;
+               for (i = 0; i < pixelCount; i++, p += 4)
+                  *p = val;
+            }
+         } else {
+            if (ri->bits_per_channel == 16) {    // output bpc
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = (stbi__uint16) stbi__get16be(s);
+            } else {
+               stbi_uc *p = out+channel;
+               if (bitdepth == 16) {  // input bpc
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
+               } else {
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = stbi__get8(s);
+               }
+            }
+         }
+      }
+   }
+
+   // remove weird white matte from PSD
+   if (channelCount >= 4) {
+      if (ri->bits_per_channel == 16) {
+         for (i=0; i < w*h; ++i) {
+            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 65535) {
+               float a = pixel[3] / 65535.0f;
+               float ra = 1.0f / a;
+               float inv_a = 65535.0f * (1 - ra);
+               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
+               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
+               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
+            }
+         }
+      } else {
+         for (i=0; i < w*h; ++i) {
+            unsigned char *pixel = out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 255) {
+               float a = pixel[3] / 255.0f;
+               float ra = 1.0f / a;
+               float inv_a = 255.0f * (1 - ra);
+               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+            }
+         }
+      }
+   }
+
+   // convert to desired output format
+   if (req_comp && req_comp != 4) {
+      if (ri->bits_per_channel == 16)
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
+      else
+         out = stbi__convert_format(out, 4, req_comp, w, h);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+
+   if (comp) *comp = 4;
+   *y = h;
+   *x = w;
+
+   return out;
+}
+#endif
+
+// *************************************************************************************************
+// Softimage PIC loader
+// by Tom Seddon
+//
+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_is4(stbi__context *s,const char *str)
+{
+   int i;
+   for (i=0; i<4; ++i)
+      if (stbi__get8(s) != (stbi_uc)str[i])
+         return 0;
+
+   return 1;
+}
+
+static int stbi__pic_test_core(stbi__context *s)
+{
+   int i;
+
+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
+      return 0;
+
+   for(i=0;i<84;++i)
+      stbi__get8(s);
+
+   if (!stbi__pic_is4(s,"PICT"))
+      return 0;
+
+   return 1;
+}
+
+typedef struct
+{
+   stbi_uc size,type,channel;
+} stbi__pic_packet;
+
+static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
+{
+   int mask=0x80, i;
+
+   for (i=0; i<4; ++i, mask>>=1) {
+      if (channel & mask) {
+         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
+         dest[i]=stbi__get8(s);
+      }
+   }
+
+   return dest;
+}
+
+static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
+{
+   int mask=0x80,i;
+
+   for (i=0;i<4; ++i, mask>>=1)
+      if (channel&mask)
+         dest[i]=src[i];
+}
+
+static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
+{
+   int act_comp=0,num_packets=0,y,chained;
+   stbi__pic_packet packets[10];
+
+   // this will (should...) cater for even some bizarre stuff like having data
+    // for the same channel in multiple packets.
+   do {
+      stbi__pic_packet *packet;
+
+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
+         return stbi__errpuc("bad format","too many packets");
+
+      packet = &packets[num_packets++];
+
+      chained = stbi__get8(s);
+      packet->size    = stbi__get8(s);
+      packet->type    = stbi__get8(s);
+      packet->channel = stbi__get8(s);
+
+      act_comp |= packet->channel;
+
+      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
+      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
+   } while (chained);
+
+   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
+   for(y=0; y<height; ++y) {
+      int packet_idx;
+
+      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
+         stbi__pic_packet *packet = &packets[packet_idx];
+         stbi_uc *dest = result+y*width*4;
+
+         switch (packet->type) {
+            default:
+               return stbi__errpuc("bad format","packet has bad compression type");
+
+            case 0: {//uncompressed
+               int x;
+
+               for(x=0;x<width;++x, dest+=4)
+                  if (!stbi__readval(s,packet->channel,dest))
+                     return 0;
+               break;
+            }
+
+            case 1://Pure RLE
+               {
+                  int left=width, i;
+
+                  while (left>0) {
+                     stbi_uc count,value[4];
+
+                     count=stbi__get8(s);
+                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
+
+                     if (count > left)
+                        count = (stbi_uc) left;
+
+                     if (!stbi__readval(s,packet->channel,value))  return 0;
+
+                     for(i=0; i<count; ++i,dest+=4)
+                        stbi__copyval(packet->channel,dest,value);
+                     left -= count;
+                  }
+               }
+               break;
+
+            case 2: {//Mixed RLE
+               int left=width;
+               while (left>0) {
+                  int count = stbi__get8(s), i;
+                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
+
+                  if (count >= 128) { // Repeated
+                     stbi_uc value[4];
+
+                     if (count==128)
+                        count = stbi__get16be(s);
+                     else
+                        count -= 127;
+                     if (count > left)
+                        return stbi__errpuc("bad file","scanline overrun");
+
+                     if (!stbi__readval(s,packet->channel,value))
+                        return 0;
+
+                     for(i=0;i<count;++i, dest += 4)
+                        stbi__copyval(packet->channel,dest,value);
+                  } else { // Raw
+                     ++count;
+                     if (count>left) return stbi__errpuc("bad file","scanline overrun");
+
+                     for(i=0;i<count;++i, dest+=4)
+                        if (!stbi__readval(s,packet->channel,dest))
+                           return 0;
+                  }
+                  left-=count;
+               }
+               break;
+            }
+         }
+      }
+   }
+
+   return result;
+}
+
+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *result;
+   int i, x,y, internal_comp;
+   STBI_NOTUSED(ri);
+
+   if (!comp) comp = &internal_comp;
+
+   for (i=0; i<92; ++i)
+      stbi__get8(s);
+
+   x = stbi__get16be(s);
+   y = stbi__get16be(s);
+
+   if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
+   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
+
+   stbi__get32be(s); //skip `ratio'
+   stbi__get16be(s); //skip `fields'
+   stbi__get16be(s); //skip `pad'
+
+   // intermediate buffer is RGBA
+   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
+   if (!result) return stbi__errpuc("outofmem", "Out of memory");
+   memset(result, 0xff, x*y*4);
+
+   if (!stbi__pic_load_core(s,x,y,comp, result)) {
+      STBI_FREE(result);
+      result=0;
+   }
+   *px = x;
+   *py = y;
+   if (req_comp == 0) req_comp = *comp;
+   result=stbi__convert_format(result,4,req_comp,x,y);
+
+   return result;
+}
+
+static int stbi__pic_test(stbi__context *s)
+{
+   int r = stbi__pic_test_core(s);
+   stbi__rewind(s);
+   return r;
+}
+#endif
+
+// *************************************************************************************************
+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
+#ifndef STBI_NO_GIF
+typedef struct
+{
+   stbi__int16 prefix;
+   stbi_uc first;
+   stbi_uc suffix;
+} stbi__gif_lzw;
+
+typedef struct
+{
+   int w,h;
+   stbi_uc *out;                 // output buffer (always 4 components)
+   stbi_uc *background;          // The current "background" as far as a gif is concerned
+   stbi_uc *history;
+   int flags, bgindex, ratio, transparent, eflags;
+   stbi_uc  pal[256][4];
+   stbi_uc lpal[256][4];
+   stbi__gif_lzw codes[8192];
+   stbi_uc *color_table;
+   int parse, step;
+   int lflags;
+   int start_x, start_y;
+   int max_x, max_y;
+   int cur_x, cur_y;
+   int line_size;
+   int delay;
+} stbi__gif;
+
+static int stbi__gif_test_raw(stbi__context *s)
+{
+   int sz;
+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
+   sz = stbi__get8(s);
+   if (sz != '9' && sz != '7') return 0;
+   if (stbi__get8(s) != 'a') return 0;
+   return 1;
+}
+
+static int stbi__gif_test(stbi__context *s)
+{
+   int r = stbi__gif_test_raw(s);
+   stbi__rewind(s);
+   return r;
+}
+
+static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
+{
+   int i;
+   for (i=0; i < num_entries; ++i) {
+      pal[i][2] = stbi__get8(s);
+      pal[i][1] = stbi__get8(s);
+      pal[i][0] = stbi__get8(s);
+      pal[i][3] = transp == i ? 0 : 255;
+   }
+}
+
+static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
+{
+   stbi_uc version;
+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+      return stbi__err("not GIF", "Corrupt GIF");
+
+   version = stbi__get8(s);
+   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
+   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
+
+   stbi__g_failure_reason = "";
+   g->w = stbi__get16le(s);
+   g->h = stbi__get16le(s);
+   g->flags = stbi__get8(s);
+   g->bgindex = stbi__get8(s);
+   g->ratio = stbi__get8(s);
+   g->transparent = -1;
+
+   if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+
+   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
+
+   if (is_info) return 1;
+
+   if (g->flags & 0x80)
+      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
+
+   return 1;
+}
+
+static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   if (!g) return stbi__err("outofmem", "Out of memory");
+   if (!stbi__gif_header(s, g, comp, 1)) {
+      STBI_FREE(g);
+      stbi__rewind( s );
+      return 0;
+   }
+   if (x) *x = g->w;
+   if (y) *y = g->h;
+   STBI_FREE(g);
+   return 1;
+}
+
+static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
+{
+   stbi_uc *p, *c;
+   int idx;
+
+   // recurse to decode the prefixes, since the linked-list is backwards,
+   // and working backwards through an interleaved image would be nasty
+   if (g->codes[code].prefix >= 0)
+      stbi__out_gif_code(g, g->codes[code].prefix);
+
+   if (g->cur_y >= g->max_y) return;
+
+   idx = g->cur_x + g->cur_y;
+   p = &g->out[idx];
+   g->history[idx / 4] = 1;
+
+   c = &g->color_table[g->codes[code].suffix * 4];
+   if (c[3] > 128) { // don't render transparent pixels;
+      p[0] = c[2];
+      p[1] = c[1];
+      p[2] = c[0];
+      p[3] = c[3];
+   }
+   g->cur_x += 4;
+
+   if (g->cur_x >= g->max_x) {
+      g->cur_x = g->start_x;
+      g->cur_y += g->step;
+
+      while (g->cur_y >= g->max_y && g->parse > 0) {
+         g->step = (1 << g->parse) * g->line_size;
+         g->cur_y = g->start_y + (g->step >> 1);
+         --g->parse;
+      }
+   }
+}
+
+static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
+{
+   stbi_uc lzw_cs;
+   stbi__int32 len, init_code;
+   stbi__uint32 first;
+   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
+   stbi__gif_lzw *p;
+
+   lzw_cs = stbi__get8(s);
+   if (lzw_cs > 12) return NULL;
+   clear = 1 << lzw_cs;
+   first = 1;
+   codesize = lzw_cs + 1;
+   codemask = (1 << codesize) - 1;
+   bits = 0;
+   valid_bits = 0;
+   for (init_code = 0; init_code < clear; init_code++) {
+      g->codes[init_code].prefix = -1;
+      g->codes[init_code].first = (stbi_uc) init_code;
+      g->codes[init_code].suffix = (stbi_uc) init_code;
+   }
+
+   // support no starting clear code
+   avail = clear+2;
+   oldcode = -1;
+
+   len = 0;
+   for(;;) {
+      if (valid_bits < codesize) {
+         if (len == 0) {
+            len = stbi__get8(s); // start new block
+            if (len == 0)
+               return g->out;
+         }
+         --len;
+         bits |= (stbi__int32) stbi__get8(s) << valid_bits;
+         valid_bits += 8;
+      } else {
+         stbi__int32 code = bits & codemask;
+         bits >>= codesize;
+         valid_bits -= codesize;
+         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
+         if (code == clear) {  // clear code
+            codesize = lzw_cs + 1;
+            codemask = (1 << codesize) - 1;
+            avail = clear + 2;
+            oldcode = -1;
+            first = 0;
+         } else if (code == clear + 1) { // end of stream code
+            stbi__skip(s, len);
+            while ((len = stbi__get8(s)) > 0)
+               stbi__skip(s,len);
+            return g->out;
+         } else if (code <= avail) {
+            if (first) {
+               return stbi__errpuc("no clear code", "Corrupt GIF");
+            }
+
+            if (oldcode >= 0) {
+               p = &g->codes[avail++];
+               if (avail > 8192) {
+                  return stbi__errpuc("too many codes", "Corrupt GIF");
+               }
+
+               p->prefix = (stbi__int16) oldcode;
+               p->first = g->codes[oldcode].first;
+               p->suffix = (code == avail) ? p->first : g->codes[code].first;
+            } else if (code == avail)
+               return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
+            stbi__out_gif_code(g, (stbi__uint16) code);
+
+            if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+               codesize++;
+               codemask = (1 << codesize) - 1;
+            }
+
+            oldcode = code;
+         } else {
+            return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+         }
+      }
+   }
+}
+
+// this function is designed to support animated gifs, although stb_image doesn't support it
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
+{
+   int dispose;
+   int first_frame;
+   int pi;
+   int pcount;
+   STBI_NOTUSED(req_comp);
+
+   // on first frame, any non-written pixels get the background colour (non-transparent)
+   first_frame = 0;
+   if (g->out == 0) {
+      if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
+      if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
+         return stbi__errpuc("too large", "GIF image is too large");
+      pcount = g->w * g->h;
+      g->out = (stbi_uc *) stbi__malloc(4 * pcount);
+      g->background = (stbi_uc *) stbi__malloc(4 * pcount);
+      g->history = (stbi_uc *) stbi__malloc(pcount);
+      if (!g->out || !g->background || !g->history)
+         return stbi__errpuc("outofmem", "Out of memory");
+
+      // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
+      // background colour is only used for pixels that are not rendered first frame, after that "background"
+      // color refers to the color that was there the previous frame.
+      memset(g->out, 0x00, 4 * pcount);
+      memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
+      memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
+      first_frame = 1;
+   } else {
+      // second frame - how do we dispose of the previous one?
+      dispose = (g->eflags & 0x1C) >> 2;
+      pcount = g->w * g->h;
+
+      if ((dispose == 3) && (two_back == 0)) {
+         dispose = 2; // if I don't have an image to revert back to, default to the old background
+      }
+
+      if (dispose == 3) { // use previous graphic
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
+            }
+         }
+      } else if (dispose == 2) {
+         // restore what was changed last frame to background before that frame;
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
+            }
+         }
+      } else {
+         // This is a non-disposal case eithe way, so just
+         // leave the pixels as is, and they will become the new background
+         // 1: do not dispose
+         // 0:  not specified.
+      }
+
+      // background is what out is after the undoing of the previou frame;
+      memcpy( g->background, g->out, 4 * g->w * g->h );
+   }
+
+   // clear my history;
+   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
+
+   for (;;) {
+      int tag = stbi__get8(s);
+      switch (tag) {
+         case 0x2C: /* Image Descriptor */
+         {
+            stbi__int32 x, y, w, h;
+            stbi_uc *o;
+
+            x = stbi__get16le(s);
+            y = stbi__get16le(s);
+            w = stbi__get16le(s);
+            h = stbi__get16le(s);
+            if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
+            g->line_size = g->w * 4;
+            g->start_x = x * 4;
+            g->start_y = y * g->line_size;
+            g->max_x   = g->start_x + w * 4;
+            g->max_y   = g->start_y + h * g->line_size;
+            g->cur_x   = g->start_x;
+            g->cur_y   = g->start_y;
+
+            // if the width of the specified rectangle is 0, that means
+            // we may not see *any* pixels or the image is malformed;
+            // to make sure this is caught, move the current y down to
+            // max_y (which is what out_gif_code checks).
+            if (w == 0)
+               g->cur_y = g->max_y;
+
+            g->lflags = stbi__get8(s);
+
+            if (g->lflags & 0x40) {
+               g->step = 8 * g->line_size; // first interlaced spacing
+               g->parse = 3;
+            } else {
+               g->step = g->line_size;
+               g->parse = 0;
+            }
+
+            if (g->lflags & 0x80) {
+               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+               g->color_table = (stbi_uc *) g->lpal;
+            } else if (g->flags & 0x80) {
+               g->color_table = (stbi_uc *) g->pal;
+            } else
+               return stbi__errpuc("missing color table", "Corrupt GIF");
+
+            o = stbi__process_gif_raster(s, g);
+            if (!o) return NULL;
+
+            // if this was the first frame,
+            pcount = g->w * g->h;
+            if (first_frame && (g->bgindex > 0)) {
+               // if first frame, any pixel not drawn to gets the background color
+               for (pi = 0; pi < pcount; ++pi) {
+                  if (g->history[pi] == 0) {
+                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
+                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
+                  }
+               }
+            }
+
+            return o;
+         }
+
+         case 0x21: // Comment Extension.
+         {
+            int len;
+            int ext = stbi__get8(s);
+            if (ext == 0xF9) { // Graphic Control Extension.
+               len = stbi__get8(s);
+               if (len == 4) {
+                  g->eflags = stbi__get8(s);
+                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+                  // unset old transparent
+                  if (g->transparent >= 0) {
+                     g->pal[g->transparent][3] = 255;
+                  }
+                  if (g->eflags & 0x01) {
+                     g->transparent = stbi__get8(s);
+                     if (g->transparent >= 0) {
+                        g->pal[g->transparent][3] = 0;
+                     }
+                  } else {
+                     // don't need transparent
+                     stbi__skip(s, 1);
+                     g->transparent = -1;
+                  }
+               } else {
+                  stbi__skip(s, len);
+                  break;
+               }
+            }
+            while ((len = stbi__get8(s)) != 0) {
+               stbi__skip(s, len);
+            }
+            break;
+         }
+
+         case 0x3B: // gif stream termination code
+            return (stbi_uc *) s; // using '1' causes warning on some compilers
+
+         default:
+            return stbi__errpuc("unknown code", "Corrupt GIF");
+      }
+   }
+}
+
+static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
+{
+   STBI_FREE(g->out);
+   STBI_FREE(g->history);
+   STBI_FREE(g->background);
+
+   if (out) STBI_FREE(out);
+   if (delays && *delays) STBI_FREE(*delays);
+   return stbi__errpuc("outofmem", "Out of memory");
+}
+
+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   if (stbi__gif_test(s)) {
+      int layers = 0;
+      stbi_uc *u = 0;
+      stbi_uc *out = 0;
+      stbi_uc *two_back = 0;
+      stbi__gif g;
+      int stride;
+      int out_size = 0;
+      int delays_size = 0;
+
+      STBI_NOTUSED(out_size);
+      STBI_NOTUSED(delays_size);
+
+      memset(&g, 0, sizeof(g));
+      if (delays) {
+         *delays = 0;
+      }
+
+      do {
+         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+
+         if (u) {
+            *x = g.w;
+            *y = g.h;
+            ++layers;
+            stride = g.w * g.h * 4;
+
+            if (out) {
+               void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
+               if (!tmp)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
+               else {
+                   out = (stbi_uc*) tmp;
+                   out_size = layers * stride;
+               }
+
+               if (delays) {
+                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
+                  if (!new_delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  *delays = new_delays;
+                  delays_size = layers * sizeof(int);
+               }
+            } else {
+               out = (stbi_uc*)stbi__malloc( layers * stride );
+               if (!out)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
+               out_size = layers * stride;
+               if (delays) {
+                  *delays = (int*) stbi__malloc( layers * sizeof(int) );
+                  if (!*delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  delays_size = layers * sizeof(int);
+               }
+            }
+            memcpy( out + ((layers - 1) * stride), u, stride );
+            if (layers >= 2) {
+               two_back = out - 2 * stride;
+            }
+
+            if (delays) {
+               (*delays)[layers - 1U] = g.delay;
+            }
+         }
+      } while (u != 0);
+
+      // free temp buffer;
+      STBI_FREE(g.out);
+      STBI_FREE(g.history);
+      STBI_FREE(g.background);
+
+      // do the final conversion after loading everything;
+      if (req_comp && req_comp != 4)
+         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+      *z = layers;
+      return out;
+   } else {
+      return stbi__errpuc("not GIF", "Image was not as a gif type.");
+   }
+}
+
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *u = 0;
+   stbi__gif g;
+   memset(&g, 0, sizeof(g));
+   STBI_NOTUSED(ri);
+
+   u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
+   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+   if (u) {
+      *x = g.w;
+      *y = g.h;
+
+      // moved conversion to after successful load so that the same
+      // can be done for multiple frames.
+      if (req_comp && req_comp != 4)
+         u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+   } else if (g.out) {
+      // if there was an error and we allocated an image buffer, free it!
+      STBI_FREE(g.out);
+   }
+
+   // free buffers needed for multiple frame loading;
+   STBI_FREE(g.history);
+   STBI_FREE(g.background);
+
+   return u;
+}
+
+static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   return stbi__gif_info_raw(s,x,y,comp);
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR loader
+// originally by Nicolas Schulz
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
+{
+   int i;
+   for (i=0; signature[i]; ++i)
+      if (stbi__get8(s) != signature[i])
+          return 0;
+   stbi__rewind(s);
+   return 1;
+}
+
+static int stbi__hdr_test(stbi__context* s)
+{
+   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
+   stbi__rewind(s);
+   if(!r) {
+       r = stbi__hdr_test_core(s, "#?RGBE\n");
+       stbi__rewind(s);
+   }
+   return r;
+}
+
+#define STBI__HDR_BUFLEN  1024
+static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
+{
+   int len=0;
+   char c = '\0';
+
+   c = (char) stbi__get8(z);
+
+   while (!stbi__at_eof(z) && c != '\n') {
+      buffer[len++] = c;
+      if (len == STBI__HDR_BUFLEN-1) {
+         // flush to end of line
+         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+            ;
+         break;
+      }
+      c = (char) stbi__get8(z);
+   }
+
+   buffer[len] = 0;
+   return buffer;
+}
+
+static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
+{
+   if ( input[3] != 0 ) {
+      float f1;
+      // Exponent
+      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
+      if (req_comp <= 2)
+         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+      else {
+         output[0] = input[0] * f1;
+         output[1] = input[1] * f1;
+         output[2] = input[2] * f1;
+      }
+      if (req_comp == 2) output[1] = 1;
+      if (req_comp == 4) output[3] = 1;
+   } else {
+      switch (req_comp) {
+         case 4: output[3] = 1; /* fallthrough */
+         case 3: output[0] = output[1] = output[2] = 0;
+                 break;
+         case 2: output[1] = 1; /* fallthrough */
+         case 1: output[0] = 0;
+                 break;
+      }
+   }
+}
+
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   char buffer[STBI__HDR_BUFLEN];
+   char *token;
+   int valid = 0;
+   int width, height;
+   stbi_uc *scanline;
+   float *hdr_data;
+   int len;
+   unsigned char count, value;
+   int i, j, k, c1,c2, z;
+   const char *headerToken;
+   STBI_NOTUSED(ri);
+
+   // Check identifier
+   headerToken = stbi__hdr_gettoken(s,buffer);
+   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
+      return stbi__errpf("not HDR", "Corrupt HDR image");
+
+   // Parse header
+   for(;;) {
+      token = stbi__hdr_gettoken(s,buffer);
+      if (token[0] == 0) break;
+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+   }
+
+   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
+
+   // Parse width and height
+   // can't use sscanf() if we're not using stdio!
+   token = stbi__hdr_gettoken(s,buffer);
+   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+   token += 3;
+   height = (int) strtol(token, &token, 10);
+   while (*token == ' ') ++token;
+   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+   token += 3;
+   width = (int) strtol(token, NULL, 10);
+
+   if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+   if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+
+   *x = width;
+   *y = height;
+
+   if (comp) *comp = 3;
+   if (req_comp == 0) req_comp = 3;
+
+   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+      return stbi__errpf("too large", "HDR image is too large");
+
+   // Read data
+   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+   if (!hdr_data)
+      return stbi__errpf("outofmem", "Out of memory");
+
+   // Load image data
+   // image data is stored as some number of sca
+   if ( width < 8 || width >= 32768) {
+      // Read flat data
+      for (j=0; j < height; ++j) {
+         for (i=0; i < width; ++i) {
+            stbi_uc rgbe[4];
+           main_decode_loop:
+            stbi__getn(s, rgbe, 4);
+            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
+         }
+      }
+   } else {
+      // Read RLE-encoded data
+      scanline = NULL;
+
+      for (j = 0; j < height; ++j) {
+         c1 = stbi__get8(s);
+         c2 = stbi__get8(s);
+         len = stbi__get8(s);
+         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+            // not run-length encoded, so we have to actually use THIS data as a decoded
+            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+            stbi_uc rgbe[4];
+            rgbe[0] = (stbi_uc) c1;
+            rgbe[1] = (stbi_uc) c2;
+            rgbe[2] = (stbi_uc) len;
+            rgbe[3] = (stbi_uc) stbi__get8(s);
+            stbi__hdr_convert(hdr_data, rgbe, req_comp);
+            i = 1;
+            j = 0;
+            STBI_FREE(scanline);
+            goto main_decode_loop; // yes, this makes no sense
+         }
+         len <<= 8;
+         len |= stbi__get8(s);
+         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+         if (scanline == NULL) {
+            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
+            if (!scanline) {
+               STBI_FREE(hdr_data);
+               return stbi__errpf("outofmem", "Out of memory");
+            }
+         }
+
+         for (k = 0; k < 4; ++k) {
+            int nleft;
+            i = 0;
+            while ((nleft = width - i) > 0) {
+               count = stbi__get8(s);
+               if (count > 128) {
+                  // Run
+                  value = stbi__get8(s);
+                  count -= 128;
+                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+                  for (z = 0; z < count; ++z)
+                     scanline[i++ * 4 + k] = value;
+               } else {
+                  // Dump
+                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+                  for (z = 0; z < count; ++z)
+                     scanline[i++ * 4 + k] = stbi__get8(s);
+               }
+            }
+         }
+         for (i=0; i < width; ++i)
+            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
+      }
+      if (scanline)
+         STBI_FREE(scanline);
+   }
+
+   return hdr_data;
+}
+
+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   char buffer[STBI__HDR_BUFLEN];
+   char *token;
+   int valid = 0;
+   int dummy;
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   if (stbi__hdr_test(s) == 0) {
+       stbi__rewind( s );
+       return 0;
+   }
+
+   for(;;) {
+      token = stbi__hdr_gettoken(s,buffer);
+      if (token[0] == 0) break;
+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+   }
+
+   if (!valid) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token = stbi__hdr_gettoken(s,buffer);
+   if (strncmp(token, "-Y ", 3)) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token += 3;
+   *y = (int) strtol(token, &token, 10);
+   while (*token == ' ') ++token;
+   if (strncmp(token, "+X ", 3)) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token += 3;
+   *x = (int) strtol(token, NULL, 10);
+   *comp = 3;
+   return 1;
+}
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   void *p;
+   stbi__bmp_data info;
+
+   info.all_a = 255;
+   p = stbi__bmp_parse_header(s, &info);
+   if (p == NULL) {
+      stbi__rewind( s );
+      return 0;
+   }
+   if (x) *x = s->img_x;
+   if (y) *y = s->img_y;
+   if (comp) {
+      if (info.bpp == 24 && info.ma == 0xff000000)
+         *comp = 3;
+      else
+         *comp = info.ma ? 4 : 3;
+   }
+   return 1;
+}
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int channelCount, dummy, depth;
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   *y = stbi__get32be(s);
+   *x = stbi__get32be(s);
+   depth = stbi__get16be(s);
+   if (depth != 8 && depth != 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 3) {
+       stbi__rewind( s );
+       return 0;
+   }
+   *comp = 4;
+   return 1;
+}
+
+static int stbi__psd_is16(stbi__context *s)
+{
+   int channelCount, depth;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   STBI_NOTUSED(stbi__get32be(s));
+   STBI_NOTUSED(stbi__get32be(s));
+   depth = stbi__get16be(s);
+   if (depth != 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int act_comp=0,num_packets=0,chained,dummy;
+   stbi__pic_packet packets[10];
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
+      stbi__rewind(s);
+      return 0;
+   }
+
+   stbi__skip(s, 88);
+
+   *x = stbi__get16be(s);
+   *y = stbi__get16be(s);
+   if (stbi__at_eof(s)) {
+      stbi__rewind( s);
+      return 0;
+   }
+   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
+      stbi__rewind( s );
+      return 0;
+   }
+
+   stbi__skip(s, 8);
+
+   do {
+      stbi__pic_packet *packet;
+
+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
+         return 0;
+
+      packet = &packets[num_packets++];
+      chained = stbi__get8(s);
+      packet->size    = stbi__get8(s);
+      packet->type    = stbi__get8(s);
+      packet->channel = stbi__get8(s);
+      act_comp |= packet->channel;
+
+      if (stbi__at_eof(s)) {
+          stbi__rewind( s );
+          return 0;
+      }
+      if (packet->size != 8) {
+          stbi__rewind( s );
+          return 0;
+      }
+   } while (chained);
+
+   *comp = (act_comp & 0x10 ? 4 : 3);
+
+   return 1;
+}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+//    Does not support comments in the header section
+//    Does not support ASCII image data (formats P2 and P3)
+
+#ifndef STBI_NO_PNM
+
+static int      stbi__pnm_test(stbi__context *s)
+{
+   char p, t;
+   p = (char) stbi__get8(s);
+   t = (char) stbi__get8(s);
+   if (p != 'P' || (t != '5' && t != '6')) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
+
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *out;
+   STBI_NOTUSED(ri);
+
+   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
+   if (ri->bits_per_channel == 0)
+      return 0;
+
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
+   *x = s->img_x;
+   *y = s->img_y;
+   if (comp) *comp = s->img_n;
+
+   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
+      return stbi__errpuc("too large", "PNM too large");
+
+   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
+      STBI_FREE(out);
+      return stbi__errpuc("bad PNM", "PNM file truncated");
+   }
+
+   if (req_comp && req_comp != s->img_n) {
+      if (ri->bits_per_channel == 16) {
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
+      } else {
+         out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+      }
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+   return out;
+}
+
+static int      stbi__pnm_isspace(char c)
+{
+   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
+{
+   for (;;) {
+      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+         *c = (char) stbi__get8(s);
+
+      if (stbi__at_eof(s) || *c != '#')
+         break;
+
+      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
+         *c = (char) stbi__get8(s);
+   }
+}
+
+static int      stbi__pnm_isdigit(char c)
+{
+   return c >= '0' && c <= '9';
+}
+
+static int      stbi__pnm_getinteger(stbi__context *s, char *c)
+{
+   int value = 0;
+
+   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+      value = value*10 + (*c - '0');
+      *c = (char) stbi__get8(s);
+      if((value > 214748364) || (value == 214748364 && *c > '7'))
+          return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
+   }
+
+   return value;
+}
+
+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int maxv, dummy;
+   char c, p, t;
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   stbi__rewind(s);
+
+   // Get identifier
+   p = (char) stbi__get8(s);
+   t = (char) stbi__get8(s);
+   if (p != 'P' || (t != '5' && t != '6')) {
+       stbi__rewind(s);
+       return 0;
+   }
+
+   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+   c = (char) stbi__get8(s);
+   stbi__pnm_skip_whitespace(s, &c);
+
+   *x = stbi__pnm_getinteger(s, &c); // read width
+   if(*x == 0)
+       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
+   stbi__pnm_skip_whitespace(s, &c);
+
+   *y = stbi__pnm_getinteger(s, &c); // read height
+   if (*y == 0)
+       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
+   stbi__pnm_skip_whitespace(s, &c);
+
+   maxv = stbi__pnm_getinteger(s, &c);  // read max value
+   if (maxv > 65535)
+      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
+   else if (maxv > 255)
+      return 16;
+   else
+      return 8;
+}
+
+static int stbi__pnm_is16(stbi__context *s)
+{
+   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
+	   return 1;
+   return 0;
+}
+#endif
+
+static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
+{
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_info(s, x, y, comp)) return 1;
+   #endif
+
+   #ifndef STBI_NO_PNG
+   if (stbi__png_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_GIF
+   if (stbi__gif_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_BMP
+   if (stbi__bmp_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PIC
+   if (stbi__pic_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_info(s, x, y, comp))  return 1;
+   #endif
+
+   // test tga last because it's a crappy test!
+   #ifndef STBI_NO_TGA
+   if (stbi__tga_info(s, x, y, comp))
+       return 1;
+   #endif
+   return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static int stbi__is_16_main(stbi__context *s)
+{
+   #ifndef STBI_NO_PNG
+   if (stbi__png_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_is16(s))  return 1;
+   #endif
+   return 0;
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_info_from_file(f, x, y, comp);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__info_main(&s,x,y,comp);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
+
+STBIDEF int stbi_is_16_bit(char const *filename)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_is_16_bit_from_file(f);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE *f)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__is_16_main(&s);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__is_16_main(&s);
+}
+
+#endif // STB_IMAGE_IMPLEMENTATION
+
+/*
+   revision history:
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+                         1-bit BMP
+                         *_is_16_bit api
+                         avoid warnings
+      2.16  (2017-07-23) all functions have 16-bit variants;
+                         STBI_NO_STDIO works again;
+                         compilation fixes;
+                         fix rounding in unpremultiply;
+                         optimize vertical flip;
+                         disable raw_len validation;
+                         documentation fixes
+      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+                         warning fixes; disable run-time SSE detection on gcc;
+                         uniform handling of optional "return" values;
+                         thread-safe initialization of zlib tables
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+      2.11  (2016-04-02) allocate large structures on the stack
+                         remove white matting for transparent PSD
+                         fix reported channel count for PNG & BMP
+                         re-enable SSE2 in non-gcc 64-bit
+                         support RGB-formatted JPEG
+                         read 16-bit PNGs (only as 8-bit)
+      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+      2.09  (2016-01-16) allow comments in PNM files
+                         16-bit-per-pixel TGA (not bit-per-component)
+                         info() for TGA could break due to .hdr handling
+                         info() for BMP to shares code instead of sloppy parse
+                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+                         code cleanup
+      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+      2.07  (2015-09-13) fix compiler warnings
+                         partial animated GIF support
+                         limited 16-bpc PSD support
+                         #ifdef unused functions
+                         bug with < 92 byte PIC,PNM,HDR,TGA
+      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
+      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
+      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+      2.03  (2015-04-12) extra corruption checking (mmozeiko)
+                         stbi_set_flip_vertically_on_load (nguillemot)
+                         fix NEON support; fix mingw support
+      2.02  (2015-01-19) fix incorrect assert, fix warning
+      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+                         progressive JPEG (stb)
+                         PGM/PPM support (Ken Miller)
+                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
+                         GIF bugfix -- seemingly never worked
+                         STBI_NO_*, STBI_ONLY_*
+      1.48  (2014-12-14) fix incorrectly-named assert()
+      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+                         optimize PNG (ryg)
+                         fix bug in interlaced PNG with user-specified channel count (stb)
+      1.46  (2014-08-26)
+              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+      1.45  (2014-08-16)
+              fix MSVC-ARM internal compiler error by wrapping malloc
+      1.44  (2014-08-07)
+              various warning fixes from Ronny Chevalier
+      1.43  (2014-07-15)
+              fix MSVC-only compiler problem in code changed in 1.42
+      1.42  (2014-07-09)
+              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+              fixes to stbi__cleanup_jpeg path
+              added STBI_ASSERT to avoid requiring assert.h
+      1.41  (2014-06-25)
+              fix search&replace from 1.36 that messed up comments/error messages
+      1.40  (2014-06-22)
+              fix gcc struct-initialization warning
+      1.39  (2014-06-15)
+              fix to TGA optimization when req_comp != number of components in TGA;
+              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+              add support for BMP version 5 (more ignored fields)
+      1.38  (2014-06-06)
+              suppress MSVC warnings on integer casts truncating values
+              fix accidental rename of 'skip' field of I/O
+      1.37  (2014-06-04)
+              remove duplicate typedef
+      1.36  (2014-06-03)
+              convert to header file single-file library
+              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+      1.35  (2014-05-27)
+              various warnings
+              fix broken STBI_SIMD path
+              fix bug where stbi_load_from_file no longer left file pointer in correct place
+              fix broken non-easy path for 32-bit BMP (possibly never used)
+              TGA optimization by Arseny Kapoulkine
+      1.34  (unknown)
+              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+      1.33  (2011-07-14)
+              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+      1.32  (2011-07-13)
+              support for "info" function for all supported filetypes (SpartanJ)
+      1.31  (2011-06-20)
+              a few more leak fixes, bug in PNG handling (SpartanJ)
+      1.30  (2011-06-11)
+              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+              removed deprecated format-specific test/load functions
+              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+              fix inefficiency in decoding 32-bit BMP (David Woo)
+      1.29  (2010-08-16)
+              various warning fixes from Aurelien Pocheville
+      1.28  (2010-08-01)
+              fix bug in GIF palette transparency (SpartanJ)
+      1.27  (2010-08-01)
+              cast-to-stbi_uc to fix warnings
+      1.26  (2010-07-24)
+              fix bug in file buffering for PNG reported by SpartanJ
+      1.25  (2010-07-17)
+              refix trans_data warning (Won Chun)
+      1.24  (2010-07-12)
+              perf improvements reading from files on platforms with lock-heavy fgetc()
+              minor perf improvements for jpeg
+              deprecated type-specific functions so we'll get feedback if they're needed
+              attempt to fix trans_data warning (Won Chun)
+      1.23    fixed bug in iPhone support
+      1.22  (2010-07-10)
+              removed image *writing* support
+              stbi_info support from Jetro Lauha
+              GIF support from Jean-Marc Lienher
+              iPhone PNG-extensions from James Brown
+              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
+      1.20    added support for Softimage PIC, by Tom Seddon
+      1.19    bug in interlaced PNG corruption check (found by ryg)
+      1.18  (2008-08-02)
+              fix a threading bug (local mutable static)
+      1.17    support interlaced PNG
+      1.16    major bugfix - stbi__convert_format converted one too many pixels
+      1.15    initialize some fields for thread safety
+      1.14    fix threadsafe conversion bug
+              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+      1.13    threadsafe
+      1.12    const qualifiers in the API
+      1.11    Support installable IDCT, colorspace conversion routines
+      1.10    Fixes for 64-bit (don't use "unsigned long")
+              optimized upsampling by Fabian "ryg" Giesen
+      1.09    Fix format-conversion for PSD code (bad global variables!)
+      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+      1.07    attempt to fix C++ warning/errors again
+      1.06    attempt to fix C++ warning/errors again
+      1.05    fix TGA loading to return correct *comp and use good luminance calc
+      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
+      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+      1.02    support for (subset of) HDR files, float interface for preferred access to them
+      1.01    fix bug: possible bug in handling right-side up bmps... not sure
+              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+      1.00    interface to zlib that skips zlib header
+      0.99    correct handling of alpha in palette
+      0.98    TGA loader by lonesock; dynamically add loaders (untested)
+      0.97    jpeg errors on too large a file; also catch another malloc failure
+      0.96    fix detection of invalid v value - particleman@mollyrocket forum
+      0.95    during header scan, seek to markers in case of padding
+      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+      0.93    handle jpegtran output; verbose errors
+      0.92    read 4,8,16,24,32-bit BMP files of several formats
+      0.91    output 24-bit Windows 3.0 BMP files
+      0.90    fix a few more warnings; bump version number to approach 1.0
+      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
+      0.60    fix compiling as c++
+      0.59    fix warnings: merge Dave Moore's -Wall fixes
+      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
+      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+      0.56    fix bug: zlib uncompressed mode len vs. nlen
+      0.55    fix bug: restart_interval not initialized to 0
+      0.54    allow NULL for 'int *comp'
+      0.53    fix bug in png 3->4; speedup png decoding
+      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
+              on 'test' only check type, not whether we support this variant
+      0.50  (2006-11-19)
+              first released version
+*/
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/vendor/tiny_obj_loader_c.h b/vendor/tiny_obj_loader_c.h
new file mode 100644
index 0000000..09087fe
--- /dev/null
+++ b/vendor/tiny_obj_loader_c.h
@@ -0,0 +1,1793 @@
+/*
+   The MIT License (MIT)
+
+   Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors.
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+   */
+#ifndef TINOBJ_LOADER_C_H_
+#define TINOBJ_LOADER_C_H_
+
+/* @todo { Remove stddef dependency. size_t? } */
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  char *name;
+
+  float ambient[3];
+  float diffuse[3];
+  float specular[3];
+  float transmittance[3];
+  float emission[3];
+  float shininess;
+  float ior;      /* index of refraction */
+  float dissolve; /* 1 == opaque; 0 == fully transparent */
+  /* illumination model (see http://www.fileformat.info/format/material/) */
+  int illum;
+
+  int pad0;
+
+  char *ambient_texname;            /* map_Ka */
+  char *diffuse_texname;            /* map_Kd */
+  char *specular_texname;           /* map_Ks */
+  char *specular_highlight_texname; /* map_Ns */
+  char *bump_texname;               /* map_bump, bump */
+  char *displacement_texname;       /* disp */
+  char *alpha_texname;              /* map_d */
+} tinyobj_material_t;
+
+typedef struct {
+  char *name; /* group name or object name. */
+  unsigned int face_offset;
+  unsigned int length;
+} tinyobj_shape_t;
+
+typedef struct {
+  int v_idx, vt_idx, vn_idx;
+} tinyobj_vertex_index_t;
+
+typedef struct {
+  unsigned int num_vertices;
+  unsigned int num_normals;
+  unsigned int num_texcoords;
+  unsigned int num_faces;
+  unsigned int num_face_num_verts;
+
+  int pad0;
+
+  float *vertices;
+  float *normals;
+  float *texcoords;
+  tinyobj_vertex_index_t *faces;
+  int *face_num_verts;
+  int *material_ids;
+} tinyobj_attrib_t;
+
+#define TINYOBJ_FLAG_TRIANGULATE (1 << 0)
+
+#define TINYOBJ_INVALID_INDEX (0x80000000)
+
+#define TINYOBJ_SUCCESS (0)
+#define TINYOBJ_ERROR_EMPTY (-1)
+#define TINYOBJ_ERROR_INVALID_PARAMETER (-2)
+#define TINYOBJ_ERROR_FILE_OPERATION (-3)
+
+/* Provide a callback that can read text file without any parsing or
+ * modification. The obj and mtl parser is going to read all the necessary data:
+ * tinyobj_parse_obj
+ * tinyobj_parse_mtl_file
+ *
+ * @param[in] ctx User provided context.
+ * @param[in] filename Filename to be loaded.
+ * @param[in] is_mtl 1 when the callback is invoked for loading .mtl. 0 for .obj
+ * @param[in] obj_filename .obj filename. Useful when you load .mtl from same
+ * location of .obj. When the callback is called to load .obj, `filename` and
+ * `obj_filename` are same.
+ * @param[out] buf Content of loaded file
+ * @param[out] len Size of content(file)
+ */
+typedef void (*file_reader_callback)(void *ctx, const char *filename,
+                                     int is_mtl, const char *obj_filename,
+                                     char **buf, size_t *len);
+
+/* Parse wavefront .obj
+ * @param[out] attrib Attibutes
+ * @param[out] shapes Array of parsed shapes
+ * @param[out] num_shapes Array length of `shapes`
+ * @param[out] materials Array of parsed materials
+ * @param[out] num_materials Array length of `materials`
+ * @param[in] file_name File name of .obj
+ * @param[in] file_reader File reader callback function(to read .obj and .mtl).
+ * @param[in] ctx Context pointer passed to the file_reader_callback.
+ * @param[in] flags combination of TINYOBJ_FLAG_***
+ *
+ * Returns TINYOBJ_SUCCESS if things goes well.
+ * Returns TINYOBJ_ERROR_*** when there is an error.
+ */
+extern int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes,
+                             size_t *num_shapes, tinyobj_material_t **materials,
+                             size_t *num_materials, const char *file_name,
+                             file_reader_callback file_reader, void *ctx,
+                             unsigned int flags);
+
+/* Parse wavefront .mtl
+ *
+ * @param[out] materials_out
+ * @param[out] num_materials_out
+ * @param[in] filename .mtl filename
+ * @param[in] filename of .obj filename. could be NULL if you just want to parse
+ .mtl file.
+ * @param[in] file_reader File reader callback
+ * @param[in[ ctx Context pointer passed to the file_reader callack.
+
+ * Returns TINYOBJ_SUCCESS if things goes well.
+ * Returns TINYOBJ_ERROR_*** when there is an error.
+ */
+extern int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out,
+                                  size_t *num_materials_out,
+                                  const char *filename,
+                                  const char *obj_filename,
+                                  file_reader_callback file_reader, void *ctx);
+
+extern void tinyobj_attrib_init(tinyobj_attrib_t *attrib);
+extern void tinyobj_attrib_free(tinyobj_attrib_t *attrib);
+extern void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes);
+extern void tinyobj_materials_free(tinyobj_material_t *materials,
+                                   size_t num_materials);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* TINOBJ_LOADER_C_H_ */
+
+#ifdef TINYOBJ_LOADER_C_IMPLEMENTATION
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#if defined(TINYOBJ_MALLOC) && defined(TINYOBJ_CALLOC) &&                      \
+    defined(TINYOBJ_FREE) &&                                                   \
+    (defined(TINYOBJ_REALLOC) || defined(TINYOBJ_REALLOC_SIZED))
+/* ok */
+#elif !defined(TINYOBJ_MALLOC) && !defined(TINYOBJ_CALLOC) &&                  \
+    !defined(TINYOBJ_FREE) && !defined(TINYOBJ_REALLOC) &&                     \
+    !defined(TINYOBJ_REALLOC_SIZED)
+/* ok */
+#else
+#error                                                                         \
+    "Must define all or none of TINYOBJ_MALLOC, TINYOBJ_CALLOC, TINYOBJ_FREE, and TINYOBJ_REALLOC (or TINYOBJ_REALLOC_SIZED)."
+#endif
+
+#ifndef TINYOBJ_MALLOC
+#include <stdlib.h>
+#define TINYOBJ_MALLOC malloc
+#define TINYOBJ_REALLOC realloc
+#define TINYOBJ_CALLOC calloc
+#define TINYOBJ_FREE free
+#endif
+
+#ifndef TINYOBJ_REALLOC_SIZED
+#define TINYOBJ_REALLOC_SIZED(p, oldsz, newsz) TINYOBJ_REALLOC(p, newsz)
+#endif
+
+#define TINYOBJ_MAX_FACES_PER_F_LINE (16)
+#define TINYOBJ_MAX_FILEPATH (8192)
+
+#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
+#define IS_DIGIT(x) ((unsigned int)((x) - '0') < (unsigned int)(10))
+#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
+
+static void skip_space(const char **token) {
+  while ((*token)[0] == ' ' || (*token)[0] == '\t') {
+    (*token)++;
+  }
+}
+
+static void skip_space_and_cr(const char **token) {
+  while ((*token)[0] == ' ' || (*token)[0] == '\t' || (*token)[0] == '\r') {
+    (*token)++;
+  }
+}
+
+static int until_space(const char *token) {
+  const char *p = token;
+  while (p[0] != '\0' && p[0] != ' ' && p[0] != '\t' && p[0] != '\r') {
+    p++;
+  }
+
+  return (int)(p - token);
+}
+
+static size_t length_until_newline(const char *token, size_t n) {
+  size_t len = 0;
+
+  /* Assume token[n-1] = '\0' */
+  for (len = 0; len < n - 1; len++) {
+    if (token[len] == '\n') {
+      break;
+    }
+    if ((token[len] == '\r') && ((len < (n - 2)) && (token[len + 1] != '\n'))) {
+      break;
+    }
+  }
+
+  return len;
+}
+
+static size_t length_until_line_feed(const char *token, size_t n) {
+  size_t len = 0;
+
+  /* Assume token[n-1] = '\0' */
+  for (len = 0; len < n; len++) {
+    if ((token[len] == '\n') || (token[len] == '\r')) {
+      break;
+    }
+  }
+
+  return len;
+}
+
+/* http://stackoverflow.com/questions/5710091/how-does-atoi-function-in-c-work
+ */
+static int my_atoi(const char *c) {
+  int value = 0;
+  int sign = 1;
+  if (*c == '+' || *c == '-') {
+    if (*c == '-')
+      sign = -1;
+    c++;
+  }
+  while (((*c) >= '0') && ((*c) <= '9')) { /* isdigit(*c) */
+    value *= 10;
+    value += (int)(*c - '0');
+    c++;
+  }
+  return value * sign;
+}
+
+/* Make index zero-base, and also support relative index. */
+static int fixIndex(int idx, size_t n) {
+  if (idx > 0)
+    return idx - 1;
+  if (idx == 0)
+    return 0;
+  return (int)n + idx; /* negative value = relative */
+}
+
+/* Parse raw triples: i, i/j/k, i//k, i/j */
+static tinyobj_vertex_index_t parseRawTriple(const char **token) {
+  tinyobj_vertex_index_t vi;
+  /* 0x80000000 = -2147483648 = invalid */
+  vi.v_idx = (int)(0x80000000);
+  vi.vn_idx = (int)(0x80000000);
+  vi.vt_idx = (int)(0x80000000);
+
+  vi.v_idx = my_atoi((*token));
+  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+         (*token)[0] != '\t' && (*token)[0] != '\r') {
+    (*token)++;
+  }
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+  (*token)++;
+
+  /* i//k */
+  if ((*token)[0] == '/') {
+    (*token)++;
+    vi.vn_idx = my_atoi((*token));
+    while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+           (*token)[0] != '\t' && (*token)[0] != '\r') {
+      (*token)++;
+    }
+    return vi;
+  }
+
+  /* i/j/k or i/j */
+  vi.vt_idx = my_atoi((*token));
+  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+         (*token)[0] != '\t' && (*token)[0] != '\r') {
+    (*token)++;
+  }
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+
+  /* i/j/k */
+  (*token)++; /* skip '/' */
+  vi.vn_idx = my_atoi((*token));
+  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' &&
+         (*token)[0] != '\t' && (*token)[0] != '\r') {
+    (*token)++;
+  }
+  return vi;
+}
+
+static int parseInt(const char **token) {
+  int i = 0;
+  skip_space(token);
+  i = my_atoi((*token));
+  (*token) += until_space((*token));
+  return i;
+}
+
+/*
+ * Tries to parse a floating point number located at s.
+ *
+ * s_end should be a location in the string where reading should absolutely
+ * stop. For example at the end of the string, to prevent buffer overflows.
+ *
+ * Parses the following EBNF grammar:
+ *   sign    = "+" | "-" ;
+ *   END     = ? anything not in digit ?
+ *   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ *   integer = [sign] , digit , {digit} ;
+ *   decimal = integer , ["." , integer] ;
+ *   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+ *
+ *  Valid strings are for example:
+ *   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
+ *
+ * If the parsing is a success, result is set to the parsed value and true
+ * is returned.
+ *
+ * The function is greedy and will parse until any of the following happens:
+ *  - a non-conforming character is encountered.
+ *  - s_end is reached.
+ *
+ * The following situations triggers a failure:
+ *  - s >= s_end.
+ *  - parse failure.
+ */
+static int tryParseDouble(const char *s, const char *s_end, double *result) {
+  double mantissa = 0.0;
+  /* This exponent is base 2 rather than 10.
+   * However the exponent we parse is supposed to be one of ten,
+   * thus we must take care to convert the exponent/and or the
+   * mantissa to a * 2^E, where a is the mantissa and E is the
+   * exponent.
+   * To get the final double we will use ldexp, it requires the
+   * exponent to be in base 2.
+   */
+  int exponent = 0;
+
+  /* NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
+   * TO JUMP OVER DEFINITIONS.
+   */
+  char sign = '+';
+  char exp_sign = '+';
+  char const *curr = s;
+
+  /* How many characters were read in a loop. */
+  int read = 0;
+  /* Tells whether a loop terminated due to reaching s_end. */
+  int end_not_reached = 0;
+
+  /*
+     BEGIN PARSING.
+     */
+
+  if (s >= s_end) {
+    return 0; /* fail */
+  }
+
+  /* Find out what sign we've got. */
+  if (*curr == '+' || *curr == '-') {
+    sign = *curr;
+    curr++;
+  } else if (IS_DIGIT(*curr)) { /* Pass through. */
+  } else {
+    goto fail;
+  }
+
+  /* Read the integer part. */
+  end_not_reached = (curr != s_end);
+  while (end_not_reached && IS_DIGIT(*curr)) {
+    mantissa *= 10;
+    mantissa += (int)(*curr - 0x30);
+    curr++;
+    read++;
+    end_not_reached = (curr != s_end);
+  }
+
+  /* We must make sure we actually got something. */
+  if (read == 0)
+    goto fail;
+  /* We allow numbers of form "#", "###" etc. */
+  if (!end_not_reached)
+    goto assemble;
+
+  /* Read the decimal part. */
+  if (*curr == '.') {
+    curr++;
+    read = 1;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      /* pow(10.0, -read) */
+      double frac_value = 1.0;
+      int f;
+      for (f = 0; f < read; f++) {
+        frac_value *= 0.1;
+      }
+      mantissa += (int)(*curr - 0x30) * frac_value;
+      read++;
+      curr++;
+      end_not_reached = (curr != s_end);
+    }
+  } else if (*curr == 'e' || *curr == 'E') {
+  } else {
+    goto assemble;
+  }
+
+  if (!end_not_reached)
+    goto assemble;
+
+  /* Read the exponent part. */
+  if (*curr == 'e' || *curr == 'E') {
+    curr++;
+    /* Figure out if a sign is present and if it is. */
+    end_not_reached = (curr != s_end);
+    if (end_not_reached && (*curr == '+' || *curr == '-')) {
+      exp_sign = *curr;
+      curr++;
+    } else if (IS_DIGIT(*curr)) { /* Pass through. */
+    } else {
+      /* Empty E is not allowed. */
+      goto fail;
+    }
+
+    read = 0;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      exponent *= 10;
+      exponent += (int)(*curr - 0x30);
+      curr++;
+      read++;
+      end_not_reached = (curr != s_end);
+    }
+    if (read == 0)
+      goto fail;
+  }
+
+assemble:
+
+{
+  double a = 1.0; /* = pow(5.0, exponent); */
+  double b = 1.0; /* = 2.0^exponent */
+  int i;
+  for (i = 0; i < exponent; i++) {
+    a = a * 5.0;
+  }
+
+  for (i = 0; i < exponent; i++) {
+    b = b * 2.0;
+  }
+
+  if (exp_sign == '-') {
+    a = 1.0 / a;
+    b = 1.0 / b;
+  }
+
+  *result =
+      /* (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent),
+         exponent); */
+      (sign == '+' ? 1 : -1) * (mantissa * a * b);
+}
+
+  return 1;
+fail:
+  return 0;
+}
+
+static float parseFloat(const char **token) {
+  const char *end;
+  double val = 0.0;
+  float f = 0.0f;
+  skip_space(token);
+  end = (*token) + until_space((*token));
+  val = 0.0;
+  tryParseDouble((*token), end, &val);
+  f = (float)(val);
+  (*token) = end;
+  return f;
+}
+
+static void parseFloat2(float *x, float *y, const char **token) {
+  (*x) = parseFloat(token);
+  (*y) = parseFloat(token);
+}
+
+static void parseFloat3(float *x, float *y, float *z, const char **token) {
+  (*x) = parseFloat(token);
+  (*y) = parseFloat(token);
+  (*z) = parseFloat(token);
+}
+
+static size_t my_strnlen(const char *s, size_t n) {
+  const char *p = (char *)memchr(s, 0, n);
+  return p ? (size_t)(p - s) : n;
+}
+
+static char *my_strdup(const char *s, size_t max_length) {
+  char *d;
+  size_t len;
+
+  if (s == NULL)
+    return NULL;
+
+  /* Do not consider CRLF line ending(#19) */
+  len = length_until_line_feed(s, max_length);
+  /* len = strlen(s); */
+
+  /* trim line ending and append '\0' */
+  d = (char *)TINYOBJ_MALLOC(len + 1); /* + '\0' */
+  memcpy(d, s, (size_t)(len));
+  d[len] = '\0';
+
+  return d;
+}
+
+static char *my_strndup(const char *s, size_t len) {
+  char *d;
+  size_t slen;
+
+  if (s == NULL)
+    return NULL;
+  if (len == 0)
+    return NULL;
+
+  slen = my_strnlen(s, len);
+  d = (char *)TINYOBJ_MALLOC(slen + 1); /* + '\0' */
+  if (!d) {
+    return NULL;
+  }
+  memcpy(d, s, slen);
+  d[slen] = '\0';
+
+  return d;
+}
+
+char *dynamic_fgets(char **buf, size_t *size, FILE *file) {
+  char *offset;
+  char *ret;
+  size_t old_size;
+
+  if (!(ret = fgets(*buf, (int)*size, file))) {
+    return ret;
+  }
+
+  if (NULL != strchr(*buf, '\n')) {
+    return ret;
+  }
+
+  do {
+    old_size = *size;
+    *size *= 2;
+    *buf = (char *)TINYOBJ_REALLOC_SIZED(*buf, old_size, *size);
+    offset = &((*buf)[old_size - 1]);
+
+    ret = fgets(offset, (int)(old_size + 1), file);
+  } while (ret && (NULL == strchr(*buf, '\n')));
+
+  return ret;
+}
+
+static void initMaterial(tinyobj_material_t *material) {
+  int i;
+  material->name = NULL;
+  material->ambient_texname = NULL;
+  material->diffuse_texname = NULL;
+  material->specular_texname = NULL;
+  material->specular_highlight_texname = NULL;
+  material->bump_texname = NULL;
+  material->displacement_texname = NULL;
+  material->alpha_texname = NULL;
+  for (i = 0; i < 3; i++) {
+    material->ambient[i] = 0.f;
+    material->diffuse[i] = 0.f;
+    material->specular[i] = 0.f;
+    material->transmittance[i] = 0.f;
+    material->emission[i] = 0.f;
+  }
+  material->illum = 0;
+  material->dissolve = 1.f;
+  material->shininess = 1.f;
+  material->ior = 1.f;
+}
+
+/* Implementation of string to int hashtable */
+
+#define HASH_TABLE_ERROR 1
+#define HASH_TABLE_SUCCESS 0
+
+#define HASH_TABLE_DEFAULT_SIZE 10
+
+typedef struct hash_table_entry_t {
+  unsigned long hash;
+  int filled;
+  int pad0;
+  long value;
+
+  struct hash_table_entry_t *next;
+} hash_table_entry_t;
+
+typedef struct {
+  unsigned long *hashes;
+  hash_table_entry_t *entries;
+  size_t capacity;
+  size_t n;
+} hash_table_t;
+
+static unsigned long hash_djb2(const unsigned char *str) {
+  unsigned long hash = 5381;
+  int c;
+
+  while ((c = *str++)) {
+    hash = ((hash << 5) + hash) + (unsigned long)(c);
+  }
+
+  return hash;
+}
+
+static void create_hash_table(size_t start_capacity, hash_table_t *hash_table) {
+  if (start_capacity < 1)
+    start_capacity = HASH_TABLE_DEFAULT_SIZE;
+  hash_table->hashes =
+      (unsigned long *)TINYOBJ_MALLOC(start_capacity * sizeof(unsigned long));
+  hash_table->entries = (hash_table_entry_t *)TINYOBJ_CALLOC(
+      start_capacity, sizeof(hash_table_entry_t));
+  hash_table->capacity = start_capacity;
+  hash_table->n = 0;
+}
+
+static void destroy_hash_table(hash_table_t *hash_table) {
+  TINYOBJ_FREE(hash_table->entries);
+  TINYOBJ_FREE(hash_table->hashes);
+}
+
+/* Insert with quadratic probing */
+static int hash_table_insert_value(unsigned long hash, long value,
+                                   hash_table_t *hash_table) {
+  /* Insert value */
+  size_t start_index = hash % hash_table->capacity;
+  size_t index = start_index;
+  hash_table_entry_t *start_entry = hash_table->entries + start_index;
+  size_t i;
+  hash_table_entry_t *entry;
+
+  for (i = 1; hash_table->entries[index].filled; i++) {
+    if (i >= hash_table->capacity)
+      return HASH_TABLE_ERROR;
+    index = (start_index + (i * i)) % hash_table->capacity;
+  }
+
+  entry = hash_table->entries + index;
+  entry->hash = hash;
+  entry->filled = 1;
+  entry->value = value;
+
+  if (index != start_index) {
+    /* This is a new entry, but not the start entry, hence we need to add a next
+     * pointer to our entry */
+    entry->next = start_entry->next;
+    start_entry->next = entry;
+  }
+
+  return HASH_TABLE_SUCCESS;
+}
+
+static int hash_table_insert(unsigned long hash, long value,
+                             hash_table_t *hash_table) {
+  int ret = hash_table_insert_value(hash, value, hash_table);
+  if (ret == HASH_TABLE_SUCCESS) {
+    hash_table->hashes[hash_table->n] = hash;
+    hash_table->n++;
+  }
+  return ret;
+}
+
+static hash_table_entry_t *hash_table_find(unsigned long hash,
+                                           hash_table_t *hash_table) {
+  hash_table_entry_t *entry =
+      hash_table->entries + (hash % hash_table->capacity);
+  while (entry) {
+    if (entry->hash == hash && entry->filled) {
+      return entry;
+    }
+    entry = entry->next;
+  }
+  return NULL;
+}
+
+static void hash_table_grow(hash_table_t *hash_table) {
+  size_t new_capacity;
+  hash_table_t new_hash_table;
+  size_t i;
+
+  new_capacity = 2 * hash_table->capacity;
+  /* Create a new hash table. We're not calling create_hash_table because we
+   * want to realloc the hash array */
+  new_hash_table.hashes = hash_table->hashes =
+      (unsigned long *)TINYOBJ_REALLOC_SIZED(
+          (void *)hash_table->hashes,
+          sizeof(unsigned long) * hash_table->capacity,
+          sizeof(unsigned long) * new_capacity);
+  new_hash_table.entries = (hash_table_entry_t *)TINYOBJ_CALLOC(
+      new_capacity, sizeof(hash_table_entry_t));
+  new_hash_table.capacity = new_capacity;
+  new_hash_table.n = hash_table->n;
+
+  /* Rehash */
+  for (i = 0; i < hash_table->capacity; i++) {
+    hash_table_entry_t *entry = &hash_table->entries[i];
+    if (entry->filled) {
+      hash_table_insert_value(entry->hash, entry->value, &new_hash_table);
+    }
+  }
+
+  TINYOBJ_FREE(hash_table->entries);
+  (*hash_table) = new_hash_table;
+}
+
+static int hash_table_exists(const char *name, hash_table_t *hash_table) {
+  return hash_table_find(hash_djb2((const unsigned char *)name), hash_table) !=
+         NULL;
+}
+
+static void hash_table_set(const char *name, size_t val,
+                           hash_table_t *hash_table) {
+  /* Hash name */
+  unsigned long hash = hash_djb2((const unsigned char *)name);
+
+  hash_table_entry_t *entry = hash_table_find(hash, hash_table);
+  if (entry) {
+    entry->value = (long)val;
+    return;
+  }
+
+  /* Expand if necessary
+   * Grow until the element has been added
+   */
+  while (hash_table_insert(hash, (long)val, hash_table) != HASH_TABLE_SUCCESS) {
+    hash_table_grow(hash_table);
+  }
+}
+
+static long hash_table_get(const char *name, hash_table_t *hash_table) {
+  hash_table_entry_t *ret =
+      hash_table_find(hash_djb2((const unsigned char *)(name)), hash_table);
+  return ret->value;
+}
+
+static tinyobj_material_t *tinyobj_material_add(tinyobj_material_t *prev,
+                                                size_t num_materials,
+                                                tinyobj_material_t *new_mat) {
+  tinyobj_material_t *dst;
+  size_t num_bytes = sizeof(tinyobj_material_t) * num_materials;
+  dst = (tinyobj_material_t *)TINYOBJ_REALLOC_SIZED(
+      prev, num_bytes, num_bytes + sizeof(tinyobj_material_t));
+
+  dst[num_materials] = (*new_mat); /* Just copy pointer for char* members */
+  return dst;
+}
+
+static int is_line_ending(const char *p, size_t i, size_t end_i) {
+  if (p[i] == '\0')
+    return 1;
+  if (p[i] == '\n')
+    return 1; /* this includes \r\n */
+  if (p[i] == '\r') {
+    if (((i + 1) < end_i) && (p[i + 1] != '\n')) { /* detect only \r case */
+      return 1;
+    }
+  }
+  return 0;
+}
+
+typedef struct {
+  size_t pos;
+  size_t len;
+} LineInfo;
+
+/* Find '\n' and create line data. */
+static int get_line_infos(const char *buf, size_t buf_len,
+                          LineInfo **line_infos, size_t *num_lines) {
+  size_t i = 0;
+  size_t end_idx = buf_len;
+  size_t prev_pos = 0;
+  size_t line_no = 0;
+  size_t last_line_ending = 0;
+
+  /* Count # of lines. */
+  for (i = 0; i < end_idx; i++) {
+    if (is_line_ending(buf, i, end_idx)) {
+      (*num_lines)++;
+      last_line_ending = i;
+    }
+  }
+  /* The last char from the input may not be a line
+   * ending character so add an extra line if there
+   * are more characters after the last line ending
+   * that was found. */
+  if (end_idx - last_line_ending > 1) {
+    (*num_lines)++;
+  }
+
+  if (*num_lines == 0)
+    return TINYOBJ_ERROR_EMPTY;
+
+  *line_infos = (LineInfo *)TINYOBJ_MALLOC(sizeof(LineInfo) * (*num_lines));
+
+  /* Fill line infos. */
+  for (i = 0; i < end_idx; i++) {
+    if (is_line_ending(buf, i, end_idx)) {
+      (*line_infos)[line_no].pos = prev_pos;
+      (*line_infos)[line_no].len = i - prev_pos;
+      prev_pos = i + 1;
+      line_no++;
+    }
+  }
+  if (end_idx - last_line_ending > 1) {
+    (*line_infos)[line_no].pos = prev_pos;
+    (*line_infos)[line_no].len = end_idx - 1 - last_line_ending;
+  }
+
+  return 0;
+}
+
+static int tinyobj_parse_and_index_mtl_file(
+    tinyobj_material_t **materials_out, size_t *num_materials_out,
+    const char *mtl_filename, const char *obj_filename,
+    file_reader_callback file_reader, void *ctx, hash_table_t *material_table) {
+  tinyobj_material_t material;
+  size_t num_materials = 0;
+  tinyobj_material_t *materials = NULL;
+  int has_previous_material = 0;
+  const char *line_end = NULL;
+  size_t num_lines = 0;
+  LineInfo *line_infos = NULL;
+  size_t i = 0;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (materials_out == NULL) {
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  }
+
+  if (num_materials_out == NULL) {
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  }
+
+  (*materials_out) = NULL;
+  (*num_materials_out) = 0;
+
+  file_reader(ctx, mtl_filename, 1, obj_filename, &buf, &len);
+  if (len < 1)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (buf == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+
+  if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) {
+    TINYOBJ_FREE(line_infos);
+    return TINYOBJ_ERROR_EMPTY;
+  }
+
+  /* Create a default material */
+  initMaterial(&material);
+
+  for (i = 0; i < num_lines; i++) {
+    const char *p = &buf[line_infos[i].pos];
+    size_t p_len = line_infos[i].len;
+
+    char linebuf[4096];
+    const char *token;
+    assert(p_len < 4095);
+
+    memcpy(linebuf, p, p_len);
+    linebuf[p_len] = '\0';
+
+    token = linebuf;
+    line_end = token + p_len;
+
+    /* Skip leading space. */
+    token += strspn(token, " \t");
+
+    assert(token);
+    if (token[0] == '\0')
+      continue; /* empty line */
+
+    if (token[0] == '#')
+      continue; /* comment line */
+
+    /* new mtl */
+    if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
+      char namebuf[4096];
+
+      /* flush previous material. */
+      if (has_previous_material) {
+        materials = tinyobj_material_add(materials, num_materials, &material);
+        num_materials++;
+      } else {
+        has_previous_material = 1;
+      }
+
+      /* initial temporary material */
+      initMaterial(&material);
+
+      /* set new mtl name */
+      token += 7;
+#ifdef _MSC_VER
+      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf));
+#else
+      sscanf(token, "%s", namebuf);
+#endif
+      material.name = my_strdup(namebuf, (size_t)(line_end - token));
+
+      /* Add material to material table */
+      if (material_table)
+        hash_table_set(material.name, num_materials, material_table);
+
+      continue;
+    }
+
+    /* ambient */
+    if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.ambient[0] = r;
+      material.ambient[1] = g;
+      material.ambient[2] = b;
+      continue;
+    }
+
+    /* diffuse */
+    if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.diffuse[0] = r;
+      material.diffuse[1] = g;
+      material.diffuse[2] = b;
+      continue;
+    }
+
+    /* specular */
+    if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.specular[0] = r;
+      material.specular[1] = g;
+      material.specular[2] = b;
+      continue;
+    }
+
+    /* transmittance */
+    if (token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.transmittance[0] = r;
+      material.transmittance[1] = g;
+      material.transmittance[2] = b;
+      continue;
+    }
+
+    /* ior(index of refraction) */
+    if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
+      token += 2;
+      material.ior = parseFloat(&token);
+      continue;
+    }
+
+    /* emission */
+    if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
+      float r, g, b;
+      token += 2;
+      parseFloat3(&r, &g, &b, &token);
+      material.emission[0] = r;
+      material.emission[1] = g;
+      material.emission[2] = b;
+      continue;
+    }
+
+    /* shininess */
+    if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
+      token += 2;
+      material.shininess = parseFloat(&token);
+      continue;
+    }
+
+    /* illum model */
+    if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
+      token += 6;
+      material.illum = parseInt(&token);
+      continue;
+    }
+
+    /* dissolve */
+    if ((token[0] == 'd' && IS_SPACE(token[1]))) {
+      token += 1;
+      material.dissolve = parseFloat(&token);
+      continue;
+    }
+    if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
+      token += 2;
+      /* Invert value of Tr(assume Tr is in range [0, 1]) */
+      material.dissolve = 1.0f - parseFloat(&token);
+      continue;
+    }
+
+    /* ambient texture */
+    if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.ambient_texname = my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* diffuse texture */
+    if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.diffuse_texname = my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* specular texture */
+    if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.specular_texname = my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* specular highlight texture */
+    if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
+      token += 7;
+      material.specular_highlight_texname =
+          my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* bump texture */
+    if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) {
+      token += 9;
+      material.bump_texname = my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* alpha texture */
+    if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
+      token += 6;
+      material.alpha_texname = my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* bump texture */
+    if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      material.bump_texname = my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* displacement texture */
+    if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
+      token += 5;
+      material.displacement_texname =
+          my_strdup(token, (size_t)(line_end - token));
+      continue;
+    }
+
+    /* @todo { unknown parameter } */
+  }
+
+  TINYOBJ_FREE(line_infos);
+
+  if (material.name) {
+    /* Flush last material element */
+    materials = tinyobj_material_add(materials, num_materials, &material);
+    num_materials++;
+  }
+
+  (*num_materials_out) = num_materials;
+  (*materials_out) = materials;
+
+  return TINYOBJ_SUCCESS;
+}
+
+int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out,
+                           size_t *num_materials_out, const char *mtl_filename,
+                           const char *obj_filename,
+                           file_reader_callback file_reader, void *ctx) {
+  return tinyobj_parse_and_index_mtl_file(materials_out, num_materials_out,
+                                          mtl_filename, obj_filename,
+                                          file_reader, ctx, NULL);
+}
+
+typedef enum {
+  COMMAND_EMPTY,
+  COMMAND_V,
+  COMMAND_VN,
+  COMMAND_VT,
+  COMMAND_F,
+  COMMAND_G,
+  COMMAND_O,
+  COMMAND_USEMTL,
+  COMMAND_MTLLIB
+
+} CommandType;
+
+typedef struct {
+  float vx, vy, vz;
+  float nx, ny, nz;
+  float tx, ty;
+
+  /* @todo { Use dynamic array } */
+  tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE];
+  size_t num_f;
+
+  int f_num_verts[TINYOBJ_MAX_FACES_PER_F_LINE];
+  size_t num_f_num_verts;
+
+  const char *group_name;
+  unsigned int group_name_len;
+  int pad0;
+
+  const char *object_name;
+  unsigned int object_name_len;
+  int pad1;
+
+  const char *material_name;
+  unsigned int material_name_len;
+  int pad2;
+
+  const char *mtllib_name;
+  unsigned int mtllib_name_len;
+
+  CommandType type;
+} Command;
+
+static int parseLine(Command *command, const char *p, size_t p_len,
+                     int triangulate) {
+  char linebuf[4096];
+  const char *token;
+  assert(p_len < 4095);
+
+  memcpy(linebuf, p, p_len);
+  linebuf[p_len] = '\0';
+
+  token = linebuf;
+
+  command->type = COMMAND_EMPTY;
+
+  /* Skip leading space. */
+  skip_space(&token);
+
+  assert(token);
+  if (token[0] == '\0') { /* empty line */
+    return 0;
+  }
+
+  if (token[0] == '#') { /* comment line */
+    return 0;
+  }
+
+  /* vertex */
+  if (token[0] == 'v' && IS_SPACE((token[1]))) {
+    float x, y, z;
+    token += 2;
+    parseFloat3(&x, &y, &z, &token);
+    command->vx = x;
+    command->vy = y;
+    command->vz = z;
+    command->type = COMMAND_V;
+    return 1;
+  }
+
+  /* normal */
+  if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
+    float x, y, z;
+    token += 3;
+    parseFloat3(&x, &y, &z, &token);
+    command->nx = x;
+    command->ny = y;
+    command->nz = z;
+    command->type = COMMAND_VN;
+    return 1;
+  }
+
+  /* texcoord */
+  if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
+    float x, y;
+    token += 3;
+    parseFloat2(&x, &y, &token);
+    command->tx = x;
+    command->ty = y;
+    command->type = COMMAND_VT;
+    return 1;
+  }
+
+  /* face */
+  if (token[0] == 'f' && IS_SPACE((token[1]))) {
+    size_t num_f = 0;
+
+    tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE];
+    token += 2;
+    skip_space(&token);
+
+    while (!IS_NEW_LINE(token[0])) {
+      tinyobj_vertex_index_t vi = parseRawTriple(&token);
+      skip_space_and_cr(&token);
+
+      f[num_f] = vi;
+      num_f++;
+    }
+
+    command->type = COMMAND_F;
+
+    if (triangulate) {
+      size_t k;
+      size_t n = 0;
+
+      tinyobj_vertex_index_t i0 = f[0];
+      tinyobj_vertex_index_t i1;
+      tinyobj_vertex_index_t i2 = f[1];
+
+      assert(3 * num_f < TINYOBJ_MAX_FACES_PER_F_LINE);
+
+      for (k = 2; k < num_f; k++) {
+        i1 = i2;
+        i2 = f[k];
+        command->f[3 * n + 0] = i0;
+        command->f[3 * n + 1] = i1;
+        command->f[3 * n + 2] = i2;
+
+        command->f_num_verts[n] = 3;
+        n++;
+      }
+      command->num_f = 3 * n;
+      command->num_f_num_verts = n;
+
+    } else {
+      size_t k = 0;
+      assert(num_f < TINYOBJ_MAX_FACES_PER_F_LINE);
+      for (k = 0; k < num_f; k++) {
+        command->f[k] = f[k];
+      }
+
+      command->num_f = num_f;
+      command->f_num_verts[0] = (int)num_f;
+      command->num_f_num_verts = 1;
+    }
+
+    return 1;
+  }
+
+  /* use mtl */
+  if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
+    token += 7;
+
+    skip_space(&token);
+    command->material_name = p + (token - linebuf);
+    command->material_name_len = (unsigned int)length_until_newline(
+        token, (p_len - (size_t)(token - linebuf)) + 1);
+    command->type = COMMAND_USEMTL;
+
+    return 1;
+  }
+
+  /* load mtl */
+  if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+    /* By specification, `mtllib` should be appear only once in .obj */
+    token += 7;
+
+    skip_space(&token);
+    command->mtllib_name = p + (token - linebuf);
+    command->mtllib_name_len = (unsigned int)length_until_newline(
+                                   token, p_len - (size_t)(token - linebuf)) +
+                               1;
+    command->type = COMMAND_MTLLIB;
+
+    return 1;
+  }
+
+  /* group name */
+  if (token[0] == 'g' && IS_SPACE((token[1]))) {
+    /* @todo { multiple group name. } */
+    token += 2;
+
+    command->group_name = p + (token - linebuf);
+    command->group_name_len = (unsigned int)length_until_newline(
+                                  token, p_len - (size_t)(token - linebuf)) +
+                              1;
+    command->type = COMMAND_G;
+
+    return 1;
+  }
+
+  /* object name */
+  if (token[0] == 'o' && IS_SPACE((token[1]))) {
+    /* @todo { multiple object name? } */
+    token += 2;
+
+    command->object_name = p + (token - linebuf);
+    command->object_name_len = (unsigned int)length_until_newline(
+                                   token, p_len - (size_t)(token - linebuf)) +
+                               1;
+    command->type = COMMAND_O;
+
+    return 1;
+  }
+
+  return 0;
+}
+
+static size_t basename_len(const char *filename, size_t filename_length) {
+  /* Count includes NUL terminator. */
+  const char *p = &filename[filename_length - 1];
+  size_t count = 1;
+
+/* On Windows, the directory delimiter is '\' and both it and '/' is
+ * reserved by the filesystem. On *nix platforms, only the '/' character
+ * is reserved, so account for the two cases separately. */
+#if _WIN32
+  while (p[-1] != '/' && p[-1] != '\\') {
+    if (p == filename) {
+      count = filename_length;
+      return count;
+    }
+    count++;
+    p--;
+  }
+  p++;
+  return count;
+#else
+  while (*(--p) != '/') {
+    if (p == filename) {
+      count = filename_length;
+      return count;
+    }
+    count++;
+  }
+  return count;
+#endif
+}
+
+static char *generate_mtl_filename(const char *obj_filename,
+                                   size_t obj_filename_length,
+                                   const char *mtllib_name,
+                                   size_t mtllib_name_length) {
+  /* Create a dynamically-allocated material filename. This allows the material
+   * and obj files to be separated, however the mtllib name in the OBJ file
+   * must be a relative path to the material file from the OBJ's directory.
+   * This does not support the matllib name as an absolute address. */
+  char *mtl_filename;
+  char *p;
+  size_t mtl_filename_length;
+  size_t obj_basename_length;
+
+  /* Calculate required size of mtl_filename and allocate */
+  obj_basename_length = basename_len(obj_filename, obj_filename_length);
+  mtl_filename_length =
+      (obj_filename_length - obj_basename_length) + mtllib_name_length;
+  mtl_filename = (char *)TINYOBJ_MALLOC(mtl_filename_length);
+
+  /* Copy over the obj's path */
+  memcpy(mtl_filename, obj_filename,
+         (obj_filename_length - obj_basename_length));
+
+  /* Overwrite the obj basename with the mtllib name, filling the string */
+  p = &mtl_filename[mtl_filename_length - mtllib_name_length];
+  strcpy(p, mtllib_name);
+  return mtl_filename;
+}
+
+int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes,
+                      size_t *num_shapes, tinyobj_material_t **materials_out,
+                      size_t *num_materials_out, const char *obj_filename,
+                      file_reader_callback file_reader, void *ctx,
+                      unsigned int flags) {
+  LineInfo *line_infos = NULL;
+  Command *commands = NULL;
+  size_t num_lines = 0;
+
+  size_t num_v = 0;
+  size_t num_vn = 0;
+  size_t num_vt = 0;
+  size_t num_f = 0;
+  size_t num_faces = 0;
+
+  int mtllib_line_index = -1;
+
+  tinyobj_material_t *materials = NULL;
+  size_t num_materials = 0;
+
+  hash_table_t material_table;
+
+  char *buf = NULL;
+  size_t len = 0;
+  file_reader(ctx, obj_filename, /* is_mtl */ 0, obj_filename, &buf, &len);
+
+  if (len < 1)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (attrib == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (shapes == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (num_shapes == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (buf == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (materials_out == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+  if (num_materials_out == NULL)
+    return TINYOBJ_ERROR_INVALID_PARAMETER;
+
+  tinyobj_attrib_init(attrib);
+
+  /* 1. create line data */
+  if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) {
+    return TINYOBJ_ERROR_EMPTY;
+  }
+
+  commands = (Command *)TINYOBJ_MALLOC(sizeof(Command) * num_lines);
+
+  create_hash_table(HASH_TABLE_DEFAULT_SIZE, &material_table);
+
+  /* 2. parse each line */
+  {
+    size_t i = 0;
+    for (i = 0; i < num_lines; i++) {
+      int ret = parseLine(&commands[i], &buf[line_infos[i].pos],
+                          line_infos[i].len, flags & TINYOBJ_FLAG_TRIANGULATE);
+      if (ret) {
+        if (commands[i].type == COMMAND_V) {
+          num_v++;
+        } else if (commands[i].type == COMMAND_VN) {
+          num_vn++;
+        } else if (commands[i].type == COMMAND_VT) {
+          num_vt++;
+        } else if (commands[i].type == COMMAND_F) {
+          num_f += commands[i].num_f;
+          num_faces += commands[i].num_f_num_verts;
+        }
+
+        if (commands[i].type == COMMAND_MTLLIB) {
+          mtllib_line_index = (int)i;
+        }
+      }
+    }
+  }
+
+  /* line_infos are not used anymore. Release memory. */
+  if (line_infos) {
+    TINYOBJ_FREE(line_infos);
+  }
+
+  /* Load material (if it exists) */
+  if (mtllib_line_index >= 0 && commands[mtllib_line_index].mtllib_name &&
+      commands[mtllib_line_index].mtllib_name_len > 0) {
+    /* Maximum length allowed by Linux - higher than Windows and macOS */
+    size_t obj_filename_len = my_strnlen(obj_filename, 4096 + 255) + 1;
+    char *mtl_filename;
+    char *mtllib_name;
+    size_t mtllib_name_len = 0;
+    int ret;
+
+    mtllib_name_len =
+        length_until_line_feed(commands[mtllib_line_index].mtllib_name,
+                               commands[mtllib_line_index].mtllib_name_len);
+
+    mtllib_name =
+        my_strndup(commands[mtllib_line_index].mtllib_name, mtllib_name_len);
+
+    /* allow for NUL terminator */
+    mtllib_name_len++;
+    mtl_filename = generate_mtl_filename(obj_filename, obj_filename_len,
+                                         mtllib_name, mtllib_name_len);
+
+    ret = tinyobj_parse_and_index_mtl_file(&materials, &num_materials,
+                                           mtl_filename, obj_filename,
+                                           file_reader, ctx, &material_table);
+
+    if (ret != TINYOBJ_SUCCESS) {
+      /* warning. */
+      fprintf(stderr, "TINYOBJ: Failed to parse material file '%s': %d\n",
+              mtl_filename, ret);
+    }
+    TINYOBJ_FREE(mtl_filename);
+    TINYOBJ_FREE(mtllib_name);
+  }
+
+  /* Construct attributes */
+
+  {
+    size_t v_count = 0;
+    size_t n_count = 0;
+    size_t t_count = 0;
+    size_t f_count = 0;
+    size_t face_count = 0;
+    int material_id = -1; /* -1 = default unknown material. */
+    size_t i = 0;
+
+    attrib->vertices = (float *)TINYOBJ_MALLOC(sizeof(float) * num_v * 3);
+    attrib->num_vertices = (unsigned int)num_v;
+    attrib->normals = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vn * 3);
+    attrib->num_normals = (unsigned int)num_vn;
+    attrib->texcoords = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vt * 2);
+    attrib->num_texcoords = (unsigned int)num_vt;
+    attrib->faces = (tinyobj_vertex_index_t *)TINYOBJ_MALLOC(
+        sizeof(tinyobj_vertex_index_t) * num_f);
+    attrib->num_faces = (unsigned int)num_f;
+    attrib->face_num_verts = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces);
+    attrib->material_ids = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces);
+    attrib->num_face_num_verts = (unsigned int)num_faces;
+
+    for (i = 0; i < num_lines; i++) {
+      if (commands[i].type == COMMAND_EMPTY) {
+        continue;
+      } else if (commands[i].type == COMMAND_USEMTL) {
+        /* @todo
+           if (commands[t][i].material_name &&
+           commands[t][i].material_name_len > 0) {
+           std::string material_name(commands[t][i].material_name,
+           commands[t][i].material_name_len);
+
+           if (material_map.find(material_name) != material_map.end()) {
+           material_id = material_map[material_name];
+           } else {
+        // Assign invalid material ID
+        material_id = -1;
+        }
+        }
+        */
+        if (commands[i].material_name && commands[i].material_name_len > 0) {
+          /* Create a null terminated string */
+          char *material_name_null_term =
+              (char *)TINYOBJ_MALLOC(commands[i].material_name_len + 1);
+          memcpy((void *)material_name_null_term,
+                 (const void *)commands[i].material_name,
+                 commands[i].material_name_len);
+          material_name_null_term[commands[i].material_name_len] = 0;
+
+          if (hash_table_exists(material_name_null_term, &material_table))
+            material_id =
+                (int)hash_table_get(material_name_null_term, &material_table);
+          else
+            material_id = -1;
+
+          TINYOBJ_FREE(material_name_null_term);
+        }
+      } else if (commands[i].type == COMMAND_V) {
+        attrib->vertices[3 * v_count + 0] = commands[i].vx;
+        attrib->vertices[3 * v_count + 1] = commands[i].vy;
+        attrib->vertices[3 * v_count + 2] = commands[i].vz;
+        v_count++;
+      } else if (commands[i].type == COMMAND_VN) {
+        attrib->normals[3 * n_count + 0] = commands[i].nx;
+        attrib->normals[3 * n_count + 1] = commands[i].ny;
+        attrib->normals[3 * n_count + 2] = commands[i].nz;
+        n_count++;
+      } else if (commands[i].type == COMMAND_VT) {
+        attrib->texcoords[2 * t_count + 0] = commands[i].tx;
+        attrib->texcoords[2 * t_count + 1] = commands[i].ty;
+        t_count++;
+      } else if (commands[i].type == COMMAND_F) {
+        size_t k = 0;
+        for (k = 0; k < commands[i].num_f; k++) {
+          tinyobj_vertex_index_t vi = commands[i].f[k];
+          int v_idx = fixIndex(vi.v_idx, v_count);
+          int vn_idx = fixIndex(vi.vn_idx, n_count);
+          int vt_idx = fixIndex(vi.vt_idx, t_count);
+          attrib->faces[f_count + k].v_idx = v_idx;
+          attrib->faces[f_count + k].vn_idx = vn_idx;
+          attrib->faces[f_count + k].vt_idx = vt_idx;
+        }
+
+        for (k = 0; k < commands[i].num_f_num_verts; k++) {
+          attrib->material_ids[face_count + k] = material_id;
+          attrib->face_num_verts[face_count + k] = commands[i].f_num_verts[k];
+        }
+
+        f_count += commands[i].num_f;
+        face_count += commands[i].num_f_num_verts;
+      }
+    }
+  }
+
+  /* 5. Construct shape information. */
+  {
+    unsigned int face_count = 0;
+    size_t i = 0;
+    size_t n = 0;
+    size_t shape_idx = 0;
+
+    const char *shape_name = NULL;
+    unsigned int shape_name_len = 0;
+    const char *prev_shape_name = NULL;
+    unsigned int prev_shape_name_len = 0;
+    unsigned int prev_shape_face_offset = 0;
+    unsigned int prev_face_offset = 0;
+    tinyobj_shape_t prev_shape = {NULL, 0, 0};
+
+    /* Find the number of shapes in .obj */
+    for (i = 0; i < num_lines; i++) {
+      if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) {
+        n++;
+      }
+    }
+
+    /* Allocate array of shapes with maximum possible size(+1 for unnamed
+     * group/object).
+     * Actual # of shapes found in .obj is determined in the later */
+    (*shapes) =
+        (tinyobj_shape_t *)TINYOBJ_MALLOC(sizeof(tinyobj_shape_t) * (n + 1));
+
+    for (i = 0; i < num_lines; i++) {
+      if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) {
+        if (commands[i].type == COMMAND_O) {
+          shape_name = commands[i].object_name;
+          shape_name_len = commands[i].object_name_len;
+        } else {
+          shape_name = commands[i].group_name;
+          shape_name_len = commands[i].group_name_len;
+        }
+
+        if (face_count == 0) {
+          /* 'o' or 'g' appears before any 'f' */
+          prev_shape_name = shape_name;
+          prev_shape_name_len = shape_name_len;
+          prev_shape_face_offset = face_count;
+          prev_face_offset = face_count;
+        } else {
+          if (shape_idx == 0) {
+            /* 'o' or 'g' after some 'v' lines. */
+            (*shapes)[shape_idx].name = my_strndup(
+                prev_shape_name, prev_shape_name_len); /* may be NULL */
+            (*shapes)[shape_idx].face_offset = prev_shape.face_offset;
+            (*shapes)[shape_idx].length = face_count - prev_face_offset;
+            shape_idx++;
+
+            prev_face_offset = face_count;
+
+          } else {
+            if ((face_count - prev_face_offset) > 0) {
+              (*shapes)[shape_idx].name =
+                  my_strndup(prev_shape_name, prev_shape_name_len);
+              (*shapes)[shape_idx].face_offset = prev_face_offset;
+              (*shapes)[shape_idx].length = face_count - prev_face_offset;
+              shape_idx++;
+              prev_face_offset = face_count;
+            }
+          }
+
+          /* Record shape info for succeeding 'o' or 'g' command. */
+          prev_shape_name = shape_name;
+          prev_shape_name_len = shape_name_len;
+          prev_shape_face_offset = face_count;
+        }
+      }
+      if (commands[i].type == COMMAND_F) {
+        face_count++;
+      }
+    }
+
+    if ((face_count - prev_face_offset) > 0) {
+      size_t length = face_count - prev_shape_face_offset;
+      if (length > 0) {
+        (*shapes)[shape_idx].name =
+            my_strndup(prev_shape_name, prev_shape_name_len);
+        (*shapes)[shape_idx].face_offset = prev_face_offset;
+        (*shapes)[shape_idx].length = face_count - prev_face_offset;
+        shape_idx++;
+      }
+    } else {
+      /* Guess no 'v' line occurrence after 'o' or 'g', so discards current
+       * shape information. */
+    }
+
+    (*num_shapes) = shape_idx;
+  }
+
+  if (commands) {
+    TINYOBJ_FREE(commands);
+  }
+
+  destroy_hash_table(&material_table);
+
+  (*materials_out) = materials;
+  (*num_materials_out) = num_materials;
+
+  return TINYOBJ_SUCCESS;
+}
+
+void tinyobj_attrib_init(tinyobj_attrib_t *attrib) {
+  attrib->vertices = NULL;
+  attrib->num_vertices = 0;
+  attrib->normals = NULL;
+  attrib->num_normals = 0;
+  attrib->texcoords = NULL;
+  attrib->num_texcoords = 0;
+  attrib->faces = NULL;
+  attrib->num_faces = 0;
+  attrib->face_num_verts = NULL;
+  attrib->num_face_num_verts = 0;
+  attrib->material_ids = NULL;
+}
+
+void tinyobj_attrib_free(tinyobj_attrib_t *attrib) {
+  if (attrib->vertices)
+    TINYOBJ_FREE(attrib->vertices);
+  if (attrib->normals)
+    TINYOBJ_FREE(attrib->normals);
+  if (attrib->texcoords)
+    TINYOBJ_FREE(attrib->texcoords);
+  if (attrib->faces)
+    TINYOBJ_FREE(attrib->faces);
+  if (attrib->face_num_verts)
+    TINYOBJ_FREE(attrib->face_num_verts);
+  if (attrib->material_ids)
+    TINYOBJ_FREE(attrib->material_ids);
+}
+
+void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes) {
+  size_t i;
+  if (shapes == NULL)
+    return;
+
+  for (i = 0; i < num_shapes; i++) {
+    if (shapes[i].name)
+      TINYOBJ_FREE(shapes[i].name);
+  }
+
+  TINYOBJ_FREE(shapes);
+}
+
+void tinyobj_materials_free(tinyobj_material_t *materials,
+                            size_t num_materials) {
+  size_t i;
+  if (materials == NULL)
+    return;
+
+  for (i = 0; i < num_materials; i++) {
+    if (materials[i].name)
+      TINYOBJ_FREE(materials[i].name);
+    if (materials[i].ambient_texname)
+      TINYOBJ_FREE(materials[i].ambient_texname);
+    if (materials[i].diffuse_texname)
+      TINYOBJ_FREE(materials[i].diffuse_texname);
+    if (materials[i].specular_texname)
+      TINYOBJ_FREE(materials[i].specular_texname);
+    if (materials[i].specular_highlight_texname)
+      TINYOBJ_FREE(materials[i].specular_highlight_texname);
+    if (materials[i].bump_texname)
+      TINYOBJ_FREE(materials[i].bump_texname);
+    if (materials[i].displacement_texname)
+      TINYOBJ_FREE(materials[i].displacement_texname);
+    if (materials[i].alpha_texname)
+      TINYOBJ_FREE(materials[i].alpha_texname);
+  }
+
+  TINYOBJ_FREE(materials);
+}
+#endif /* TINYOBJ_LOADER_C_IMPLEMENTATION */
diff --git a/vendor/vk_mem_alloc.h b/vendor/vk_mem_alloc.h
new file mode 100644
index 0000000..6f71d5b
--- /dev/null
+++ b/vendor/vk_mem_alloc.h
@@ -0,0 +1,19111 @@
+//
+// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+
+#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H
+#define AMD_VULKAN_MEMORY_ALLOCATOR_H
+
+/** \mainpage Vulkan Memory Allocator
+
+<b>Version 3.2.1</b>
+
+Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. \n
+License: MIT \n
+See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/),
+[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
+
+
+<b>API documentation divided into groups:</b> [Topics](topics.html)
+
+<b>General documentation chapters:</b>
+
+- <b>User guide</b>
+  - \subpage quick_start
+    - [Project setup](@ref quick_start_project_setup)
+    - [Initialization](@ref quick_start_initialization)
+    - [Resource allocation](@ref quick_start_resource_allocation)
+  - \subpage choosing_memory_type
+    - [Usage](@ref choosing_memory_type_usage)
+    - [Required and preferred flags](@ref choosing_memory_type_required_preferred_flags)
+    - [Explicit memory types](@ref choosing_memory_type_explicit_memory_types)
+    - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools)
+    - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations)
+  - \subpage memory_mapping
+    - [Copy functions](@ref memory_mapping_copy_functions)
+    - [Mapping functions](@ref memory_mapping_mapping_functions)
+    - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory)
+    - [Cache flush and invalidate](@ref memory_mapping_cache_control)
+  - \subpage staying_within_budget
+    - [Querying for budget](@ref staying_within_budget_querying_for_budget)
+    - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage)
+  - \subpage resource_aliasing
+  - \subpage custom_memory_pools
+    - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex)
+    - [When not to use custom pools](@ref custom_memory_pools_when_not_use)
+    - [Linear allocation algorithm](@ref linear_algorithm)
+      - [Free-at-once](@ref linear_algorithm_free_at_once)
+      - [Stack](@ref linear_algorithm_stack)
+      - [Double stack](@ref linear_algorithm_double_stack)
+      - [Ring buffer](@ref linear_algorithm_ring_buffer)
+  - \subpage defragmentation
+  - \subpage statistics
+    - [Numeric statistics](@ref statistics_numeric_statistics)
+    - [JSON dump](@ref statistics_json_dump)
+  - \subpage allocation_annotation
+    - [Allocation user data](@ref allocation_user_data)
+    - [Allocation names](@ref allocation_names)
+  - \subpage virtual_allocator
+  - \subpage debugging_memory_usage
+    - [Memory initialization](@ref debugging_memory_usage_initialization)
+    - [Margins](@ref debugging_memory_usage_margins)
+    - [Corruption detection](@ref debugging_memory_usage_corruption_detection)
+    - [Leak detection features](@ref debugging_memory_usage_leak_detection)
+  - \subpage other_api_interop
+- \subpage usage_patterns
+    - [GPU-only resource](@ref usage_patterns_gpu_only)
+    - [Staging copy for upload](@ref usage_patterns_staging_copy_upload)
+    - [Readback](@ref usage_patterns_readback)
+    - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading)
+    - [Other use cases](@ref usage_patterns_other_use_cases)
+- \subpage configuration
+  - [Pointers to Vulkan functions](@ref config_Vulkan_functions)
+  - [Custom host memory allocator](@ref custom_memory_allocator)
+  - [Device memory allocation callbacks](@ref allocation_callbacks)
+  - [Device heap memory limit](@ref heap_memory_limit)
+- <b>Extension support</b>
+    - \subpage vk_khr_dedicated_allocation
+    - \subpage enabling_buffer_device_address
+    - \subpage vk_ext_memory_priority
+    - \subpage vk_amd_device_coherent_memory
+    - \subpage vk_khr_external_memory_win32
+- \subpage general_considerations
+  - [Thread safety](@ref general_considerations_thread_safety)
+  - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility)
+  - [Validation layer warnings](@ref general_considerations_validation_layer_warnings)
+  - [Allocation algorithm](@ref general_considerations_allocation_algorithm)
+  - [Features not supported](@ref general_considerations_features_not_supported)
+
+\defgroup group_init Library initialization
+
+\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object.
+
+\defgroup group_alloc Memory allocation
+
+\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images.
+Most basic ones being: vmaCreateBuffer(), vmaCreateImage().
+
+\defgroup group_virtual Virtual allocator
+
+\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm
+for user-defined purpose without allocating any real GPU memory.
+
+\defgroup group_stats Statistics
+
+\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format.
+See documentation chapter: \ref statistics.
+*/
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(VULKAN_H_)
+#include <vulkan/vulkan.h>
+#endif
+
+#if !defined(VMA_VULKAN_VERSION)
+    #if defined(VK_VERSION_1_4)
+        #define VMA_VULKAN_VERSION 1004000
+    #elif defined(VK_VERSION_1_3)
+        #define VMA_VULKAN_VERSION 1003000
+    #elif defined(VK_VERSION_1_2)
+        #define VMA_VULKAN_VERSION 1002000
+    #elif defined(VK_VERSION_1_1)
+        #define VMA_VULKAN_VERSION 1001000
+    #else
+        #define VMA_VULKAN_VERSION 1000000
+    #endif
+#endif
+
+#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS
+    extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
+    extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr;
+    extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties;
+    extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties;
+    extern PFN_vkAllocateMemory vkAllocateMemory;
+    extern PFN_vkFreeMemory vkFreeMemory;
+    extern PFN_vkMapMemory vkMapMemory;
+    extern PFN_vkUnmapMemory vkUnmapMemory;
+    extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges;
+    extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges;
+    extern PFN_vkBindBufferMemory vkBindBufferMemory;
+    extern PFN_vkBindImageMemory vkBindImageMemory;
+    extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
+    extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
+    extern PFN_vkCreateBuffer vkCreateBuffer;
+    extern PFN_vkDestroyBuffer vkDestroyBuffer;
+    extern PFN_vkCreateImage vkCreateImage;
+    extern PFN_vkDestroyImage vkDestroyImage;
+    extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
+    #if VMA_VULKAN_VERSION >= 1001000
+        extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2;
+        extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2;
+        extern PFN_vkBindBufferMemory2 vkBindBufferMemory2;
+        extern PFN_vkBindImageMemory2 vkBindImageMemory2;
+        extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2;
+    #endif // #if VMA_VULKAN_VERSION >= 1001000
+#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES
+
+#if !defined(VMA_DEDICATED_ALLOCATION)
+    #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation
+        #define VMA_DEDICATED_ALLOCATION 1
+    #else
+        #define VMA_DEDICATED_ALLOCATION 0
+    #endif
+#endif
+
+#if !defined(VMA_BIND_MEMORY2)
+    #if VK_KHR_bind_memory2
+        #define VMA_BIND_MEMORY2 1
+    #else
+        #define VMA_BIND_MEMORY2 0
+    #endif
+#endif
+
+#if !defined(VMA_MEMORY_BUDGET)
+    #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000)
+        #define VMA_MEMORY_BUDGET 1
+    #else
+        #define VMA_MEMORY_BUDGET 0
+    #endif
+#endif
+
+// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers.
+#if !defined(VMA_BUFFER_DEVICE_ADDRESS)
+    #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000
+        #define VMA_BUFFER_DEVICE_ADDRESS 1
+    #else
+        #define VMA_BUFFER_DEVICE_ADDRESS 0
+    #endif
+#endif
+
+// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers.
+#if !defined(VMA_MEMORY_PRIORITY)
+    #if VK_EXT_memory_priority
+        #define VMA_MEMORY_PRIORITY 1
+    #else
+        #define VMA_MEMORY_PRIORITY 0
+    #endif
+#endif
+
+// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers.
+#if !defined(VMA_KHR_MAINTENANCE4)
+    #if VK_KHR_maintenance4
+        #define VMA_KHR_MAINTENANCE4 1
+    #else
+        #define VMA_KHR_MAINTENANCE4 0
+    #endif
+#endif
+
+// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers.
+#if !defined(VMA_KHR_MAINTENANCE5)
+    #if VK_KHR_maintenance5
+        #define VMA_KHR_MAINTENANCE5 1
+    #else
+        #define VMA_KHR_MAINTENANCE5 0
+    #endif
+#endif
+
+
+// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers.
+#if !defined(VMA_EXTERNAL_MEMORY)
+    #if VK_KHR_external_memory
+        #define VMA_EXTERNAL_MEMORY 1
+    #else
+        #define VMA_EXTERNAL_MEMORY 0
+    #endif
+#endif
+
+// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers.
+#if !defined(VMA_EXTERNAL_MEMORY_WIN32)
+    #if VK_KHR_external_memory_win32
+        #define VMA_EXTERNAL_MEMORY_WIN32 1
+    #else
+        #define VMA_EXTERNAL_MEMORY_WIN32 0
+    #endif
+#endif
+
+// Define these macros to decorate all public functions with additional code,
+// before and after returned type, appropriately. This may be useful for
+// exporting the functions when compiling VMA as a separate library. Example:
+// #define VMA_CALL_PRE  __declspec(dllexport)
+// #define VMA_CALL_POST __cdecl
+#ifndef VMA_CALL_PRE
+    #define VMA_CALL_PRE
+#endif
+#ifndef VMA_CALL_POST
+    #define VMA_CALL_POST
+#endif
+
+// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan
+// structure that will be extended via the pNext chain.
+#ifndef VMA_EXTENDS_VK_STRUCT
+    #define VMA_EXTENDS_VK_STRUCT(vkStruct)
+#endif
+
+// Define this macro to decorate pointers with an attribute specifying the
+// length of the array they point to if they are not null.
+//
+// The length may be one of
+// - The name of another parameter in the argument list where the pointer is declared
+// - The name of another member in the struct where the pointer is declared
+// - The name of a member of a struct type, meaning the value of that member in
+//   the context of the call. For example
+//   VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"),
+//   this means the number of memory heaps available in the device associated
+//   with the VmaAllocator being dealt with.
+#ifndef VMA_LEN_IF_NOT_NULL
+    #define VMA_LEN_IF_NOT_NULL(len)
+#endif
+
+// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang.
+// see: https://clang.llvm.org/docs/AttributeReference.html#nullable
+#ifndef VMA_NULLABLE
+    #ifdef __clang__
+        #define VMA_NULLABLE _Nullable
+    #else
+        #define VMA_NULLABLE
+    #endif
+#endif
+
+// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang.
+// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull
+#ifndef VMA_NOT_NULL
+    #ifdef __clang__
+        #define VMA_NOT_NULL _Nonnull
+    #else
+        #define VMA_NOT_NULL
+    #endif
+#endif
+
+// If non-dispatchable handles are represented as pointers then we can give
+// then nullability annotations
+#ifndef VMA_NOT_NULL_NON_DISPATCHABLE
+    #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+        #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL
+    #else
+        #define VMA_NOT_NULL_NON_DISPATCHABLE
+    #endif
+#endif
+
+#ifndef VMA_NULLABLE_NON_DISPATCHABLE
+    #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+        #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE
+    #else
+        #define VMA_NULLABLE_NON_DISPATCHABLE
+    #endif
+#endif
+
+#ifndef VMA_STATS_STRING_ENABLED
+    #define VMA_STATS_STRING_ENABLED 1
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//    INTERFACE
+//
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE.
+#ifndef _VMA_ENUM_DECLARATIONS
+
+/**
+\addtogroup group_init
+@{
+*/
+
+/// Flags for created #VmaAllocator.
+typedef enum VmaAllocatorCreateFlagBits
+{
+    /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you.
+
+    Using this flag may increase performance because internal mutexes are not used.
+    */
+    VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001,
+    /** \brief Enables usage of VK_KHR_dedicated_allocation extension.
+
+    The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`.
+    When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1.
+
+    Using this extension will automatically allocate dedicated blocks of memory for
+    some buffers and images instead of suballocating place for them out of bigger
+    memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT
+    flag) when it is recommended by the driver. It may improve performance on some
+    GPUs.
+
+    You may set this flag only if you found out that following device extensions are
+    supported, you enabled them while creating Vulkan device passed as
+    VmaAllocatorCreateInfo::device, and you want them to be used internally by this
+    library:
+
+    - VK_KHR_get_memory_requirements2 (device extension)
+    - VK_KHR_dedicated_allocation (device extension)
+
+    When this flag is set, you can experience following warnings reported by Vulkan
+    validation layer. You can ignore them.
+
+    > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer.
+    */
+    VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002,
+    /**
+    Enables usage of VK_KHR_bind_memory2 extension.
+
+    The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`.
+    When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1.
+
+    You may set this flag only if you found out that this device extension is supported,
+    you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
+    and you want it to be used internally by this library.
+
+    The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`,
+    which allow to pass a chain of `pNext` structures while binding.
+    This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2().
+    */
+    VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004,
+    /**
+    Enables usage of VK_EXT_memory_budget extension.
+
+    You may set this flag only if you found out that this device extension is supported,
+    you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
+    and you want it to be used internally by this library, along with another instance extension
+    VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted).
+
+    The extension provides query for current memory usage and budget, which will probably
+    be more accurate than an estimation used by the library otherwise.
+    */
+    VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008,
+    /**
+    Enables usage of VK_AMD_device_coherent_memory extension.
+
+    You may set this flag only if you:
+
+    - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device,
+    - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device,
+    - want it to be used internally by this library.
+
+    The extension and accompanying device feature provide access to memory types with
+    `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags.
+    They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR.
+
+    When the extension is not enabled, such memory types are still enumerated, but their usage is illegal.
+    To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type,
+    returning `VK_ERROR_FEATURE_NOT_PRESENT`.
+    */
+    VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010,
+    /**
+    Enables usage of "buffer device address" feature, which allows you to use function
+    `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader.
+
+    You may set this flag only if you:
+
+    1. (For Vulkan version < 1.2) Found as available and enabled device extension
+    VK_KHR_buffer_device_address.
+    This extension is promoted to core Vulkan 1.2.
+    2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`.
+
+    When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA.
+    The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to
+    allocated memory blocks wherever it might be needed.
+
+    For more information, see documentation chapter \ref enabling_buffer_device_address.
+    */
+    VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020,
+    /**
+    Enables usage of VK_EXT_memory_priority extension in the library.
+
+    You may set this flag only if you found available and enabled this device extension,
+    along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`,
+    while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+
+    When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority
+    are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored.
+
+    A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations.
+    Larger values are higher priority. The granularity of the priorities is implementation-dependent.
+    It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`.
+    The value to be used for default priority is 0.5.
+    For more details, see the documentation of the VK_EXT_memory_priority extension.
+    */
+    VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040,
+    /**
+    Enables usage of VK_KHR_maintenance4 extension in the library.
+
+    You may set this flag only if you found available and enabled this device extension,
+    while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+    */
+    VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080,
+    /**
+    Enables usage of VK_KHR_maintenance5 extension in the library.
+
+    You should set this flag if you found available and enabled this device extension,
+    while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+    */
+    VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100,
+
+    /**
+    Enables usage of VK_KHR_external_memory_win32 extension in the library.
+
+    You should set this flag if you found available and enabled this device extension,
+    while creating Vulkan device passed as VmaAllocatorCreateInfo::device.
+    For more information, see \ref vk_khr_external_memory_win32.
+    */
+    VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200,
+
+    VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaAllocatorCreateFlagBits;
+/// See #VmaAllocatorCreateFlagBits.
+typedef VkFlags VmaAllocatorCreateFlags;
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/// \brief Intended usage of the allocated memory.
+typedef enum VmaMemoryUsage
+{
+    /** No intended memory usage specified.
+    Use other members of VmaAllocationCreateInfo to specify your requirements.
+    */
+    VMA_MEMORY_USAGE_UNKNOWN = 0,
+    /**
+    \deprecated Obsolete, preserved for backward compatibility.
+    Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+    */
+    VMA_MEMORY_USAGE_GPU_ONLY = 1,
+    /**
+    \deprecated Obsolete, preserved for backward compatibility.
+    Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`.
+    */
+    VMA_MEMORY_USAGE_CPU_ONLY = 2,
+    /**
+    \deprecated Obsolete, preserved for backward compatibility.
+    Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+    */
+    VMA_MEMORY_USAGE_CPU_TO_GPU = 3,
+    /**
+    \deprecated Obsolete, preserved for backward compatibility.
+    Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`.
+    */
+    VMA_MEMORY_USAGE_GPU_TO_CPU = 4,
+    /**
+    \deprecated Obsolete, preserved for backward compatibility.
+    Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+    */
+    VMA_MEMORY_USAGE_CPU_COPY = 5,
+    /**
+    Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`.
+    Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation.
+
+    Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`.
+
+    Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+    */
+    VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6,
+    /**
+    Selects best memory type automatically.
+    This flag is recommended for most common use cases.
+
+    When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
+    you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+    in VmaAllocationCreateInfo::flags.
+
+    It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
+    vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
+    and not with generic memory allocation functions.
+    */
+    VMA_MEMORY_USAGE_AUTO = 7,
+    /**
+    Selects best memory type automatically with preference for GPU (device) memory.
+
+    When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
+    you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+    in VmaAllocationCreateInfo::flags.
+
+    It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
+    vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
+    and not with generic memory allocation functions.
+    */
+    VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8,
+    /**
+    Selects best memory type automatically with preference for CPU (host) memory.
+
+    When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
+    you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+    in VmaAllocationCreateInfo::flags.
+
+    It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
+    vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
+    and not with generic memory allocation functions.
+    */
+    VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9,
+
+    VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF
+} VmaMemoryUsage;
+
+/// Flags to be passed as VmaAllocationCreateInfo::flags.
+typedef enum VmaAllocationCreateFlagBits
+{
+    /** \brief Set this flag if the allocation should have its own memory block.
+
+    Use it for special, big resources, like fullscreen images used as attachments.
+
+    If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo`
+    structure is applied if possible.
+    */
+    VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001,
+
+    /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block.
+
+    If new allocation cannot be placed in any of the existing blocks, allocation
+    fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error.
+
+    You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and
+    #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense.
+    */
+    VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002,
+    /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it.
+
+    Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData.
+
+    It is valid to use this flag for allocation made from memory type that is not
+    `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is
+    useful if you need an allocation that is efficient to use on GPU
+    (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that
+    support it (e.g. Intel GPU).
+    */
+    VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004,
+    /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead.
+
+    Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a
+    null-terminated string. Instead of copying pointer value, a local copy of the
+    string is made and stored in allocation's `pName`. The string is automatically
+    freed together with the allocation. It is also used in vmaBuildStatsString().
+    */
+    VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020,
+    /** Allocation will be created from upper stack in a double stack pool.
+
+    This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag.
+    */
+    VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040,
+    /** Create both buffer/image and allocation, but don't bind them together.
+    It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions.
+    The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage().
+    Otherwise it is ignored.
+
+    If you want to make sure the new buffer/image is not tied to the new memory allocation
+    through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block,
+    use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT.
+    */
+    VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080,
+    /** Create allocation only if additional device memory required for it, if any, won't exceed
+    memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+    */
+    VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100,
+    /** \brief Set this flag if the allocated memory will have aliasing resources.
+
+    Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified.
+    Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors.
+    */
+    VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200,
+    /**
+    Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT).
+
+    - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value,
+      you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect.
+    - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`.
+      This includes allocations created in \ref custom_memory_pools.
+
+    Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number,
+    never read or accessed randomly, so a memory type can be selected that is uncached and write-combined.
+
+    \warning Violating this declaration may work correctly, but will likely be very slow.
+    Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;`
+    Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once.
+    */
+    VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400,
+    /**
+    Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT).
+
+    - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value,
+      you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect.
+    - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`.
+      This includes allocations created in \ref custom_memory_pools.
+
+    Declares that mapped memory can be read, written, and accessed in random order,
+    so a `HOST_CACHED` memory type is preferred.
+    */
+    VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800,
+    /**
+    Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT,
+    it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected
+    if it may improve performance.
+
+    By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type
+    (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and
+    issue an explicit transfer to write/read your data.
+    To prepare for this possibility, don't forget to add appropriate flags like
+    `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image.
+    */
+    VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000,
+    /** Allocation strategy that chooses smallest possible free range for the allocation
+    to minimize memory usage and fragmentation, possibly at the expense of allocation time.
+    */
+    VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000,
+    /** Allocation strategy that chooses first suitable free range for the allocation -
+    not necessarily in terms of the smallest offset but the one that is easiest and fastest to find
+    to minimize allocation time, possibly at the expense of allocation quality.
+    */
+    VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000,
+    /** Allocation strategy that chooses always the lowest offset in available space.
+    This is not the most efficient strategy but achieves highly packed data.
+    Used internally by defragmentation, not recommended in typical usage.
+    */
+    VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT  = 0x00040000,
+    /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT.
+    */
+    VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT,
+    /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT.
+    */
+    VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT,
+    /** A bit mask to extract only `STRATEGY` bits from entire set of flags.
+    */
+    VMA_ALLOCATION_CREATE_STRATEGY_MASK =
+        VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT |
+        VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT |
+        VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+
+    VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaAllocationCreateFlagBits;
+/// See #VmaAllocationCreateFlagBits.
+typedef VkFlags VmaAllocationCreateFlags;
+
+/// Flags to be passed as VmaPoolCreateInfo::flags.
+typedef enum VmaPoolCreateFlagBits
+{
+    /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored.
+
+    This is an optional optimization flag.
+
+    If you always allocate using vmaCreateBuffer(), vmaCreateImage(),
+    vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator
+    knows exact type of your allocations so it can handle Buffer-Image Granularity
+    in the optimal way.
+
+    If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(),
+    exact type of such allocations is not known, so allocator must be conservative
+    in handling Buffer-Image Granularity, which can lead to suboptimal allocation
+    (wasted memory). In that case, if you can make sure you always allocate only
+    buffers and linear images or only optimal images out of this pool, use this flag
+    to make allocator disregard Buffer-Image Granularity and so make allocations
+    faster and more optimal.
+    */
+    VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002,
+
+    /** \brief Enables alternative, linear allocation algorithm in this pool.
+
+    Specify this flag to enable linear allocation algorithm, which always creates
+    new allocations after last one and doesn't reuse space from allocations freed in
+    between. It trades memory consumption for simplified algorithm and data
+    structure, which has better performance and uses less memory for metadata.
+
+    By using this flag, you can achieve behavior of free-at-once, stack,
+    ring buffer, and double stack.
+    For details, see documentation chapter \ref linear_algorithm.
+    */
+    VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004,
+
+    /** Bit mask to extract only `ALGORITHM` bits from entire set of flags.
+    */
+    VMA_POOL_CREATE_ALGORITHM_MASK =
+        VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT,
+
+    VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaPoolCreateFlagBits;
+/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits.
+typedef VkFlags VmaPoolCreateFlags;
+
+/// Flags to be passed as VmaDefragmentationInfo::flags.
+typedef enum VmaDefragmentationFlagBits
+{
+    /* \brief Use simple but fast algorithm for defragmentation.
+    May not achieve best results but will require least time to compute and least allocations to copy.
+    */
+    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1,
+    /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified.
+    Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved.
+    */
+    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2,
+    /* \brief Perform full defragmentation of memory.
+    Can result in notably more time to compute and allocations to copy, but will achieve best memory packing.
+    */
+    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4,
+    /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make.
+    Only available when bufferImageGranularity is greater than 1, since it aims to reduce
+    alignment issues between different types of resources.
+    Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT.
+    */
+    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8,
+
+    /// A bit mask to extract only `ALGORITHM` bits from entire set of flags.
+    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK =
+        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT |
+        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT |
+        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT |
+        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT,
+
+    VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaDefragmentationFlagBits;
+/// See #VmaDefragmentationFlagBits.
+typedef VkFlags VmaDefragmentationFlags;
+
+/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove.
+typedef enum VmaDefragmentationMoveOperation
+{
+    /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass().
+    VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0,
+    /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged.
+    VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1,
+    /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed.
+    VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2,
+} VmaDefragmentationMoveOperation;
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/// Flags to be passed as VmaVirtualBlockCreateInfo::flags.
+typedef enum VmaVirtualBlockCreateFlagBits
+{
+    /** \brief Enables alternative, linear allocation algorithm in this virtual block.
+
+    Specify this flag to enable linear allocation algorithm, which always creates
+    new allocations after last one and doesn't reuse space from allocations freed in
+    between. It trades memory consumption for simplified algorithm and data
+    structure, which has better performance and uses less memory for metadata.
+
+    By using this flag, you can achieve behavior of free-at-once, stack,
+    ring buffer, and double stack.
+    For details, see documentation chapter \ref linear_algorithm.
+    */
+    VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001,
+
+    /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags.
+    */
+    VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK =
+        VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT,
+
+    VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaVirtualBlockCreateFlagBits;
+/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits.
+typedef VkFlags VmaVirtualBlockCreateFlags;
+
+/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags.
+typedef enum VmaVirtualAllocationCreateFlagBits
+{
+    /** \brief Allocation will be created from upper stack in a double stack pool.
+
+    This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag.
+    */
+    VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT,
+    /** \brief Allocation strategy that tries to minimize memory usage.
+    */
+    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT,
+    /** \brief Allocation strategy that tries to minimize allocation time.
+    */
+    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT,
+    /** Allocation strategy that chooses always the lowest offset in available space.
+    This is not the most efficient strategy but achieves highly packed data.
+    */
+    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+    /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags.
+
+    These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits.
+    */
+    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK,
+
+    VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VmaVirtualAllocationCreateFlagBits;
+/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits.
+typedef VkFlags VmaVirtualAllocationCreateFlags;
+
+/** @} */
+
+#endif // _VMA_ENUM_DECLARATIONS
+
+#ifndef _VMA_DATA_TYPES_DECLARATIONS
+
+/**
+\addtogroup group_init
+@{ */
+
+/** \struct VmaAllocator
+\brief Represents main object of this library initialized.
+
+Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it.
+Call function vmaDestroyAllocator() to destroy it.
+
+It is recommended to create just one object of this type per `VkDevice` object,
+right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed.
+*/
+VK_DEFINE_HANDLE(VmaAllocator)
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/** \struct VmaPool
+\brief Represents custom memory pool
+
+Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it.
+Call function vmaDestroyPool() to destroy it.
+
+For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools).
+*/
+VK_DEFINE_HANDLE(VmaPool)
+
+/** \struct VmaAllocation
+\brief Represents single memory allocation.
+
+It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type
+plus unique offset.
+
+There are multiple ways to create such object.
+You need to fill structure VmaAllocationCreateInfo.
+For more information see [Choosing memory type](@ref choosing_memory_type).
+
+Although the library provides convenience functions that create Vulkan buffer or image,
+allocate memory for it and bind them together,
+binding of the allocation to a buffer or an image is out of scope of the allocation itself.
+Allocation object can exist without buffer/image bound,
+binding can be done manually by the user, and destruction of it can be done
+independently of destruction of the allocation.
+
+The object also remembers its size and some other information.
+To retrieve this information, use function vmaGetAllocationInfo() and inspect
+returned structure VmaAllocationInfo.
+*/
+VK_DEFINE_HANDLE(VmaAllocation)
+
+/** \struct VmaDefragmentationContext
+\brief An opaque object that represents started defragmentation process.
+
+Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it.
+Call function vmaEndDefragmentation() to destroy it.
+*/
+VK_DEFINE_HANDLE(VmaDefragmentationContext)
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/** \struct VmaVirtualAllocation
+\brief Represents single memory allocation done inside VmaVirtualBlock.
+
+Use it as a unique identifier to virtual allocation within the single block.
+
+Use value `VK_NULL_HANDLE` to represent a null/invalid allocation.
+*/
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation)
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/** \struct VmaVirtualBlock
+\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory.
+
+Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it.
+For more information, see documentation chapter \ref virtual_allocator.
+
+This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally.
+*/
+VK_DEFINE_HANDLE(VmaVirtualBlock)
+
+/** @} */
+
+/**
+\addtogroup group_init
+@{
+*/
+
+/// Callback function called after successful vkAllocateMemory.
+typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)(
+    VmaAllocator VMA_NOT_NULL                    allocator,
+    uint32_t                                     memoryType,
+    VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory,
+    VkDeviceSize                                 size,
+    void* VMA_NULLABLE                           pUserData);
+
+/// Callback function called before vkFreeMemory.
+typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)(
+    VmaAllocator VMA_NOT_NULL                    allocator,
+    uint32_t                                     memoryType,
+    VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory,
+    VkDeviceSize                                 size,
+    void* VMA_NULLABLE                           pUserData);
+
+/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`.
+
+Provided for informative purpose, e.g. to gather statistics about number of
+allocations or total amount of memory allocated in Vulkan.
+
+Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks.
+*/
+typedef struct VmaDeviceMemoryCallbacks
+{
+    /// Optional, can be null.
+    PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate;
+    /// Optional, can be null.
+    PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree;
+    /// Optional, can be null.
+    void* VMA_NULLABLE pUserData;
+} VmaDeviceMemoryCallbacks;
+
+/** \brief Pointers to some Vulkan functions - a subset used by the library.
+
+Used in VmaAllocatorCreateInfo::pVulkanFunctions.
+*/
+typedef struct VmaVulkanFunctions
+{
+    /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+    PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr;
+    /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+    PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr;
+    PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties;
+    PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties;
+    PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory;
+    PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory;
+    PFN_vkMapMemory VMA_NULLABLE vkMapMemory;
+    PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory;
+    PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges;
+    PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges;
+    PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory;
+    PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory;
+    PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements;
+    PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements;
+    PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer;
+    PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer;
+    PFN_vkCreateImage VMA_NULLABLE vkCreateImage;
+    PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage;
+    PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer;
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+    PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR;
+    /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+    PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR;
+#endif
+#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+    /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension.
+    PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR;
+    /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension.
+    PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR;
+#endif
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+    /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2.
+    PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR;
+#endif
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+    /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+    PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements;
+    /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+    PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements;
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+    PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR;
+#else
+    void* VMA_NULLABLE vkGetMemoryWin32HandleKHR;
+#endif
+} VmaVulkanFunctions;
+
+/// Description of a Allocator to be created.
+typedef struct VmaAllocatorCreateInfo
+{
+    /// Flags for created allocator. Use #VmaAllocatorCreateFlagBits enum.
+    VmaAllocatorCreateFlags flags;
+    /// Vulkan physical device.
+    /** It must be valid throughout whole lifetime of created allocator. */
+    VkPhysicalDevice VMA_NOT_NULL physicalDevice;
+    /// Vulkan device.
+    /** It must be valid throughout whole lifetime of created allocator. */
+    VkDevice VMA_NOT_NULL device;
+    /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional.
+    /** Set to 0 to use default, which is currently 256 MiB. */
+    VkDeviceSize preferredLargeHeapBlockSize;
+    /// Custom CPU memory allocation callbacks. Optional.
+    /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */
+    const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks;
+    /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional.
+    /** Optional, can be null. */
+    const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks;
+    /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap.
+
+    If not NULL, it must be a pointer to an array of
+    `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on
+    maximum number of bytes that can be allocated out of particular Vulkan memory
+    heap.
+
+    Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that
+    heap. This is also the default in case of `pHeapSizeLimit` = NULL.
+
+    If there is a limit defined for a heap:
+
+    - If user tries to allocate more memory from that heap using this allocator,
+      the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+    - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the
+      value of this limit will be reported instead when using vmaGetMemoryProperties().
+
+    Warning! Using this feature may not be equivalent to installing a GPU with
+    smaller amount of memory, because graphics driver doesn't necessary fail new
+    allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is
+    exceeded. It may return success and just silently migrate some device memory
+    blocks to system RAM. This driver behavior can also be controlled using
+    VK_AMD_memory_overallocation_behavior extension.
+    */
+    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit;
+
+    /** \brief Pointers to Vulkan functions. Can be null.
+
+    For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions).
+    */
+    const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions;
+    /** \brief Handle to Vulkan instance object.
+
+    Starting from version 3.0.0 this member is no longer optional, it must be set!
+    */
+    VkInstance VMA_NOT_NULL instance;
+    /** \brief Optional. Vulkan version that the application uses.
+
+    It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`.
+    The patch version number specified is ignored. Only the major and minor versions are considered.
+    Only versions 1.0...1.4 are supported by the current implementation.
+    Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`.
+    It must match the Vulkan version used by the application and supported on the selected physical device,
+    so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance`
+    and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used.
+    */
+    uint32_t vulkanApiVersion;
+#if VMA_EXTERNAL_MEMORY
+    /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type.
+
+    If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount`
+    elements, defining external memory handle types of particular Vulkan memory type,
+    to be passed using `VkExportMemoryAllocateInfoKHR`.
+
+    Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type.
+    This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL.
+    */
+    const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes;
+#endif // #if VMA_EXTERNAL_MEMORY
+} VmaAllocatorCreateInfo;
+
+/// Information about existing #VmaAllocator object.
+typedef struct VmaAllocatorInfo
+{
+    /** \brief Handle to Vulkan instance object.
+
+    This is the same value as has been passed through VmaAllocatorCreateInfo::instance.
+    */
+    VkInstance VMA_NOT_NULL instance;
+    /** \brief Handle to Vulkan physical device object.
+
+    This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice.
+    */
+    VkPhysicalDevice VMA_NOT_NULL physicalDevice;
+    /** \brief Handle to Vulkan device object.
+
+    This is the same value as has been passed through VmaAllocatorCreateInfo::device.
+    */
+    VkDevice VMA_NOT_NULL device;
+} VmaAllocatorInfo;
+
+/** @} */
+
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total.
+
+These are fast to calculate.
+See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics().
+*/
+typedef struct VmaStatistics
+{
+    /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated.
+    */
+    uint32_t blockCount;
+    /** \brief Number of #VmaAllocation objects allocated.
+
+    Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`.
+    */
+    uint32_t allocationCount;
+    /** \brief Number of bytes allocated in `VkDeviceMemory` blocks.
+
+    \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object
+    (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls
+    "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image.
+    */
+    VkDeviceSize blockBytes;
+    /** \brief Total number of bytes occupied by all #VmaAllocation objects.
+
+    Always less or equal than `blockBytes`.
+    Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan
+    but unused by any #VmaAllocation.
+    */
+    VkDeviceSize allocationBytes;
+} VmaStatistics;
+
+/** \brief More detailed statistics than #VmaStatistics.
+
+These are slower to calculate. Use for debugging purposes.
+See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics().
+
+Previous version of the statistics API provided averages, but they have been removed
+because they can be easily calculated as:
+
+\code
+VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount;
+VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes;
+VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount;
+\endcode
+*/
+typedef struct VmaDetailedStatistics
+{
+    /// Basic statistics.
+    VmaStatistics statistics;
+    /// Number of free ranges of memory between allocations.
+    uint32_t unusedRangeCount;
+    /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations.
+    VkDeviceSize allocationSizeMin;
+    /// Largest allocation size. 0 if there are 0 allocations.
+    VkDeviceSize allocationSizeMax;
+    /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges.
+    VkDeviceSize unusedRangeSizeMin;
+    /// Largest empty range size. 0 if there are 0 empty ranges.
+    VkDeviceSize unusedRangeSizeMax;
+} VmaDetailedStatistics;
+
+/** \brief  General statistics from current state of the Allocator -
+total memory usage across all memory heaps and types.
+
+These are slower to calculate. Use for debugging purposes.
+See function vmaCalculateStatistics().
+*/
+typedef struct VmaTotalStatistics
+{
+    VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES];
+    VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS];
+    VmaDetailedStatistics total;
+} VmaTotalStatistics;
+
+/** \brief Statistics of current memory usage and available budget for a specific memory heap.
+
+These are fast to calculate.
+See function vmaGetHeapBudgets().
+*/
+typedef struct VmaBudget
+{
+    /** \brief Statistics fetched from the library.
+    */
+    VmaStatistics statistics;
+    /** \brief Estimated current memory usage of the program, in bytes.
+
+    Fetched from system using VK_EXT_memory_budget extension if enabled.
+
+    It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects
+    also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or
+    `VkDeviceMemory` blocks allocated outside of this library, if any.
+    */
+    VkDeviceSize usage;
+    /** \brief Estimated amount of memory available to the program, in bytes.
+
+    Fetched from system using VK_EXT_memory_budget extension if enabled.
+
+    It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors
+    external to the program, decided by the operating system.
+    Difference `budget - usage` is the amount of additional memory that can probably
+    be allocated without problems. Exceeding the budget may result in various problems.
+    */
+    VkDeviceSize budget;
+} VmaBudget;
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/** \brief Parameters of new #VmaAllocation.
+
+To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others.
+*/
+typedef struct VmaAllocationCreateInfo
+{
+    /// Use #VmaAllocationCreateFlagBits enum.
+    VmaAllocationCreateFlags flags;
+    /** \brief Intended usage of memory.
+
+    You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n
+    If `pool` is not null, this member is ignored.
+    */
+    VmaMemoryUsage usage;
+    /** \brief Flags that must be set in a Memory Type chosen for an allocation.
+
+    Leave 0 if you specify memory requirements in other way. \n
+    If `pool` is not null, this member is ignored.*/
+    VkMemoryPropertyFlags requiredFlags;
+    /** \brief Flags that preferably should be set in a memory type chosen for an allocation.
+
+    Set to 0 if no additional flags are preferred. \n
+    If `pool` is not null, this member is ignored. */
+    VkMemoryPropertyFlags preferredFlags;
+    /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation.
+
+    Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if
+    it meets other requirements specified by this structure, with no further
+    restrictions on memory type index. \n
+    If `pool` is not null, this member is ignored.
+    */
+    uint32_t memoryTypeBits;
+    /** \brief Pool that this allocation should be created in.
+
+    Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members:
+    `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored.
+    */
+    VmaPool VMA_NULLABLE pool;
+    /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData().
+
+    If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either
+    null or pointer to a null-terminated string. The string will be then copied to
+    internal buffer, so it doesn't need to be valid after allocation call.
+    */
+    void* VMA_NULLABLE pUserData;
+    /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations.
+
+    It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object
+    and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+    Otherwise, it has the priority of a memory block where it is placed and this variable is ignored.
+    */
+    float priority;
+} VmaAllocationCreateInfo;
+
+/// Describes parameter of created #VmaPool.
+typedef struct VmaPoolCreateInfo
+{
+    /** \brief Vulkan memory type index to allocate this pool from.
+    */
+    uint32_t memoryTypeIndex;
+    /** \brief Use combination of #VmaPoolCreateFlagBits.
+    */
+    VmaPoolCreateFlags flags;
+    /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional.
+
+    Specify nonzero to set explicit, constant size of memory blocks used by this
+    pool.
+
+    Leave 0 to use default and let the library manage block sizes automatically.
+    Sizes of particular blocks may vary.
+    In this case, the pool will also support dedicated allocations.
+    */
+    VkDeviceSize blockSize;
+    /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty.
+
+    Set to 0 to have no preallocated blocks and allow the pool be completely empty.
+    */
+    size_t minBlockCount;
+    /** \brief Maximum number of blocks that can be allocated in this pool. Optional.
+
+    Set to 0 to use default, which is `SIZE_MAX`, which means no limit.
+
+    Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated
+    throughout whole lifetime of this pool.
+    */
+    size_t maxBlockCount;
+    /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations.
+
+    It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object.
+    Otherwise, this variable is ignored.
+    */
+    float priority;
+    /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0.
+
+    Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two.
+    It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough,
+    e.g. when doing interop with OpenGL.
+    */
+    VkDeviceSize minAllocationAlignment;
+    /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional.
+
+    Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`.
+    It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`.
+    Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool.
+
+    Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`,
+    can be attached automatically by this library when using other, more convenient of its features.
+    */
+    void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext;
+} VmaPoolCreateInfo;
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/**
+Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo().
+
+There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2.
+*/
+typedef struct VmaAllocationInfo
+{
+    /** \brief Memory type index that this allocation was allocated from.
+
+    It never changes.
+    */
+    uint32_t memoryType;
+    /** \brief Handle to Vulkan memory object.
+
+    Same memory object can be shared by multiple allocations.
+
+    It can change after the allocation is moved during \ref defragmentation.
+    */
+    VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory;
+    /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation.
+
+    You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function
+    vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image,
+    not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation
+    and apply this offset automatically.
+
+    It can change after the allocation is moved during \ref defragmentation.
+    */
+    VkDeviceSize offset;
+    /** \brief Size of this allocation, in bytes.
+
+    It never changes.
+
+    \note Allocation size returned in this variable may be greater than the size
+    requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the
+    allocation is accessible for operations on memory e.g. using a pointer after
+    mapping with vmaMapMemory(), but operations on the resource e.g. using
+    `vkCmdCopyBuffer` must be limited to the size of the resource.
+    */
+    VkDeviceSize size;
+    /** \brief Pointer to the beginning of this allocation as mapped data.
+
+    If the allocation hasn't been mapped using vmaMapMemory() and hasn't been
+    created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null.
+
+    It can change after call to vmaMapMemory(), vmaUnmapMemory().
+    It can also change after the allocation is moved during \ref defragmentation.
+    */
+    void* VMA_NULLABLE pMappedData;
+    /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData().
+
+    It can change after call to vmaSetAllocationUserData() for this allocation.
+    */
+    void* VMA_NULLABLE pUserData;
+    /** \brief Custom allocation name that was set with vmaSetAllocationName().
+
+    It can change after call to vmaSetAllocationName() for this allocation.
+
+    Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with
+    additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED].
+    */
+    const char* VMA_NULLABLE pName;
+} VmaAllocationInfo;
+
+/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2().
+typedef struct VmaAllocationInfo2
+{
+    /** \brief Basic parameters of the allocation.
+    
+    If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead.
+    */
+    VmaAllocationInfo allocationInfo;
+    /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to.
+    
+    In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`.
+    */
+    VkDeviceSize blockSize;
+    /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block.
+    
+    When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation
+    (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled).
+    */
+    VkBool32 dedicatedMemory;
+} VmaAllocationInfo2;
+
+/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass.
+
+Should return true if the defragmentation needs to stop current pass.
+*/
+typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData);
+
+/** \brief Parameters for defragmentation.
+
+To be used with function vmaBeginDefragmentation().
+*/
+typedef struct VmaDefragmentationInfo
+{
+    /// \brief Use combination of #VmaDefragmentationFlagBits.
+    VmaDefragmentationFlags flags;
+    /** \brief Custom pool to be defragmented.
+
+    If null then default pools will undergo defragmentation process.
+    */
+    VmaPool VMA_NULLABLE pool;
+    /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places.
+
+    `0` means no limit.
+    */
+    VkDeviceSize maxBytesPerPass;
+    /** \brief Maximum number of allocations that can be moved during single pass to a different place.
+
+    `0` means no limit.
+    */
+    uint32_t maxAllocationsPerPass;
+    /** \brief Optional custom callback for stopping vmaBeginDefragmentation().
+
+    Have to return true for breaking current defragmentation pass.
+    */
+    PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback;
+    /// \brief Optional data to pass to custom callback for stopping pass of defragmentation.
+    void* VMA_NULLABLE pBreakCallbackUserData;
+} VmaDefragmentationInfo;
+
+/// Single move of an allocation to be done for defragmentation.
+typedef struct VmaDefragmentationMove
+{
+    /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it.
+    VmaDefragmentationMoveOperation operation;
+    /// Allocation that should be moved.
+    VmaAllocation VMA_NOT_NULL srcAllocation;
+    /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`.
+
+    \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass,
+    to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory().
+    vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory.
+    */
+    VmaAllocation VMA_NOT_NULL dstTmpAllocation;
+} VmaDefragmentationMove;
+
+/** \brief Parameters for incremental defragmentation steps.
+
+To be used with function vmaBeginDefragmentationPass().
+*/
+typedef struct VmaDefragmentationPassMoveInfo
+{
+    /// Number of elements in the `pMoves` array.
+    uint32_t moveCount;
+    /** \brief Array of moves to be performed by the user in the current defragmentation pass.
+
+    Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass().
+
+    For each element, you should:
+
+    1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset.
+    2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`.
+    3. Make sure these commands finished executing on the GPU.
+    4. Destroy the old buffer/image.
+
+    Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass().
+    After this call, the allocation will point to the new place in memory.
+
+    Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE.
+
+    Alternatively, if you decide you want to completely remove the allocation:
+
+    1. Destroy its buffer/image.
+    2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY.
+
+    Then, after vmaEndDefragmentationPass() the allocation will be freed.
+    */
+    VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves;
+} VmaDefragmentationPassMoveInfo;
+
+/// Statistics returned for defragmentation process in function vmaEndDefragmentation().
+typedef struct VmaDefragmentationStats
+{
+    /// Total number of bytes that have been copied while moving allocations to different places.
+    VkDeviceSize bytesMoved;
+    /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects.
+    VkDeviceSize bytesFreed;
+    /// Number of allocations that have been moved to different places.
+    uint32_t allocationsMoved;
+    /// Number of empty `VkDeviceMemory` objects that have been released to the system.
+    uint32_t deviceMemoryBlocksFreed;
+} VmaDefragmentationStats;
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock().
+typedef struct VmaVirtualBlockCreateInfo
+{
+    /** \brief Total size of the virtual block.
+
+    Sizes can be expressed in bytes or any units you want as long as you are consistent in using them.
+    For example, if you allocate from some array of structures, 1 can mean single instance of entire structure.
+    */
+    VkDeviceSize size;
+
+    /** \brief Use combination of #VmaVirtualBlockCreateFlagBits.
+    */
+    VmaVirtualBlockCreateFlags flags;
+
+    /** \brief Custom CPU memory allocation callbacks. Optional.
+
+    Optional, can be null. When specified, they will be used for all CPU-side memory allocations.
+    */
+    const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks;
+} VmaVirtualBlockCreateInfo;
+
+/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate().
+typedef struct VmaVirtualAllocationCreateInfo
+{
+    /** \brief Size of the allocation.
+
+    Cannot be zero.
+    */
+    VkDeviceSize size;
+    /** \brief Required alignment of the allocation. Optional.
+
+    Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset.
+    */
+    VkDeviceSize alignment;
+    /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits.
+    */
+    VmaVirtualAllocationCreateFlags flags;
+    /** \brief Custom pointer to be associated with the allocation. Optional.
+
+    It can be any value and can be used for user-defined purposes. It can be fetched or changed later.
+    */
+    void* VMA_NULLABLE pUserData;
+} VmaVirtualAllocationCreateInfo;
+
+/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo().
+typedef struct VmaVirtualAllocationInfo
+{
+    /** \brief Offset of the allocation.
+
+    Offset at which the allocation was made.
+    */
+    VkDeviceSize offset;
+    /** \brief Size of the allocation.
+
+    Same value as passed in VmaVirtualAllocationCreateInfo::size.
+    */
+    VkDeviceSize size;
+    /** \brief Custom pointer associated with the allocation.
+
+    Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData().
+    */
+    void* VMA_NULLABLE pUserData;
+} VmaVirtualAllocationInfo;
+
+/** @} */
+
+#endif // _VMA_DATA_TYPES_DECLARATIONS
+
+#ifndef _VMA_FUNCTION_HEADERS
+
+/**
+\addtogroup group_init
+@{
+*/
+
+/// Creates #VmaAllocator object.
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator(
+    const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator);
+
+/// Destroys allocator object.
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator(
+    VmaAllocator VMA_NULLABLE allocator);
+
+/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc.
+
+It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to
+`VkPhysicalDevice`, `VkDevice` etc. every time using this function.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo);
+
+/**
+PhysicalDeviceProperties are fetched from physicalDevice by the allocator.
+You can access it here, without fetching it again on your own.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties);
+
+/**
+PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator.
+You can access it here, without fetching it again on your own.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties);
+
+/**
+\brief Given Memory Type Index, returns Property Flags of this memory type.
+
+This is just a convenience function. Same information can be obtained using
+vmaGetMemoryProperties().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties(
+    VmaAllocator VMA_NOT_NULL allocator,
+    uint32_t memoryTypeIndex,
+    VkMemoryPropertyFlags* VMA_NOT_NULL pFlags);
+
+/** \brief Sets index of the current frame.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex(
+    VmaAllocator VMA_NOT_NULL allocator,
+    uint32_t frameIndex);
+
+/** @} */
+
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Retrieves statistics from current state of the Allocator.
+
+This function is called "calculate" not "get" because it has to traverse all
+internal data structures, so it may be quite slow. Use it for debugging purposes.
+For faster but more brief statistics suitable to be called every frame or every allocation,
+use vmaGetHeapBudgets().
+
+Note that when using allocator from multiple threads, returned information may immediately
+become outdated.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaTotalStatistics* VMA_NOT_NULL pStats);
+
+/** \brief Retrieves information about current memory usage and budget for all memory heaps.
+
+\param allocator
+\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used.
+
+This function is called "get" not "calculate" because it is very fast, suitable to be called
+every frame or every allocation. For more detailed statistics use vmaCalculateStatistics().
+
+Note that when using allocator from multiple threads, returned information may immediately
+become outdated.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets);
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/**
+\brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo.
+
+This algorithm tries to find a memory type that:
+
+- Is allowed by memoryTypeBits.
+- Contains all the flags from pAllocationCreateInfo->requiredFlags.
+- Matches intended usage.
+- Has as many flags from pAllocationCreateInfo->preferredFlags as possible.
+
+\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result
+from this function or any other allocating function probably means that your
+device doesn't support any memory type with requested features for the specific
+type of resource you want to use it for. Please check parameters of your
+resource, like image layout (OPTIMAL versus LINEAR) or mip level count.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex(
+    VmaAllocator VMA_NOT_NULL allocator,
+    uint32_t memoryTypeBits,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+    uint32_t* VMA_NOT_NULL pMemoryTypeIndex);
+
+/**
+\brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo.
+
+It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex.
+It internally creates a temporary, dummy buffer that never has memory bound.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+    uint32_t* VMA_NOT_NULL pMemoryTypeIndex);
+
+/**
+\brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo.
+
+It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex.
+It internally creates a temporary, dummy image that never has memory bound.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+    uint32_t* VMA_NOT_NULL pMemoryTypeIndex);
+
+/** \brief Allocates Vulkan device memory and creates #VmaPool object.
+
+\param allocator Allocator object.
+\param pCreateInfo Parameters of pool to create.
+\param[out] pPool Handle to created pool.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool);
+
+/** \brief Destroys #VmaPool object and frees Vulkan device memory.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaPool VMA_NULLABLE pool);
+
+/** @} */
+
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Retrieves statistics of existing #VmaPool object.
+
+\param allocator Allocator object.
+\param pool Pool object.
+\param[out] pPoolStats Statistics of specified pool.
+
+Note that when using the pool from multiple threads, returned information may immediately
+become outdated.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaPool VMA_NOT_NULL pool,
+    VmaStatistics* VMA_NOT_NULL pPoolStats);
+
+/** \brief Retrieves detailed statistics of existing #VmaPool object.
+
+\param allocator Allocator object.
+\param pool Pool object.
+\param[out] pPoolStats Statistics of specified pool.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaPool VMA_NOT_NULL pool,
+    VmaDetailedStatistics* VMA_NOT_NULL pPoolStats);
+
+/** @} */
+
+/**
+\addtogroup group_alloc
+@{
+*/
+
+/** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions.
+
+Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero,
+`VMA_DEBUG_MARGIN` is defined to nonzero and the pool is created in memory type that is
+`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection).
+
+Possible return values:
+
+- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool.
+- `VK_SUCCESS` - corruption detection has been performed and succeeded.
+- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations.
+  `VMA_ASSERT` is also fired in that case.
+- Other value: Error returned by Vulkan, e.g. memory mapping failure.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaPool VMA_NOT_NULL pool);
+
+/** \brief Retrieves name of a custom pool.
+
+After the call `ppName` is either null or points to an internally-owned null-terminated string
+containing name of the pool that was previously set. The pointer becomes invalid when the pool is
+destroyed or its name is changed using vmaSetPoolName().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaPool VMA_NOT_NULL pool,
+    const char* VMA_NULLABLE* VMA_NOT_NULL ppName);
+
+/** \brief Sets name of a custom pool.
+
+`pName` can be either null or pointer to a null-terminated string with new name for the pool.
+Function makes internal copy of the string, so it can be changed or freed immediately after this call.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaPool VMA_NOT_NULL pool,
+    const char* VMA_NULLABLE pName);
+
+/** \brief General purpose memory allocation.
+
+\param allocator
+\param pVkMemoryRequirements
+\param pCreateInfo
+\param[out] pAllocation Handle to allocated memory.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages().
+
+It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(),
+vmaCreateBuffer(), vmaCreateImage() instead whenever possible.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief General purpose memory allocation for multiple allocation objects at once.
+
+\param allocator Allocator object.
+\param pVkMemoryRequirements Memory requirements for each allocation.
+\param pCreateInfo Creation parameters for each allocation.
+\param allocationCount Number of allocations to make.
+\param[out] pAllocations Pointer to array that will be filled with handles to created allocations.
+\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations.
+
+You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages().
+
+Word "pages" is just a suggestion to use this function to allocate pieces of memory needed for sparse binding.
+It is just a general purpose allocation function able to make multiple allocations at once.
+It may be internally optimized to be more efficient than calling vmaAllocateMemory() `allocationCount` times.
+
+All allocations are made using same parameters. All of them are created out of the same memory pool and type.
+If any allocation fails, all allocations already made within this function call are also freed, so that when
+returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo,
+    size_t allocationCount,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations,
+    VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo);
+
+/** \brief Allocates memory suitable for given `VkBuffer`.
+
+\param allocator
+\param buffer
+\param pCreateInfo
+\param[out] pAllocation Handle to allocated memory.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory().
+
+This is a special-purpose function. In most cases you should use vmaCreateBuffer().
+
+You must free the allocation using vmaFreeMemory() when no longer needed.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Allocates memory suitable for given `VkImage`.
+
+\param allocator
+\param image
+\param pCreateInfo
+\param[out] pAllocation Handle to allocated memory.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory().
+
+This is a special-purpose function. In most cases you should use vmaCreateImage().
+
+You must free the allocation using vmaFreeMemory() when no longer needed.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VkImage VMA_NOT_NULL_NON_DISPATCHABLE image,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage().
+
+Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VmaAllocation VMA_NULLABLE allocation);
+
+/** \brief Frees memory and destroys multiple allocations.
+
+Word "pages" is just a suggestion to use this function to free pieces of memory used for sparse binding.
+It is just a general purpose function to free memory and destroy allocations made using e.g. vmaAllocateMemory(),
+vmaAllocateMemoryPages() and other functions.
+It may be internally optimized to be more efficient than calling vmaFreeMemory() `allocationCount` times.
+
+Allocations in `pAllocations` array can come from any memory pools and types.
+Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages(
+    VmaAllocator VMA_NOT_NULL allocator,
+    size_t allocationCount,
+    const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations);
+
+/** \brief Returns current information about specified allocation.
+
+Current parameters of given allocation are returned in `pAllocationInfo`.
+
+Although this function doesn't lock any mutex, so it should be quite efficient,
+you should avoid calling it too often.
+You can retrieve same VmaAllocationInfo structure while creating your resource, from function
+vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change
+(e.g. due to defragmentation).
+
+There is also a new function vmaGetAllocationInfo2() that offers extended information
+about the allocation, returned using new structure #VmaAllocationInfo2.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo);
+
+/** \brief Returns extended information about specified allocation.
+
+Current parameters of given allocation are returned in `pAllocationInfo`.
+Extended parameters in structure #VmaAllocationInfo2 include memory block size
+and a flag telling whether the allocation has dedicated memory.
+It can be useful e.g. for interop with OpenGL.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo);
+
+/** \brief Sets pUserData in given allocation to new value.
+
+The value of pointer `pUserData` is copied to allocation's `pUserData`.
+It is opaque, so you can use it however you want - e.g.
+as a pointer, ordinal number or some handle to you own data.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    void* VMA_NULLABLE pUserData);
+
+/** \brief Sets pName in given allocation to new value.
+
+`pName` must be either null, or pointer to a null-terminated string. The function
+makes local copy of the string and sets it as allocation's `pName`. String
+passed as pName doesn't need to be valid for whole lifetime of the allocation -
+you can free it after this call. String previously pointed by allocation's
+`pName` is freed from memory.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    const char* VMA_NULLABLE pName);
+
+/**
+\brief Given an allocation, returns Property Flags of its memory type.
+
+This is just a convenience function. Same information can be obtained using
+vmaGetAllocationInfo() + vmaGetMemoryProperties().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkMemoryPropertyFlags* VMA_NOT_NULL pFlags);
+
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+/**
+\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs.
+
+\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns
+    handle for the current process.
+\param[out] pHandle Output parameter that returns the handle.
+
+The function fills `pHandle` with handle that can be used in target process.
+The handle is fetched using function `vkGetMemoryWin32HandleKHR`.
+When no longer needed, you must close it using:
+
+\code
+CloseHandle(handle);
+\endcode
+
+You can close it any time, before or after destroying the allocation object.
+It is reference-counted internally by Windows.
+
+Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to.
+If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation
+(VmaAllocationInfo::offset).
+
+If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check
+that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS`
+or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions.
+
+For more information, see chapter \ref vk_khr_external_memory_win32.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle);
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+
+/** \brief Maps memory represented by given allocation and returns pointer to it.
+
+Maps memory represented by given allocation to make it accessible to CPU code.
+When succeeded, `*ppData` contains pointer to first byte of this memory.
+
+\warning
+If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is
+correctly offsetted to the beginning of region assigned to this particular allocation.
+Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block.
+You should not add VmaAllocationInfo::offset to it!
+
+Mapping is internally reference-counted and synchronized, so despite raw Vulkan
+function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory`
+multiple times simultaneously, it is safe to call this function on allocations
+assigned to the same memory block. Actual Vulkan memory will be mapped on first
+mapping and unmapped on last unmapping.
+
+If the function succeeded, you must call vmaUnmapMemory() to unmap the
+allocation when mapping is no longer needed or before freeing the allocation, at
+the latest.
+
+It also safe to call this function multiple times on the same allocation. You
+must call vmaUnmapMemory() same number of times as you called vmaMapMemory().
+
+It is also safe to call this function on allocation created with
+#VMA_ALLOCATION_CREATE_MAPPED_BIT flag. Its memory stays mapped all the time.
+You must still call vmaUnmapMemory() same number of times as you called
+vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the
+"0-th" mapping made automatically due to #VMA_ALLOCATION_CREATE_MAPPED_BIT flag.
+
+This function fails when used on allocation made in memory type that is not
+`HOST_VISIBLE`.
+
+This function doesn't automatically flush or invalidate caches.
+If the allocation is made from a memory types that is not `HOST_COHERENT`,
+you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    void* VMA_NULLABLE* VMA_NOT_NULL ppData);
+
+/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory().
+
+For details, see description of vmaMapMemory().
+
+This function doesn't automatically flush or invalidate caches.
+If the allocation is made from a memory types that is not `HOST_COHERENT`,
+you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation);
+
+/** \brief Flushes memory of given allocation.
+
+Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation.
+It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`.
+Unmap operation doesn't do that automatically.
+
+- `offset` must be relative to the beginning of allocation.
+- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation.
+- `offset` and `size` don't have to be aligned.
+  They are internally rounded down/up to multiply of `nonCoherentAtomSize`.
+- If `size` is 0, this call is ignored.
+- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`,
+  this call is ignored.
+
+Warning! `offset` and `size` are relative to the contents of given `allocation`.
+If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively.
+Do not pass allocation's offset as `offset`!!!
+
+This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is
+called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize offset,
+    VkDeviceSize size);
+
+/** \brief Invalidates memory of given allocation.
+
+Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation.
+It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`.
+Map operation doesn't do that automatically.
+
+- `offset` must be relative to the beginning of allocation.
+- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation.
+- `offset` and `size` don't have to be aligned.
+  They are internally rounded down/up to multiply of `nonCoherentAtomSize`.
+- If `size` is 0, this call is ignored.
+- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`,
+  this call is ignored.
+
+Warning! `offset` and `size` are relative to the contents of given `allocation`.
+If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively.
+Do not pass allocation's offset as `offset`!!!
+
+This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if
+it is called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize offset,
+    VkDeviceSize size);
+
+/** \brief Flushes memory of given set of allocations.
+
+Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations.
+For more information, see documentation of vmaFlushAllocation().
+
+\param allocator
+\param allocationCount
+\param allocations
+\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero.
+\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations.
+
+This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is
+called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations(
+    VmaAllocator VMA_NOT_NULL allocator,
+    uint32_t allocationCount,
+    const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations,
+    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets,
+    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes);
+
+/** \brief Invalidates memory of given set of allocations.
+
+Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations.
+For more information, see documentation of vmaInvalidateAllocation().
+
+\param allocator
+\param allocationCount
+\param allocations
+\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero.
+\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations.
+
+This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is
+called, otherwise `VK_SUCCESS`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations(
+    VmaAllocator VMA_NOT_NULL allocator,
+    uint32_t allocationCount,
+    const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations,
+    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets,
+    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes);
+
+/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed.
+
+\param allocator
+\param pSrcHostPointer Pointer to the host data that become source of the copy.
+\param dstAllocation   Handle to the allocation that becomes destination of the copy.
+\param dstAllocationLocalOffset  Offset within `dstAllocation` where to write copied data, in bytes.
+\param size            Number of bytes to copy.
+
+This is a convenience function that allows to copy data from a host pointer to an allocation easily.
+Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation().
+
+This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag.
+It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+Otherwise, the function will fail and generate a Validation Layers error.
+
+`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`.
+If you mean whole allocation, you should pass 0.
+Do not pass allocation's offset within device memory block this parameter!
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer,
+    VmaAllocation VMA_NOT_NULL dstAllocation,
+    VkDeviceSize dstAllocationLocalOffset,
+    VkDeviceSize size);
+
+/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer.
+
+\param allocator
+\param srcAllocation   Handle to the allocation that becomes source of the copy.
+\param srcAllocationLocalOffset  Offset within `srcAllocation` where to read copied data, in bytes.
+\param pDstHostPointer Pointer to the host memory that become destination of the copy.
+\param size            Number of bytes to copy.
+
+This is a convenience function that allows to copy data from an allocation to a host pointer easily.
+Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory().
+
+This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`
+and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag.
+It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+Otherwise, the function may fail and generate a Validation Layers error.
+It may also work very slowly when reading from an uncached memory.
+
+`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`.
+If you mean whole allocation, you should pass 0.
+Do not pass allocation's offset within device memory block as this parameter!
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL srcAllocation,
+    VkDeviceSize srcAllocationLocalOffset,
+    void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer,
+    VkDeviceSize size);
+
+/** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions.
+
+\param allocator
+\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked.
+
+Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero,
+`VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are
+`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection).
+
+Possible return values:
+
+- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types.
+- `VK_SUCCESS` - corruption detection has been performed and succeeded.
+- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations.
+  `VMA_ASSERT` is also fired in that case.
+- Other value: Error returned by Vulkan, e.g. memory mapping failure.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(
+    VmaAllocator VMA_NOT_NULL allocator,
+    uint32_t memoryTypeBits);
+
+/** \brief Begins defragmentation process.
+
+\param allocator Allocator object.
+\param pInfo Structure filled with parameters of defragmentation.
+\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation.
+\returns
+- `VK_SUCCESS` if defragmentation can begin.
+- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported.
+
+For more information about defragmentation, see documentation chapter:
+[Defragmentation](@ref defragmentation).
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VmaDefragmentationInfo* VMA_NOT_NULL pInfo,
+    VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext);
+
+/** \brief Ends defragmentation process.
+
+\param allocator Allocator object.
+\param context Context object that has been created by vmaBeginDefragmentation().
+\param[out] pStats Optional stats for the defragmentation. Can be null.
+
+Use this function to finish defragmentation started by vmaBeginDefragmentation().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaDefragmentationContext VMA_NOT_NULL context,
+    VmaDefragmentationStats* VMA_NULLABLE pStats);
+
+/** \brief Starts single defragmentation pass.
+
+\param allocator Allocator object.
+\param context Context object that has been created by vmaBeginDefragmentation().
+\param[out] pPassInfo Computed information for current pass.
+\returns
+- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation.
+- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(),
+  and then preferably try another pass with vmaBeginDefragmentationPass().
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaDefragmentationContext VMA_NOT_NULL context,
+    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo);
+
+/** \brief Ends single defragmentation pass.
+
+\param allocator Allocator object.
+\param context Context object that has been created by vmaBeginDefragmentation().
+\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you.
+
+Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible.
+
+Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`.
+After this call:
+
+- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY
+  (which is the default) will be pointing to the new destination place.
+- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY
+  will be freed.
+
+If no more moves are possible you can end whole defragmentation.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaDefragmentationContext VMA_NOT_NULL context,
+    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo);
+
+/** \brief Binds buffer to allocation.
+
+Binds specified buffer to region of memory represented by specified allocation.
+Gets `VkDeviceMemory` handle and offset from the allocation.
+If you want to create a buffer, allocate memory for it and bind them together separately,
+you should use this function for binding instead of standard `vkBindBufferMemory()`,
+because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple
+allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously
+(which is illegal in Vulkan).
+
+It is recommended to use function vmaCreateBuffer() instead of this one.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer);
+
+/** \brief Binds buffer to allocation with additional parameters.
+
+\param allocator
+\param allocation
+\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0.
+\param buffer
+\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null.
+
+This function is similar to vmaBindBufferMemory(), but it provides additional parameters.
+
+If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag
+or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize allocationLocalOffset,
+    VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer,
+    const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext);
+
+/** \brief Binds image to allocation.
+
+Binds specified image to region of memory represented by specified allocation.
+Gets `VkDeviceMemory` handle and offset from the allocation.
+If you want to create an image, allocate memory for it and bind them together separately,
+you should use this function for binding instead of standard `vkBindImageMemory()`,
+because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple
+allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously
+(which is illegal in Vulkan).
+
+It is recommended to use function vmaCreateImage() instead of this one.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkImage VMA_NOT_NULL_NON_DISPATCHABLE image);
+
+/** \brief Binds image to allocation with additional parameters.
+
+\param allocator
+\param allocation
+\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0.
+\param image
+\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null.
+
+This function is similar to vmaBindImageMemory(), but it provides additional parameters.
+
+If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag
+or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize allocationLocalOffset,
+    VkImage VMA_NOT_NULL_NON_DISPATCHABLE image,
+    const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext);
+
+/** \brief Creates a new `VkBuffer`, allocates and binds memory for it.
+
+\param allocator
+\param pBufferCreateInfo
+\param pAllocationCreateInfo
+\param[out] pBuffer Buffer that was created.
+\param[out] pAllocation Allocation that was created.
+\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo().
+
+This function automatically:
+
+-# Creates buffer.
+-# Allocates appropriate memory for it.
+-# Binds the buffer with the memory.
+
+If any of these operations fail, buffer and allocation are not created,
+returned value is negative error code, `*pBuffer` and `*pAllocation` are null.
+
+If the function succeeded, you must destroy both buffer and allocation when you
+no longer need them using either convenience function vmaDestroyBuffer() or
+separately, using `vkDestroyBuffer()` and vmaFreeMemory().
+
+If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used,
+VK_KHR_dedicated_allocation extension is used internally to query driver whether
+it requires or prefers the new buffer to have dedicated allocation. If yes,
+and if dedicated allocation is possible
+(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated
+allocation for this buffer, just like when using
+#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+
+\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer,
+although recommended as a good practice, is out of scope of this library and could be implemented
+by the user as a higher-level logic on top of VMA.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Creates a buffer with additional minimum alignment.
+
+Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom,
+minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g.
+for interop with OpenGL.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+    VkDeviceSize minAlignment,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/** \brief Creates a new `VkBuffer`, binds already created memory for it.
+
+\param allocator
+\param allocation Allocation that provides memory to be used for binding new buffer to it.
+\param pBufferCreateInfo
+\param[out] pBuffer Buffer that was created.
+
+This function automatically:
+
+-# Creates buffer.
+-# Binds the buffer with the supplied memory.
+
+If any of these operations fail, buffer is not created,
+returned value is negative error code and `*pBuffer` is null.
+
+If the function succeeded, you must destroy the buffer when you
+no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding
+allocation you can use convenience function vmaDestroyBuffer().
+
+\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2().
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer);
+
+/** \brief Creates a new `VkBuffer`, binds already created memory for it.
+
+\param allocator
+\param allocation Allocation that provides memory to be used for binding new buffer to it.
+\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0.
+\param pBufferCreateInfo 
+\param[out] pBuffer Buffer that was created.
+
+This function automatically:
+
+-# Creates buffer.
+-# Binds the buffer with the supplied memory.
+
+If any of these operations fail, buffer is not created,
+returned value is negative error code and `*pBuffer` is null.
+
+If the function succeeded, you must destroy the buffer when you
+no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding
+allocation you can use convenience function vmaDestroyBuffer().
+
+\note This is a new version of the function augmented with parameter `allocationLocalOffset`.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize allocationLocalOffset,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer);
+
+/** \brief Destroys Vulkan buffer and frees allocated memory.
+
+This is just a convenience function equivalent to:
+
+\code
+vkDestroyBuffer(device, buffer, allocationCallbacks);
+vmaFreeMemory(allocator, allocation);
+\endcode
+
+It is safe to pass null as buffer and/or allocation.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer,
+    VmaAllocation VMA_NULLABLE allocation);
+
+/// Function similar to vmaCreateBuffer().
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage(
+    VmaAllocator VMA_NOT_NULL allocator,
+    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage,
+    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation,
+    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo);
+
+/// Function similar to vmaCreateAliasingBuffer() but for images.
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage);
+
+/// Function similar to vmaCreateAliasingBuffer2() but for images.
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize allocationLocalOffset,
+    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage);
+
+/** \brief Destroys Vulkan image and frees allocated memory.
+
+This is just a convenience function equivalent to:
+
+\code
+vkDestroyImage(device, image, allocationCallbacks);
+vmaFreeMemory(allocator, allocation);
+\endcode
+
+It is safe to pass null as image and/or allocation.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE image,
+    VmaAllocation VMA_NULLABLE allocation);
+
+/** @} */
+
+/**
+\addtogroup group_virtual
+@{
+*/
+
+/** \brief Creates new #VmaVirtualBlock object.
+
+\param pCreateInfo Parameters for creation.
+\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock(
+    const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock);
+
+/** \brief Destroys #VmaVirtualBlock object.
+
+Please note that you should consciously handle virtual allocations that could remain unfreed in the block.
+You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock()
+if you are sure this is what you want. If you do neither, an assert is called.
+
+If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`,
+don't forget to free them.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(
+    VmaVirtualBlock VMA_NULLABLE virtualBlock);
+
+/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations.
+*/
+VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock);
+
+/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo);
+
+/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock.
+
+If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned
+(despite the function doesn't ever allocate actual GPU memory).
+`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`.
+
+\param virtualBlock Virtual block
+\param pCreateInfo Parameters for the allocation
+\param[out] pAllocation Returned handle of the new allocation
+\param[out] pOffset Returned offset of the new allocation. Optional, can be null.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation,
+    VkDeviceSize* VMA_NULLABLE pOffset);
+
+/** \brief Frees virtual allocation inside given #VmaVirtualBlock.
+
+It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation);
+
+/** \brief Frees all virtual allocations inside given #VmaVirtualBlock.
+
+You must either call this function or free each virtual allocation individually with vmaVirtualFree()
+before destroying a virtual block. Otherwise, an assert is called.
+
+If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`,
+don't forget to free it as well.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock);
+
+/** \brief Changes custom pointer associated with given virtual allocation.
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation,
+    void* VMA_NULLABLE pUserData);
+
+/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock.
+
+This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaStatistics* VMA_NOT_NULL pStats);
+
+/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock.
+
+This function is slow to call. Use for debugging purposes.
+For less detailed statistics, see vmaGetVirtualBlockStatistics().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaDetailedStatistics* VMA_NOT_NULL pStats);
+
+/** @} */
+
+#if VMA_STATS_STRING_ENABLED
+/**
+\addtogroup group_stats
+@{
+*/
+
+/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock.
+\param virtualBlock Virtual block.
+\param[out] ppStatsString Returned string.
+\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces.
+
+Returned string must be freed using vmaFreeVirtualBlockStatsString().
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString,
+    VkBool32 detailedMap);
+
+/// Frees a string returned by vmaBuildVirtualBlockStatsString().
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(
+    VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    char* VMA_NULLABLE pStatsString);
+
+/** \brief Builds and returns statistics as a null-terminated string in JSON format.
+\param allocator
+\param[out] ppStatsString Must be freed using vmaFreeStatsString() function.
+\param detailedMap
+*/
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString(
+    VmaAllocator VMA_NOT_NULL allocator,
+    char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString,
+    VkBool32 detailedMap);
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString(
+    VmaAllocator VMA_NOT_NULL allocator,
+    char* VMA_NULLABLE pStatsString);
+
+/** @} */
+
+#endif // VMA_STATS_STRING_ENABLED
+
+#endif // _VMA_FUNCTION_HEADERS
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//    IMPLEMENTATION
+//
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+// For Visual Studio IntelliSense.
+#if defined(__cplusplus) && defined(__INTELLISENSE__)
+#define VMA_IMPLEMENTATION
+#endif
+
+#ifdef VMA_IMPLEMENTATION
+#undef VMA_IMPLEMENTATION
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cinttypes>
+#include <utility>
+#include <type_traits>
+
+#if !defined(VMA_CPP20)
+    #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20
+        #define VMA_CPP20 1
+    #else
+        #define VMA_CPP20 0
+    #endif
+#endif
+
+#ifdef _MSC_VER
+    #include <intrin.h> // For functions like __popcnt, _BitScanForward etc.
+#endif
+#if VMA_CPP20
+    #include <bit>
+#endif
+
+#if VMA_STATS_STRING_ENABLED
+    #include <cstdio> // For snprintf
+#endif
+
+/*******************************************************************************
+CONFIGURATION SECTION
+
+Define some of these macros before each #include of this header or change them
+here if you need other then default behavior depending on your environment.
+*/
+#ifndef _VMA_CONFIGURATION
+
+/*
+Define this macro to 1 to make the library fetch pointers to Vulkan functions
+internally, like:
+
+    vulkanFunctions.vkAllocateMemory = &vkAllocateMemory;
+*/
+#if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES)
+    #define VMA_STATIC_VULKAN_FUNCTIONS 1
+#endif
+
+/*
+Define this macro to 1 to make the library fetch pointers to Vulkan functions
+internally, like:
+
+    vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory");
+
+To use this feature in new versions of VMA you now have to pass
+VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as
+VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null.
+*/
+#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS)
+    #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#endif
+
+#ifndef VMA_USE_STL_SHARED_MUTEX
+    #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17
+        #define VMA_USE_STL_SHARED_MUTEX 1
+    // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus
+    // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2.
+    #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L
+        #define VMA_USE_STL_SHARED_MUTEX 1
+    #else
+        #define VMA_USE_STL_SHARED_MUTEX 0
+    #endif
+#endif
+
+/*
+Define this macro to include custom header files without having to edit this file directly, e.g.:
+
+    // Inside of "my_vma_configuration_user_includes.h":
+
+    #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT
+    #include "my_custom_min.h" // for my_custom_min
+    #include <algorithm>
+    #include <mutex>
+
+    // Inside a different file, which includes "vk_mem_alloc.h":
+
+    #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h"
+    #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr)
+    #define VMA_MIN(v1, v2)  (my_custom_min(v1, v2))
+    #include "vk_mem_alloc.h"
+    ...
+
+The following headers are used in this CONFIGURATION section only, so feel free to
+remove them if not needed.
+*/
+#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H)
+    #include <cassert> // for assert
+    #include <algorithm> // for min, max, swap
+    #include <mutex>
+#else
+    #include VMA_CONFIGURATION_USER_INCLUDES_H
+#endif
+
+#ifndef VMA_NULL
+   // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0.
+   #define VMA_NULL   nullptr
+#endif
+
+#ifndef VMA_FALLTHROUGH
+    #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17
+        #define VMA_FALLTHROUGH [[fallthrough]]
+    #else
+        #define VMA_FALLTHROUGH
+    #endif
+#endif
+
+// Normal assert to check for programmer's errors, especially in Debug configuration.
+#ifndef VMA_ASSERT
+   #ifdef NDEBUG
+       #define VMA_ASSERT(expr)
+   #else
+       #define VMA_ASSERT(expr)         assert(expr)
+   #endif
+#endif
+
+// Assert that will be called very often, like inside data structures e.g. operator[].
+// Making it non-empty can make program slow.
+#ifndef VMA_HEAVY_ASSERT
+   #ifdef NDEBUG
+       #define VMA_HEAVY_ASSERT(expr)
+   #else
+       #define VMA_HEAVY_ASSERT(expr)   //VMA_ASSERT(expr)
+   #endif
+#endif
+
+// Assert used for reporting memory leaks - unfreed allocations.
+#ifndef VMA_ASSERT_LEAK
+    #define VMA_ASSERT_LEAK(expr)   VMA_ASSERT(expr)
+#endif
+
+// If your compiler is not compatible with C++17 and definition of
+// aligned_alloc() function is missing, uncommenting following line may help:
+
+//#include <malloc.h>
+
+#if defined(__ANDROID_API__) && (__ANDROID_API__ < 16)
+#include <cstdlib>
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+    // alignment must be >= sizeof(void*)
+    if(alignment < sizeof(void*))
+    {
+        alignment = sizeof(void*);
+    }
+
+    return memalign(alignment, size);
+}
+#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC))
+#include <cstdlib>
+
+#if defined(__APPLE__)
+#include <AvailabilityMacros.h>
+#endif
+
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+    // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4)
+    // Therefore, for now disable this specific exception until a proper solution is found.
+    //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0))
+    //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0
+    //    // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only
+    //    // with the MacOSX11.0 SDK in Xcode 12 (which is what adds
+    //    // MAC_OS_X_VERSION_10_16), even though the function is marked
+    //    // available for 10.15. That is why the preprocessor checks for 10.16 but
+    //    // the __builtin_available checks for 10.15.
+    //    // People who use C++17 could call aligned_alloc with the 10.15 SDK already.
+    //    if (__builtin_available(macOS 10.15, iOS 13, *))
+    //        return aligned_alloc(alignment, size);
+    //#endif
+    //#endif
+
+    // alignment must be >= sizeof(void*)
+    if(alignment < sizeof(void*))
+    {
+        alignment = sizeof(void*);
+    }
+
+    void *pointer;
+    if(posix_memalign(&pointer, alignment, size) == 0)
+        return pointer;
+    return VMA_NULL;
+}
+#elif defined(_WIN32)
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+    return _aligned_malloc(size, alignment);
+}
+#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+    return aligned_alloc(alignment, size);
+}
+#else
+static void* vma_aligned_alloc(size_t alignment, size_t size)
+{
+    VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system.");
+    return VMA_NULL;
+}
+#endif
+
+#if defined(_WIN32)
+static void vma_aligned_free(void* ptr)
+{
+    _aligned_free(ptr);
+}
+#else
+static void vma_aligned_free(void* VMA_NULLABLE ptr)
+{
+    free(ptr);
+}
+#endif
+
+#ifndef VMA_ALIGN_OF
+   #define VMA_ALIGN_OF(type)       (alignof(type))
+#endif
+
+#ifndef VMA_SYSTEM_ALIGNED_MALLOC
+   #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size))
+#endif
+
+#ifndef VMA_SYSTEM_ALIGNED_FREE
+   // VMA_SYSTEM_FREE is the old name, but might have been defined by the user
+   #if defined(VMA_SYSTEM_FREE)
+      #define VMA_SYSTEM_ALIGNED_FREE(ptr)     VMA_SYSTEM_FREE(ptr)
+   #else
+      #define VMA_SYSTEM_ALIGNED_FREE(ptr)     vma_aligned_free(ptr)
+    #endif
+#endif
+
+#ifndef VMA_COUNT_BITS_SET
+    // Returns number of bits set to 1 in (v)
+    #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v)
+#endif
+
+#ifndef VMA_BITSCAN_LSB
+    // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX
+    #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask)
+#endif
+
+#ifndef VMA_BITSCAN_MSB
+    // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX
+    #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask)
+#endif
+
+#ifndef VMA_MIN
+   #define VMA_MIN(v1, v2)    ((std::min)((v1), (v2)))
+#endif
+
+#ifndef VMA_MAX
+   #define VMA_MAX(v1, v2)    ((std::max)((v1), (v2)))
+#endif
+
+#ifndef VMA_SORT
+   #define VMA_SORT(beg, end, cmp)  std::sort(beg, end, cmp)
+#endif
+
+#ifndef VMA_DEBUG_LOG_FORMAT
+   #define VMA_DEBUG_LOG_FORMAT(format, ...)
+   /*
+   #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \
+       printf((format), __VA_ARGS__); \
+       printf("\n"); \
+   } while(false)
+   */
+#endif
+
+#ifndef VMA_DEBUG_LOG
+    #define VMA_DEBUG_LOG(str)   VMA_DEBUG_LOG_FORMAT("%s", (str))
+#endif
+
+#ifndef VMA_LEAK_LOG_FORMAT
+    #define VMA_LEAK_LOG_FORMAT(format, ...)   VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__)
+#endif
+
+#ifndef VMA_CLASS_NO_COPY
+    #define VMA_CLASS_NO_COPY(className) \
+        private: \
+            className(const className&) = delete; \
+            className& operator=(const className&) = delete;
+#endif
+#ifndef VMA_CLASS_NO_COPY_NO_MOVE
+    #define VMA_CLASS_NO_COPY_NO_MOVE(className) \
+        private: \
+            className(const className&) = delete; \
+            className(className&&) = delete; \
+            className& operator=(const className&) = delete; \
+            className& operator=(className&&) = delete;
+#endif
+
+// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString.
+#if VMA_STATS_STRING_ENABLED
+    static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num)
+    {
+        snprintf(outStr, strLen, "%" PRIu32, num);
+    }
+    static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num)
+    {
+        snprintf(outStr, strLen, "%" PRIu64, num);
+    }
+    static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr)
+    {
+        snprintf(outStr, strLen, "%p", ptr);
+    }
+#endif
+
+#ifndef VMA_MUTEX
+    class VmaMutex
+    {
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex)
+    public:
+        VmaMutex() { }
+        void Lock() { m_Mutex.lock(); }
+        void Unlock() { m_Mutex.unlock(); }
+        bool TryLock() { return m_Mutex.try_lock(); }
+    private:
+        std::mutex m_Mutex;
+    };
+    #define VMA_MUTEX VmaMutex
+#endif
+
+// Read-write mutex, where "read" is shared access, "write" is exclusive access.
+#ifndef VMA_RW_MUTEX
+    #if VMA_USE_STL_SHARED_MUTEX
+        // Use std::shared_mutex from C++17.
+        #include <shared_mutex>
+        class VmaRWMutex
+        {
+        public:
+            void LockRead() { m_Mutex.lock_shared(); }
+            void UnlockRead() { m_Mutex.unlock_shared(); }
+            bool TryLockRead() { return m_Mutex.try_lock_shared(); }
+            void LockWrite() { m_Mutex.lock(); }
+            void UnlockWrite() { m_Mutex.unlock(); }
+            bool TryLockWrite() { return m_Mutex.try_lock(); }
+        private:
+            std::shared_mutex m_Mutex;
+        };
+        #define VMA_RW_MUTEX VmaRWMutex
+    #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600
+        // Use SRWLOCK from WinAPI.
+        // Minimum supported client = Windows Vista, server = Windows Server 2008.
+        class VmaRWMutex
+        {
+        public:
+            VmaRWMutex() { InitializeSRWLock(&m_Lock); }
+            void LockRead() { AcquireSRWLockShared(&m_Lock); }
+            void UnlockRead() { ReleaseSRWLockShared(&m_Lock); }
+            bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; }
+            void LockWrite() { AcquireSRWLockExclusive(&m_Lock); }
+            void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); }
+            bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; }
+        private:
+            SRWLOCK m_Lock;
+        };
+        #define VMA_RW_MUTEX VmaRWMutex
+    #else
+        // Less efficient fallback: Use normal mutex.
+        class VmaRWMutex
+        {
+        public:
+            void LockRead() { m_Mutex.Lock(); }
+            void UnlockRead() { m_Mutex.Unlock(); }
+            bool TryLockRead() { return m_Mutex.TryLock(); }
+            void LockWrite() { m_Mutex.Lock(); }
+            void UnlockWrite() { m_Mutex.Unlock(); }
+            bool TryLockWrite() { return m_Mutex.TryLock(); }
+        private:
+            VMA_MUTEX m_Mutex;
+        };
+        #define VMA_RW_MUTEX VmaRWMutex
+    #endif // #if VMA_USE_STL_SHARED_MUTEX
+#endif // #ifndef VMA_RW_MUTEX
+
+/*
+If providing your own implementation, you need to implement a subset of std::atomic.
+*/
+#ifndef VMA_ATOMIC_UINT32
+    #include <atomic>
+    #define VMA_ATOMIC_UINT32 std::atomic<uint32_t>
+#endif
+
+#ifndef VMA_ATOMIC_UINT64
+    #include <atomic>
+    #define VMA_ATOMIC_UINT64 std::atomic<uint64_t>
+#endif
+
+#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY
+    /**
+    Every allocation will have its own memory block.
+    Define to 1 for debugging purposes only.
+    */
+    #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0)
+#endif
+
+#ifndef VMA_MIN_ALIGNMENT
+    /**
+    Minimum alignment of all allocations, in bytes.
+    Set to more than 1 for debugging purposes. Must be power of two.
+    */
+    #ifdef VMA_DEBUG_ALIGNMENT // Old name
+        #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT
+    #else
+        #define VMA_MIN_ALIGNMENT (1)
+    #endif
+#endif
+
+#ifndef VMA_DEBUG_MARGIN
+    /**
+    Minimum margin after every allocation, in bytes.
+    Set nonzero for debugging purposes only.
+    */
+    #define VMA_DEBUG_MARGIN (0)
+#endif
+
+#ifndef VMA_DEBUG_INITIALIZE_ALLOCATIONS
+    /**
+    Define this macro to 1 to automatically fill new allocations and destroyed
+    allocations with some bit pattern.
+    */
+    #define VMA_DEBUG_INITIALIZE_ALLOCATIONS (0)
+#endif
+
+#ifndef VMA_DEBUG_DETECT_CORRUPTION
+    /**
+    Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to
+    enable writing magic value to the margin after every allocation and
+    validating it, so that memory corruptions (out-of-bounds writes) are detected.
+    */
+    #define VMA_DEBUG_DETECT_CORRUPTION (0)
+#endif
+
+#ifndef VMA_DEBUG_GLOBAL_MUTEX
+    /**
+    Set this to 1 for debugging purposes only, to enable single mutex protecting all
+    entry calls to the library. Can be useful for debugging multithreading issues.
+    */
+    #define VMA_DEBUG_GLOBAL_MUTEX (0)
+#endif
+
+#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY
+    /**
+    Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity.
+    Set to more than 1 for debugging purposes only. Must be power of two.
+    */
+    #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1)
+#endif
+
+#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT
+    /*
+    Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount
+    and return error instead of leaving up to Vulkan implementation what to do in such cases.
+    */
+    #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0)
+#endif
+
+#ifndef VMA_SMALL_HEAP_MAX_SIZE
+   /// Maximum size of a memory heap in Vulkan to consider it "small".
+   #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024)
+#endif
+
+#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE
+   /// Default size of a block allocated as single VkDeviceMemory from a "large" heap.
+   #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024)
+#endif
+
+/*
+Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called
+or a persistently mapped allocation is created and destroyed several times in a row.
+It keeps additional +1 mapping of a device memory block to prevent calling actual
+vkMapMemory/vkUnmapMemory too many times, which may improve performance and help
+tools like RenderDoc.
+*/
+#ifndef VMA_MAPPING_HYSTERESIS_ENABLED
+    #define VMA_MAPPING_HYSTERESIS_ENABLED 1
+#endif
+
+#define VMA_VALIDATE(cond) do { if(!(cond)) { \
+        VMA_ASSERT(0 && "Validation failed: " #cond); \
+        return false; \
+    } } while(false)
+
+/*******************************************************************************
+END OF CONFIGURATION
+*/
+#endif // _VMA_CONFIGURATION
+
+
+static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC;
+static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF;
+// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F.
+static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666;
+
+// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants.
+static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040;
+static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080;
+static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000;
+static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200;
+static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000;
+static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u;
+static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32;
+static const uint32_t VMA_VENDOR_ID_AMD = 4098;
+
+// This one is tricky. Vulkan specification defines this code as available since
+// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131.
+// See pull request #207.
+#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13)
+
+
+#if VMA_STATS_STRING_ENABLED
+// Correspond to values of enum VmaSuballocationType.
+static const char* VMA_SUBALLOCATION_TYPE_NAMES[] =
+{
+    "FREE",
+    "UNKNOWN",
+    "BUFFER",
+    "IMAGE_UNKNOWN",
+    "IMAGE_LINEAR",
+    "IMAGE_OPTIMAL",
+};
+#endif
+
+static VkAllocationCallbacks VmaEmptyAllocationCallbacks =
+    { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL };
+
+
+#ifndef _VMA_ENUM_DECLARATIONS
+
+enum VmaSuballocationType
+{
+    VMA_SUBALLOCATION_TYPE_FREE = 0,
+    VMA_SUBALLOCATION_TYPE_UNKNOWN = 1,
+    VMA_SUBALLOCATION_TYPE_BUFFER = 2,
+    VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3,
+    VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4,
+    VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5,
+    VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF
+};
+
+enum VMA_CACHE_OPERATION
+{
+    VMA_CACHE_FLUSH,
+    VMA_CACHE_INVALIDATE
+};
+
+enum class VmaAllocationRequestType
+{
+    Normal,
+    TLSF,
+    // Used by "Linear" algorithm.
+    UpperAddress,
+    EndOf1st,
+    EndOf2nd,
+};
+
+#endif // _VMA_ENUM_DECLARATIONS
+
+#ifndef _VMA_FORWARD_DECLARATIONS
+// Opaque handle used by allocation algorithms to identify single allocation in any conforming way.
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle);
+
+struct VmaMutexLock;
+struct VmaMutexLockRead;
+struct VmaMutexLockWrite;
+
+template<typename T>
+struct AtomicTransactionalIncrement;
+
+template<typename T>
+struct VmaStlAllocator;
+
+template<typename T, typename AllocatorT>
+class VmaVector;
+
+template<typename T, typename AllocatorT, size_t N>
+class VmaSmallVector;
+
+template<typename T>
+class VmaPoolAllocator;
+
+template<typename T>
+struct VmaListItem;
+
+template<typename T>
+class VmaRawList;
+
+template<typename T, typename AllocatorT>
+class VmaList;
+
+template<typename ItemTypeTraits>
+class VmaIntrusiveLinkedList;
+
+#if VMA_STATS_STRING_ENABLED
+class VmaStringBuilder;
+class VmaJsonWriter;
+#endif
+
+class VmaDeviceMemoryBlock;
+
+struct VmaDedicatedAllocationListItemTraits;
+class VmaDedicatedAllocationList;
+
+struct VmaSuballocation;
+struct VmaSuballocationOffsetLess;
+struct VmaSuballocationOffsetGreater;
+struct VmaSuballocationItemSizeLess;
+
+typedef VmaList<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> VmaSuballocationList;
+
+struct VmaAllocationRequest;
+
+class VmaBlockMetadata;
+class VmaBlockMetadata_Linear;
+class VmaBlockMetadata_TLSF;
+
+class VmaBlockVector;
+
+struct VmaPoolListItemTraits;
+
+struct VmaCurrentBudgetData;
+
+class VmaAllocationObjectAllocator;
+
+#endif // _VMA_FORWARD_DECLARATIONS
+
+
+#ifndef _VMA_FUNCTIONS
+
+/*
+Returns number of bits set to 1 in (v).
+
+On specific platforms and compilers you can use intrinsics like:
+
+Visual Studio:
+    return __popcnt(v);
+GCC, Clang:
+    return static_cast<uint32_t>(__builtin_popcount(v));
+
+Define macro VMA_COUNT_BITS_SET to provide your optimized implementation.
+But you need to check in runtime whether user's CPU supports these, as some old processors don't.
+*/
+static inline uint32_t VmaCountBitsSet(uint32_t v)
+{
+#if VMA_CPP20
+    return std::popcount(v);
+#else
+    uint32_t c = v - ((v >> 1) & 0x55555555);
+    c = ((c >> 2) & 0x33333333) + (c & 0x33333333);
+    c = ((c >> 4) + c) & 0x0F0F0F0F;
+    c = ((c >> 8) + c) & 0x00FF00FF;
+    c = ((c >> 16) + c) & 0x0000FFFF;
+    return c;
+#endif
+}
+
+static inline uint8_t VmaBitScanLSB(uint64_t mask)
+{
+#if defined(_MSC_VER) && defined(_WIN64)
+    unsigned long pos;
+    if (_BitScanForward64(&pos, mask))
+        return static_cast<uint8_t>(pos);
+    return UINT8_MAX;
+#elif VMA_CPP20
+    if(mask)
+        return static_cast<uint8_t>(std::countr_zero(mask));
+    return UINT8_MAX;
+#elif defined __GNUC__ || defined __clang__
+    return static_cast<uint8_t>(__builtin_ffsll(mask)) - 1U;
+#else
+    uint8_t pos = 0;
+    uint64_t bit = 1;
+    do
+    {
+        if (mask & bit)
+            return pos;
+        bit <<= 1;
+    } while (pos++ < 63);
+    return UINT8_MAX;
+#endif
+}
+
+static inline uint8_t VmaBitScanLSB(uint32_t mask)
+{
+#ifdef _MSC_VER
+    unsigned long pos;
+    if (_BitScanForward(&pos, mask))
+        return static_cast<uint8_t>(pos);
+    return UINT8_MAX;
+#elif VMA_CPP20
+    if(mask)
+        return static_cast<uint8_t>(std::countr_zero(mask));
+    return UINT8_MAX;
+#elif defined __GNUC__ || defined __clang__
+    return static_cast<uint8_t>(__builtin_ffs(mask)) - 1U;
+#else
+    uint8_t pos = 0;
+    uint32_t bit = 1;
+    do
+    {
+        if (mask & bit)
+            return pos;
+        bit <<= 1;
+    } while (pos++ < 31);
+    return UINT8_MAX;
+#endif
+}
+
+static inline uint8_t VmaBitScanMSB(uint64_t mask)
+{
+#if defined(_MSC_VER) && defined(_WIN64)
+    unsigned long pos;
+    if (_BitScanReverse64(&pos, mask))
+        return static_cast<uint8_t>(pos);
+#elif VMA_CPP20
+    if(mask)
+        return 63 - static_cast<uint8_t>(std::countl_zero(mask));
+#elif defined __GNUC__ || defined __clang__
+    if (mask)
+        return 63 - static_cast<uint8_t>(__builtin_clzll(mask));
+#else
+    uint8_t pos = 63;
+    uint64_t bit = 1ULL << 63;
+    do
+    {
+        if (mask & bit)
+            return pos;
+        bit >>= 1;
+    } while (pos-- > 0);
+#endif
+    return UINT8_MAX;
+}
+
+static inline uint8_t VmaBitScanMSB(uint32_t mask)
+{
+#ifdef _MSC_VER
+    unsigned long pos;
+    if (_BitScanReverse(&pos, mask))
+        return static_cast<uint8_t>(pos);
+#elif VMA_CPP20
+    if(mask)
+        return 31 - static_cast<uint8_t>(std::countl_zero(mask));
+#elif defined __GNUC__ || defined __clang__
+    if (mask)
+        return 31 - static_cast<uint8_t>(__builtin_clz(mask));
+#else
+    uint8_t pos = 31;
+    uint32_t bit = 1UL << 31;
+    do
+    {
+        if (mask & bit)
+            return pos;
+        bit >>= 1;
+    } while (pos-- > 0);
+#endif
+    return UINT8_MAX;
+}
+
+/*
+Returns true if given number is a power of two.
+T must be unsigned integer number or signed integer but always nonnegative.
+For 0 returns true.
+*/
+template <typename T>
+inline bool VmaIsPow2(T x)
+{
+    return (x & (x - 1)) == 0;
+}
+
+// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16.
+// Use types like uint32_t, uint64_t as T.
+template <typename T>
+static inline T VmaAlignUp(T val, T alignment)
+{
+    VMA_HEAVY_ASSERT(VmaIsPow2(alignment));
+    return (val + alignment - 1) & ~(alignment - 1);
+}
+
+// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8.
+// Use types like uint32_t, uint64_t as T.
+template <typename T>
+static inline T VmaAlignDown(T val, T alignment)
+{
+    VMA_HEAVY_ASSERT(VmaIsPow2(alignment));
+    return val & ~(alignment - 1);
+}
+
+// Division with mathematical rounding to nearest number.
+template <typename T>
+static inline T VmaRoundDiv(T x, T y)
+{
+    return (x + (y / (T)2)) / y;
+}
+
+// Divide by 'y' and round up to nearest integer.
+template <typename T>
+static inline T VmaDivideRoundingUp(T x, T y)
+{
+    return (x + y - (T)1) / y;
+}
+
+// Returns smallest power of 2 greater or equal to v.
+static inline uint32_t VmaNextPow2(uint32_t v)
+{
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+static inline uint64_t VmaNextPow2(uint64_t v)
+{
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+    v++;
+    return v;
+}
+
+// Returns largest power of 2 less or equal to v.
+static inline uint32_t VmaPrevPow2(uint32_t v)
+{
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v = v ^ (v >> 1);
+    return v;
+}
+
+static inline uint64_t VmaPrevPow2(uint64_t v)
+{
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+    v = v ^ (v >> 1);
+    return v;
+}
+
+static inline bool VmaStrIsEmpty(const char* pStr)
+{
+    return pStr == VMA_NULL || *pStr == '\0';
+}
+
+/*
+Returns true if two memory blocks occupy overlapping pages.
+ResourceA must be in less memory offset than ResourceB.
+
+Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)"
+chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity".
+*/
+static inline bool VmaBlocksOnSamePage(
+    VkDeviceSize resourceAOffset,
+    VkDeviceSize resourceASize,
+    VkDeviceSize resourceBOffset,
+    VkDeviceSize pageSize)
+{
+    VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0);
+    VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1;
+    VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1);
+    VkDeviceSize resourceBStart = resourceBOffset;
+    VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1);
+    return resourceAEndPage == resourceBStartPage;
+}
+
+/*
+Returns true if given suballocation types could conflict and must respect
+VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer
+or linear image and another one is optimal image. If type is unknown, behave
+conservatively.
+*/
+static inline bool VmaIsBufferImageGranularityConflict(
+    VmaSuballocationType suballocType1,
+    VmaSuballocationType suballocType2)
+{
+    if (suballocType1 > suballocType2)
+    {
+        std::swap(suballocType1, suballocType2);
+    }
+
+    switch (suballocType1)
+    {
+    case VMA_SUBALLOCATION_TYPE_FREE:
+        return false;
+    case VMA_SUBALLOCATION_TYPE_UNKNOWN:
+        return true;
+    case VMA_SUBALLOCATION_TYPE_BUFFER:
+        return
+            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN ||
+            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL;
+    case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN:
+        return
+            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN ||
+            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR ||
+            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL;
+    case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR:
+        return
+            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL;
+    case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL:
+        return false;
+    default:
+        VMA_ASSERT(0);
+        return true;
+    }
+}
+
+static void VmaWriteMagicValue(void* pData, VkDeviceSize offset)
+{
+#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION
+    uint32_t* pDst = (uint32_t*)((char*)pData + offset);
+    const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t);
+    for (size_t i = 0; i < numberCount; ++i, ++pDst)
+    {
+        *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE;
+    }
+#else
+    // no-op
+#endif
+}
+
+static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset)
+{
+#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION
+    const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset);
+    const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t);
+    for (size_t i = 0; i < numberCount; ++i, ++pSrc)
+    {
+        if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE)
+        {
+            return false;
+        }
+    }
+#endif
+    return true;
+}
+
+/*
+Fills structure with parameters of an example buffer to be used for transfers
+during GPU memory defragmentation.
+*/
+static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo)
+{
+    memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo));
+    outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+    outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+    outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size.
+}
+
+
+/*
+Performs binary search and returns iterator to first element that is greater or
+equal to (key), according to comparison (cmp).
+
+Cmp should return true if first argument is less than second argument.
+
+Returned value is the found element, if present in the collection or place where
+new element with value (key) should be inserted.
+*/
+template <typename CmpLess, typename IterT, typename KeyT>
+static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp)
+{
+    size_t down = 0, up = size_t(end - beg);
+    while (down < up)
+    {
+        const size_t mid = down + (up - down) / 2;  // Overflow-safe midpoint calculation
+        if (cmp(*(beg + mid), key))
+        {
+            down = mid + 1;
+        }
+        else
+        {
+            up = mid;
+        }
+    }
+    return beg + down;
+}
+
+template<typename CmpLess, typename IterT, typename KeyT>
+IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp)
+{
+    IterT it = VmaBinaryFindFirstNotLess<CmpLess, IterT, KeyT>(
+        beg, end, value, cmp);
+    if (it == end ||
+        (!cmp(*it, value) && !cmp(value, *it)))
+    {
+        return it;
+    }
+    return end;
+}
+
+/*
+Returns true if all pointers in the array are not-null and unique.
+Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT.
+T must be pointer type, e.g. VmaAllocation, VmaPool.
+*/
+template<typename T>
+static bool VmaValidatePointerArray(uint32_t count, const T* arr)
+{
+    for (uint32_t i = 0; i < count; ++i)
+    {
+        const T iPtr = arr[i];
+        if (iPtr == VMA_NULL)
+        {
+            return false;
+        }
+        for (uint32_t j = i + 1; j < count; ++j)
+        {
+            if (iPtr == arr[j])
+            {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template<typename MainT, typename NewT>
+static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct)
+{
+    newStruct->pNext = mainStruct->pNext;
+    mainStruct->pNext = newStruct;
+}
+// Finds structure with s->sType == sType in mainStruct->pNext chain.
+// Returns pointer to it. If not found, returns null.
+template<typename FindT, typename MainT>
+static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType)
+{
+    for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext;
+        s != VMA_NULL; s = s->pNext)
+    {
+        if(s->sType == sType)
+        {
+            return (const FindT*)s;
+        }
+    }
+    return VMA_NULL;
+}
+
+// An abstraction over buffer or image `usage` flags, depending on available extensions.
+struct VmaBufferImageUsage
+{
+#if VMA_KHR_MAINTENANCE5
+    typedef uint64_t BaseType; // VkFlags64
+#else
+    typedef uint32_t BaseType; // VkFlags32
+#endif
+
+    static const VmaBufferImageUsage UNKNOWN;
+
+    BaseType Value;
+
+    VmaBufferImageUsage() { *this = UNKNOWN; }
+    explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { }
+    VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5);
+    explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo);
+
+    bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; }
+    bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; }
+
+    bool Contains(BaseType flag) const { return (Value & flag) != 0; }
+    bool ContainsDeviceAccess() const
+    {
+        // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*.
+        return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0;
+    }
+};
+
+const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0);
+
+VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo,
+    bool useKhrMaintenance5)
+{
+#if VMA_KHR_MAINTENANCE5
+    if(useKhrMaintenance5)
+    {
+        // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR,
+        // take usage from it and ignore VkBufferCreateInfo::usage, per specification
+        // of the VK_KHR_maintenance5 extension.
+        const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 =
+            VmaPnextChainFind<VkBufferUsageFlags2CreateInfoKHR>(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR);
+        if(usageFlags2)
+        {
+            this->Value = usageFlags2->usage;
+            return;
+        }
+    }
+#endif
+
+    this->Value = (BaseType)createInfo.usage;
+}
+
+VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo)
+{
+    // Maybe in the future there will be VK_KHR_maintenanceN extension with structure
+    // VkImageUsageFlags2CreateInfoKHR, like the one for buffers...
+
+    this->Value = (BaseType)createInfo.usage;
+}
+
+// This is the main algorithm that guides the selection of a memory type best for an allocation -
+// converts usage to required/preferred/not preferred flags.
+static bool FindMemoryPreferences(
+    bool isIntegratedGPU,
+    const VmaAllocationCreateInfo& allocCreateInfo,
+    VmaBufferImageUsage bufImgUsage,
+    VkMemoryPropertyFlags& outRequiredFlags,
+    VkMemoryPropertyFlags& outPreferredFlags,
+    VkMemoryPropertyFlags& outNotPreferredFlags)
+{
+    outRequiredFlags = allocCreateInfo.requiredFlags;
+    outPreferredFlags = allocCreateInfo.preferredFlags;
+    outNotPreferredFlags = 0;
+
+    switch(allocCreateInfo.usage)
+    {
+    case VMA_MEMORY_USAGE_UNKNOWN:
+        break;
+    case VMA_MEMORY_USAGE_GPU_ONLY:
+        if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0)
+        {
+            outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        }
+        break;
+    case VMA_MEMORY_USAGE_CPU_ONLY:
+        outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+        break;
+    case VMA_MEMORY_USAGE_CPU_TO_GPU:
+        outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+        if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0)
+        {
+            outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        }
+        break;
+    case VMA_MEMORY_USAGE_GPU_TO_CPU:
+        outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+        outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+        break;
+    case VMA_MEMORY_USAGE_CPU_COPY:
+        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        break;
+    case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED:
+        outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
+        break;
+    case VMA_MEMORY_USAGE_AUTO:
+    case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE:
+    case VMA_MEMORY_USAGE_AUTO_PREFER_HOST:
+    {
+        if(bufImgUsage == VmaBufferImageUsage::UNKNOWN)
+        {
+            VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known."
+                " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" );
+            return false;
+        }
+
+        const bool deviceAccess = bufImgUsage.ContainsDeviceAccess();
+        const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0;
+        const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0;
+        const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0;
+        const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+        const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
+
+        // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU.
+        if(hostAccessRandom)
+        {
+            // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)!
+            outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+            if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost)
+            {
+                // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL.
+                // Omitting HOST_VISIBLE here is intentional.
+                // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one.
+                // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list.
+                outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+            }
+            else
+            {
+                // Always CPU memory.
+                outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+            }
+        }
+        // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined.
+        else if(hostAccessSequentialWrite)
+        {
+            // Want uncached and write-combined.
+            outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+            if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost)
+            {
+                outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+            }
+            else
+            {
+                outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+                // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame)
+                if(deviceAccess)
+                {
+                    // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory.
+                    if(preferHost)
+                        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+                    else
+                        outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+                }
+                // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU)
+                else
+                {
+                    // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory.
+                    if(preferDevice)
+                        outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+                    else
+                        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+                }
+            }
+        }
+        // No CPU access
+        else
+        {
+            // if(deviceAccess)
+            //
+            // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory,
+            // unless there is a clear preference from the user not to do so.
+            //
+            // else:
+            //
+            // No direct GPU access, no CPU access, just transfers.
+            // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or
+            // a "swap file" copy to free some GPU memory (then better CPU memory).
+            // Up to the user to decide. If no preferece, assume the former and choose GPU memory.
+
+            if(preferHost)
+                outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+            else
+                outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        }
+        break;
+    }
+    default:
+        VMA_ASSERT(0);
+    }
+
+    // Avoid DEVICE_COHERENT unless explicitly requested.
+    if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) &
+        (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0)
+    {
+        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY;
+    }
+
+    return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory allocation
+
+static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment)
+{
+    void* result = VMA_NULL;
+    if ((pAllocationCallbacks != VMA_NULL) &&
+        (pAllocationCallbacks->pfnAllocation != VMA_NULL))
+    {
+        result = (*pAllocationCallbacks->pfnAllocation)(
+            pAllocationCallbacks->pUserData,
+            size,
+            alignment,
+            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    }
+    else
+    {
+        result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment);
+    }
+    VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed.");
+    return result;
+}
+
+static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr)
+{
+    if ((pAllocationCallbacks != VMA_NULL) &&
+        (pAllocationCallbacks->pfnFree != VMA_NULL))
+    {
+        (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr);
+    }
+    else
+    {
+        VMA_SYSTEM_ALIGNED_FREE(ptr);
+    }
+}
+
+template<typename T>
+static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks)
+{
+    return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T));
+}
+
+template<typename T>
+static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count)
+{
+    return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T));
+}
+
+#define vma_new(allocator, type)   new(VmaAllocate<type>(allocator))(type)
+
+#define vma_new_array(allocator, type, count)   new(VmaAllocateArray<type>((allocator), (count)))(type)
+
+template<typename T>
+static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr)
+{
+    ptr->~T();
+    VmaFree(pAllocationCallbacks, ptr);
+}
+
+template<typename T>
+static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count)
+{
+    if (ptr != VMA_NULL)
+    {
+        for (size_t i = count; i--; )
+        {
+            ptr[i].~T();
+        }
+        VmaFree(pAllocationCallbacks, ptr);
+    }
+}
+
+static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr)
+{
+    if (srcStr != VMA_NULL)
+    {
+        const size_t len = strlen(srcStr);
+        char* const result = vma_new_array(allocs, char, len + 1);
+        memcpy(result, srcStr, len + 1);
+        return result;
+    }
+    return VMA_NULL;
+}
+
+#if VMA_STATS_STRING_ENABLED
+static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen)
+{
+    if (srcStr != VMA_NULL)
+    {
+        char* const result = vma_new_array(allocs, char, strLen + 1);
+        memcpy(result, srcStr, strLen);
+        result[strLen] = '\0';
+        return result;
+    }
+    return VMA_NULL;
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str)
+{
+    if (str != VMA_NULL)
+    {
+        const size_t len = strlen(str);
+        vma_delete_array(allocs, str, len + 1);
+    }
+}
+
+template<typename CmpLess, typename VectorT>
+size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value)
+{
+    const size_t indexToInsert = VmaBinaryFindFirstNotLess(
+        vector.data(),
+        vector.data() + vector.size(),
+        value,
+        CmpLess()) - vector.data();
+    VmaVectorInsert(vector, indexToInsert, value);
+    return indexToInsert;
+}
+
+template<typename CmpLess, typename VectorT>
+bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value)
+{
+    CmpLess comparator;
+    typename VectorT::iterator it = VmaBinaryFindFirstNotLess(
+        vector.begin(),
+        vector.end(),
+        value,
+        comparator);
+    if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it))
+    {
+        size_t indexToRemove = it - vector.begin();
+        VmaVectorRemove(vector, indexToRemove);
+        return true;
+    }
+    return false;
+}
+#endif // _VMA_FUNCTIONS
+
+#ifndef _VMA_STATISTICS_FUNCTIONS
+
+static void VmaClearStatistics(VmaStatistics& outStats)
+{
+    outStats.blockCount = 0;
+    outStats.allocationCount = 0;
+    outStats.blockBytes = 0;
+    outStats.allocationBytes = 0;
+}
+
+static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src)
+{
+    inoutStats.blockCount += src.blockCount;
+    inoutStats.allocationCount += src.allocationCount;
+    inoutStats.blockBytes += src.blockBytes;
+    inoutStats.allocationBytes += src.allocationBytes;
+}
+
+static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats)
+{
+    VmaClearStatistics(outStats.statistics);
+    outStats.unusedRangeCount = 0;
+    outStats.allocationSizeMin = VK_WHOLE_SIZE;
+    outStats.allocationSizeMax = 0;
+    outStats.unusedRangeSizeMin = VK_WHOLE_SIZE;
+    outStats.unusedRangeSizeMax = 0;
+}
+
+static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size)
+{
+    inoutStats.statistics.allocationCount++;
+    inoutStats.statistics.allocationBytes += size;
+    inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size);
+    inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size);
+}
+
+static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size)
+{
+    inoutStats.unusedRangeCount++;
+    inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size);
+    inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size);
+}
+
+static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src)
+{
+    VmaAddStatistics(inoutStats.statistics, src.statistics);
+    inoutStats.unusedRangeCount += src.unusedRangeCount;
+    inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin);
+    inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax);
+    inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin);
+    inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax);
+}
+
+#endif // _VMA_STATISTICS_FUNCTIONS
+
+#ifndef _VMA_MUTEX_LOCK
+// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope).
+struct VmaMutexLock
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock)
+public:
+    VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) :
+        m_pMutex(useMutex ? &mutex : VMA_NULL)
+    {
+        if (m_pMutex) { m_pMutex->Lock(); }
+    }
+    ~VmaMutexLock() {  if (m_pMutex) { m_pMutex->Unlock(); } }
+
+private:
+    VMA_MUTEX* m_pMutex;
+};
+
+// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading.
+struct VmaMutexLockRead
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead)
+public:
+    VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) :
+        m_pMutex(useMutex ? &mutex : VMA_NULL)
+    {
+        if (m_pMutex) { m_pMutex->LockRead(); }
+    }
+    ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } }
+
+private:
+    VMA_RW_MUTEX* m_pMutex;
+};
+
+// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing.
+struct VmaMutexLockWrite
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite)
+public:
+    VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex)
+        : m_pMutex(useMutex ? &mutex : VMA_NULL)
+    {
+        if (m_pMutex) { m_pMutex->LockWrite(); }
+    }
+    ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } }
+
+private:
+    VMA_RW_MUTEX* m_pMutex;
+};
+
+#if VMA_DEBUG_GLOBAL_MUTEX
+    static VMA_MUTEX gDebugGlobalMutex;
+    #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true);
+#else
+    #define VMA_DEBUG_GLOBAL_MUTEX_LOCK
+#endif
+#endif // _VMA_MUTEX_LOCK
+
+#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT
+// An object that increments given atomic but decrements it back in the destructor unless Commit() is called.
+template<typename AtomicT>
+struct AtomicTransactionalIncrement
+{
+public:
+    using T = decltype(AtomicT().load());
+
+    ~AtomicTransactionalIncrement()
+    {
+        if(m_Atomic)
+            --(*m_Atomic);
+    }
+
+    void Commit() { m_Atomic = VMA_NULL; }
+    T Increment(AtomicT* atomic)
+    {
+        m_Atomic = atomic;
+        return m_Atomic->fetch_add(1);
+    }
+
+private:
+    AtomicT* m_Atomic = VMA_NULL;
+};
+#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT
+
+#ifndef _VMA_STL_ALLOCATOR
+// STL-compatible allocator.
+template<typename T>
+struct VmaStlAllocator
+{
+    const VkAllocationCallbacks* const m_pCallbacks;
+    typedef T value_type;
+
+    VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {}
+    template<typename U>
+    VmaStlAllocator(const VmaStlAllocator<U>& src) : m_pCallbacks(src.m_pCallbacks) {}
+    VmaStlAllocator(const VmaStlAllocator&) = default;
+    VmaStlAllocator& operator=(const VmaStlAllocator&) = delete;
+
+    T* allocate(size_t n) { return VmaAllocateArray<T>(m_pCallbacks, n); }
+    void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); }
+
+    template<typename U>
+    bool operator==(const VmaStlAllocator<U>& rhs) const
+    {
+        return m_pCallbacks == rhs.m_pCallbacks;
+    }
+    template<typename U>
+    bool operator!=(const VmaStlAllocator<U>& rhs) const
+    {
+        return m_pCallbacks != rhs.m_pCallbacks;
+    }
+};
+#endif // _VMA_STL_ALLOCATOR
+
+#ifndef _VMA_VECTOR
+/* Class with interface compatible with subset of std::vector.
+T must be POD because constructors and destructors are not called and memcpy is
+used for these objects. */
+template<typename T, typename AllocatorT>
+class VmaVector
+{
+public:
+    typedef T value_type;
+    typedef T* iterator;
+    typedef const T* const_iterator;
+
+    VmaVector(const AllocatorT& allocator);
+    VmaVector(size_t count, const AllocatorT& allocator);
+    // This version of the constructor is here for compatibility with pre-C++14 std::vector.
+    // value is unused.
+    VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {}
+    VmaVector(const VmaVector<T, AllocatorT>& src);
+    VmaVector& operator=(const VmaVector& rhs);
+    ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); }
+
+    bool empty() const { return m_Count == 0; }
+    size_t size() const { return m_Count; }
+    T* data() { return m_pArray; }
+    T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; }
+    T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; }
+    const T* data() const { return m_pArray; }
+    const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; }
+    const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; }
+
+    iterator begin() { return m_pArray; }
+    iterator end() { return m_pArray + m_Count; }
+    const_iterator cbegin() const { return m_pArray; }
+    const_iterator cend() const { return m_pArray + m_Count; }
+    const_iterator begin() const { return cbegin(); }
+    const_iterator end() const { return cend(); }
+
+    void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); }
+    void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); }
+    void push_front(const T& src) { insert(0, src); }
+
+    void push_back(const T& src);
+    void reserve(size_t newCapacity, bool freeMemory = false);
+    void resize(size_t newCount);
+    void clear() { resize(0); }
+    void shrink_to_fit();
+    void insert(size_t index, const T& src);
+    void remove(size_t index);
+
+    T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; }
+    const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; }
+
+private:
+    AllocatorT m_Allocator;
+    T* m_pArray;
+    size_t m_Count;
+    size_t m_Capacity;
+};
+
+#ifndef _VMA_VECTOR_FUNCTIONS
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>::VmaVector(const AllocatorT& allocator)
+    : m_Allocator(allocator),
+    m_pArray(VMA_NULL),
+    m_Count(0),
+    m_Capacity(0) {}
+
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>::VmaVector(size_t count, const AllocatorT& allocator)
+    : m_Allocator(allocator),
+    m_pArray(count ? (T*)VmaAllocateArray<T>(allocator.m_pCallbacks, count) : VMA_NULL),
+    m_Count(count),
+    m_Capacity(count) {}
+
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>::VmaVector(const VmaVector& src)
+    : m_Allocator(src.m_Allocator),
+    m_pArray(src.m_Count ? (T*)VmaAllocateArray<T>(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL),
+    m_Count(src.m_Count),
+    m_Capacity(src.m_Count)
+{
+    if (m_Count != 0)
+    {
+        memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T));
+    }
+}
+
+template<typename T, typename AllocatorT>
+VmaVector<T, AllocatorT>& VmaVector<T, AllocatorT>::operator=(const VmaVector& rhs)
+{
+    if (&rhs != this)
+    {
+        resize(rhs.m_Count);
+        if (m_Count != 0)
+        {
+            memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T));
+        }
+    }
+    return *this;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::push_back(const T& src)
+{
+    const size_t newIndex = size();
+    resize(newIndex + 1);
+    m_pArray[newIndex] = src;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::reserve(size_t newCapacity, bool freeMemory)
+{
+    newCapacity = VMA_MAX(newCapacity, m_Count);
+
+    if ((newCapacity < m_Capacity) && !freeMemory)
+    {
+        newCapacity = m_Capacity;
+    }
+
+    if (newCapacity != m_Capacity)
+    {
+        T* const newArray = newCapacity ? VmaAllocateArray<T>(m_Allocator, newCapacity) : VMA_NULL;
+        if (m_Count != 0)
+        {
+            memcpy(newArray, m_pArray, m_Count * sizeof(T));
+        }
+        VmaFree(m_Allocator.m_pCallbacks, m_pArray);
+        m_Capacity = newCapacity;
+        m_pArray = newArray;
+    }
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::resize(size_t newCount)
+{
+    size_t newCapacity = m_Capacity;
+    if (newCount > m_Capacity)
+    {
+        newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8));
+    }
+
+    if (newCapacity != m_Capacity)
+    {
+        T* const newArray = newCapacity ? VmaAllocateArray<T>(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL;
+        const size_t elementsToCopy = VMA_MIN(m_Count, newCount);
+        if (elementsToCopy != 0)
+        {
+            memcpy(newArray, m_pArray, elementsToCopy * sizeof(T));
+        }
+        VmaFree(m_Allocator.m_pCallbacks, m_pArray);
+        m_Capacity = newCapacity;
+        m_pArray = newArray;
+    }
+
+    m_Count = newCount;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::shrink_to_fit()
+{
+    if (m_Capacity > m_Count)
+    {
+        T* newArray = VMA_NULL;
+        if (m_Count > 0)
+        {
+            newArray = VmaAllocateArray<T>(m_Allocator.m_pCallbacks, m_Count);
+            memcpy(newArray, m_pArray, m_Count * sizeof(T));
+        }
+        VmaFree(m_Allocator.m_pCallbacks, m_pArray);
+        m_Capacity = m_Count;
+        m_pArray = newArray;
+    }
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::insert(size_t index, const T& src)
+{
+    VMA_HEAVY_ASSERT(index <= m_Count);
+    const size_t oldCount = size();
+    resize(oldCount + 1);
+    if (index < oldCount)
+    {
+        memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T));
+    }
+    m_pArray[index] = src;
+}
+
+template<typename T, typename AllocatorT>
+void VmaVector<T, AllocatorT>::remove(size_t index)
+{
+    VMA_HEAVY_ASSERT(index < m_Count);
+    const size_t oldCount = size();
+    if (index < oldCount - 1)
+    {
+        memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T));
+    }
+    resize(oldCount - 1);
+}
+#endif // _VMA_VECTOR_FUNCTIONS
+
+template<typename T, typename allocatorT>
+static void VmaVectorInsert(VmaVector<T, allocatorT>& vec, size_t index, const T& item)
+{
+    vec.insert(index, item);
+}
+
+template<typename T, typename allocatorT>
+static void VmaVectorRemove(VmaVector<T, allocatorT>& vec, size_t index)
+{
+    vec.remove(index);
+}
+#endif // _VMA_VECTOR
+
+#ifndef _VMA_SMALL_VECTOR
+/*
+This is a vector (a variable-sized array), optimized for the case when the array is small.
+
+It contains some number of elements in-place, which allows it to avoid heap allocation
+when the actual number of elements is below that threshold. This allows normal "small"
+cases to be fast without losing generality for large inputs.
+*/
+template<typename T, typename AllocatorT, size_t N>
+class VmaSmallVector
+{
+public:
+    typedef T value_type;
+    typedef T* iterator;
+
+    VmaSmallVector(const AllocatorT& allocator);
+    VmaSmallVector(size_t count, const AllocatorT& allocator);
+    template<typename SrcT, typename SrcAllocatorT, size_t SrcN>
+    VmaSmallVector(const VmaSmallVector<SrcT, SrcAllocatorT, SrcN>&) = delete;
+    template<typename SrcT, typename SrcAllocatorT, size_t SrcN>
+    VmaSmallVector<T, AllocatorT, N>& operator=(const VmaSmallVector<SrcT, SrcAllocatorT, SrcN>&) = delete;
+    ~VmaSmallVector() = default;
+
+    bool empty() const { return m_Count == 0; }
+    size_t size() const { return m_Count; }
+    T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; }
+    T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; }
+    T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; }
+    const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; }
+    const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; }
+    const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; }
+
+    iterator begin() { return data(); }
+    iterator end() { return data() + m_Count; }
+
+    void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); }
+    void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); }
+    void push_front(const T& src) { insert(0, src); }
+
+    void push_back(const T& src);
+    void resize(size_t newCount, bool freeMemory = false);
+    void clear(bool freeMemory = false);
+    void insert(size_t index, const T& src);
+    void remove(size_t index);
+
+    T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; }
+    const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; }
+
+private:
+    size_t m_Count;
+    T m_StaticArray[N]; // Used when m_Size <= N
+    VmaVector<T, AllocatorT> m_DynamicArray; // Used when m_Size > N
+};
+
+#ifndef _VMA_SMALL_VECTOR_FUNCTIONS
+template<typename T, typename AllocatorT, size_t N>
+VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(const AllocatorT& allocator)
+    : m_Count(0),
+    m_DynamicArray(allocator) {}
+
+template<typename T, typename AllocatorT, size_t N>
+VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(size_t count, const AllocatorT& allocator)
+    : m_Count(count),
+    m_DynamicArray(count > N ? count : 0, allocator) {}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::push_back(const T& src)
+{
+    const size_t newIndex = size();
+    resize(newIndex + 1);
+    data()[newIndex] = src;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::resize(size_t newCount, bool freeMemory)
+{
+    if (newCount > N && m_Count > N)
+    {
+        // Any direction, staying in m_DynamicArray
+        m_DynamicArray.resize(newCount);
+        if (freeMemory)
+        {
+            m_DynamicArray.shrink_to_fit();
+        }
+    }
+    else if (newCount > N && m_Count <= N)
+    {
+        // Growing, moving from m_StaticArray to m_DynamicArray
+        m_DynamicArray.resize(newCount);
+        if (m_Count > 0)
+        {
+            memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T));
+        }
+    }
+    else if (newCount <= N && m_Count > N)
+    {
+        // Shrinking, moving from m_DynamicArray to m_StaticArray
+        if (newCount > 0)
+        {
+            memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T));
+        }
+        m_DynamicArray.resize(0);
+        if (freeMemory)
+        {
+            m_DynamicArray.shrink_to_fit();
+        }
+    }
+    else
+    {
+        // Any direction, staying in m_StaticArray - nothing to do here
+    }
+    m_Count = newCount;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::clear(bool freeMemory)
+{
+    m_DynamicArray.clear();
+    if (freeMemory)
+    {
+        m_DynamicArray.shrink_to_fit();
+    }
+    m_Count = 0;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::insert(size_t index, const T& src)
+{
+    VMA_HEAVY_ASSERT(index <= m_Count);
+    const size_t oldCount = size();
+    resize(oldCount + 1);
+    T* const dataPtr = data();
+    if (index < oldCount)
+    {
+        //  I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray.
+        memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T));
+    }
+    dataPtr[index] = src;
+}
+
+template<typename T, typename AllocatorT, size_t N>
+void VmaSmallVector<T, AllocatorT, N>::remove(size_t index)
+{
+    VMA_HEAVY_ASSERT(index < m_Count);
+    const size_t oldCount = size();
+    if (index < oldCount - 1)
+    {
+        //  I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray.
+        T* const dataPtr = data();
+        memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T));
+    }
+    resize(oldCount - 1);
+}
+#endif // _VMA_SMALL_VECTOR_FUNCTIONS
+#endif // _VMA_SMALL_VECTOR
+
+#ifndef _VMA_POOL_ALLOCATOR
+/*
+Allocator for objects of type T using a list of arrays (pools) to speed up
+allocation. Number of elements that can be allocated is not bounded because
+allocator can create multiple blocks.
+*/
+template<typename T>
+class VmaPoolAllocator
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator)
+public:
+    VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity);
+    ~VmaPoolAllocator();
+    template<typename... Types> T* Alloc(Types&&... args);
+    void Free(T* ptr);
+
+private:
+    union Item
+    {
+        uint32_t NextFreeIndex;
+        alignas(T) char Value[sizeof(T)];
+    };
+    struct ItemBlock
+    {
+        Item* pItems;
+        uint32_t Capacity;
+        uint32_t FirstFreeIndex;
+    };
+
+    const VkAllocationCallbacks* m_pAllocationCallbacks;
+    const uint32_t m_FirstBlockCapacity;
+    VmaVector<ItemBlock, VmaStlAllocator<ItemBlock>> m_ItemBlocks;
+
+    ItemBlock& CreateNewBlock();
+};
+
+#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS
+template<typename T>
+VmaPoolAllocator<T>::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity)
+    : m_pAllocationCallbacks(pAllocationCallbacks),
+    m_FirstBlockCapacity(firstBlockCapacity),
+    m_ItemBlocks(VmaStlAllocator<ItemBlock>(pAllocationCallbacks))
+{
+    VMA_ASSERT(m_FirstBlockCapacity > 1);
+}
+
+template<typename T>
+VmaPoolAllocator<T>::~VmaPoolAllocator()
+{
+    for (size_t i = m_ItemBlocks.size(); i--;)
+        vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity);
+    m_ItemBlocks.clear();
+}
+
+template<typename T>
+template<typename... Types> T* VmaPoolAllocator<T>::Alloc(Types&&... args)
+{
+    for (size_t i = m_ItemBlocks.size(); i--; )
+    {
+        ItemBlock& block = m_ItemBlocks[i];
+        // This block has some free items: Use first one.
+        if (block.FirstFreeIndex != UINT32_MAX)
+        {
+            Item* const pItem = &block.pItems[block.FirstFreeIndex];
+            block.FirstFreeIndex = pItem->NextFreeIndex;
+            T* result = (T*)&pItem->Value;
+            new(result)T(std::forward<Types>(args)...); // Explicit constructor call.
+            return result;
+        }
+    }
+
+    // No block has free item: Create new one and use it.
+    ItemBlock& newBlock = CreateNewBlock();
+    Item* const pItem = &newBlock.pItems[0];
+    newBlock.FirstFreeIndex = pItem->NextFreeIndex;
+    T* result = (T*)&pItem->Value;
+    new(result) T(std::forward<Types>(args)...); // Explicit constructor call.
+    return result;
+}
+
+template<typename T>
+void VmaPoolAllocator<T>::Free(T* ptr)
+{
+    // Search all memory blocks to find ptr.
+    for (size_t i = m_ItemBlocks.size(); i--; )
+    {
+        ItemBlock& block = m_ItemBlocks[i];
+
+        // Casting to union.
+        Item* pItemPtr;
+        memcpy(&pItemPtr, &ptr, sizeof(pItemPtr));
+
+        // Check if pItemPtr is in address range of this block.
+        if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity))
+        {
+            ptr->~T(); // Explicit destructor call.
+            const uint32_t index = static_cast<uint32_t>(pItemPtr - block.pItems);
+            pItemPtr->NextFreeIndex = block.FirstFreeIndex;
+            block.FirstFreeIndex = index;
+            return;
+        }
+    }
+    VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool.");
+}
+
+template<typename T>
+typename VmaPoolAllocator<T>::ItemBlock& VmaPoolAllocator<T>::CreateNewBlock()
+{
+    const uint32_t newBlockCapacity = m_ItemBlocks.empty() ?
+        m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2;
+
+    const ItemBlock newBlock =
+    {
+        vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity),
+        newBlockCapacity,
+        0
+    };
+
+    m_ItemBlocks.push_back(newBlock);
+
+    // Setup singly-linked list of all free items in this block.
+    for (uint32_t i = 0; i < newBlockCapacity - 1; ++i)
+        newBlock.pItems[i].NextFreeIndex = i + 1;
+    newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX;
+    return m_ItemBlocks.back();
+}
+#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS
+#endif // _VMA_POOL_ALLOCATOR
+
+#ifndef _VMA_RAW_LIST
+template<typename T>
+struct VmaListItem
+{
+    VmaListItem* pPrev;
+    VmaListItem* pNext;
+    T Value;
+};
+
+// Doubly linked list.
+template<typename T>
+class VmaRawList
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList)
+public:
+    typedef VmaListItem<T> ItemType;
+
+    VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks);
+    // Intentionally not calling Clear, because that would be unnecessary
+    // computations to return all items to m_ItemAllocator as free.
+    ~VmaRawList() = default;
+
+    size_t GetCount() const { return m_Count; }
+    bool IsEmpty() const { return m_Count == 0; }
+
+    ItemType* Front() { return m_pFront; }
+    ItemType* Back() { return m_pBack; }
+    const ItemType* Front() const { return m_pFront; }
+    const ItemType* Back() const { return m_pBack; }
+
+    ItemType* PushFront();
+    ItemType* PushBack();
+    ItemType* PushFront(const T& value);
+    ItemType* PushBack(const T& value);
+    void PopFront();
+    void PopBack();
+
+    // Item can be null - it means PushBack.
+    ItemType* InsertBefore(ItemType* pItem);
+    // Item can be null - it means PushFront.
+    ItemType* InsertAfter(ItemType* pItem);
+    ItemType* InsertBefore(ItemType* pItem, const T& value);
+    ItemType* InsertAfter(ItemType* pItem, const T& value);
+
+    void Clear();
+    void Remove(ItemType* pItem);
+
+private:
+    const VkAllocationCallbacks* const m_pAllocationCallbacks;
+    VmaPoolAllocator<ItemType> m_ItemAllocator;
+    ItemType* m_pFront;
+    ItemType* m_pBack;
+    size_t m_Count;
+};
+
+#ifndef _VMA_RAW_LIST_FUNCTIONS
+template<typename T>
+VmaRawList<T>::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks)
+    : m_pAllocationCallbacks(pAllocationCallbacks),
+    m_ItemAllocator(pAllocationCallbacks, 128),
+    m_pFront(VMA_NULL),
+    m_pBack(VMA_NULL),
+    m_Count(0) {}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushFront()
+{
+    ItemType* const pNewItem = m_ItemAllocator.Alloc();
+    pNewItem->pPrev = VMA_NULL;
+    if (IsEmpty())
+    {
+        pNewItem->pNext = VMA_NULL;
+        m_pFront = pNewItem;
+        m_pBack = pNewItem;
+        m_Count = 1;
+    }
+    else
+    {
+        pNewItem->pNext = m_pFront;
+        m_pFront->pPrev = pNewItem;
+        m_pFront = pNewItem;
+        ++m_Count;
+    }
+    return pNewItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushBack()
+{
+    ItemType* const pNewItem = m_ItemAllocator.Alloc();
+    pNewItem->pNext = VMA_NULL;
+    if(IsEmpty())
+    {
+        pNewItem->pPrev = VMA_NULL;
+        m_pFront = pNewItem;
+        m_pBack = pNewItem;
+        m_Count = 1;
+    }
+    else
+    {
+        pNewItem->pPrev = m_pBack;
+        m_pBack->pNext = pNewItem;
+        m_pBack = pNewItem;
+        ++m_Count;
+    }
+    return pNewItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushFront(const T& value)
+{
+    ItemType* const pNewItem = PushFront();
+    pNewItem->Value = value;
+    return pNewItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::PushBack(const T& value)
+{
+    ItemType* const pNewItem = PushBack();
+    pNewItem->Value = value;
+    return pNewItem;
+}
+
+template<typename T>
+void VmaRawList<T>::PopFront()
+{
+    VMA_HEAVY_ASSERT(m_Count > 0);
+    ItemType* const pFrontItem = m_pFront;
+    ItemType* const pNextItem = pFrontItem->pNext;
+    if (pNextItem != VMA_NULL)
+    {
+        pNextItem->pPrev = VMA_NULL;
+    }
+    m_pFront = pNextItem;
+    m_ItemAllocator.Free(pFrontItem);
+    --m_Count;
+}
+
+template<typename T>
+void VmaRawList<T>::PopBack()
+{
+    VMA_HEAVY_ASSERT(m_Count > 0);
+    ItemType* const pBackItem = m_pBack;
+    ItemType* const pPrevItem = pBackItem->pPrev;
+    if(pPrevItem != VMA_NULL)
+    {
+        pPrevItem->pNext = VMA_NULL;
+    }
+    m_pBack = pPrevItem;
+    m_ItemAllocator.Free(pBackItem);
+    --m_Count;
+}
+
+template<typename T>
+void VmaRawList<T>::Clear()
+{
+    if (IsEmpty() == false)
+    {
+        ItemType* pItem = m_pBack;
+        while (pItem != VMA_NULL)
+        {
+            ItemType* const pPrevItem = pItem->pPrev;
+            m_ItemAllocator.Free(pItem);
+            pItem = pPrevItem;
+        }
+        m_pFront = VMA_NULL;
+        m_pBack = VMA_NULL;
+        m_Count = 0;
+    }
+}
+
+template<typename T>
+void VmaRawList<T>::Remove(ItemType* pItem)
+{
+    VMA_HEAVY_ASSERT(pItem != VMA_NULL);
+    VMA_HEAVY_ASSERT(m_Count > 0);
+
+    if(pItem->pPrev != VMA_NULL)
+    {
+        pItem->pPrev->pNext = pItem->pNext;
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(m_pFront == pItem);
+        m_pFront = pItem->pNext;
+    }
+
+    if(pItem->pNext != VMA_NULL)
+    {
+        pItem->pNext->pPrev = pItem->pPrev;
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(m_pBack == pItem);
+        m_pBack = pItem->pPrev;
+    }
+
+    m_ItemAllocator.Free(pItem);
+    --m_Count;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertBefore(ItemType* pItem)
+{
+    if(pItem != VMA_NULL)
+    {
+        ItemType* const prevItem = pItem->pPrev;
+        ItemType* const newItem = m_ItemAllocator.Alloc();
+        newItem->pPrev = prevItem;
+        newItem->pNext = pItem;
+        pItem->pPrev = newItem;
+        if(prevItem != VMA_NULL)
+        {
+            prevItem->pNext = newItem;
+        }
+        else
+        {
+            VMA_HEAVY_ASSERT(m_pFront == pItem);
+            m_pFront = newItem;
+        }
+        ++m_Count;
+        return newItem;
+    }
+    else
+        return PushBack();
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertAfter(ItemType* pItem)
+{
+    if(pItem != VMA_NULL)
+    {
+        ItemType* const nextItem = pItem->pNext;
+        ItemType* const newItem = m_ItemAllocator.Alloc();
+        newItem->pNext = nextItem;
+        newItem->pPrev = pItem;
+        pItem->pNext = newItem;
+        if(nextItem != VMA_NULL)
+        {
+            nextItem->pPrev = newItem;
+        }
+        else
+        {
+            VMA_HEAVY_ASSERT(m_pBack == pItem);
+            m_pBack = newItem;
+        }
+        ++m_Count;
+        return newItem;
+    }
+    else
+        return PushFront();
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertBefore(ItemType* pItem, const T& value)
+{
+    ItemType* const newItem = InsertBefore(pItem);
+    newItem->Value = value;
+    return newItem;
+}
+
+template<typename T>
+VmaListItem<T>* VmaRawList<T>::InsertAfter(ItemType* pItem, const T& value)
+{
+    ItemType* const newItem = InsertAfter(pItem);
+    newItem->Value = value;
+    return newItem;
+}
+#endif // _VMA_RAW_LIST_FUNCTIONS
+#endif // _VMA_RAW_LIST
+
+#ifndef _VMA_LIST
+template<typename T, typename AllocatorT>
+class VmaList
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaList)
+public:
+    class reverse_iterator;
+    class const_iterator;
+    class const_reverse_iterator;
+
+    class iterator
+    {
+        friend class const_iterator;
+        friend class VmaList<T, AllocatorT>;
+    public:
+        iterator() :  m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+        iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+        T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+        T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+        bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+        bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+        iterator operator++(int) { iterator result = *this; ++*this; return result; }
+        iterator operator--(int) { iterator result = *this; --*this; return result; }
+
+        iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; }
+        iterator& operator--();
+
+    private:
+        VmaRawList<T>* m_pList;
+        VmaListItem<T>* m_pItem;
+
+        iterator(VmaRawList<T>* pList, VmaListItem<T>* pItem) : m_pList(pList),  m_pItem(pItem) {}
+    };
+    class reverse_iterator
+    {
+        friend class const_reverse_iterator;
+        friend class VmaList<T, AllocatorT>;
+    public:
+        reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+        reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+        T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+        T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+        bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+        bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+        reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; }
+        reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; }
+
+        reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; }
+        reverse_iterator& operator--();
+
+    private:
+        VmaRawList<T>* m_pList;
+        VmaListItem<T>* m_pItem;
+
+        reverse_iterator(VmaRawList<T>* pList, VmaListItem<T>* pItem) : m_pList(pList),  m_pItem(pItem) {}
+    };
+    class const_iterator
+    {
+        friend class VmaList<T, AllocatorT>;
+    public:
+        const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+        const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+        const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+        iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; }
+
+        const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+        const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+        bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+        bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+        const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; }
+        const_iterator operator--(int) { const_iterator result = *this; --* this; return result; }
+
+        const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; }
+        const_iterator& operator--();
+
+    private:
+        const VmaRawList<T>* m_pList;
+        const VmaListItem<T>* m_pItem;
+
+        const_iterator(const VmaRawList<T>* pList, const VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {}
+    };
+    class const_reverse_iterator
+    {
+        friend class VmaList<T, AllocatorT>;
+    public:
+        const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {}
+        const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+        const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {}
+
+        reverse_iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; }
+
+        const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; }
+        const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; }
+
+        bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; }
+        bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; }
+
+        const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; }
+        const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; }
+
+        const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; }
+        const_reverse_iterator& operator--();
+
+    private:
+        const VmaRawList<T>* m_pList;
+        const VmaListItem<T>* m_pItem;
+
+        const_reverse_iterator(const VmaRawList<T>* pList, const VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {}
+    };
+
+    VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {}
+
+    bool empty() const { return m_RawList.IsEmpty(); }
+    size_t size() const { return m_RawList.GetCount(); }
+
+    iterator begin() { return iterator(&m_RawList, m_RawList.Front()); }
+    iterator end() { return iterator(&m_RawList, VMA_NULL); }
+
+    const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); }
+    const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); }
+
+    const_iterator begin() const { return cbegin(); }
+    const_iterator end() const { return cend(); }
+
+    reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); }
+    reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); }
+
+    const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); }
+    const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); }
+
+    const_reverse_iterator rbegin() const { return crbegin(); }
+    const_reverse_iterator rend() const { return crend(); }
+
+    void push_back(const T& value) { m_RawList.PushBack(value); }
+    iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); }
+
+    void clear() { m_RawList.Clear(); }
+    void erase(iterator it) { m_RawList.Remove(it.m_pItem); }
+
+private:
+    VmaRawList<T> m_RawList;
+};
+
+#ifndef _VMA_LIST_FUNCTIONS
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::iterator& VmaList<T, AllocatorT>::iterator::operator--()
+{
+    if (m_pItem != VMA_NULL)
+    {
+        m_pItem = m_pItem->pPrev;
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+        m_pItem = m_pList->Back();
+    }
+    return *this;
+}
+
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::reverse_iterator& VmaList<T, AllocatorT>::reverse_iterator::operator--()
+{
+    if (m_pItem != VMA_NULL)
+    {
+        m_pItem = m_pItem->pNext;
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+        m_pItem = m_pList->Front();
+    }
+    return *this;
+}
+
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::const_iterator& VmaList<T, AllocatorT>::const_iterator::operator--()
+{
+    if (m_pItem != VMA_NULL)
+    {
+        m_pItem = m_pItem->pPrev;
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+        m_pItem = m_pList->Back();
+    }
+    return *this;
+}
+
+template<typename T, typename AllocatorT>
+typename VmaList<T, AllocatorT>::const_reverse_iterator& VmaList<T, AllocatorT>::const_reverse_iterator::operator--()
+{
+    if (m_pItem != VMA_NULL)
+    {
+        m_pItem = m_pItem->pNext;
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(!m_pList->IsEmpty());
+        m_pItem = m_pList->Back();
+    }
+    return *this;
+}
+#endif // _VMA_LIST_FUNCTIONS
+#endif // _VMA_LIST
+
+#ifndef _VMA_INTRUSIVE_LINKED_LIST
+/*
+Expected interface of ItemTypeTraits:
+struct MyItemTypeTraits
+{
+    typedef MyItem ItemType;
+    static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; }
+    static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; }
+    static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; }
+    static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; }
+};
+*/
+template<typename ItemTypeTraits>
+class VmaIntrusiveLinkedList
+{
+public:
+    typedef typename ItemTypeTraits::ItemType ItemType;
+    static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); }
+    static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); }
+
+    // Movable, not copyable.
+    VmaIntrusiveLinkedList() = default;
+    VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src);
+    VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete;
+    VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src);
+    VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete;
+    ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); }
+
+    size_t GetCount() const { return m_Count; }
+    bool IsEmpty() const { return m_Count == 0; }
+    ItemType* Front() { return m_Front; }
+    ItemType* Back() { return m_Back; }
+    const ItemType* Front() const { return m_Front; }
+    const ItemType* Back() const { return m_Back; }
+
+    void PushBack(ItemType* item);
+    void PushFront(ItemType* item);
+    ItemType* PopBack();
+    ItemType* PopFront();
+
+    // MyItem can be null - it means PushBack.
+    void InsertBefore(ItemType* existingItem, ItemType* newItem);
+    // MyItem can be null - it means PushFront.
+    void InsertAfter(ItemType* existingItem, ItemType* newItem);
+    void Remove(ItemType* item);
+    void RemoveAll();
+
+private:
+    ItemType* m_Front = VMA_NULL;
+    ItemType* m_Back = VMA_NULL;
+    size_t m_Count = 0;
+};
+
+#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS
+template<typename ItemTypeTraits>
+VmaIntrusiveLinkedList<ItemTypeTraits>::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src)
+    : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count)
+{
+    src.m_Front = src.m_Back = VMA_NULL;
+    src.m_Count = 0;
+}
+
+template<typename ItemTypeTraits>
+VmaIntrusiveLinkedList<ItemTypeTraits>& VmaIntrusiveLinkedList<ItemTypeTraits>::operator=(VmaIntrusiveLinkedList&& src)
+{
+    if (&src != this)
+    {
+        VMA_HEAVY_ASSERT(IsEmpty());
+        m_Front = src.m_Front;
+        m_Back = src.m_Back;
+        m_Count = src.m_Count;
+        src.m_Front = src.m_Back = VMA_NULL;
+        src.m_Count = 0;
+    }
+    return *this;
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::PushBack(ItemType* item)
+{
+    VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL);
+    if (IsEmpty())
+    {
+        m_Front = item;
+        m_Back = item;
+        m_Count = 1;
+    }
+    else
+    {
+        ItemTypeTraits::AccessPrev(item) = m_Back;
+        ItemTypeTraits::AccessNext(m_Back) = item;
+        m_Back = item;
+        ++m_Count;
+    }
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::PushFront(ItemType* item)
+{
+    VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL);
+    if (IsEmpty())
+    {
+        m_Front = item;
+        m_Back = item;
+        m_Count = 1;
+    }
+    else
+    {
+        ItemTypeTraits::AccessNext(item) = m_Front;
+        ItemTypeTraits::AccessPrev(m_Front) = item;
+        m_Front = item;
+        ++m_Count;
+    }
+}
+
+template<typename ItemTypeTraits>
+typename VmaIntrusiveLinkedList<ItemTypeTraits>::ItemType* VmaIntrusiveLinkedList<ItemTypeTraits>::PopBack()
+{
+    VMA_HEAVY_ASSERT(m_Count > 0);
+    ItemType* const backItem = m_Back;
+    ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem);
+    if (prevItem != VMA_NULL)
+    {
+        ItemTypeTraits::AccessNext(prevItem) = VMA_NULL;
+    }
+    m_Back = prevItem;
+    --m_Count;
+    ItemTypeTraits::AccessPrev(backItem) = VMA_NULL;
+    ItemTypeTraits::AccessNext(backItem) = VMA_NULL;
+    return backItem;
+}
+
+template<typename ItemTypeTraits>
+typename VmaIntrusiveLinkedList<ItemTypeTraits>::ItemType* VmaIntrusiveLinkedList<ItemTypeTraits>::PopFront()
+{
+    VMA_HEAVY_ASSERT(m_Count > 0);
+    ItemType* const frontItem = m_Front;
+    ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem);
+    if (nextItem != VMA_NULL)
+    {
+        ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL;
+    }
+    m_Front = nextItem;
+    --m_Count;
+    ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL;
+    ItemTypeTraits::AccessNext(frontItem) = VMA_NULL;
+    return frontItem;
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::InsertBefore(ItemType* existingItem, ItemType* newItem)
+{
+    VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL);
+    if (existingItem != VMA_NULL)
+    {
+        ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem);
+        ItemTypeTraits::AccessPrev(newItem) = prevItem;
+        ItemTypeTraits::AccessNext(newItem) = existingItem;
+        ItemTypeTraits::AccessPrev(existingItem) = newItem;
+        if (prevItem != VMA_NULL)
+        {
+            ItemTypeTraits::AccessNext(prevItem) = newItem;
+        }
+        else
+        {
+            VMA_HEAVY_ASSERT(m_Front == existingItem);
+            m_Front = newItem;
+        }
+        ++m_Count;
+    }
+    else
+        PushBack(newItem);
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::InsertAfter(ItemType* existingItem, ItemType* newItem)
+{
+    VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL);
+    if (existingItem != VMA_NULL)
+    {
+        ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem);
+        ItemTypeTraits::AccessNext(newItem) = nextItem;
+        ItemTypeTraits::AccessPrev(newItem) = existingItem;
+        ItemTypeTraits::AccessNext(existingItem) = newItem;
+        if (nextItem != VMA_NULL)
+        {
+            ItemTypeTraits::AccessPrev(nextItem) = newItem;
+        }
+        else
+        {
+            VMA_HEAVY_ASSERT(m_Back == existingItem);
+            m_Back = newItem;
+        }
+        ++m_Count;
+    }
+    else
+        return PushFront(newItem);
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::Remove(ItemType* item)
+{
+    VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0);
+    if (ItemTypeTraits::GetPrev(item) != VMA_NULL)
+    {
+        ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item);
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(m_Front == item);
+        m_Front = ItemTypeTraits::GetNext(item);
+    }
+
+    if (ItemTypeTraits::GetNext(item) != VMA_NULL)
+    {
+        ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item);
+    }
+    else
+    {
+        VMA_HEAVY_ASSERT(m_Back == item);
+        m_Back = ItemTypeTraits::GetPrev(item);
+    }
+    ItemTypeTraits::AccessPrev(item) = VMA_NULL;
+    ItemTypeTraits::AccessNext(item) = VMA_NULL;
+    --m_Count;
+}
+
+template<typename ItemTypeTraits>
+void VmaIntrusiveLinkedList<ItemTypeTraits>::RemoveAll()
+{
+    if (!IsEmpty())
+    {
+        ItemType* item = m_Back;
+        while (item != VMA_NULL)
+        {
+            ItemType* const prevItem = ItemTypeTraits::AccessPrev(item);
+            ItemTypeTraits::AccessPrev(item) = VMA_NULL;
+            ItemTypeTraits::AccessNext(item) = VMA_NULL;
+            item = prevItem;
+        }
+        m_Front = VMA_NULL;
+        m_Back = VMA_NULL;
+        m_Count = 0;
+    }
+}
+#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS
+#endif // _VMA_INTRUSIVE_LINKED_LIST
+
+#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED
+class VmaStringBuilder
+{
+public:
+    VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator<char>(allocationCallbacks)) {}
+    ~VmaStringBuilder() = default;
+
+    size_t GetLength() const { return m_Data.size(); }
+    const char* GetData() const { return m_Data.data(); }
+    void AddNewLine() { Add('\n'); }
+    void Add(char ch) { m_Data.push_back(ch); }
+
+    void Add(const char* pStr);
+    void AddNumber(uint32_t num);
+    void AddNumber(uint64_t num);
+    void AddPointer(const void* ptr);
+
+private:
+    VmaVector<char, VmaStlAllocator<char>> m_Data;
+};
+
+#ifndef _VMA_STRING_BUILDER_FUNCTIONS
+void VmaStringBuilder::Add(const char* pStr)
+{
+    const size_t strLen = strlen(pStr);
+    if (strLen > 0)
+    {
+        const size_t oldCount = m_Data.size();
+        m_Data.resize(oldCount + strLen);
+        memcpy(m_Data.data() + oldCount, pStr, strLen);
+    }
+}
+
+void VmaStringBuilder::AddNumber(uint32_t num)
+{
+    char buf[11];
+    buf[10] = '\0';
+    char* p = &buf[10];
+    do
+    {
+        *--p = '0' + (char)(num % 10);
+        num /= 10;
+    } while (num);
+    Add(p);
+}
+
+void VmaStringBuilder::AddNumber(uint64_t num)
+{
+    char buf[21];
+    buf[20] = '\0';
+    char* p = &buf[20];
+    do
+    {
+        *--p = '0' + (char)(num % 10);
+        num /= 10;
+    } while (num);
+    Add(p);
+}
+
+void VmaStringBuilder::AddPointer(const void* ptr)
+{
+    char buf[21];
+    VmaPtrToStr(buf, sizeof(buf), ptr);
+    Add(buf);
+}
+#endif //_VMA_STRING_BUILDER_FUNCTIONS
+#endif // _VMA_STRING_BUILDER
+
+#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED
+/*
+Allows to conveniently build a correct JSON document to be written to the
+VmaStringBuilder passed to the constructor.
+*/
+class VmaJsonWriter
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter)
+public:
+    // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object.
+    VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb);
+    ~VmaJsonWriter();
+
+    // Begins object by writing "{".
+    // Inside an object, you must call pairs of WriteString and a value, e.g.:
+    // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject();
+    // Will write: { "A": 1, "B": 2 }
+    void BeginObject(bool singleLine = false);
+    // Ends object by writing "}".
+    void EndObject();
+
+    // Begins array by writing "[".
+    // Inside an array, you can write a sequence of any values.
+    void BeginArray(bool singleLine = false);
+    // Ends array by writing "[".
+    void EndArray();
+
+    // Writes a string value inside "".
+    // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped.
+    void WriteString(const char* pStr);
+
+    // Begins writing a string value.
+    // Call BeginString, ContinueString, ContinueString, ..., EndString instead of
+    // WriteString to conveniently build the string content incrementally, made of
+    // parts including numbers.
+    void BeginString(const char* pStr = VMA_NULL);
+    // Posts next part of an open string.
+    void ContinueString(const char* pStr);
+    // Posts next part of an open string. The number is converted to decimal characters.
+    void ContinueString(uint32_t n);
+    void ContinueString(uint64_t n);
+    // Posts next part of an open string. Pointer value is converted to characters
+    // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00
+    void ContinueString_Pointer(const void* ptr);
+    // Ends writing a string value by writing '"'.
+    void EndString(const char* pStr = VMA_NULL);
+
+    // Writes a number value.
+    void WriteNumber(uint32_t n);
+    void WriteNumber(uint64_t n);
+    // Writes a boolean value - false or true.
+    void WriteBool(bool b);
+    // Writes a null value.
+    void WriteNull();
+
+private:
+    enum COLLECTION_TYPE
+    {
+        COLLECTION_TYPE_OBJECT,
+        COLLECTION_TYPE_ARRAY,
+    };
+    struct StackItem
+    {
+        COLLECTION_TYPE type;
+        uint32_t valueCount;
+        bool singleLineMode;
+    };
+
+    static const char* const INDENT;
+
+    VmaStringBuilder& m_SB;
+    VmaVector< StackItem, VmaStlAllocator<StackItem> > m_Stack;
+    bool m_InsideString;
+
+    void BeginValue(bool isString);
+    void WriteIndent(bool oneLess = false);
+};
+const char* const VmaJsonWriter::INDENT = "  ";
+
+#ifndef _VMA_JSON_WRITER_FUNCTIONS
+VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb)
+    : m_SB(sb),
+    m_Stack(VmaStlAllocator<StackItem>(pAllocationCallbacks)),
+    m_InsideString(false) {}
+
+VmaJsonWriter::~VmaJsonWriter()
+{
+    VMA_ASSERT(!m_InsideString);
+    VMA_ASSERT(m_Stack.empty());
+}
+
+void VmaJsonWriter::BeginObject(bool singleLine)
+{
+    VMA_ASSERT(!m_InsideString);
+
+    BeginValue(false);
+    m_SB.Add('{');
+
+    StackItem item;
+    item.type = COLLECTION_TYPE_OBJECT;
+    item.valueCount = 0;
+    item.singleLineMode = singleLine;
+    m_Stack.push_back(item);
+}
+
+void VmaJsonWriter::EndObject()
+{
+    VMA_ASSERT(!m_InsideString);
+
+    WriteIndent(true);
+    m_SB.Add('}');
+
+    VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT);
+    m_Stack.pop_back();
+}
+
+void VmaJsonWriter::BeginArray(bool singleLine)
+{
+    VMA_ASSERT(!m_InsideString);
+
+    BeginValue(false);
+    m_SB.Add('[');
+
+    StackItem item;
+    item.type = COLLECTION_TYPE_ARRAY;
+    item.valueCount = 0;
+    item.singleLineMode = singleLine;
+    m_Stack.push_back(item);
+}
+
+void VmaJsonWriter::EndArray()
+{
+    VMA_ASSERT(!m_InsideString);
+
+    WriteIndent(true);
+    m_SB.Add(']');
+
+    VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY);
+    m_Stack.pop_back();
+}
+
+void VmaJsonWriter::WriteString(const char* pStr)
+{
+    BeginString(pStr);
+    EndString();
+}
+
+void VmaJsonWriter::BeginString(const char* pStr)
+{
+    VMA_ASSERT(!m_InsideString);
+
+    BeginValue(true);
+    m_SB.Add('"');
+    m_InsideString = true;
+    if (pStr != VMA_NULL && pStr[0] != '\0')
+    {
+        ContinueString(pStr);
+    }
+}
+
+void VmaJsonWriter::ContinueString(const char* pStr)
+{
+    VMA_ASSERT(m_InsideString);
+
+    const size_t strLen = strlen(pStr);
+    for (size_t i = 0; i < strLen; ++i)
+    {
+        char ch = pStr[i];
+        if (ch == '\\')
+        {
+            m_SB.Add("\\\\");
+        }
+        else if (ch == '"')
+        {
+            m_SB.Add("\\\"");
+        }
+        else if ((uint8_t)ch >= 32)
+        {
+            m_SB.Add(ch);
+        }
+        else switch (ch)
+        {
+        case '\b':
+            m_SB.Add("\\b");
+            break;
+        case '\f':
+            m_SB.Add("\\f");
+            break;
+        case '\n':
+            m_SB.Add("\\n");
+            break;
+        case '\r':
+            m_SB.Add("\\r");
+            break;
+        case '\t':
+            m_SB.Add("\\t");
+            break;
+        default:
+            VMA_ASSERT(0 && "Character not currently supported.");
+        }
+    }
+}
+
+void VmaJsonWriter::ContinueString(uint32_t n)
+{
+    VMA_ASSERT(m_InsideString);
+    m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::ContinueString(uint64_t n)
+{
+    VMA_ASSERT(m_InsideString);
+    m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::ContinueString_Pointer(const void* ptr)
+{
+    VMA_ASSERT(m_InsideString);
+    m_SB.AddPointer(ptr);
+}
+
+void VmaJsonWriter::EndString(const char* pStr)
+{
+    VMA_ASSERT(m_InsideString);
+    if (pStr != VMA_NULL && pStr[0] != '\0')
+    {
+        ContinueString(pStr);
+    }
+    m_SB.Add('"');
+    m_InsideString = false;
+}
+
+void VmaJsonWriter::WriteNumber(uint32_t n)
+{
+    VMA_ASSERT(!m_InsideString);
+    BeginValue(false);
+    m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::WriteNumber(uint64_t n)
+{
+    VMA_ASSERT(!m_InsideString);
+    BeginValue(false);
+    m_SB.AddNumber(n);
+}
+
+void VmaJsonWriter::WriteBool(bool b)
+{
+    VMA_ASSERT(!m_InsideString);
+    BeginValue(false);
+    m_SB.Add(b ? "true" : "false");
+}
+
+void VmaJsonWriter::WriteNull()
+{
+    VMA_ASSERT(!m_InsideString);
+    BeginValue(false);
+    m_SB.Add("null");
+}
+
+void VmaJsonWriter::BeginValue(bool isString)
+{
+    if (!m_Stack.empty())
+    {
+        StackItem& currItem = m_Stack.back();
+        if (currItem.type == COLLECTION_TYPE_OBJECT &&
+            currItem.valueCount % 2 == 0)
+        {
+            VMA_ASSERT(isString);
+        }
+
+        if (currItem.type == COLLECTION_TYPE_OBJECT &&
+            currItem.valueCount % 2 != 0)
+        {
+            m_SB.Add(": ");
+        }
+        else if (currItem.valueCount > 0)
+        {
+            m_SB.Add(", ");
+            WriteIndent();
+        }
+        else
+        {
+            WriteIndent();
+        }
+        ++currItem.valueCount;
+    }
+}
+
+void VmaJsonWriter::WriteIndent(bool oneLess)
+{
+    if (!m_Stack.empty() && !m_Stack.back().singleLineMode)
+    {
+        m_SB.AddNewLine();
+
+        size_t count = m_Stack.size();
+        if (count > 0 && oneLess)
+        {
+            --count;
+        }
+        for (size_t i = 0; i < count; ++i)
+        {
+            m_SB.Add(INDENT);
+        }
+    }
+}
+#endif // _VMA_JSON_WRITER_FUNCTIONS
+
+static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat)
+{
+    json.BeginObject();
+
+    json.WriteString("BlockCount");
+    json.WriteNumber(stat.statistics.blockCount);
+    json.WriteString("BlockBytes");
+    json.WriteNumber(stat.statistics.blockBytes);
+    json.WriteString("AllocationCount");
+    json.WriteNumber(stat.statistics.allocationCount);
+    json.WriteString("AllocationBytes");
+    json.WriteNumber(stat.statistics.allocationBytes);
+    json.WriteString("UnusedRangeCount");
+    json.WriteNumber(stat.unusedRangeCount);
+
+    if (stat.statistics.allocationCount > 1)
+    {
+        json.WriteString("AllocationSizeMin");
+        json.WriteNumber(stat.allocationSizeMin);
+        json.WriteString("AllocationSizeMax");
+        json.WriteNumber(stat.allocationSizeMax);
+    }
+    if (stat.unusedRangeCount > 1)
+    {
+        json.WriteString("UnusedRangeSizeMin");
+        json.WriteNumber(stat.unusedRangeSizeMin);
+        json.WriteString("UnusedRangeSizeMax");
+        json.WriteNumber(stat.unusedRangeSizeMax);
+    }
+    json.EndObject();
+}
+#endif // _VMA_JSON_WRITER
+
+#ifndef _VMA_MAPPING_HYSTERESIS
+
+class VmaMappingHysteresis
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis)
+public:
+    VmaMappingHysteresis() = default;
+
+    uint32_t GetExtraMapping() const { return m_ExtraMapping; }
+
+    // Call when Map was called.
+    // Returns true if switched to extra +1 mapping reference count.
+    bool PostMap()
+    {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+        if(m_ExtraMapping == 0)
+        {
+            ++m_MajorCounter;
+            if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING)
+            {
+                m_ExtraMapping = 1;
+                m_MajorCounter = 0;
+                m_MinorCounter = 0;
+                return true;
+            }
+        }
+        else // m_ExtraMapping == 1
+            PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+        return false;
+    }
+
+    // Call when Unmap was called.
+    void PostUnmap()
+    {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+        if(m_ExtraMapping == 0)
+            ++m_MajorCounter;
+        else // m_ExtraMapping == 1
+            PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+    }
+
+    // Call when allocation was made from the memory block.
+    void PostAlloc()
+    {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+        if(m_ExtraMapping == 1)
+            ++m_MajorCounter;
+        else // m_ExtraMapping == 0
+            PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+    }
+
+    // Call when allocation was freed from the memory block.
+    // Returns true if switched to extra -1 mapping reference count.
+    bool PostFree()
+    {
+#if VMA_MAPPING_HYSTERESIS_ENABLED
+        if(m_ExtraMapping == 1)
+        {
+            ++m_MajorCounter;
+            if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING &&
+                m_MajorCounter > m_MinorCounter + 1)
+            {
+                m_ExtraMapping = 0;
+                m_MajorCounter = 0;
+                m_MinorCounter = 0;
+                return true;
+            }
+        }
+        else // m_ExtraMapping == 0
+            PostMinorCounter();
+#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED
+        return false;
+    }
+
+private:
+    static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7;
+
+    uint32_t m_MinorCounter = 0;
+    uint32_t m_MajorCounter = 0;
+    uint32_t m_ExtraMapping = 0; // 0 or 1.
+
+    void PostMinorCounter()
+    {
+        if(m_MinorCounter < m_MajorCounter)
+        {
+            ++m_MinorCounter;
+        }
+        else if(m_MajorCounter > 0)
+        {
+            --m_MajorCounter;
+            --m_MinorCounter;
+        }
+    }
+};
+
+#endif // _VMA_MAPPING_HYSTERESIS
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+class VmaWin32Handle
+{
+public:
+    VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { }
+    explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { }
+    ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } }
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle)
+
+public:
+    // Strengthened
+    VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept
+    {
+        *pHandle = VMA_NULL;
+        // Try to get handle first.
+        if (m_hHandle != VMA_NULL)
+        {
+            *pHandle = Duplicate(hTargetProcess);
+            return VK_SUCCESS;
+        }
+
+        VkResult res = VK_SUCCESS;
+        // If failed, try to create it.
+        {
+            VmaMutexLockWrite lock(m_Mutex, useMutex);
+            if (m_hHandle == VMA_NULL)
+            {
+                res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle);
+            }
+        }
+
+        *pHandle = Duplicate(hTargetProcess);
+        return res;
+    }
+
+    operator bool() const noexcept { return m_hHandle != VMA_NULL; }
+private:
+    // Not atomic
+    static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept
+    {
+        VkResult res = VK_ERROR_FEATURE_NOT_PRESENT;
+        if (pvkGetMemoryWin32HandleKHR != VMA_NULL)
+        {
+            VkMemoryGetWin32HandleInfoKHR handleInfo{ };
+            handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
+            handleInfo.memory = memory;
+            handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR;
+            res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle);
+        }
+        return res;
+    }
+    HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept
+    {
+        if (!m_hHandle)
+            return m_hHandle;
+
+        HANDLE hCurrentProcess = ::GetCurrentProcess();
+        HANDLE hDupHandle = VMA_NULL;
+        if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS))
+        {
+            VMA_ASSERT(0 && "Failed to duplicate handle.");
+        }
+        return hDupHandle;
+    }
+private:
+    HANDLE m_hHandle;
+    VMA_RW_MUTEX m_Mutex; // Protects access m_Handle
+};
+#else 
+class VmaWin32Handle
+{
+    // ABI compatibility
+    void* placeholder = VMA_NULL;
+    VMA_RW_MUTEX placeholder2;
+};
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+
+
+#ifndef _VMA_DEVICE_MEMORY_BLOCK
+/*
+Represents a single block of device memory (`VkDeviceMemory`) with all the
+data about its regions (aka suballocations, #VmaAllocation), assigned and free.
+
+Thread-safety:
+- Access to m_pMetadata must be externally synchronized.
+- Map, Unmap, Bind* are synchronized internally.
+*/
+class VmaDeviceMemoryBlock
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock)
+public:
+    VmaBlockMetadata* m_pMetadata;
+
+    VmaDeviceMemoryBlock(VmaAllocator hAllocator);
+    ~VmaDeviceMemoryBlock();
+
+    // Always call after construction.
+    void Init(
+        VmaAllocator hAllocator,
+        VmaPool hParentPool,
+        uint32_t newMemoryTypeIndex,
+        VkDeviceMemory newMemory,
+        VkDeviceSize newSize,
+        uint32_t id,
+        uint32_t algorithm,
+        VkDeviceSize bufferImageGranularity);
+    // Always call before destruction.
+    void Destroy(VmaAllocator allocator);
+
+    VmaPool GetParentPool() const { return m_hParentPool; }
+    VkDeviceMemory GetDeviceMemory() const { return m_hMemory; }
+    uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; }
+    uint32_t GetId() const { return m_Id; }
+    void* GetMappedData() const { return m_pMappedData; }
+    uint32_t GetMapRefCount() const { return m_MapCount; }
+
+    // Call when allocation/free was made from m_pMetadata.
+    // Used for m_MappingHysteresis.
+    void PostAlloc(VmaAllocator hAllocator);
+    void PostFree(VmaAllocator hAllocator);
+
+    // Validates all data structures inside this object. If not valid, returns false.
+    bool Validate() const;
+    VkResult CheckCorruption(VmaAllocator hAllocator);
+
+    // ppData can be null.
+    VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData);
+    void Unmap(VmaAllocator hAllocator, uint32_t count);
+
+    VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize);
+    VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize);
+
+    VkResult BindBufferMemory(
+        const VmaAllocator hAllocator,
+        const VmaAllocation hAllocation,
+        VkDeviceSize allocationLocalOffset,
+        VkBuffer hBuffer,
+        const void* pNext);
+    VkResult BindImageMemory(
+        const VmaAllocator hAllocator,
+        const VmaAllocation hAllocation,
+        VkDeviceSize allocationLocalOffset,
+        VkImage hImage,
+        const void* pNext);
+#if VMA_EXTERNAL_MEMORY_WIN32
+    VkResult CreateWin32Handle(
+        const VmaAllocator hAllocator,
+        PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR,
+        HANDLE hTargetProcess,
+        HANDLE* pHandle)noexcept;
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+private:
+    VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool.
+    uint32_t m_MemoryTypeIndex;
+    uint32_t m_Id;
+    VkDeviceMemory m_hMemory;
+
+    /*
+    Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory.
+    Also protects m_MapCount, m_pMappedData.
+    Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex.
+    */
+    VMA_MUTEX m_MapAndBindMutex;
+    VmaMappingHysteresis m_MappingHysteresis;
+    uint32_t m_MapCount;
+    void* m_pMappedData;
+
+    VmaWin32Handle m_Handle;
+};
+#endif // _VMA_DEVICE_MEMORY_BLOCK
+
+#ifndef _VMA_ALLOCATION_T
+struct VmaAllocationExtraData
+{
+    void* m_pMappedData = VMA_NULL; // Not null means memory is mapped.
+    VmaWin32Handle m_Handle;
+};
+
+struct VmaAllocation_T
+{
+    friend struct VmaDedicatedAllocationListItemTraits;
+
+    enum FLAGS
+    {
+        FLAG_PERSISTENT_MAP   = 0x01,
+        FLAG_MAPPING_ALLOWED  = 0x02,
+    };
+
+public:
+    enum ALLOCATION_TYPE
+    {
+        ALLOCATION_TYPE_NONE,
+        ALLOCATION_TYPE_BLOCK,
+        ALLOCATION_TYPE_DEDICATED,
+    };
+
+    // This struct is allocated using VmaPoolAllocator.
+    VmaAllocation_T(bool mappingAllowed);
+    ~VmaAllocation_T();
+
+    void InitBlockAllocation(
+        VmaDeviceMemoryBlock* block,
+        VmaAllocHandle allocHandle,
+        VkDeviceSize alignment,
+        VkDeviceSize size,
+        uint32_t memoryTypeIndex,
+        VmaSuballocationType suballocationType,
+        bool mapped);
+    // pMappedData not null means allocation is created with MAPPED flag.
+    void InitDedicatedAllocation(
+        VmaAllocator allocator,
+        VmaPool hParentPool,
+        uint32_t memoryTypeIndex,
+        VkDeviceMemory hMemory,
+        VmaSuballocationType suballocationType,
+        void* pMappedData,
+        VkDeviceSize size);
+    void Destroy(VmaAllocator allocator);
+
+    ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; }
+    VkDeviceSize GetAlignment() const { return m_Alignment; }
+    VkDeviceSize GetSize() const { return m_Size; }
+    void* GetUserData() const { return m_pUserData; }
+    const char* GetName() const { return m_pName; }
+    VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; }
+
+    VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; }
+    uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; }
+    bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; }
+    bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; }
+
+    void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; }
+    void SetName(VmaAllocator hAllocator, const char* pName);
+    void FreeName(VmaAllocator hAllocator);
+    uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation);
+    VmaAllocHandle GetAllocHandle() const;
+    VkDeviceSize GetOffset() const;
+    VmaPool GetParentPool() const;
+    VkDeviceMemory GetMemory() const;
+    void* GetMappedData() const;
+
+    void BlockAllocMap();
+    void BlockAllocUnmap();
+    VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData);
+    void DedicatedAllocUnmap(VmaAllocator hAllocator);
+
+#if VMA_STATS_STRING_ENABLED
+    VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; }
+    void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5)
+    {
+        VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN);
+        m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5);
+    }
+    void InitImageUsage(const VkImageCreateInfo &createInfo)
+    {
+        VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN);
+        m_BufferImageUsage = VmaBufferImageUsage(createInfo);
+    }
+    void PrintParameters(class VmaJsonWriter& json) const;
+#endif
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+    VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept;
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+
+private:
+    // Allocation out of VmaDeviceMemoryBlock.
+    struct BlockAllocation
+    {
+        VmaDeviceMemoryBlock* m_Block;
+        VmaAllocHandle m_AllocHandle;
+    };
+    // Allocation for an object that has its own private VkDeviceMemory.
+    struct DedicatedAllocation
+    {
+        VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool.
+        VkDeviceMemory m_hMemory;
+        VmaAllocationExtraData* m_ExtraData;
+        VmaAllocation_T* m_Prev;
+        VmaAllocation_T* m_Next;
+    };
+    union
+    {
+        // Allocation out of VmaDeviceMemoryBlock.
+        BlockAllocation m_BlockAllocation;
+        // Allocation for an object that has its own private VkDeviceMemory.
+        DedicatedAllocation m_DedicatedAllocation;
+    };
+
+    VkDeviceSize m_Alignment;
+    VkDeviceSize m_Size;
+    void* m_pUserData;
+    char* m_pName;
+    uint32_t m_MemoryTypeIndex;
+    uint8_t m_Type; // ALLOCATION_TYPE
+    uint8_t m_SuballocationType; // VmaSuballocationType
+    // Reference counter for vmaMapMemory()/vmaUnmapMemory().
+    uint8_t m_MapCount;
+    uint8_t m_Flags; // enum FLAGS
+#if VMA_STATS_STRING_ENABLED
+    VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown.
+#endif
+
+    void EnsureExtraData(VmaAllocator hAllocator);
+};
+#endif // _VMA_ALLOCATION_T
+
+#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS
+struct VmaDedicatedAllocationListItemTraits
+{
+    typedef VmaAllocation_T ItemType;
+
+    static ItemType* GetPrev(const ItemType* item)
+    {
+        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+        return item->m_DedicatedAllocation.m_Prev;
+    }
+    static ItemType* GetNext(const ItemType* item)
+    {
+        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+        return item->m_DedicatedAllocation.m_Next;
+    }
+    static ItemType*& AccessPrev(ItemType* item)
+    {
+        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+        return item->m_DedicatedAllocation.m_Prev;
+    }
+    static ItemType*& AccessNext(ItemType* item)
+    {
+        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+        return item->m_DedicatedAllocation.m_Next;
+    }
+};
+#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS
+
+#ifndef _VMA_DEDICATED_ALLOCATION_LIST
+/*
+Stores linked list of VmaAllocation_T objects.
+Thread-safe, synchronized internally.
+*/
+class VmaDedicatedAllocationList
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList)
+public:
+    VmaDedicatedAllocationList() {}
+    ~VmaDedicatedAllocationList();
+
+    void Init(bool useMutex) { m_UseMutex = useMutex; }
+    bool Validate();
+
+    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats);
+    void AddStatistics(VmaStatistics& inoutStats);
+#if VMA_STATS_STRING_ENABLED
+    // Writes JSON array with the list of allocations.
+    void BuildStatsString(VmaJsonWriter& json);
+#endif
+
+    bool IsEmpty();
+    void Register(VmaAllocation alloc);
+    void Unregister(VmaAllocation alloc);
+
+private:
+    typedef VmaIntrusiveLinkedList<VmaDedicatedAllocationListItemTraits> DedicatedAllocationLinkedList;
+
+    bool m_UseMutex = true;
+    VMA_RW_MUTEX m_Mutex;
+    DedicatedAllocationLinkedList m_AllocationList;
+};
+
+#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS
+
+VmaDedicatedAllocationList::~VmaDedicatedAllocationList()
+{
+    VMA_HEAVY_ASSERT(Validate());
+
+    if (!m_AllocationList.IsEmpty())
+    {
+        VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!");
+    }
+}
+
+bool VmaDedicatedAllocationList::Validate()
+{
+    const size_t declaredCount = m_AllocationList.GetCount();
+    size_t actualCount = 0;
+    VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+    for (VmaAllocation alloc = m_AllocationList.Front();
+        alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc))
+    {
+        ++actualCount;
+    }
+    VMA_VALIDATE(actualCount == declaredCount);
+
+    return true;
+}
+
+void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats)
+{
+    for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item))
+    {
+        const VkDeviceSize size = item->GetSize();
+        inoutStats.statistics.blockCount++;
+        inoutStats.statistics.blockBytes += size;
+        VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize());
+    }
+}
+
+void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats)
+{
+    VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+
+    const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount();
+    inoutStats.blockCount += allocCount;
+    inoutStats.allocationCount += allocCount;
+
+    for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item))
+    {
+        const VkDeviceSize size = item->GetSize();
+        inoutStats.blockBytes += size;
+        inoutStats.allocationBytes += size;
+    }
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json)
+{
+    VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+    json.BeginArray();
+    for (VmaAllocation alloc = m_AllocationList.Front();
+        alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc))
+    {
+        json.BeginObject(true);
+        alloc->PrintParameters(json);
+        json.EndObject();
+    }
+    json.EndArray();
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+bool VmaDedicatedAllocationList::IsEmpty()
+{
+    VmaMutexLockRead lock(m_Mutex, m_UseMutex);
+    return m_AllocationList.IsEmpty();
+}
+
+void VmaDedicatedAllocationList::Register(VmaAllocation alloc)
+{
+    VmaMutexLockWrite lock(m_Mutex, m_UseMutex);
+    m_AllocationList.PushBack(alloc);
+}
+
+void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc)
+{
+    VmaMutexLockWrite lock(m_Mutex, m_UseMutex);
+    m_AllocationList.Remove(alloc);
+}
+#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS
+#endif // _VMA_DEDICATED_ALLOCATION_LIST
+
+#ifndef _VMA_SUBALLOCATION
+/*
+Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as
+allocated memory block or free.
+*/
+struct VmaSuballocation
+{
+    VkDeviceSize offset;
+    VkDeviceSize size;
+    void* userData;
+    VmaSuballocationType type;
+};
+
+// Comparator for offsets.
+struct VmaSuballocationOffsetLess
+{
+    bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const
+    {
+        return lhs.offset < rhs.offset;
+    }
+};
+
+struct VmaSuballocationOffsetGreater
+{
+    bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const
+    {
+        return lhs.offset > rhs.offset;
+    }
+};
+
+struct VmaSuballocationItemSizeLess
+{
+    bool operator()(const VmaSuballocationList::iterator lhs,
+        const VmaSuballocationList::iterator rhs) const
+    {
+        return lhs->size < rhs->size;
+    }
+
+    bool operator()(const VmaSuballocationList::iterator lhs,
+        VkDeviceSize rhsSize) const
+    {
+        return lhs->size < rhsSize;
+    }
+};
+#endif // _VMA_SUBALLOCATION
+
+#ifndef _VMA_ALLOCATION_REQUEST
+/*
+Parameters of planned allocation inside a VmaDeviceMemoryBlock.
+item points to a FREE suballocation.
+*/
+struct VmaAllocationRequest
+{
+    VmaAllocHandle allocHandle;
+    VkDeviceSize size;
+    VmaSuballocationList::iterator item;
+    void* customData;
+    uint64_t algorithmData;
+    VmaAllocationRequestType type;
+};
+#endif // _VMA_ALLOCATION_REQUEST
+
+#ifndef _VMA_BLOCK_METADATA
+/*
+Data structure used for bookkeeping of allocations and unused ranges of memory
+in a single VkDeviceMemory block.
+*/
+class VmaBlockMetadata
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata)
+public:
+    // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object.
+    VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks,
+        VkDeviceSize bufferImageGranularity, bool isVirtual);
+    virtual ~VmaBlockMetadata() = default;
+
+    virtual void Init(VkDeviceSize size) { m_Size = size; }
+    bool IsVirtual() const { return m_IsVirtual; }
+    VkDeviceSize GetSize() const { return m_Size; }
+
+    // Validates all data structures inside this object. If not valid, returns false.
+    virtual bool Validate() const = 0;
+    virtual size_t GetAllocationCount() const = 0;
+    virtual size_t GetFreeRegionsCount() const = 0;
+    virtual VkDeviceSize GetSumFreeSize() const = 0;
+    // Returns true if this block is empty - contains only single free suballocation.
+    virtual bool IsEmpty() const = 0;
+    virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0;
+    virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0;
+    virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0;
+
+    virtual VmaAllocHandle GetAllocationListBegin() const = 0;
+    virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0;
+    virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0;
+
+    // Shouldn't modify blockCount.
+    virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0;
+    virtual void AddStatistics(VmaStatistics& inoutStats) const = 0;
+
+#if VMA_STATS_STRING_ENABLED
+    virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0;
+#endif
+
+    // Tries to find a place for suballocation with given parameters inside this block.
+    // If succeeded, fills pAllocationRequest and returns true.
+    // If failed, returns false.
+    virtual bool CreateAllocationRequest(
+        VkDeviceSize allocSize,
+        VkDeviceSize allocAlignment,
+        bool upperAddress,
+        VmaSuballocationType allocType,
+        // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags.
+        uint32_t strategy,
+        VmaAllocationRequest* pAllocationRequest) = 0;
+
+    virtual VkResult CheckCorruption(const void* pBlockData) = 0;
+
+    // Makes actual allocation based on request. Request must already be checked and valid.
+    virtual void Alloc(
+        const VmaAllocationRequest& request,
+        VmaSuballocationType type,
+        void* userData) = 0;
+
+    // Frees suballocation assigned to given memory region.
+    virtual void Free(VmaAllocHandle allocHandle) = 0;
+
+    // Frees all allocations.
+    // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations!
+    virtual void Clear() = 0;
+
+    virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0;
+    virtual void DebugLogAllAllocations() const = 0;
+
+protected:
+    const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; }
+    VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; }
+    VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); }
+
+    void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const;
+#if VMA_STATS_STRING_ENABLED
+    // mapRefCount == UINT32_MAX means unspecified.
+    void PrintDetailedMap_Begin(class VmaJsonWriter& json,
+        VkDeviceSize unusedBytes,
+        size_t allocationCount,
+        size_t unusedRangeCount) const;
+    void PrintDetailedMap_Allocation(class VmaJsonWriter& json,
+        VkDeviceSize offset, VkDeviceSize size, void* userData) const;
+    void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json,
+        VkDeviceSize offset,
+        VkDeviceSize size) const;
+    void PrintDetailedMap_End(class VmaJsonWriter& json) const;
+#endif
+
+private:
+    VkDeviceSize m_Size;
+    const VkAllocationCallbacks* m_pAllocationCallbacks;
+    const VkDeviceSize m_BufferImageGranularity;
+    const bool m_IsVirtual;
+};
+
+#ifndef _VMA_BLOCK_METADATA_FUNCTIONS
+VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks,
+    VkDeviceSize bufferImageGranularity, bool isVirtual)
+    : m_Size(0),
+    m_pAllocationCallbacks(pAllocationCallbacks),
+    m_BufferImageGranularity(bufferImageGranularity),
+    m_IsVirtual(isVirtual) {}
+
+void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const
+{
+    if (IsVirtual())
+    {
+        VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData);
+    }
+    else
+    {
+        VMA_ASSERT(userData != VMA_NULL);
+        VmaAllocation allocation = reinterpret_cast<VmaAllocation>(userData);
+
+        userData = allocation->GetUserData();
+        const char* name = allocation->GetName();
+
+#if VMA_STATS_STRING_ENABLED
+        VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64,
+            offset, size, userData, name ? name : "vma_empty",
+            VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()],
+            (uint64_t)allocation->GetBufferImageUsage().Value);
+#else
+        VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u",
+            offset, size, userData, name ? name : "vma_empty",
+            (unsigned)allocation->GetSuballocationType());
+#endif // VMA_STATS_STRING_ENABLED
+    }
+
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json,
+    VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const
+{
+    json.WriteString("TotalBytes");
+    json.WriteNumber(GetSize());
+
+    json.WriteString("UnusedBytes");
+    json.WriteNumber(unusedBytes);
+
+    json.WriteString("Allocations");
+    json.WriteNumber((uint64_t)allocationCount);
+
+    json.WriteString("UnusedRanges");
+    json.WriteNumber((uint64_t)unusedRangeCount);
+
+    json.WriteString("Suballocations");
+    json.BeginArray();
+}
+
+void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json,
+    VkDeviceSize offset, VkDeviceSize size, void* userData) const
+{
+    json.BeginObject(true);
+
+    json.WriteString("Offset");
+    json.WriteNumber(offset);
+
+    if (IsVirtual())
+    {
+        json.WriteString("Size");
+        json.WriteNumber(size);
+        if (userData)
+        {
+            json.WriteString("CustomData");
+            json.BeginString();
+            json.ContinueString_Pointer(userData);
+            json.EndString();
+        }
+    }
+    else
+    {
+        ((VmaAllocation)userData)->PrintParameters(json);
+    }
+
+    json.EndObject();
+}
+
+void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json,
+    VkDeviceSize offset, VkDeviceSize size) const
+{
+    json.BeginObject(true);
+
+    json.WriteString("Offset");
+    json.WriteNumber(offset);
+
+    json.WriteString("Type");
+    json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]);
+
+    json.WriteString("Size");
+    json.WriteNumber(size);
+
+    json.EndObject();
+}
+
+void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const
+{
+    json.EndArray();
+}
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_BLOCK_METADATA_FUNCTIONS
+#endif // _VMA_BLOCK_METADATA
+
+#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY
+// Before deleting object of this class remember to call 'Destroy()'
+class VmaBlockBufferImageGranularity final
+{
+public:
+    struct ValidationContext
+    {
+        const VkAllocationCallbacks* allocCallbacks;
+        uint16_t* pageAllocs;
+    };
+
+    VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity);
+    ~VmaBlockBufferImageGranularity();
+
+    bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; }
+
+    void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size);
+    // Before destroying object you must call free it's memory
+    void Destroy(const VkAllocationCallbacks* pAllocationCallbacks);
+
+    void RoundupAllocRequest(VmaSuballocationType allocType,
+        VkDeviceSize& inOutAllocSize,
+        VkDeviceSize& inOutAllocAlignment) const;
+
+    bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset,
+        VkDeviceSize allocSize,
+        VkDeviceSize blockOffset,
+        VkDeviceSize blockSize,
+        VmaSuballocationType allocType) const;
+
+    void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size);
+    void FreePages(VkDeviceSize offset, VkDeviceSize size);
+    void Clear();
+
+    ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks,
+        bool isVirutal) const;
+    bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const;
+    bool FinishValidation(ValidationContext& ctx) const;
+
+private:
+    static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256;
+
+    struct RegionInfo
+    {
+        uint8_t allocType;
+        uint16_t allocCount;
+    };
+
+    VkDeviceSize m_BufferImageGranularity;
+    uint32_t m_RegionCount;
+    RegionInfo* m_RegionInfo;
+
+    uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); }
+    uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); }
+
+    uint32_t OffsetToPageIndex(VkDeviceSize offset) const;
+    void AllocPage(RegionInfo& page, uint8_t allocType);
+};
+
+#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS
+VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity)
+    : m_BufferImageGranularity(bufferImageGranularity),
+    m_RegionCount(0),
+    m_RegionInfo(VMA_NULL) {}
+
+VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity()
+{
+    VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!");
+}
+
+void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size)
+{
+    if (IsEnabled())
+    {
+        m_RegionCount = static_cast<uint32_t>(VmaDivideRoundingUp(size, m_BufferImageGranularity));
+        m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount);
+        memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo));
+    }
+}
+
+void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks)
+{
+    if (m_RegionInfo)
+    {
+        vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount);
+        m_RegionInfo = VMA_NULL;
+    }
+}
+
+void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType,
+    VkDeviceSize& inOutAllocSize,
+    VkDeviceSize& inOutAllocAlignment) const
+{
+    if (m_BufferImageGranularity > 1 &&
+        m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY)
+    {
+        if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN ||
+            allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN ||
+            allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)
+        {
+            inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity);
+            inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity);
+        }
+    }
+}
+
+bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset,
+    VkDeviceSize allocSize,
+    VkDeviceSize blockOffset,
+    VkDeviceSize blockSize,
+    VmaSuballocationType allocType) const
+{
+    if (IsEnabled())
+    {
+        uint32_t startPage = GetStartPage(inOutAllocOffset);
+        if (m_RegionInfo[startPage].allocCount > 0 &&
+            VmaIsBufferImageGranularityConflict(static_cast<VmaSuballocationType>(m_RegionInfo[startPage].allocType), allocType))
+        {
+            inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity);
+            if (blockSize < allocSize + inOutAllocOffset - blockOffset)
+                return true;
+            ++startPage;
+        }
+        uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize);
+        if (endPage != startPage &&
+            m_RegionInfo[endPage].allocCount > 0 &&
+            VmaIsBufferImageGranularityConflict(static_cast<VmaSuballocationType>(m_RegionInfo[endPage].allocType), allocType))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size)
+{
+    if (IsEnabled())
+    {
+        uint32_t startPage = GetStartPage(offset);
+        AllocPage(m_RegionInfo[startPage], allocType);
+
+        uint32_t endPage = GetEndPage(offset, size);
+        if (startPage != endPage)
+            AllocPage(m_RegionInfo[endPage], allocType);
+    }
+}
+
+void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size)
+{
+    if (IsEnabled())
+    {
+        uint32_t startPage = GetStartPage(offset);
+        --m_RegionInfo[startPage].allocCount;
+        if (m_RegionInfo[startPage].allocCount == 0)
+            m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE;
+        uint32_t endPage = GetEndPage(offset, size);
+        if (startPage != endPage)
+        {
+            --m_RegionInfo[endPage].allocCount;
+            if (m_RegionInfo[endPage].allocCount == 0)
+                m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE;
+        }
+    }
+}
+
+void VmaBlockBufferImageGranularity::Clear()
+{
+    if (m_RegionInfo)
+        memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo));
+}
+
+VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation(
+    const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const
+{
+    ValidationContext ctx{ pAllocationCallbacks, VMA_NULL };
+    if (!isVirutal && IsEnabled())
+    {
+        ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount);
+        memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t));
+    }
+    return ctx;
+}
+
+bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx,
+    VkDeviceSize offset, VkDeviceSize size) const
+{
+    if (IsEnabled())
+    {
+        uint32_t start = GetStartPage(offset);
+        ++ctx.pageAllocs[start];
+        VMA_VALIDATE(m_RegionInfo[start].allocCount > 0);
+
+        uint32_t end = GetEndPage(offset, size);
+        if (start != end)
+        {
+            ++ctx.pageAllocs[end];
+            VMA_VALIDATE(m_RegionInfo[end].allocCount > 0);
+        }
+    }
+    return true;
+}
+
+bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const
+{
+    // Check proper page structure
+    if (IsEnabled())
+    {
+        VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!");
+
+        for (uint32_t page = 0; page < m_RegionCount; ++page)
+        {
+            VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount);
+        }
+        vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount);
+        ctx.pageAllocs = VMA_NULL;
+    }
+    return true;
+}
+
+uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const
+{
+    return static_cast<uint32_t>(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity));
+}
+
+void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType)
+{
+    // When current alloc type is free then it can be overridden by new type
+    if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE))
+        page.allocType = allocType;
+
+    ++page.allocCount;
+}
+#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS
+#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY
+
+#ifndef _VMA_BLOCK_METADATA_LINEAR
+/*
+Allocations and their references in internal data structure look like this:
+
+if(m_2ndVectorMode == SECOND_VECTOR_EMPTY):
+
+        0 +-------+
+          |       |
+          |       |
+          |       |
+          +-------+
+          | Alloc |  1st[m_1stNullItemsBeginCount]
+          +-------+
+          | Alloc |  1st[m_1stNullItemsBeginCount + 1]
+          +-------+
+          |  ...  |
+          +-------+
+          | Alloc |  1st[1st.size() - 1]
+          +-------+
+          |       |
+          |       |
+          |       |
+GetSize() +-------+
+
+if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER):
+
+        0 +-------+
+          | Alloc |  2nd[0]
+          +-------+
+          | Alloc |  2nd[1]
+          +-------+
+          |  ...  |
+          +-------+
+          | Alloc |  2nd[2nd.size() - 1]
+          +-------+
+          |       |
+          |       |
+          |       |
+          +-------+
+          | Alloc |  1st[m_1stNullItemsBeginCount]
+          +-------+
+          | Alloc |  1st[m_1stNullItemsBeginCount + 1]
+          +-------+
+          |  ...  |
+          +-------+
+          | Alloc |  1st[1st.size() - 1]
+          +-------+
+          |       |
+GetSize() +-------+
+
+if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK):
+
+        0 +-------+
+          |       |
+          |       |
+          |       |
+          +-------+
+          | Alloc |  1st[m_1stNullItemsBeginCount]
+          +-------+
+          | Alloc |  1st[m_1stNullItemsBeginCount + 1]
+          +-------+
+          |  ...  |
+          +-------+
+          | Alloc |  1st[1st.size() - 1]
+          +-------+
+          |       |
+          |       |
+          |       |
+          +-------+
+          | Alloc |  2nd[2nd.size() - 1]
+          +-------+
+          |  ...  |
+          +-------+
+          | Alloc |  2nd[1]
+          +-------+
+          | Alloc |  2nd[0]
+GetSize() +-------+
+
+*/
+class VmaBlockMetadata_Linear : public VmaBlockMetadata
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear)
+public:
+    VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks,
+        VkDeviceSize bufferImageGranularity, bool isVirtual);
+    virtual ~VmaBlockMetadata_Linear() = default;
+
+    VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; }
+    bool IsEmpty() const override { return GetAllocationCount() == 0; }
+    VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; }
+
+    void Init(VkDeviceSize size) override;
+    bool Validate() const override;
+    size_t GetAllocationCount() const override;
+    size_t GetFreeRegionsCount() const override;
+
+    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override;
+    void AddStatistics(VmaStatistics& inoutStats) const override;
+
+#if VMA_STATS_STRING_ENABLED
+    void PrintDetailedMap(class VmaJsonWriter& json) const override;
+#endif
+
+    bool CreateAllocationRequest(
+        VkDeviceSize allocSize,
+        VkDeviceSize allocAlignment,
+        bool upperAddress,
+        VmaSuballocationType allocType,
+        uint32_t strategy,
+        VmaAllocationRequest* pAllocationRequest) override;
+
+    VkResult CheckCorruption(const void* pBlockData) override;
+
+    void Alloc(
+        const VmaAllocationRequest& request,
+        VmaSuballocationType type,
+        void* userData) override;
+
+    void Free(VmaAllocHandle allocHandle) override;
+    void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override;
+    void* GetAllocationUserData(VmaAllocHandle allocHandle) const override;
+    VmaAllocHandle GetAllocationListBegin() const override;
+    VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override;
+    VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override;
+    void Clear() override;
+    void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override;
+    void DebugLogAllAllocations() const override;
+
+private:
+    /*
+    There are two suballocation vectors, used in ping-pong way.
+    The one with index m_1stVectorIndex is called 1st.
+    The one with index (m_1stVectorIndex ^ 1) is called 2nd.
+    2nd can be non-empty only when 1st is not empty.
+    When 2nd is not empty, m_2ndVectorMode indicates its mode of operation.
+    */
+    typedef VmaVector<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> SuballocationVectorType;
+
+    enum SECOND_VECTOR_MODE
+    {
+        SECOND_VECTOR_EMPTY,
+        /*
+        Suballocations in 2nd vector are created later than the ones in 1st, but they
+        all have smaller offset.
+        */
+        SECOND_VECTOR_RING_BUFFER,
+        /*
+        Suballocations in 2nd vector are upper side of double stack.
+        They all have offsets higher than those in 1st vector.
+        Top of this stack means smaller offsets, but higher indices in this vector.
+        */
+        SECOND_VECTOR_DOUBLE_STACK,
+    };
+
+    VkDeviceSize m_SumFreeSize;
+    SuballocationVectorType m_Suballocations0, m_Suballocations1;
+    uint32_t m_1stVectorIndex;
+    SECOND_VECTOR_MODE m_2ndVectorMode;
+    // Number of items in 1st vector with hAllocation = null at the beginning.
+    size_t m_1stNullItemsBeginCount;
+    // Number of other items in 1st vector with hAllocation = null somewhere in the middle.
+    size_t m_1stNullItemsMiddleCount;
+    // Number of items in 2nd vector with hAllocation = null.
+    size_t m_2ndNullItemsCount;
+
+    SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; }
+    SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; }
+    const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; }
+    const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; }
+
+    VmaSuballocation& FindSuballocation(VkDeviceSize offset) const;
+    bool ShouldCompact1st() const;
+    void CleanupAfterFree();
+
+    bool CreateAllocationRequest_LowerAddress(
+        VkDeviceSize allocSize,
+        VkDeviceSize allocAlignment,
+        VmaSuballocationType allocType,
+        uint32_t strategy,
+        VmaAllocationRequest* pAllocationRequest);
+    bool CreateAllocationRequest_UpperAddress(
+        VkDeviceSize allocSize,
+        VkDeviceSize allocAlignment,
+        VmaSuballocationType allocType,
+        uint32_t strategy,
+        VmaAllocationRequest* pAllocationRequest);
+};
+
+#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS
+VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks,
+    VkDeviceSize bufferImageGranularity, bool isVirtual)
+    : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual),
+    m_SumFreeSize(0),
+    m_Suballocations0(VmaStlAllocator<VmaSuballocation>(pAllocationCallbacks)),
+    m_Suballocations1(VmaStlAllocator<VmaSuballocation>(pAllocationCallbacks)),
+    m_1stVectorIndex(0),
+    m_2ndVectorMode(SECOND_VECTOR_EMPTY),
+    m_1stNullItemsBeginCount(0),
+    m_1stNullItemsMiddleCount(0),
+    m_2ndNullItemsCount(0) {}
+
+void VmaBlockMetadata_Linear::Init(VkDeviceSize size)
+{
+    VmaBlockMetadata::Init(size);
+    m_SumFreeSize = size;
+}
+
+bool VmaBlockMetadata_Linear::Validate() const
+{
+    const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+    VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY));
+    VMA_VALIDATE(!suballocations1st.empty() ||
+        suballocations2nd.empty() ||
+        m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER);
+
+    if (!suballocations1st.empty())
+    {
+        // Null item at the beginning should be accounted into m_1stNullItemsBeginCount.
+        VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE);
+        // Null item at the end should be just pop_back().
+        VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE);
+    }
+    if (!suballocations2nd.empty())
+    {
+        // Null item at the end should be just pop_back().
+        VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE);
+    }
+
+    VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size());
+    VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size());
+
+    VkDeviceSize sumUsedSize = 0;
+    const size_t suballoc1stCount = suballocations1st.size();
+    const VkDeviceSize debugMargin = GetDebugMargin();
+    VkDeviceSize offset = 0;
+
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        const size_t suballoc2ndCount = suballocations2nd.size();
+        size_t nullItem2ndCount = 0;
+        for (size_t i = 0; i < suballoc2ndCount; ++i)
+        {
+            const VmaSuballocation& suballoc = suballocations2nd[i];
+            const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
+
+            VmaAllocation const alloc = (VmaAllocation)suballoc.userData;
+            if (!IsVirtual())
+            {
+                VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE));
+            }
+            VMA_VALIDATE(suballoc.offset >= offset);
+
+            if (!currFree)
+            {
+                if (!IsVirtual())
+                {
+                    VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1);
+                    VMA_VALIDATE(alloc->GetSize() == suballoc.size);
+                }
+                sumUsedSize += suballoc.size;
+            }
+            else
+            {
+                ++nullItem2ndCount;
+            }
+
+            offset = suballoc.offset + suballoc.size + debugMargin;
+        }
+
+        VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount);
+    }
+
+    for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i)
+    {
+        const VmaSuballocation& suballoc = suballocations1st[i];
+        VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE &&
+            suballoc.userData == VMA_NULL);
+    }
+
+    size_t nullItem1stCount = m_1stNullItemsBeginCount;
+
+    for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i)
+    {
+        const VmaSuballocation& suballoc = suballocations1st[i];
+        const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
+
+        VmaAllocation const alloc = (VmaAllocation)suballoc.userData;
+        if (!IsVirtual())
+        {
+            VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE));
+        }
+        VMA_VALIDATE(suballoc.offset >= offset);
+        VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree);
+
+        if (!currFree)
+        {
+            if (!IsVirtual())
+            {
+                VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1);
+                VMA_VALIDATE(alloc->GetSize() == suballoc.size);
+            }
+            sumUsedSize += suballoc.size;
+        }
+        else
+        {
+            ++nullItem1stCount;
+        }
+
+        offset = suballoc.offset + suballoc.size + debugMargin;
+    }
+    VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount);
+
+    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        const size_t suballoc2ndCount = suballocations2nd.size();
+        size_t nullItem2ndCount = 0;
+        for (size_t i = suballoc2ndCount; i--; )
+        {
+            const VmaSuballocation& suballoc = suballocations2nd[i];
+            const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
+
+            VmaAllocation const alloc = (VmaAllocation)suballoc.userData;
+            if (!IsVirtual())
+            {
+                VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE));
+            }
+            VMA_VALIDATE(suballoc.offset >= offset);
+
+            if (!currFree)
+            {
+                if (!IsVirtual())
+                {
+                    VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1);
+                    VMA_VALIDATE(alloc->GetSize() == suballoc.size);
+                }
+                sumUsedSize += suballoc.size;
+            }
+            else
+            {
+                ++nullItem2ndCount;
+            }
+
+            offset = suballoc.offset + suballoc.size + debugMargin;
+        }
+
+        VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount);
+    }
+
+    VMA_VALIDATE(offset <= GetSize());
+    VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize);
+
+    return true;
+}
+
+size_t VmaBlockMetadata_Linear::GetAllocationCount() const
+{
+    return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount +
+        AccessSuballocations2nd().size() - m_2ndNullItemsCount;
+}
+
+size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const
+{
+    // Function only used for defragmentation, which is disabled for this algorithm
+    VMA_ASSERT(0);
+    return SIZE_MAX;
+}
+
+void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const
+{
+    const VkDeviceSize size = GetSize();
+    const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+    const size_t suballoc1stCount = suballocations1st.size();
+    const size_t suballoc2ndCount = suballocations2nd.size();
+
+    inoutStats.statistics.blockCount++;
+    inoutStats.statistics.blockBytes += size;
+
+    VkDeviceSize lastOffset = 0;
+
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+        size_t nextAlloc2ndIndex = 0;
+        while (lastOffset < freeSpace2ndTo1stEnd)
+        {
+            // Find next non-null allocation or move nextAllocIndex to the end.
+            while (nextAlloc2ndIndex < suballoc2ndCount &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                ++nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex < suballoc2ndCount)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // 1. Process free space before this allocation.
+                if (lastOffset < suballoc.offset)
+                {
+                    // There is free space from lastOffset to suballoc.offset.
+                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+                }
+
+                // 2. Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size);
+
+                // 3. Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                ++nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                // There is free space from lastOffset to freeSpace2ndTo1stEnd.
+                if (lastOffset < freeSpace2ndTo1stEnd)
+                {
+                    const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset;
+                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+                }
+
+                // End of loop.
+                lastOffset = freeSpace2ndTo1stEnd;
+            }
+        }
+    }
+
+    size_t nextAlloc1stIndex = m_1stNullItemsBeginCount;
+    const VkDeviceSize freeSpace1stTo2ndEnd =
+        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size;
+    while (lastOffset < freeSpace1stTo2ndEnd)
+    {
+        // Find next non-null allocation or move nextAllocIndex to the end.
+        while (nextAlloc1stIndex < suballoc1stCount &&
+            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+        {
+            ++nextAlloc1stIndex;
+        }
+
+        // Found non-null allocation.
+        if (nextAlloc1stIndex < suballoc1stCount)
+        {
+            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+            // 1. Process free space before this allocation.
+            if (lastOffset < suballoc.offset)
+            {
+                // There is free space from lastOffset to suballoc.offset.
+                const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+                VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+            }
+
+            // 2. Process this allocation.
+            // There is allocation with suballoc.offset, suballoc.size.
+            VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size);
+
+            // 3. Prepare for next iteration.
+            lastOffset = suballoc.offset + suballoc.size;
+            ++nextAlloc1stIndex;
+        }
+        // We are at the end.
+        else
+        {
+            // There is free space from lastOffset to freeSpace1stTo2ndEnd.
+            if (lastOffset < freeSpace1stTo2ndEnd)
+            {
+                const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset;
+                VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+            }
+
+            // End of loop.
+            lastOffset = freeSpace1stTo2ndEnd;
+        }
+    }
+
+    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+        while (lastOffset < size)
+        {
+            // Find next non-null allocation or move nextAllocIndex to the end.
+            while (nextAlloc2ndIndex != SIZE_MAX &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                --nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex != SIZE_MAX)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // 1. Process free space before this allocation.
+                if (lastOffset < suballoc.offset)
+                {
+                    // There is free space from lastOffset to suballoc.offset.
+                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+                }
+
+                // 2. Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size);
+
+                // 3. Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                --nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                // There is free space from lastOffset to size.
+                if (lastOffset < size)
+                {
+                    const VkDeviceSize unusedRangeSize = size - lastOffset;
+                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize);
+                }
+
+                // End of loop.
+                lastOffset = size;
+            }
+        }
+    }
+}
+
+void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const
+{
+    const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+    const VkDeviceSize size = GetSize();
+    const size_t suballoc1stCount = suballocations1st.size();
+    const size_t suballoc2ndCount = suballocations2nd.size();
+
+    inoutStats.blockCount++;
+    inoutStats.blockBytes += size;
+    inoutStats.allocationBytes += size - m_SumFreeSize;
+
+    VkDeviceSize lastOffset = 0;
+
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+        size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount;
+        while (lastOffset < freeSpace2ndTo1stEnd)
+        {
+            // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+            while (nextAlloc2ndIndex < suballoc2ndCount &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                ++nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex < suballoc2ndCount)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                ++inoutStats.allocationCount;
+
+                // Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                ++nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                // End of loop.
+                lastOffset = freeSpace2ndTo1stEnd;
+            }
+        }
+    }
+
+    size_t nextAlloc1stIndex = m_1stNullItemsBeginCount;
+    const VkDeviceSize freeSpace1stTo2ndEnd =
+        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size;
+    while (lastOffset < freeSpace1stTo2ndEnd)
+    {
+        // Find next non-null allocation or move nextAllocIndex to the end.
+        while (nextAlloc1stIndex < suballoc1stCount &&
+            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+        {
+            ++nextAlloc1stIndex;
+        }
+
+        // Found non-null allocation.
+        if (nextAlloc1stIndex < suballoc1stCount)
+        {
+            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+            // Process this allocation.
+            // There is allocation with suballoc.offset, suballoc.size.
+            ++inoutStats.allocationCount;
+
+            // Prepare for next iteration.
+            lastOffset = suballoc.offset + suballoc.size;
+            ++nextAlloc1stIndex;
+        }
+        // We are at the end.
+        else
+        {
+            // End of loop.
+            lastOffset = freeSpace1stTo2ndEnd;
+        }
+    }
+
+    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+        while (lastOffset < size)
+        {
+            // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+            while (nextAlloc2ndIndex != SIZE_MAX &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                --nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex != SIZE_MAX)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                ++inoutStats.allocationCount;
+
+                // Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                --nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                // End of loop.
+                lastOffset = size;
+            }
+        }
+    }
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const
+{
+    const VkDeviceSize size = GetSize();
+    const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+    const size_t suballoc1stCount = suballocations1st.size();
+    const size_t suballoc2ndCount = suballocations2nd.size();
+
+    // FIRST PASS
+
+    size_t unusedRangeCount = 0;
+    VkDeviceSize usedBytes = 0;
+
+    VkDeviceSize lastOffset = 0;
+
+    size_t alloc2ndCount = 0;
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+        size_t nextAlloc2ndIndex = 0;
+        while (lastOffset < freeSpace2ndTo1stEnd)
+        {
+            // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+            while (nextAlloc2ndIndex < suballoc2ndCount &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                ++nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex < suballoc2ndCount)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // 1. Process free space before this allocation.
+                if (lastOffset < suballoc.offset)
+                {
+                    // There is free space from lastOffset to suballoc.offset.
+                    ++unusedRangeCount;
+                }
+
+                // 2. Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                ++alloc2ndCount;
+                usedBytes += suballoc.size;
+
+                // 3. Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                ++nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                if (lastOffset < freeSpace2ndTo1stEnd)
+                {
+                    // There is free space from lastOffset to freeSpace2ndTo1stEnd.
+                    ++unusedRangeCount;
+                }
+
+                // End of loop.
+                lastOffset = freeSpace2ndTo1stEnd;
+            }
+        }
+    }
+
+    size_t nextAlloc1stIndex = m_1stNullItemsBeginCount;
+    size_t alloc1stCount = 0;
+    const VkDeviceSize freeSpace1stTo2ndEnd =
+        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size;
+    while (lastOffset < freeSpace1stTo2ndEnd)
+    {
+        // Find next non-null allocation or move nextAllocIndex to the end.
+        while (nextAlloc1stIndex < suballoc1stCount &&
+            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+        {
+            ++nextAlloc1stIndex;
+        }
+
+        // Found non-null allocation.
+        if (nextAlloc1stIndex < suballoc1stCount)
+        {
+            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+            // 1. Process free space before this allocation.
+            if (lastOffset < suballoc.offset)
+            {
+                // There is free space from lastOffset to suballoc.offset.
+                ++unusedRangeCount;
+            }
+
+            // 2. Process this allocation.
+            // There is allocation with suballoc.offset, suballoc.size.
+            ++alloc1stCount;
+            usedBytes += suballoc.size;
+
+            // 3. Prepare for next iteration.
+            lastOffset = suballoc.offset + suballoc.size;
+            ++nextAlloc1stIndex;
+        }
+        // We are at the end.
+        else
+        {
+            if (lastOffset < freeSpace1stTo2ndEnd)
+            {
+                // There is free space from lastOffset to freeSpace1stTo2ndEnd.
+                ++unusedRangeCount;
+            }
+
+            // End of loop.
+            lastOffset = freeSpace1stTo2ndEnd;
+        }
+    }
+
+    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+        while (lastOffset < size)
+        {
+            // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+            while (nextAlloc2ndIndex != SIZE_MAX &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                --nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex != SIZE_MAX)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // 1. Process free space before this allocation.
+                if (lastOffset < suballoc.offset)
+                {
+                    // There is free space from lastOffset to suballoc.offset.
+                    ++unusedRangeCount;
+                }
+
+                // 2. Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                ++alloc2ndCount;
+                usedBytes += suballoc.size;
+
+                // 3. Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                --nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                if (lastOffset < size)
+                {
+                    // There is free space from lastOffset to size.
+                    ++unusedRangeCount;
+                }
+
+                // End of loop.
+                lastOffset = size;
+            }
+        }
+    }
+
+    const VkDeviceSize unusedBytes = size - usedBytes;
+    PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount);
+
+    // SECOND PASS
+    lastOffset = 0;
+
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset;
+        size_t nextAlloc2ndIndex = 0;
+        while (lastOffset < freeSpace2ndTo1stEnd)
+        {
+            // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+            while (nextAlloc2ndIndex < suballoc2ndCount &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                ++nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex < suballoc2ndCount)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // 1. Process free space before this allocation.
+                if (lastOffset < suballoc.offset)
+                {
+                    // There is free space from lastOffset to suballoc.offset.
+                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+                }
+
+                // 2. Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData);
+
+                // 3. Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                ++nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                if (lastOffset < freeSpace2ndTo1stEnd)
+                {
+                    // There is free space from lastOffset to freeSpace2ndTo1stEnd.
+                    const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset;
+                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+                }
+
+                // End of loop.
+                lastOffset = freeSpace2ndTo1stEnd;
+            }
+        }
+    }
+
+    nextAlloc1stIndex = m_1stNullItemsBeginCount;
+    while (lastOffset < freeSpace1stTo2ndEnd)
+    {
+        // Find next non-null allocation or move nextAllocIndex to the end.
+        while (nextAlloc1stIndex < suballoc1stCount &&
+            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL)
+        {
+            ++nextAlloc1stIndex;
+        }
+
+        // Found non-null allocation.
+        if (nextAlloc1stIndex < suballoc1stCount)
+        {
+            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex];
+
+            // 1. Process free space before this allocation.
+            if (lastOffset < suballoc.offset)
+            {
+                // There is free space from lastOffset to suballoc.offset.
+                const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+                PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+            }
+
+            // 2. Process this allocation.
+            // There is allocation with suballoc.offset, suballoc.size.
+            PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData);
+
+            // 3. Prepare for next iteration.
+            lastOffset = suballoc.offset + suballoc.size;
+            ++nextAlloc1stIndex;
+        }
+        // We are at the end.
+        else
+        {
+            if (lastOffset < freeSpace1stTo2ndEnd)
+            {
+                // There is free space from lastOffset to freeSpace1stTo2ndEnd.
+                const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset;
+                PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+            }
+
+            // End of loop.
+            lastOffset = freeSpace1stTo2ndEnd;
+        }
+    }
+
+    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1;
+        while (lastOffset < size)
+        {
+            // Find next non-null allocation or move nextAlloc2ndIndex to the end.
+            while (nextAlloc2ndIndex != SIZE_MAX &&
+                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL)
+            {
+                --nextAlloc2ndIndex;
+            }
+
+            // Found non-null allocation.
+            if (nextAlloc2ndIndex != SIZE_MAX)
+            {
+                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex];
+
+                // 1. Process free space before this allocation.
+                if (lastOffset < suballoc.offset)
+                {
+                    // There is free space from lastOffset to suballoc.offset.
+                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset;
+                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+                }
+
+                // 2. Process this allocation.
+                // There is allocation with suballoc.offset, suballoc.size.
+                PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData);
+
+                // 3. Prepare for next iteration.
+                lastOffset = suballoc.offset + suballoc.size;
+                --nextAlloc2ndIndex;
+            }
+            // We are at the end.
+            else
+            {
+                if (lastOffset < size)
+                {
+                    // There is free space from lastOffset to size.
+                    const VkDeviceSize unusedRangeSize = size - lastOffset;
+                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize);
+                }
+
+                // End of loop.
+                lastOffset = size;
+            }
+        }
+    }
+
+    PrintDetailedMap_End(json);
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+bool VmaBlockMetadata_Linear::CreateAllocationRequest(
+    VkDeviceSize allocSize,
+    VkDeviceSize allocAlignment,
+    bool upperAddress,
+    VmaSuballocationType allocType,
+    uint32_t strategy,
+    VmaAllocationRequest* pAllocationRequest)
+{
+    VMA_ASSERT(allocSize > 0);
+    VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE);
+    VMA_ASSERT(pAllocationRequest != VMA_NULL);
+    VMA_HEAVY_ASSERT(Validate());
+
+    if(allocSize > GetSize())
+        return false;
+
+    pAllocationRequest->size = allocSize;
+    return upperAddress ?
+        CreateAllocationRequest_UpperAddress(
+            allocSize, allocAlignment, allocType, strategy, pAllocationRequest) :
+        CreateAllocationRequest_LowerAddress(
+            allocSize, allocAlignment, allocType, strategy, pAllocationRequest);
+}
+
+VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData)
+{
+    VMA_ASSERT(!IsVirtual());
+    SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i)
+    {
+        const VmaSuballocation& suballoc = suballocations1st[i];
+        if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE)
+        {
+            if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size))
+            {
+                VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!");
+                return VK_ERROR_UNKNOWN_COPY;
+            }
+        }
+    }
+
+    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+    for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i)
+    {
+        const VmaSuballocation& suballoc = suballocations2nd[i];
+        if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE)
+        {
+            if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size))
+            {
+                VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!");
+                return VK_ERROR_UNKNOWN_COPY;
+            }
+        }
+    }
+
+    return VK_SUCCESS;
+}
+
+void VmaBlockMetadata_Linear::Alloc(
+    const VmaAllocationRequest& request,
+    VmaSuballocationType type,
+    void* userData)
+{
+    const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1;
+    const VmaSuballocation newSuballoc = { offset, request.size, userData, type };
+
+    switch (request.type)
+    {
+    case VmaAllocationRequestType::UpperAddress:
+    {
+        VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER &&
+            "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer.");
+        SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+        suballocations2nd.push_back(newSuballoc);
+        m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK;
+    }
+    break;
+    case VmaAllocationRequestType::EndOf1st:
+    {
+        SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+
+        VMA_ASSERT(suballocations1st.empty() ||
+            offset >= suballocations1st.back().offset + suballocations1st.back().size);
+        // Check if it fits before the end of the block.
+        VMA_ASSERT(offset + request.size <= GetSize());
+
+        suballocations1st.push_back(newSuballoc);
+    }
+    break;
+    case VmaAllocationRequestType::EndOf2nd:
+    {
+        SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+        // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector.
+        VMA_ASSERT(!suballocations1st.empty() &&
+            offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset);
+        SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+        switch (m_2ndVectorMode)
+        {
+        case SECOND_VECTOR_EMPTY:
+            // First allocation from second part ring buffer.
+            VMA_ASSERT(suballocations2nd.empty());
+            m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER;
+            break;
+        case SECOND_VECTOR_RING_BUFFER:
+            // 2-part ring buffer is already started.
+            VMA_ASSERT(!suballocations2nd.empty());
+            break;
+        case SECOND_VECTOR_DOUBLE_STACK:
+            VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack.");
+            break;
+        default:
+            VMA_ASSERT(0);
+        }
+
+        suballocations2nd.push_back(newSuballoc);
+    }
+    break;
+    default:
+        VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR.");
+    }
+
+    m_SumFreeSize -= newSuballoc.size;
+}
+
+void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle)
+{
+    SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+    VkDeviceSize offset = (VkDeviceSize)allocHandle - 1;
+
+    if (!suballocations1st.empty())
+    {
+        // First allocation: Mark it as next empty at the beginning.
+        VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount];
+        if (firstSuballoc.offset == offset)
+        {
+            firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE;
+            firstSuballoc.userData = VMA_NULL;
+            m_SumFreeSize += firstSuballoc.size;
+            ++m_1stNullItemsBeginCount;
+            CleanupAfterFree();
+            return;
+        }
+    }
+
+    // Last allocation in 2-part ring buffer or top of upper stack (same logic).
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ||
+        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        VmaSuballocation& lastSuballoc = suballocations2nd.back();
+        if (lastSuballoc.offset == offset)
+        {
+            m_SumFreeSize += lastSuballoc.size;
+            suballocations2nd.pop_back();
+            CleanupAfterFree();
+            return;
+        }
+    }
+    // Last allocation in 1st vector.
+    else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY)
+    {
+        VmaSuballocation& lastSuballoc = suballocations1st.back();
+        if (lastSuballoc.offset == offset)
+        {
+            m_SumFreeSize += lastSuballoc.size;
+            suballocations1st.pop_back();
+            CleanupAfterFree();
+            return;
+        }
+    }
+
+    VmaSuballocation refSuballoc;
+    refSuballoc.offset = offset;
+    // Rest of members stays uninitialized intentionally for better performance.
+
+    // Item from the middle of 1st vector.
+    {
+        const SuballocationVectorType::iterator it = VmaBinaryFindSorted(
+            suballocations1st.begin() + m_1stNullItemsBeginCount,
+            suballocations1st.end(),
+            refSuballoc,
+            VmaSuballocationOffsetLess());
+        if (it != suballocations1st.end())
+        {
+            it->type = VMA_SUBALLOCATION_TYPE_FREE;
+            it->userData = VMA_NULL;
+            ++m_1stNullItemsMiddleCount;
+            m_SumFreeSize += it->size;
+            CleanupAfterFree();
+            return;
+        }
+    }
+
+    if (m_2ndVectorMode != SECOND_VECTOR_EMPTY)
+    {
+        // Item from the middle of 2nd vector.
+        const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ?
+            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) :
+            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater());
+        if (it != suballocations2nd.end())
+        {
+            it->type = VMA_SUBALLOCATION_TYPE_FREE;
+            it->userData = VMA_NULL;
+            ++m_2ndNullItemsCount;
+            m_SumFreeSize += it->size;
+            CleanupAfterFree();
+            return;
+        }
+    }
+
+    VMA_ASSERT(0 && "Allocation to free not found in linear allocator!");
+}
+
+void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo)
+{
+    outInfo.offset = (VkDeviceSize)allocHandle - 1;
+    VmaSuballocation& suballoc = FindSuballocation(outInfo.offset);
+    outInfo.size = suballoc.size;
+    outInfo.pUserData = suballoc.userData;
+}
+
+void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const
+{
+    return FindSuballocation((VkDeviceSize)allocHandle - 1).userData;
+}
+
+VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const
+{
+    // Function only used for defragmentation, which is disabled for this algorithm
+    VMA_ASSERT(0);
+    return VK_NULL_HANDLE;
+}
+
+VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const
+{
+    // Function only used for defragmentation, which is disabled for this algorithm
+    VMA_ASSERT(0);
+    return VK_NULL_HANDLE;
+}
+
+VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const
+{
+    // Function only used for defragmentation, which is disabled for this algorithm
+    VMA_ASSERT(0);
+    return 0;
+}
+
+void VmaBlockMetadata_Linear::Clear()
+{
+    m_SumFreeSize = GetSize();
+    m_Suballocations0.clear();
+    m_Suballocations1.clear();
+    // Leaving m_1stVectorIndex unchanged - it doesn't matter.
+    m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+    m_1stNullItemsBeginCount = 0;
+    m_1stNullItemsMiddleCount = 0;
+    m_2ndNullItemsCount = 0;
+}
+
+void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData)
+{
+    VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1);
+    suballoc.userData = userData;
+}
+
+void VmaBlockMetadata_Linear::DebugLogAllAllocations() const
+{
+    const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it)
+        if (it->type != VMA_SUBALLOCATION_TYPE_FREE)
+            DebugLogAllocation(it->offset, it->size, it->userData);
+
+    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+    for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it)
+        if (it->type != VMA_SUBALLOCATION_TYPE_FREE)
+            DebugLogAllocation(it->offset, it->size, it->userData);
+}
+
+VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const
+{
+    const SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+    VmaSuballocation refSuballoc;
+    refSuballoc.offset = offset;
+    // Rest of members stays uninitialized intentionally for better performance.
+
+    // Item from the 1st vector.
+    {
+        SuballocationVectorType::const_iterator it = VmaBinaryFindSorted(
+            suballocations1st.begin() + m_1stNullItemsBeginCount,
+            suballocations1st.end(),
+            refSuballoc,
+            VmaSuballocationOffsetLess());
+        if (it != suballocations1st.end())
+        {
+            return const_cast<VmaSuballocation&>(*it);
+        }
+    }
+
+    if (m_2ndVectorMode != SECOND_VECTOR_EMPTY)
+    {
+        // Rest of members stays uninitialized intentionally for better performance.
+        SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ?
+            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) :
+            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater());
+        if (it != suballocations2nd.end())
+        {
+            return const_cast<VmaSuballocation&>(*it);
+        }
+    }
+
+    VMA_ASSERT(0 && "Allocation not found in linear allocator!");
+    return const_cast<VmaSuballocation&>(suballocations1st.back()); // Should never occur.
+}
+
+bool VmaBlockMetadata_Linear::ShouldCompact1st() const
+{
+    const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount;
+    const size_t suballocCount = AccessSuballocations1st().size();
+    return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3;
+}
+
+void VmaBlockMetadata_Linear::CleanupAfterFree()
+{
+    SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+    if (IsEmpty())
+    {
+        suballocations1st.clear();
+        suballocations2nd.clear();
+        m_1stNullItemsBeginCount = 0;
+        m_1stNullItemsMiddleCount = 0;
+        m_2ndNullItemsCount = 0;
+        m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+    }
+    else
+    {
+        const size_t suballoc1stCount = suballocations1st.size();
+        const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount;
+        VMA_ASSERT(nullItem1stCount <= suballoc1stCount);
+
+        // Find more null items at the beginning of 1st vector.
+        while (m_1stNullItemsBeginCount < suballoc1stCount &&
+            suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE)
+        {
+            ++m_1stNullItemsBeginCount;
+            --m_1stNullItemsMiddleCount;
+        }
+
+        // Find more null items at the end of 1st vector.
+        while (m_1stNullItemsMiddleCount > 0 &&
+            suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE)
+        {
+            --m_1stNullItemsMiddleCount;
+            suballocations1st.pop_back();
+        }
+
+        // Find more null items at the end of 2nd vector.
+        while (m_2ndNullItemsCount > 0 &&
+            suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE)
+        {
+            --m_2ndNullItemsCount;
+            suballocations2nd.pop_back();
+        }
+
+        // Find more null items at the beginning of 2nd vector.
+        while (m_2ndNullItemsCount > 0 &&
+            suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE)
+        {
+            --m_2ndNullItemsCount;
+            VmaVectorRemove(suballocations2nd, 0);
+        }
+
+        if (ShouldCompact1st())
+        {
+            const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount;
+            size_t srcIndex = m_1stNullItemsBeginCount;
+            for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex)
+            {
+                while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE)
+                {
+                    ++srcIndex;
+                }
+                if (dstIndex != srcIndex)
+                {
+                    suballocations1st[dstIndex] = suballocations1st[srcIndex];
+                }
+                ++srcIndex;
+            }
+            suballocations1st.resize(nonNullItemCount);
+            m_1stNullItemsBeginCount = 0;
+            m_1stNullItemsMiddleCount = 0;
+        }
+
+        // 2nd vector became empty.
+        if (suballocations2nd.empty())
+        {
+            m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+        }
+
+        // 1st vector became empty.
+        if (suballocations1st.size() - m_1stNullItemsBeginCount == 0)
+        {
+            suballocations1st.clear();
+            m_1stNullItemsBeginCount = 0;
+
+            if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+            {
+                // Swap 1st with 2nd. Now 2nd is empty.
+                m_2ndVectorMode = SECOND_VECTOR_EMPTY;
+                m_1stNullItemsMiddleCount = m_2ndNullItemsCount;
+                while (m_1stNullItemsBeginCount < suballocations2nd.size() &&
+                    suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE)
+                {
+                    ++m_1stNullItemsBeginCount;
+                    --m_1stNullItemsMiddleCount;
+                }
+                m_2ndNullItemsCount = 0;
+                m_1stVectorIndex ^= 1;
+            }
+        }
+    }
+
+    VMA_HEAVY_ASSERT(Validate());
+}
+
+bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress(
+    VkDeviceSize allocSize,
+    VkDeviceSize allocAlignment,
+    VmaSuballocationType allocType,
+    uint32_t strategy,
+    VmaAllocationRequest* pAllocationRequest)
+{
+    const VkDeviceSize blockSize = GetSize();
+    const VkDeviceSize debugMargin = GetDebugMargin();
+    const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity();
+    SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+    if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+    {
+        // Try to allocate at the end of 1st vector.
+
+        VkDeviceSize resultBaseOffset = 0;
+        if (!suballocations1st.empty())
+        {
+            const VmaSuballocation& lastSuballoc = suballocations1st.back();
+            resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin;
+        }
+
+        // Start from offset equal to beginning of free space.
+        VkDeviceSize resultOffset = resultBaseOffset;
+
+        // Apply alignment.
+        resultOffset = VmaAlignUp(resultOffset, allocAlignment);
+
+        // Check previous suballocations for BufferImageGranularity conflicts.
+        // Make bigger alignment if necessary.
+        if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty())
+        {
+            bool bufferImageGranularityConflict = false;
+            for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
+            {
+                const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
+                if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
+                {
+                    if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType))
+                    {
+                        bufferImageGranularityConflict = true;
+                        break;
+                    }
+                }
+                else
+                    // Already on previous page.
+                    break;
+            }
+            if (bufferImageGranularityConflict)
+            {
+                resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity);
+            }
+        }
+
+        const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ?
+            suballocations2nd.back().offset : blockSize;
+
+        // There is enough free space at the end after alignment.
+        if (resultOffset + allocSize + debugMargin <= freeSpaceEnd)
+        {
+            // Check next suballocations for BufferImageGranularity conflicts.
+            // If conflict exists, allocation cannot be made here.
+            if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK)
+            {
+                for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
+                {
+                    const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
+                    if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
+                    {
+                        if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type))
+                        {
+                            return false;
+                        }
+                    }
+                    else
+                    {
+                        // Already on previous page.
+                        break;
+                    }
+                }
+            }
+
+            // All tests passed: Success.
+            pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1);
+            // pAllocationRequest->item, customData unused.
+            pAllocationRequest->type = VmaAllocationRequestType::EndOf1st;
+            return true;
+        }
+    }
+
+    // Wrap-around to end of 2nd vector. Try to allocate there, watching for the
+    // beginning of 1st vector as the end of free space.
+    if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        VMA_ASSERT(!suballocations1st.empty());
+
+        VkDeviceSize resultBaseOffset = 0;
+        if (!suballocations2nd.empty())
+        {
+            const VmaSuballocation& lastSuballoc = suballocations2nd.back();
+            resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin;
+        }
+
+        // Start from offset equal to beginning of free space.
+        VkDeviceSize resultOffset = resultBaseOffset;
+
+        // Apply alignment.
+        resultOffset = VmaAlignUp(resultOffset, allocAlignment);
+
+        // Check previous suballocations for BufferImageGranularity conflicts.
+        // Make bigger alignment if necessary.
+        if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty())
+        {
+            bool bufferImageGranularityConflict = false;
+            for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; )
+            {
+                const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex];
+                if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
+                {
+                    if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType))
+                    {
+                        bufferImageGranularityConflict = true;
+                        break;
+                    }
+                }
+                else
+                    // Already on previous page.
+                    break;
+            }
+            if (bufferImageGranularityConflict)
+            {
+                resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity);
+            }
+        }
+
+        size_t index1st = m_1stNullItemsBeginCount;
+
+        // There is enough free space at the end after alignment.
+        if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) ||
+            (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset))
+        {
+            // Check next suballocations for BufferImageGranularity conflicts.
+            // If conflict exists, allocation cannot be made here.
+            if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity)
+            {
+                for (size_t nextSuballocIndex = index1st;
+                    nextSuballocIndex < suballocations1st.size();
+                    nextSuballocIndex++)
+                {
+                    const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex];
+                    if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
+                    {
+                        if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type))
+                        {
+                            return false;
+                        }
+                    }
+                    else
+                    {
+                        // Already on next page.
+                        break;
+                    }
+                }
+            }
+
+            // All tests passed: Success.
+            pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1);
+            pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd;
+            // pAllocationRequest->item, customData unused.
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress(
+    VkDeviceSize allocSize,
+    VkDeviceSize allocAlignment,
+    VmaSuballocationType allocType,
+    uint32_t strategy,
+    VmaAllocationRequest* pAllocationRequest)
+{
+    const VkDeviceSize blockSize = GetSize();
+    const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity();
+    SuballocationVectorType& suballocations1st = AccessSuballocations1st();
+    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd();
+
+    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER)
+    {
+        VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer.");
+        return false;
+    }
+
+    // Try to allocate before 2nd.back(), or end of block if 2nd.empty().
+    if (allocSize > blockSize)
+    {
+        return false;
+    }
+    VkDeviceSize resultBaseOffset = blockSize - allocSize;
+    if (!suballocations2nd.empty())
+    {
+        const VmaSuballocation& lastSuballoc = suballocations2nd.back();
+        resultBaseOffset = lastSuballoc.offset - allocSize;
+        if (allocSize > lastSuballoc.offset)
+        {
+            return false;
+        }
+    }
+
+    // Start from offset equal to end of free space.
+    VkDeviceSize resultOffset = resultBaseOffset;
+
+    const VkDeviceSize debugMargin = GetDebugMargin();
+
+    // Apply debugMargin at the end.
+    if (debugMargin > 0)
+    {
+        if (resultOffset < debugMargin)
+        {
+            return false;
+        }
+        resultOffset -= debugMargin;
+    }
+
+    // Apply alignment.
+    resultOffset = VmaAlignDown(resultOffset, allocAlignment);
+
+    // Check next suballocations from 2nd for BufferImageGranularity conflicts.
+    // Make bigger alignment if necessary.
+    if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty())
+    {
+        bool bufferImageGranularityConflict = false;
+        for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; )
+        {
+            const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex];
+            if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
+            {
+                if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType))
+                {
+                    bufferImageGranularityConflict = true;
+                    break;
+                }
+            }
+            else
+                // Already on previous page.
+                break;
+        }
+        if (bufferImageGranularityConflict)
+        {
+            resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity);
+        }
+    }
+
+    // There is enough free space.
+    const VkDeviceSize endOf1st = !suballocations1st.empty() ?
+        suballocations1st.back().offset + suballocations1st.back().size :
+        0;
+    if (endOf1st + debugMargin <= resultOffset)
+    {
+        // Check previous suballocations for BufferImageGranularity conflicts.
+        // If conflict exists, allocation cannot be made here.
+        if (bufferImageGranularity > 1)
+        {
+            for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; )
+            {
+                const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex];
+                if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity))
+                {
+                    if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type))
+                    {
+                        return false;
+                    }
+                }
+                else
+                {
+                    // Already on next page.
+                    break;
+                }
+            }
+        }
+
+        // All tests passed: Success.
+        pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1);
+        // pAllocationRequest->item unused.
+        pAllocationRequest->type = VmaAllocationRequestType::UpperAddress;
+        return true;
+    }
+
+    return false;
+}
+#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS
+#endif // _VMA_BLOCK_METADATA_LINEAR
+
+#ifndef _VMA_BLOCK_METADATA_TLSF
+// To not search current larger region if first allocation won't succeed and skip to smaller range
+// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest().
+// When fragmentation and reusal of previous blocks doesn't matter then use with
+// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible.
+class VmaBlockMetadata_TLSF : public VmaBlockMetadata
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF)
+public:
+    VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks,
+        VkDeviceSize bufferImageGranularity, bool isVirtual);
+    virtual ~VmaBlockMetadata_TLSF();
+
+    size_t GetAllocationCount() const override { return m_AllocCount; }
+    size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; }
+    VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; }
+    bool IsEmpty() const override { return m_NullBlock->offset == 0; }
+    VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }
+
+    void Init(VkDeviceSize size) override;
+    bool Validate() const override;
+
+    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override;
+    void AddStatistics(VmaStatistics& inoutStats) const override;
+
+#if VMA_STATS_STRING_ENABLED
+    void PrintDetailedMap(class VmaJsonWriter& json) const override;
+#endif
+
+    bool CreateAllocationRequest(
+        VkDeviceSize allocSize,
+        VkDeviceSize allocAlignment,
+        bool upperAddress,
+        VmaSuballocationType allocType,
+        uint32_t strategy,
+        VmaAllocationRequest* pAllocationRequest) override;
+
+    VkResult CheckCorruption(const void* pBlockData) override;
+    void Alloc(
+        const VmaAllocationRequest& request,
+        VmaSuballocationType type,
+        void* userData) override;
+
+    void Free(VmaAllocHandle allocHandle) override;
+    void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override;
+    void* GetAllocationUserData(VmaAllocHandle allocHandle) const override;
+    VmaAllocHandle GetAllocationListBegin() const override;
+    VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override;
+    VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override;
+    void Clear() override;
+    void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override;
+    void DebugLogAllAllocations() const override;
+
+private:
+    // According to original paper it should be preferable 4 or 5:
+    // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems"
+    // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf
+    static const uint8_t SECOND_LEVEL_INDEX = 5;
+    static const uint16_t SMALL_BUFFER_SIZE = 256;
+    static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16;
+    static const uint8_t MEMORY_CLASS_SHIFT = 7;
+    static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT;
+
+    class Block
+    {
+    public:
+        VkDeviceSize offset;
+        VkDeviceSize size;
+        Block* prevPhysical;
+        Block* nextPhysical;
+
+        void MarkFree() { prevFree = VMA_NULL; }
+        void MarkTaken() { prevFree = this; }
+        bool IsFree() const { return prevFree != this; }
+        void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; }
+        Block*& PrevFree() { return prevFree; }
+        Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; }
+
+    private:
+        Block* prevFree; // Address of the same block here indicates that block is taken
+        union
+        {
+            Block* nextFree;
+            void* userData;
+        };
+    };
+
+    size_t m_AllocCount;
+    // Total number of free blocks besides null block
+    size_t m_BlocksFreeCount;
+    // Total size of free blocks excluding null block
+    VkDeviceSize m_BlocksFreeSize;
+    uint32_t m_IsFreeBitmap;
+    uint8_t m_MemoryClasses;
+    uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES];
+    uint32_t m_ListsCount;
+    /*
+    * 0: 0-3 lists for small buffers
+    * 1+: 0-(2^SLI-1) lists for normal buffers
+    */
+    Block** m_FreeList;
+    VmaPoolAllocator<Block> m_BlockAllocator;
+    Block* m_NullBlock;
+    VmaBlockBufferImageGranularity m_GranularityHandler;
+
+    uint8_t SizeToMemoryClass(VkDeviceSize size) const;
+    uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const;
+    uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const;
+    uint32_t GetListIndex(VkDeviceSize size) const;
+
+    void RemoveFreeBlock(Block* block);
+    void InsertFreeBlock(Block* block);
+    void MergeBlock(Block* block, Block* prev);
+
+    Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const;
+    bool CheckBlock(
+        Block& block,
+        uint32_t listIndex,
+        VkDeviceSize allocSize,
+        VkDeviceSize allocAlignment,
+        VmaSuballocationType allocType,
+        VmaAllocationRequest* pAllocationRequest);
+};
+
+#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS
+VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks,
+    VkDeviceSize bufferImageGranularity, bool isVirtual)
+    : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual),
+    m_AllocCount(0),
+    m_BlocksFreeCount(0),
+    m_BlocksFreeSize(0),
+    m_IsFreeBitmap(0),
+    m_MemoryClasses(0),
+    m_ListsCount(0),
+    m_FreeList(VMA_NULL),
+    m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT),
+    m_NullBlock(VMA_NULL),
+    m_GranularityHandler(bufferImageGranularity) {}
+
+VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF()
+{
+    if (m_FreeList)
+        vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount);
+    m_GranularityHandler.Destroy(GetAllocationCallbacks());
+}
+
+void VmaBlockMetadata_TLSF::Init(VkDeviceSize size)
+{
+    VmaBlockMetadata::Init(size);
+
+    if (!IsVirtual())
+        m_GranularityHandler.Init(GetAllocationCallbacks(), size);
+
+    m_NullBlock = m_BlockAllocator.Alloc();
+    m_NullBlock->size = size;
+    m_NullBlock->offset = 0;
+    m_NullBlock->prevPhysical = VMA_NULL;
+    m_NullBlock->nextPhysical = VMA_NULL;
+    m_NullBlock->MarkFree();
+    m_NullBlock->NextFree() = VMA_NULL;
+    m_NullBlock->PrevFree() = VMA_NULL;
+    uint8_t memoryClass = SizeToMemoryClass(size);
+    uint16_t sli = SizeToSecondIndex(size, memoryClass);
+    m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1;
+    if (IsVirtual())
+        m_ListsCount += 1UL << SECOND_LEVEL_INDEX;
+    else
+        m_ListsCount += 4;
+
+    m_MemoryClasses = memoryClass + uint8_t(2);
+    memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t));
+
+    m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount);
+    memset(m_FreeList, 0, m_ListsCount * sizeof(Block*));
+}
+
+bool VmaBlockMetadata_TLSF::Validate() const
+{
+    VMA_VALIDATE(GetSumFreeSize() <= GetSize());
+
+    VkDeviceSize calculatedSize = m_NullBlock->size;
+    VkDeviceSize calculatedFreeSize = m_NullBlock->size;
+    size_t allocCount = 0;
+    size_t freeCount = 0;
+
+    // Check integrity of free lists
+    for (uint32_t list = 0; list < m_ListsCount; ++list)
+    {
+        Block* block = m_FreeList[list];
+        if (block != VMA_NULL)
+        {
+            VMA_VALIDATE(block->IsFree());
+            VMA_VALIDATE(block->PrevFree() == VMA_NULL);
+            while (block->NextFree())
+            {
+                VMA_VALIDATE(block->NextFree()->IsFree());
+                VMA_VALIDATE(block->NextFree()->PrevFree() == block);
+                block = block->NextFree();
+            }
+        }
+    }
+
+    VkDeviceSize nextOffset = m_NullBlock->offset;
+    auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual());
+
+    VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL);
+    if (m_NullBlock->prevPhysical)
+    {
+        VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock);
+    }
+    // Check all blocks
+    for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical)
+    {
+        VMA_VALIDATE(prev->offset + prev->size == nextOffset);
+        nextOffset = prev->offset;
+        calculatedSize += prev->size;
+
+        uint32_t listIndex = GetListIndex(prev->size);
+        if (prev->IsFree())
+        {
+            ++freeCount;
+            // Check if free block belongs to free list
+            Block* freeBlock = m_FreeList[listIndex];
+            VMA_VALIDATE(freeBlock != VMA_NULL);
+
+            bool found = false;
+            do
+            {
+                if (freeBlock == prev)
+                    found = true;
+
+                freeBlock = freeBlock->NextFree();
+            } while (!found && freeBlock != VMA_NULL);
+
+            VMA_VALIDATE(found);
+            calculatedFreeSize += prev->size;
+        }
+        else
+        {
+            ++allocCount;
+            // Check if taken block is not on a free list
+            Block* freeBlock = m_FreeList[listIndex];
+            while (freeBlock)
+            {
+                VMA_VALIDATE(freeBlock != prev);
+                freeBlock = freeBlock->NextFree();
+            }
+
+            if (!IsVirtual())
+            {
+                VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size));
+            }
+        }
+
+        if (prev->prevPhysical)
+        {
+            VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev);
+        }
+    }
+
+    if (!IsVirtual())
+    {
+        VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx));
+    }
+
+    VMA_VALIDATE(nextOffset == 0);
+    VMA_VALIDATE(calculatedSize == GetSize());
+    VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize());
+    VMA_VALIDATE(allocCount == m_AllocCount);
+    VMA_VALIDATE(freeCount == m_BlocksFreeCount);
+
+    return true;
+}
+
+void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const
+{
+    inoutStats.statistics.blockCount++;
+    inoutStats.statistics.blockBytes += GetSize();
+    if (m_NullBlock->size > 0)
+        VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size);
+
+    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+    {
+        if (block->IsFree())
+            VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size);
+        else
+            VmaAddDetailedStatisticsAllocation(inoutStats, block->size);
+    }
+}
+
+void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const
+{
+    inoutStats.blockCount++;
+    inoutStats.allocationCount += (uint32_t)m_AllocCount;
+    inoutStats.blockBytes += GetSize();
+    inoutStats.allocationBytes += GetSize() - GetSumFreeSize();
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const
+{
+    size_t blockCount = m_AllocCount + m_BlocksFreeCount;
+    VmaStlAllocator<Block*> allocator(GetAllocationCallbacks());
+    VmaVector<Block*, VmaStlAllocator<Block*>> blockList(blockCount, allocator);
+
+    size_t i = blockCount;
+    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+    {
+        blockList[--i] = block;
+    }
+    VMA_ASSERT(i == 0);
+
+    VmaDetailedStatistics stats;
+    VmaClearDetailedStatistics(stats);
+    AddDetailedStatistics(stats);
+
+    PrintDetailedMap_Begin(json,
+        stats.statistics.blockBytes - stats.statistics.allocationBytes,
+        stats.statistics.allocationCount,
+        stats.unusedRangeCount);
+
+    for (; i < blockCount; ++i)
+    {
+        Block* block = blockList[i];
+        if (block->IsFree())
+            PrintDetailedMap_UnusedRange(json, block->offset, block->size);
+        else
+            PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData());
+    }
+    if (m_NullBlock->size > 0)
+        PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size);
+
+    PrintDetailedMap_End(json);
+}
+#endif
+
+bool VmaBlockMetadata_TLSF::CreateAllocationRequest(
+    VkDeviceSize allocSize,
+    VkDeviceSize allocAlignment,
+    bool upperAddress,
+    VmaSuballocationType allocType,
+    uint32_t strategy,
+    VmaAllocationRequest* pAllocationRequest)
+{
+    VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!");
+    VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm.");
+
+    // For small granularity round up
+    if (!IsVirtual())
+        m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment);
+
+    allocSize += GetDebugMargin();
+    // Quick check for too small pool
+    if (allocSize > GetSumFreeSize())
+        return false;
+
+    // If no free blocks in pool then check only null block
+    if (m_BlocksFreeCount == 0)
+        return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest);
+
+    // Round up to the next block
+    VkDeviceSize sizeForNextList = allocSize;
+    VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4));
+    if (allocSize > SMALL_BUFFER_SIZE)
+    {
+        sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX));
+    }
+    else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep)
+        sizeForNextList = SMALL_BUFFER_SIZE + 1;
+    else
+        sizeForNextList += smallSizeStep;
+
+    uint32_t nextListIndex = m_ListsCount;
+    uint32_t prevListIndex = m_ListsCount;
+    Block* nextListBlock = VMA_NULL;
+    Block* prevListBlock = VMA_NULL;
+
+    // Check blocks according to strategies
+    if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT)
+    {
+        // Quick check for larger block first
+        nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex);
+        if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+            return true;
+
+        // If not fitted then null block
+        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+            return true;
+
+        // Null block failed, search larger bucket
+        while (nextListBlock)
+        {
+            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            nextListBlock = nextListBlock->NextFree();
+        }
+
+        // Failed again, check best fit bucket
+        prevListBlock = FindFreeBlock(allocSize, prevListIndex);
+        while (prevListBlock)
+        {
+            if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            prevListBlock = prevListBlock->NextFree();
+        }
+    }
+    else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT)
+    {
+        // Check best fit bucket
+        prevListBlock = FindFreeBlock(allocSize, prevListIndex);
+        while (prevListBlock)
+        {
+            if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            prevListBlock = prevListBlock->NextFree();
+        }
+
+        // If failed check null block
+        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+            return true;
+
+        // Check larger bucket
+        nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex);
+        while (nextListBlock)
+        {
+            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            nextListBlock = nextListBlock->NextFree();
+        }
+    }
+    else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT )
+    {
+        // Perform search from the start
+        VmaStlAllocator<Block*> allocator(GetAllocationCallbacks());
+        VmaVector<Block*, VmaStlAllocator<Block*>> blockList(m_BlocksFreeCount, allocator);
+
+        size_t i = m_BlocksFreeCount;
+        for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+        {
+            if (block->IsFree() && block->size >= allocSize)
+                blockList[--i] = block;
+        }
+
+        for (; i < m_BlocksFreeCount; ++i)
+        {
+            Block& block = *blockList[i];
+            if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+        }
+
+        // If failed check null block
+        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+            return true;
+
+        // Whole range searched, no more memory
+        return false;
+    }
+    else
+    {
+        // Check larger bucket
+        nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex);
+        while (nextListBlock)
+        {
+            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            nextListBlock = nextListBlock->NextFree();
+        }
+
+        // If failed check null block
+        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest))
+            return true;
+
+        // Check best fit bucket
+        prevListBlock = FindFreeBlock(allocSize, prevListIndex);
+        while (prevListBlock)
+        {
+            if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            prevListBlock = prevListBlock->NextFree();
+        }
+    }
+
+    // Worst case, full search has to be done
+    while (++nextListIndex < m_ListsCount)
+    {
+        nextListBlock = m_FreeList[nextListIndex];
+        while (nextListBlock)
+        {
+            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest))
+                return true;
+            nextListBlock = nextListBlock->NextFree();
+        }
+    }
+
+    // No more memory sadly
+    return false;
+}
+
+VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData)
+{
+    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+    {
+        if (!block->IsFree())
+        {
+            if (!VmaValidateMagicValue(pBlockData, block->offset + block->size))
+            {
+                VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!");
+                return VK_ERROR_UNKNOWN_COPY;
+            }
+        }
+    }
+
+    return VK_SUCCESS;
+}
+
+void VmaBlockMetadata_TLSF::Alloc(
+    const VmaAllocationRequest& request,
+    VmaSuballocationType type,
+    void* userData)
+{
+    VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF);
+
+    // Get block and pop it from the free list
+    Block* currentBlock = (Block*)request.allocHandle;
+    VkDeviceSize offset = request.algorithmData;
+    VMA_ASSERT(currentBlock != VMA_NULL);
+    VMA_ASSERT(currentBlock->offset <= offset);
+
+    if (currentBlock != m_NullBlock)
+        RemoveFreeBlock(currentBlock);
+
+    VkDeviceSize debugMargin = GetDebugMargin();
+    VkDeviceSize missingAlignment = offset - currentBlock->offset;
+
+    // Append missing alignment to prev block or create new one
+    if (missingAlignment)
+    {
+        Block* prevBlock = currentBlock->prevPhysical;
+        VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!");
+
+        if (prevBlock->IsFree() && prevBlock->size != debugMargin)
+        {
+            uint32_t oldList = GetListIndex(prevBlock->size);
+            prevBlock->size += missingAlignment;
+            // Check if new size crosses list bucket
+            if (oldList != GetListIndex(prevBlock->size))
+            {
+                prevBlock->size -= missingAlignment;
+                RemoveFreeBlock(prevBlock);
+                prevBlock->size += missingAlignment;
+                InsertFreeBlock(prevBlock);
+            }
+            else
+                m_BlocksFreeSize += missingAlignment;
+        }
+        else
+        {
+            Block* newBlock = m_BlockAllocator.Alloc();
+            currentBlock->prevPhysical = newBlock;
+            prevBlock->nextPhysical = newBlock;
+            newBlock->prevPhysical = prevBlock;
+            newBlock->nextPhysical = currentBlock;
+            newBlock->size = missingAlignment;
+            newBlock->offset = currentBlock->offset;
+            newBlock->MarkTaken();
+
+            InsertFreeBlock(newBlock);
+        }
+
+        currentBlock->size -= missingAlignment;
+        currentBlock->offset += missingAlignment;
+    }
+
+    VkDeviceSize size = request.size + debugMargin;
+    if (currentBlock->size == size)
+    {
+        if (currentBlock == m_NullBlock)
+        {
+            // Setup new null block
+            m_NullBlock = m_BlockAllocator.Alloc();
+            m_NullBlock->size = 0;
+            m_NullBlock->offset = currentBlock->offset + size;
+            m_NullBlock->prevPhysical = currentBlock;
+            m_NullBlock->nextPhysical = VMA_NULL;
+            m_NullBlock->MarkFree();
+            m_NullBlock->PrevFree() = VMA_NULL;
+            m_NullBlock->NextFree() = VMA_NULL;
+            currentBlock->nextPhysical = m_NullBlock;
+            currentBlock->MarkTaken();
+        }
+    }
+    else
+    {
+        VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!");
+
+        // Create new free block
+        Block* newBlock = m_BlockAllocator.Alloc();
+        newBlock->size = currentBlock->size - size;
+        newBlock->offset = currentBlock->offset + size;
+        newBlock->prevPhysical = currentBlock;
+        newBlock->nextPhysical = currentBlock->nextPhysical;
+        currentBlock->nextPhysical = newBlock;
+        currentBlock->size = size;
+
+        if (currentBlock == m_NullBlock)
+        {
+            m_NullBlock = newBlock;
+            m_NullBlock->MarkFree();
+            m_NullBlock->NextFree() = VMA_NULL;
+            m_NullBlock->PrevFree() = VMA_NULL;
+            currentBlock->MarkTaken();
+        }
+        else
+        {
+            newBlock->nextPhysical->prevPhysical = newBlock;
+            newBlock->MarkTaken();
+            InsertFreeBlock(newBlock);
+        }
+    }
+    currentBlock->UserData() = userData;
+
+    if (debugMargin > 0)
+    {
+        currentBlock->size -= debugMargin;
+        Block* newBlock = m_BlockAllocator.Alloc();
+        newBlock->size = debugMargin;
+        newBlock->offset = currentBlock->offset + currentBlock->size;
+        newBlock->prevPhysical = currentBlock;
+        newBlock->nextPhysical = currentBlock->nextPhysical;
+        newBlock->MarkTaken();
+        currentBlock->nextPhysical->prevPhysical = newBlock;
+        currentBlock->nextPhysical = newBlock;
+        InsertFreeBlock(newBlock);
+    }
+
+    if (!IsVirtual())
+        m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData,
+            currentBlock->offset, currentBlock->size);
+    ++m_AllocCount;
+}
+
+void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle)
+{
+    Block* block = (Block*)allocHandle;
+    Block* next = block->nextPhysical;
+    VMA_ASSERT(!block->IsFree() && "Block is already free!");
+
+    if (!IsVirtual())
+        m_GranularityHandler.FreePages(block->offset, block->size);
+    --m_AllocCount;
+
+    VkDeviceSize debugMargin = GetDebugMargin();
+    if (debugMargin > 0)
+    {
+        RemoveFreeBlock(next);
+        MergeBlock(next, block);
+        block = next;
+        next = next->nextPhysical;
+    }
+
+    // Try merging
+    Block* prev = block->prevPhysical;
+    if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin)
+    {
+        RemoveFreeBlock(prev);
+        MergeBlock(block, prev);
+    }
+
+    if (!next->IsFree())
+        InsertFreeBlock(block);
+    else if (next == m_NullBlock)
+        MergeBlock(m_NullBlock, block);
+    else
+    {
+        RemoveFreeBlock(next);
+        MergeBlock(next, block);
+        InsertFreeBlock(next);
+    }
+}
+
+void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo)
+{
+    Block* block = (Block*)allocHandle;
+    VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!");
+    outInfo.offset = block->offset;
+    outInfo.size = block->size;
+    outInfo.pUserData = block->UserData();
+}
+
+void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const
+{
+    Block* block = (Block*)allocHandle;
+    VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!");
+    return block->UserData();
+}
+
+VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const
+{
+    if (m_AllocCount == 0)
+        return VK_NULL_HANDLE;
+
+    for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical)
+    {
+        if (!block->IsFree())
+            return (VmaAllocHandle)block;
+    }
+    VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!");
+    return VK_NULL_HANDLE;
+}
+
+VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const
+{
+    Block* startBlock = (Block*)prevAlloc;
+    VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!");
+
+    for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical)
+    {
+        if (!block->IsFree())
+            return (VmaAllocHandle)block;
+    }
+    return VK_NULL_HANDLE;
+}
+
+VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const
+{
+    Block* block = (Block*)alloc;
+    VMA_ASSERT(!block->IsFree() && "Incorrect block!");
+
+    if (block->prevPhysical)
+        return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0;
+    return 0;
+}
+
+void VmaBlockMetadata_TLSF::Clear()
+{
+    m_AllocCount = 0;
+    m_BlocksFreeCount = 0;
+    m_BlocksFreeSize = 0;
+    m_IsFreeBitmap = 0;
+    m_NullBlock->offset = 0;
+    m_NullBlock->size = GetSize();
+    Block* block = m_NullBlock->prevPhysical;
+    m_NullBlock->prevPhysical = VMA_NULL;
+    while (block)
+    {
+        Block* prev = block->prevPhysical;
+        m_BlockAllocator.Free(block);
+        block = prev;
+    }
+    memset(m_FreeList, 0, m_ListsCount * sizeof(Block*));
+    memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t));
+    m_GranularityHandler.Clear();
+}
+
+void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData)
+{
+    Block* block = (Block*)allocHandle;
+    VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!");
+    block->UserData() = userData;
+}
+
+void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const
+{
+    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical)
+        if (!block->IsFree())
+            DebugLogAllocation(block->offset, block->size, block->UserData());
+}
+
+uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const
+{
+    if (size > SMALL_BUFFER_SIZE)
+        return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT);
+    return 0;
+}
+
+uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const
+{
+    if (memoryClass == 0)
+    {
+        if (IsVirtual())
+            return static_cast<uint16_t>((size - 1) / 8);
+        else
+            return static_cast<uint16_t>((size - 1) / 64);
+    }
+    return static_cast<uint16_t>((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX));
+}
+
+uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const
+{
+    if (memoryClass == 0)
+        return secondIndex;
+
+    const uint32_t index = static_cast<uint32_t>(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex;
+    if (IsVirtual())
+        return index + (1 << SECOND_LEVEL_INDEX);
+    else
+        return index + 4;
+}
+
+uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const
+{
+    uint8_t memoryClass = SizeToMemoryClass(size);
+    return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass));
+}
+
+void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block)
+{
+    VMA_ASSERT(block != m_NullBlock);
+    VMA_ASSERT(block->IsFree());
+
+    if (block->NextFree() != VMA_NULL)
+        block->NextFree()->PrevFree() = block->PrevFree();
+    if (block->PrevFree() != VMA_NULL)
+        block->PrevFree()->NextFree() = block->NextFree();
+    else
+    {
+        uint8_t memClass = SizeToMemoryClass(block->size);
+        uint16_t secondIndex = SizeToSecondIndex(block->size, memClass);
+        uint32_t index = GetListIndex(memClass, secondIndex);
+        VMA_ASSERT(m_FreeList[index] == block);
+        m_FreeList[index] = block->NextFree();
+        if (block->NextFree() == VMA_NULL)
+        {
+            m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex);
+            if (m_InnerIsFreeBitmap[memClass] == 0)
+                m_IsFreeBitmap &= ~(1UL << memClass);
+        }
+    }
+    block->MarkTaken();
+    block->UserData() = VMA_NULL;
+    --m_BlocksFreeCount;
+    m_BlocksFreeSize -= block->size;
+}
+
+void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block)
+{
+    VMA_ASSERT(block != m_NullBlock);
+    VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!");
+
+    uint8_t memClass = SizeToMemoryClass(block->size);
+    uint16_t secondIndex = SizeToSecondIndex(block->size, memClass);
+    uint32_t index = GetListIndex(memClass, secondIndex);
+    VMA_ASSERT(index < m_ListsCount);
+    block->PrevFree() = VMA_NULL;
+    block->NextFree() = m_FreeList[index];
+    m_FreeList[index] = block;
+    if (block->NextFree() != VMA_NULL)
+        block->NextFree()->PrevFree() = block;
+    else
+    {
+        m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex;
+        m_IsFreeBitmap |= 1UL << memClass;
+    }
+    ++m_BlocksFreeCount;
+    m_BlocksFreeSize += block->size;
+}
+
+void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev)
+{
+    VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!");
+    VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!");
+
+    block->offset = prev->offset;
+    block->size += prev->size;
+    block->prevPhysical = prev->prevPhysical;
+    if (block->prevPhysical)
+        block->prevPhysical->nextPhysical = block;
+    m_BlockAllocator.Free(prev);
+}
+
+VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const
+{
+    uint8_t memoryClass = SizeToMemoryClass(size);
+    uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass));
+    if (!innerFreeMap)
+    {
+        // Check higher levels for available blocks
+        uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1));
+        if (!freeMap)
+            return VMA_NULL; // No more memory available
+
+        // Find lowest free region
+        memoryClass = VMA_BITSCAN_LSB(freeMap);
+        innerFreeMap = m_InnerIsFreeBitmap[memoryClass];
+        VMA_ASSERT(innerFreeMap != 0);
+    }
+    // Find lowest free subregion
+    listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap));
+    VMA_ASSERT(m_FreeList[listIndex]);
+    return m_FreeList[listIndex];
+}
+
+bool VmaBlockMetadata_TLSF::CheckBlock(
+    Block& block,
+    uint32_t listIndex,
+    VkDeviceSize allocSize,
+    VkDeviceSize allocAlignment,
+    VmaSuballocationType allocType,
+    VmaAllocationRequest* pAllocationRequest)
+{
+    VMA_ASSERT(block.IsFree() && "Block is already taken!");
+
+    VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment);
+    if (block.size < allocSize + alignedOffset - block.offset)
+        return false;
+
+    // Check for granularity conflicts
+    if (!IsVirtual() &&
+        m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType))
+        return false;
+
+    // Alloc successful
+    pAllocationRequest->type = VmaAllocationRequestType::TLSF;
+    pAllocationRequest->allocHandle = (VmaAllocHandle)&block;
+    pAllocationRequest->size = allocSize - GetDebugMargin();
+    pAllocationRequest->customData = (void*)allocType;
+    pAllocationRequest->algorithmData = alignedOffset;
+
+    // Place block at the start of list if it's normal block
+    if (listIndex != m_ListsCount && block.PrevFree())
+    {
+        block.PrevFree()->NextFree() = block.NextFree();
+        if (block.NextFree())
+            block.NextFree()->PrevFree() = block.PrevFree();
+        block.PrevFree() = VMA_NULL;
+        block.NextFree() = m_FreeList[listIndex];
+        m_FreeList[listIndex] = &block;
+        if (block.NextFree())
+            block.NextFree()->PrevFree() = &block;
+    }
+
+    return true;
+}
+#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS
+#endif // _VMA_BLOCK_METADATA_TLSF
+
+#ifndef _VMA_BLOCK_VECTOR
+/*
+Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific
+Vulkan memory type.
+
+Synchronized internally with a mutex.
+*/
+class VmaBlockVector
+{
+    friend struct VmaDefragmentationContext_T;
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector)
+public:
+    VmaBlockVector(
+        VmaAllocator hAllocator,
+        VmaPool hParentPool,
+        uint32_t memoryTypeIndex,
+        VkDeviceSize preferredBlockSize,
+        size_t minBlockCount,
+        size_t maxBlockCount,
+        VkDeviceSize bufferImageGranularity,
+        bool explicitBlockSize,
+        uint32_t algorithm,
+        float priority,
+        VkDeviceSize minAllocationAlignment,
+        void* pMemoryAllocateNext);
+    ~VmaBlockVector();
+
+    VmaAllocator GetAllocator() const { return m_hAllocator; }
+    VmaPool GetParentPool() const { return m_hParentPool; }
+    bool IsCustomPool() const { return m_hParentPool != VMA_NULL; }
+    uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; }
+    VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; }
+    VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; }
+    uint32_t GetAlgorithm() const { return m_Algorithm; }
+    bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; }
+    float GetPriority() const { return m_Priority; }
+    const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; }
+    // To be used only while the m_Mutex is locked. Used during defragmentation.
+    size_t GetBlockCount() const { return m_Blocks.size(); }
+    // To be used only while the m_Mutex is locked. Used during defragmentation.
+    VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; }
+    VMA_RW_MUTEX &GetMutex() { return m_Mutex; }
+
+    VkResult CreateMinBlocks();
+    void AddStatistics(VmaStatistics& inoutStats);
+    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats);
+    bool IsEmpty();
+    bool IsCorruptionDetectionEnabled() const;
+
+    VkResult Allocate(
+        VkDeviceSize size,
+        VkDeviceSize alignment,
+        const VmaAllocationCreateInfo& createInfo,
+        VmaSuballocationType suballocType,
+        size_t allocationCount,
+        VmaAllocation* pAllocations);
+
+    void Free(const VmaAllocation hAllocation);
+
+#if VMA_STATS_STRING_ENABLED
+    void PrintDetailedMap(class VmaJsonWriter& json);
+#endif
+
+    VkResult CheckCorruption();
+
+private:
+    const VmaAllocator m_hAllocator;
+    const VmaPool m_hParentPool;
+    const uint32_t m_MemoryTypeIndex;
+    const VkDeviceSize m_PreferredBlockSize;
+    const size_t m_MinBlockCount;
+    const size_t m_MaxBlockCount;
+    const VkDeviceSize m_BufferImageGranularity;
+    const bool m_ExplicitBlockSize;
+    const uint32_t m_Algorithm;
+    const float m_Priority;
+    const VkDeviceSize m_MinAllocationAlignment;
+
+    void* const m_pMemoryAllocateNext;
+    VMA_RW_MUTEX m_Mutex;
+    // Incrementally sorted by sumFreeSize, ascending.
+    VmaVector<VmaDeviceMemoryBlock*, VmaStlAllocator<VmaDeviceMemoryBlock*>> m_Blocks;
+    uint32_t m_NextBlockId;
+    bool m_IncrementalSort = true;
+
+    void SetIncrementalSort(bool val) { m_IncrementalSort = val; }
+
+    VkDeviceSize CalcMaxBlockSize() const;
+    // Finds and removes given block from vector.
+    void Remove(VmaDeviceMemoryBlock* pBlock);
+    // Performs single step in sorting m_Blocks. They may not be fully sorted
+    // after this call.
+    void IncrementallySortBlocks();
+    void SortByFreeSize();
+
+    VkResult AllocatePage(
+        VkDeviceSize size,
+        VkDeviceSize alignment,
+        const VmaAllocationCreateInfo& createInfo,
+        VmaSuballocationType suballocType,
+        VmaAllocation* pAllocation);
+
+    VkResult AllocateFromBlock(
+        VmaDeviceMemoryBlock* pBlock,
+        VkDeviceSize size,
+        VkDeviceSize alignment,
+        VmaAllocationCreateFlags allocFlags,
+        void* pUserData,
+        VmaSuballocationType suballocType,
+        uint32_t strategy,
+        VmaAllocation* pAllocation);
+
+    VkResult CommitAllocationRequest(
+        VmaAllocationRequest& allocRequest,
+        VmaDeviceMemoryBlock* pBlock,
+        VkDeviceSize alignment,
+        VmaAllocationCreateFlags allocFlags,
+        void* pUserData,
+        VmaSuballocationType suballocType,
+        VmaAllocation* pAllocation);
+
+    VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex);
+    bool HasEmptyBlock();
+};
+#endif // _VMA_BLOCK_VECTOR
+
+#ifndef _VMA_DEFRAGMENTATION_CONTEXT
+struct VmaDefragmentationContext_T
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T)
+public:
+    VmaDefragmentationContext_T(
+        VmaAllocator hAllocator,
+        const VmaDefragmentationInfo& info);
+    ~VmaDefragmentationContext_T();
+
+    void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; }
+
+    VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo);
+    VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo);
+
+private:
+    // Max number of allocations to ignore due to size constraints before ending single pass
+    static const uint8_t MAX_ALLOCS_TO_IGNORE = 16;
+    enum class CounterStatus { Pass, Ignore, End };
+
+    struct FragmentedBlock
+    {
+        uint32_t data;
+        VmaDeviceMemoryBlock* block;
+    };
+    struct StateBalanced
+    {
+        VkDeviceSize avgFreeSize = 0;
+        VkDeviceSize avgAllocSize = UINT64_MAX;
+    };
+    struct StateExtensive
+    {
+        enum class Operation : uint8_t
+        {
+            FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll,
+            MoveBuffers, MoveTextures, MoveAll,
+            Cleanup, Done
+        };
+
+        Operation operation = Operation::FindFreeBlockTexture;
+        size_t firstFreeBlock = SIZE_MAX;
+    };
+    struct MoveAllocationData
+    {
+        VkDeviceSize size;
+        VkDeviceSize alignment;
+        VmaSuballocationType type;
+        VmaAllocationCreateFlags flags;
+        VmaDefragmentationMove move = {};
+    };
+
+    const VkDeviceSize m_MaxPassBytes;
+    const uint32_t m_MaxPassAllocations;
+    const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback;
+    void* m_BreakCallbackUserData;
+
+    VmaStlAllocator<VmaDefragmentationMove> m_MoveAllocator;
+    VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>> m_Moves;
+
+    uint8_t m_IgnoredAllocs = 0;
+    uint32_t m_Algorithm;
+    uint32_t m_BlockVectorCount;
+    VmaBlockVector* m_PoolBlockVector;
+    VmaBlockVector** m_pBlockVectors;
+    size_t m_ImmovableBlockCount = 0;
+    VmaDefragmentationStats m_GlobalStats = { 0 };
+    VmaDefragmentationStats m_PassStats = { 0 };
+    void* m_AlgorithmState = VMA_NULL;
+
+    static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata);
+    CounterStatus CheckCounters(VkDeviceSize bytes);
+    bool IncrementCounters(VkDeviceSize bytes);
+    bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block);
+    bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector);
+
+    bool ComputeDefragmentation(VmaBlockVector& vector, size_t index);
+    bool ComputeDefragmentation_Fast(VmaBlockVector& vector);
+    bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update);
+    bool ComputeDefragmentation_Full(VmaBlockVector& vector);
+    bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index);
+
+    void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state);
+    bool MoveDataToFreeBlocks(VmaSuballocationType currentType,
+        VmaBlockVector& vector, size_t firstFreeBlock,
+        bool& texturePresent, bool& bufferPresent, bool& otherPresent);
+};
+#endif // _VMA_DEFRAGMENTATION_CONTEXT
+
+#ifndef _VMA_POOL_T
+struct VmaPool_T
+{
+    friend struct VmaPoolListItemTraits;
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T)
+public:
+    VmaBlockVector m_BlockVector;
+    VmaDedicatedAllocationList m_DedicatedAllocations;
+
+    VmaPool_T(
+        VmaAllocator hAllocator,
+        const VmaPoolCreateInfo& createInfo,
+        VkDeviceSize preferredBlockSize);
+    ~VmaPool_T();
+
+    uint32_t GetId() const { return m_Id; }
+    void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; }
+
+    const char* GetName() const { return m_Name; }
+    void SetName(const char* pName);
+
+#if VMA_STATS_STRING_ENABLED
+    //void PrintDetailedMap(class VmaStringBuilder& sb);
+#endif
+
+private:
+    uint32_t m_Id;
+    char* m_Name;
+    VmaPool_T* m_PrevPool = VMA_NULL;
+    VmaPool_T* m_NextPool = VMA_NULL;
+};
+
+struct VmaPoolListItemTraits
+{
+    typedef VmaPool_T ItemType;
+
+    static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; }
+    static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; }
+    static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; }
+    static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; }
+};
+#endif // _VMA_POOL_T
+
+#ifndef _VMA_CURRENT_BUDGET_DATA
+struct VmaCurrentBudgetData
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData)
+public:
+
+    VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS];
+    VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS];
+    VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS];
+    VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS];
+
+#if VMA_MEMORY_BUDGET
+    VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch;
+    VMA_RW_MUTEX m_BudgetMutex;
+    uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS];
+    uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS];
+    uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS];
+#endif // VMA_MEMORY_BUDGET
+
+    VmaCurrentBudgetData();
+
+    void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize);
+    void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize);
+};
+
+#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS
+VmaCurrentBudgetData::VmaCurrentBudgetData()
+{
+    for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex)
+    {
+        m_BlockCount[heapIndex] = 0;
+        m_AllocationCount[heapIndex] = 0;
+        m_BlockBytes[heapIndex] = 0;
+        m_AllocationBytes[heapIndex] = 0;
+#if VMA_MEMORY_BUDGET
+        m_VulkanUsage[heapIndex] = 0;
+        m_VulkanBudget[heapIndex] = 0;
+        m_BlockBytesAtBudgetFetch[heapIndex] = 0;
+#endif
+    }
+
+#if VMA_MEMORY_BUDGET
+    m_OperationsSinceBudgetFetch = 0;
+#endif
+}
+
+void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize)
+{
+    m_AllocationBytes[heapIndex] += allocationSize;
+    ++m_AllocationCount[heapIndex];
+#if VMA_MEMORY_BUDGET
+    ++m_OperationsSinceBudgetFetch;
+#endif
+}
+
+void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize)
+{
+    VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize);
+    m_AllocationBytes[heapIndex] -= allocationSize;
+    VMA_ASSERT(m_AllocationCount[heapIndex] > 0);
+    --m_AllocationCount[heapIndex];
+#if VMA_MEMORY_BUDGET
+    ++m_OperationsSinceBudgetFetch;
+#endif
+}
+#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS
+#endif // _VMA_CURRENT_BUDGET_DATA
+
+#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR
+/*
+Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects.
+*/
+class VmaAllocationObjectAllocator
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator)
+public:
+    VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks)
+        : m_Allocator(pAllocationCallbacks, 1024) {}
+
+    template<typename... Types> VmaAllocation Allocate(Types&&... args);
+    void Free(VmaAllocation hAlloc);
+
+private:
+    VMA_MUTEX m_Mutex;
+    VmaPoolAllocator<VmaAllocation_T> m_Allocator;
+};
+
+template<typename... Types>
+VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args)
+{
+    VmaMutexLock mutexLock(m_Mutex);
+    return m_Allocator.Alloc<Types...>(std::forward<Types>(args)...);
+}
+
+void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc)
+{
+    VmaMutexLock mutexLock(m_Mutex);
+    m_Allocator.Free(hAlloc);
+}
+#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR
+
+#ifndef _VMA_VIRTUAL_BLOCK_T
+struct VmaVirtualBlock_T
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T)
+public:
+    const bool m_AllocationCallbacksSpecified;
+    const VkAllocationCallbacks m_AllocationCallbacks;
+
+    VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo);
+    ~VmaVirtualBlock_T();
+
+    VkResult Init() { return VK_SUCCESS; }
+    bool IsEmpty() const { return m_Metadata->IsEmpty(); }
+    void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); }
+    void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); }
+    void Clear() { m_Metadata->Clear(); }
+
+    const VkAllocationCallbacks* GetAllocationCallbacks() const;
+    void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo);
+    VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation,
+        VkDeviceSize* outOffset);
+    void GetStatistics(VmaStatistics& outStats) const;
+    void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const;
+#if VMA_STATS_STRING_ENABLED
+    void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const;
+#endif
+
+private:
+    VmaBlockMetadata* m_Metadata;
+};
+
+#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS
+VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo)
+    : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL),
+    m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks)
+{
+    const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK;
+    switch (algorithm)
+    {
+    case 0:
+        m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true);
+        break;
+    case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT:
+        m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true);
+        break;
+    default:
+        VMA_ASSERT(0);
+        m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true);
+    }
+
+    m_Metadata->Init(createInfo.size);
+}
+
+VmaVirtualBlock_T::~VmaVirtualBlock_T()
+{
+    // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT
+    // to receive the list of the unfreed allocations.
+    if (!m_Metadata->IsEmpty())
+        m_Metadata->DebugLogAllAllocations();
+    // This is the most important assert in the entire library.
+    // Hitting it means you have some memory leak - unreleased virtual allocations.
+    VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!");
+
+    vma_delete(GetAllocationCallbacks(), m_Metadata);
+}
+
+const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const
+{
+    return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL;
+}
+
+void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo)
+{
+    m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo);
+}
+
+VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation,
+    VkDeviceSize* outOffset)
+{
+    VmaAllocationRequest request = {};
+    if (m_Metadata->CreateAllocationRequest(
+        createInfo.size, // allocSize
+        VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment
+        (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress
+        VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant
+        createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy
+        &request))
+    {
+        m_Metadata->Alloc(request,
+            VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant
+            createInfo.pUserData);
+        outAllocation = (VmaVirtualAllocation)request.allocHandle;
+        if(outOffset)
+            *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle);
+        return VK_SUCCESS;
+    }
+    outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE;
+    if (outOffset)
+        *outOffset = UINT64_MAX;
+    return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const
+{
+    VmaClearStatistics(outStats);
+    m_Metadata->AddStatistics(outStats);
+}
+
+void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const
+{
+    VmaClearDetailedStatistics(outStats);
+    m_Metadata->AddDetailedStatistics(outStats);
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const
+{
+    VmaJsonWriter json(GetAllocationCallbacks(), sb);
+    json.BeginObject();
+
+    VmaDetailedStatistics stats;
+    CalculateDetailedStatistics(stats);
+
+    json.WriteString("Stats");
+    VmaPrintDetailedStatistics(json, stats);
+
+    if (detailedMap)
+    {
+        json.WriteString("Details");
+        json.BeginObject();
+        m_Metadata->PrintDetailedMap(json);
+        json.EndObject();
+    }
+
+    json.EndObject();
+}
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS
+#endif // _VMA_VIRTUAL_BLOCK_T
+
+
+// Main allocator object.
+struct VmaAllocator_T
+{
+    VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T)
+public:
+    const bool m_UseMutex;
+    const uint32_t m_VulkanApiVersion;
+    bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0).
+    bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0).
+    bool m_UseExtMemoryBudget;
+    bool m_UseAmdDeviceCoherentMemory;
+    bool m_UseKhrBufferDeviceAddress;
+    bool m_UseExtMemoryPriority;
+    bool m_UseKhrMaintenance4;
+    bool m_UseKhrMaintenance5;
+    bool m_UseKhrExternalMemoryWin32;
+    const VkDevice m_hDevice;
+    const VkInstance m_hInstance;
+    const bool m_AllocationCallbacksSpecified;
+    const VkAllocationCallbacks m_AllocationCallbacks;
+    VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks;
+    VmaAllocationObjectAllocator m_AllocationObjectAllocator;
+
+    // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size.
+    uint32_t m_HeapSizeLimitMask;
+
+    VkPhysicalDeviceProperties m_PhysicalDeviceProperties;
+    VkPhysicalDeviceMemoryProperties m_MemProps;
+
+    // Default pools.
+    VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES];
+    VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES];
+
+    VmaCurrentBudgetData m_Budget;
+    VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects.
+
+    VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo);
+    VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo);
+    ~VmaAllocator_T();
+
+    const VkAllocationCallbacks* GetAllocationCallbacks() const
+    {
+        return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL;
+    }
+    const VmaVulkanFunctions& GetVulkanFunctions() const
+    {
+        return m_VulkanFunctions;
+    }
+
+    VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; }
+
+    VkDeviceSize GetBufferImageGranularity() const
+    {
+        return VMA_MAX(
+            static_cast<VkDeviceSize>(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY),
+            m_PhysicalDeviceProperties.limits.bufferImageGranularity);
+    }
+
+    uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; }
+    uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; }
+
+    uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const
+    {
+        VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount);
+        return m_MemProps.memoryTypes[memTypeIndex].heapIndex;
+    }
+    // True when specific memory type is HOST_VISIBLE but not HOST_COHERENT.
+    bool IsMemoryTypeNonCoherent(uint32_t memTypeIndex) const
+    {
+        return (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) ==
+            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+    }
+    // Minimum alignment for all allocations in specific memory type.
+    VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const
+    {
+        return IsMemoryTypeNonCoherent(memTypeIndex) ?
+            VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) :
+            (VkDeviceSize)VMA_MIN_ALIGNMENT;
+    }
+
+    bool IsIntegratedGpu() const
+    {
+        return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+    }
+
+    uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; }
+
+    void GetBufferMemoryRequirements(
+        VkBuffer hBuffer,
+        VkMemoryRequirements& memReq,
+        bool& requiresDedicatedAllocation,
+        bool& prefersDedicatedAllocation) const;
+    void GetImageMemoryRequirements(
+        VkImage hImage,
+        VkMemoryRequirements& memReq,
+        bool& requiresDedicatedAllocation,
+        bool& prefersDedicatedAllocation) const;
+    VkResult FindMemoryTypeIndex(
+        uint32_t memoryTypeBits,
+        const VmaAllocationCreateInfo* pAllocationCreateInfo,
+        VmaBufferImageUsage bufImgUsage,
+        uint32_t* pMemoryTypeIndex) const;
+
+    // Main allocation function.
+    VkResult AllocateMemory(
+        const VkMemoryRequirements& vkMemReq,
+        bool requiresDedicatedAllocation,
+        bool prefersDedicatedAllocation,
+        VkBuffer dedicatedBuffer,
+        VkImage dedicatedImage,
+        VmaBufferImageUsage dedicatedBufferImageUsage,
+        const VmaAllocationCreateInfo& createInfo,
+        VmaSuballocationType suballocType,
+        size_t allocationCount,
+        VmaAllocation* pAllocations);
+
+    // Main deallocation function.
+    void FreeMemory(
+        size_t allocationCount,
+        const VmaAllocation* pAllocations);
+
+    void CalculateStatistics(VmaTotalStatistics* pStats);
+
+    void GetHeapBudgets(
+        VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount);
+
+#if VMA_STATS_STRING_ENABLED
+    void PrintDetailedMap(class VmaJsonWriter& json);
+#endif
+
+    void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo);
+    void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo);
+
+    VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool);
+    void DestroyPool(VmaPool pool);
+    void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats);
+    void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats);
+
+    void SetCurrentFrameIndex(uint32_t frameIndex);
+    uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); }
+
+    VkResult CheckPoolCorruption(VmaPool hPool);
+    VkResult CheckCorruption(uint32_t memoryTypeBits);
+
+    // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping.
+    VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory);
+    // Call to Vulkan function vkFreeMemory with accompanying bookkeeping.
+    void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory);
+    // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR.
+    VkResult BindVulkanBuffer(
+        VkDeviceMemory memory,
+        VkDeviceSize memoryOffset,
+        VkBuffer buffer,
+        const void* pNext);
+    // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR.
+    VkResult BindVulkanImage(
+        VkDeviceMemory memory,
+        VkDeviceSize memoryOffset,
+        VkImage image,
+        const void* pNext);
+
+    VkResult Map(VmaAllocation hAllocation, void** ppData);
+    void Unmap(VmaAllocation hAllocation);
+
+    VkResult BindBufferMemory(
+        VmaAllocation hAllocation,
+        VkDeviceSize allocationLocalOffset,
+        VkBuffer hBuffer,
+        const void* pNext);
+    VkResult BindImageMemory(
+        VmaAllocation hAllocation,
+        VkDeviceSize allocationLocalOffset,
+        VkImage hImage,
+        const void* pNext);
+
+    VkResult FlushOrInvalidateAllocation(
+        VmaAllocation hAllocation,
+        VkDeviceSize offset, VkDeviceSize size,
+        VMA_CACHE_OPERATION op);
+    VkResult FlushOrInvalidateAllocations(
+        uint32_t allocationCount,
+        const VmaAllocation* allocations,
+        const VkDeviceSize* offsets, const VkDeviceSize* sizes,
+        VMA_CACHE_OPERATION op);
+
+    VkResult CopyMemoryToAllocation(
+        const void* pSrcHostPointer,
+        VmaAllocation dstAllocation,
+        VkDeviceSize dstAllocationLocalOffset,
+        VkDeviceSize size);
+    VkResult CopyAllocationToMemory(
+        VmaAllocation srcAllocation,
+        VkDeviceSize srcAllocationLocalOffset,
+        void* pDstHostPointer,
+        VkDeviceSize size);
+
+    void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern);
+
+    /*
+    Returns bit mask of memory types that can support defragmentation on GPU as
+    they support creation of required buffer for copy operations.
+    */
+    uint32_t GetGpuDefragmentationMemoryTypeBits();
+
+#if VMA_EXTERNAL_MEMORY
+    VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const
+    {
+        return m_TypeExternalMemoryHandleTypes[memTypeIndex];
+    }
+#endif // #if VMA_EXTERNAL_MEMORY
+
+private:
+    VkDeviceSize m_PreferredLargeHeapBlockSize;
+
+    VkPhysicalDevice m_PhysicalDevice;
+    VMA_ATOMIC_UINT32 m_CurrentFrameIndex;
+    VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized.
+#if VMA_EXTERNAL_MEMORY
+    VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES];
+#endif // #if VMA_EXTERNAL_MEMORY
+
+    VMA_RW_MUTEX m_PoolsMutex;
+    typedef VmaIntrusiveLinkedList<VmaPoolListItemTraits> PoolList;
+    // Protected by m_PoolsMutex.
+    PoolList m_Pools;
+    uint32_t m_NextPoolId;
+
+    VmaVulkanFunctions m_VulkanFunctions;
+
+    // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types.
+    uint32_t m_GlobalMemoryTypeBits;
+
+    void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions);
+
+#if VMA_STATIC_VULKAN_FUNCTIONS == 1
+    void ImportVulkanFunctions_Static();
+#endif
+
+    void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions);
+
+#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+    void ImportVulkanFunctions_Dynamic();
+#endif
+
+    void ValidateVulkanFunctions();
+
+    VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex);
+
+    VkResult AllocateMemoryOfType(
+        VmaPool pool,
+        VkDeviceSize size,
+        VkDeviceSize alignment,
+        bool dedicatedPreferred,
+        VkBuffer dedicatedBuffer,
+        VkImage dedicatedImage,
+        VmaBufferImageUsage dedicatedBufferImageUsage,
+        const VmaAllocationCreateInfo& createInfo,
+        uint32_t memTypeIndex,
+        VmaSuballocationType suballocType,
+        VmaDedicatedAllocationList& dedicatedAllocations,
+        VmaBlockVector& blockVector,
+        size_t allocationCount,
+        VmaAllocation* pAllocations);
+
+    // Helper function only to be used inside AllocateDedicatedMemory.
+    VkResult AllocateDedicatedMemoryPage(
+        VmaPool pool,
+        VkDeviceSize size,
+        VmaSuballocationType suballocType,
+        uint32_t memTypeIndex,
+        const VkMemoryAllocateInfo& allocInfo,
+        bool map,
+        bool isUserDataString,
+        bool isMappingAllowed,
+        void* pUserData,
+        VmaAllocation* pAllocation);
+
+    // Allocates and registers new VkDeviceMemory specifically for dedicated allocations.
+    VkResult AllocateDedicatedMemory(
+        VmaPool pool,
+        VkDeviceSize size,
+        VmaSuballocationType suballocType,
+        VmaDedicatedAllocationList& dedicatedAllocations,
+        uint32_t memTypeIndex,
+        bool map,
+        bool isUserDataString,
+        bool isMappingAllowed,
+        bool canAliasMemory,
+        void* pUserData,
+        float priority,
+        VkBuffer dedicatedBuffer,
+        VkImage dedicatedImage,
+        VmaBufferImageUsage dedicatedBufferImageUsage,
+        size_t allocationCount,
+        VmaAllocation* pAllocations,
+        const void* pNextChain = VMA_NULL);
+
+    void FreeDedicatedMemory(const VmaAllocation allocation);
+
+    VkResult CalcMemTypeParams(
+        VmaAllocationCreateInfo& outCreateInfo,
+        uint32_t memTypeIndex,
+        VkDeviceSize size,
+        size_t allocationCount);
+    VkResult CalcAllocationParams(
+        VmaAllocationCreateInfo& outCreateInfo,
+        bool dedicatedRequired,
+        bool dedicatedPreferred);
+
+    /*
+    Calculates and returns bit mask of memory types that can support defragmentation
+    on GPU as they support creation of required buffer for copy operations.
+    */
+    uint32_t CalculateGpuDefragmentationMemoryTypeBits() const;
+    uint32_t CalculateGlobalMemoryTypeBits() const;
+
+    bool GetFlushOrInvalidateRange(
+        VmaAllocation allocation,
+        VkDeviceSize offset, VkDeviceSize size,
+        VkMappedMemoryRange& outRange) const;
+
+#if VMA_MEMORY_BUDGET
+    void UpdateVulkanBudget();
+#endif // #if VMA_MEMORY_BUDGET
+};
+
+
+#ifndef _VMA_MEMORY_FUNCTIONS
+static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment)
+{
+    return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment);
+}
+
+static void VmaFree(VmaAllocator hAllocator, void* ptr)
+{
+    VmaFree(&hAllocator->m_AllocationCallbacks, ptr);
+}
+
+template<typename T>
+static T* VmaAllocate(VmaAllocator hAllocator)
+{
+    return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T));
+}
+
+template<typename T>
+static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count)
+{
+    return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T));
+}
+
+template<typename T>
+static void vma_delete(VmaAllocator hAllocator, T* ptr)
+{
+    if(ptr != VMA_NULL)
+    {
+        ptr->~T();
+        VmaFree(hAllocator, ptr);
+    }
+}
+
+template<typename T>
+static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count)
+{
+    if(ptr != VMA_NULL)
+    {
+        for(size_t i = count; i--; )
+            ptr[i].~T();
+        VmaFree(hAllocator, ptr);
+    }
+}
+#endif // _VMA_MEMORY_FUNCTIONS
+
+#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS
+VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator)
+    : m_pMetadata(VMA_NULL),
+    m_MemoryTypeIndex(UINT32_MAX),
+    m_Id(0),
+    m_hMemory(VK_NULL_HANDLE),
+    m_MapCount(0),
+    m_pMappedData(VMA_NULL){}
+
+VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock()
+{
+    VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped.");
+    VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE);
+}
+
+void VmaDeviceMemoryBlock::Init(
+    VmaAllocator hAllocator,
+    VmaPool hParentPool,
+    uint32_t newMemoryTypeIndex,
+    VkDeviceMemory newMemory,
+    VkDeviceSize newSize,
+    uint32_t id,
+    uint32_t algorithm,
+    VkDeviceSize bufferImageGranularity)
+{
+    VMA_ASSERT(m_hMemory == VK_NULL_HANDLE);
+
+    m_hParentPool = hParentPool;
+    m_MemoryTypeIndex = newMemoryTypeIndex;
+    m_Id = id;
+    m_hMemory = newMemory;
+
+    switch (algorithm)
+    {
+    case 0:
+        m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(),
+            bufferImageGranularity, false); // isVirtual
+        break;
+    case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
+        m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(),
+            bufferImageGranularity, false); // isVirtual
+        break;
+    default:
+        VMA_ASSERT(0);
+        m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(),
+            bufferImageGranularity, false); // isVirtual
+    }
+    m_pMetadata->Init(newSize);
+}
+
+void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator)
+{
+    // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT
+    // to receive the list of the unfreed allocations.
+    if (!m_pMetadata->IsEmpty())
+        m_pMetadata->DebugLogAllAllocations();
+    // This is the most important assert in the entire library.
+    // Hitting it means you have some memory leak - unreleased VmaAllocation objects.
+    VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!");
+
+    VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE);
+    allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory);
+    m_hMemory = VK_NULL_HANDLE;
+
+    vma_delete(allocator, m_pMetadata);
+    m_pMetadata = VMA_NULL;
+}
+
+void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator)
+{
+    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+    m_MappingHysteresis.PostAlloc();
+}
+
+void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator)
+{
+    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+    if(m_MappingHysteresis.PostFree())
+    {
+        VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0);
+        if (m_MapCount == 0)
+        {
+            m_pMappedData = VMA_NULL;
+            (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory);
+        }
+    }
+}
+
+bool VmaDeviceMemoryBlock::Validate() const
+{
+    VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) &&
+        (m_pMetadata->GetSize() != 0));
+
+    return m_pMetadata->Validate();
+}
+
+VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator)
+{
+    void* pData = VMA_NULL;
+    VkResult res = Map(hAllocator, 1, &pData);
+    if (res != VK_SUCCESS)
+    {
+        return res;
+    }
+
+    res = m_pMetadata->CheckCorruption(pData);
+
+    Unmap(hAllocator, 1);
+
+    return res;
+}
+
+VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData)
+{
+    if (count == 0)
+    {
+        return VK_SUCCESS;
+    }
+
+    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+    const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping();
+    if (oldTotalMapCount != 0)
+    {
+        VMA_ASSERT(m_pMappedData != VMA_NULL);
+        m_MappingHysteresis.PostMap();
+        m_MapCount += count;
+        if (ppData != VMA_NULL)
+        {
+            *ppData = m_pMappedData;
+        }
+        return VK_SUCCESS;
+    }
+    else
+    {
+        VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)(
+            hAllocator->m_hDevice,
+            m_hMemory,
+            0, // offset
+            VK_WHOLE_SIZE,
+            0, // flags
+            &m_pMappedData);
+        if (result == VK_SUCCESS)
+        {
+            VMA_ASSERT(m_pMappedData != VMA_NULL);
+            m_MappingHysteresis.PostMap();
+            m_MapCount = count;
+            if (ppData != VMA_NULL)
+            {
+                *ppData = m_pMappedData;
+            }
+        }
+        return result;
+    }
+}
+
+void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count)
+{
+    if (count == 0)
+    {
+        return;
+    }
+
+    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+    if (m_MapCount >= count)
+    {
+        m_MapCount -= count;
+        const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping();
+        if (totalMapCount == 0)
+        {
+            m_pMappedData = VMA_NULL;
+            (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory);
+        }
+        m_MappingHysteresis.PostUnmap();
+    }
+    else
+    {
+        VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped.");
+    }
+}
+
+VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize)
+{
+    VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION);
+
+    void* pData;
+    VkResult res = Map(hAllocator, 1, &pData);
+    if (res != VK_SUCCESS)
+    {
+        return res;
+    }
+
+    VmaWriteMagicValue(pData, allocOffset + allocSize);
+
+    Unmap(hAllocator, 1);
+    return VK_SUCCESS;
+}
+
+VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize)
+{
+    VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION);
+
+    void* pData;
+    VkResult res = Map(hAllocator, 1, &pData);
+    if (res != VK_SUCCESS)
+    {
+        return res;
+    }
+
+    if (!VmaValidateMagicValue(pData, allocOffset + allocSize))
+    {
+        VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!");
+    }
+
+    Unmap(hAllocator, 1);
+    return VK_SUCCESS;
+}
+
+VkResult VmaDeviceMemoryBlock::BindBufferMemory(
+    const VmaAllocator hAllocator,
+    const VmaAllocation hAllocation,
+    VkDeviceSize allocationLocalOffset,
+    VkBuffer hBuffer,
+    const void* pNext)
+{
+    VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK &&
+        hAllocation->GetBlock() == this);
+    VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() &&
+        "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?");
+    const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset;
+    // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads.
+    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+    return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext);
+}
+
+VkResult VmaDeviceMemoryBlock::BindImageMemory(
+    const VmaAllocator hAllocator,
+    const VmaAllocation hAllocation,
+    VkDeviceSize allocationLocalOffset,
+    VkImage hImage,
+    const void* pNext)
+{
+    VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK &&
+        hAllocation->GetBlock() == this);
+    VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() &&
+        "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?");
+    const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset;
+    // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads.
+    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex);
+    return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext);
+}
+
+#if VMA_EXTERNAL_MEMORY_WIN32
+VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept
+{
+    VMA_ASSERT(pHandle);
+    return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle);
+}
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS
+
+#ifndef _VMA_ALLOCATION_T_FUNCTIONS
+VmaAllocation_T::VmaAllocation_T(bool mappingAllowed)
+    : m_Alignment{ 1 },
+    m_Size{ 0 },
+    m_pUserData{ VMA_NULL },
+    m_pName{ VMA_NULL },
+    m_MemoryTypeIndex{ 0 },
+    m_Type{ (uint8_t)ALLOCATION_TYPE_NONE },
+    m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN },
+    m_MapCount{ 0 },
+    m_Flags{ 0 }
+{
+    if(mappingAllowed)
+        m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED;
+}
+
+VmaAllocation_T::~VmaAllocation_T()
+{
+    VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction.");
+
+    // Check if owned string was freed.
+    VMA_ASSERT(m_pName == VMA_NULL);
+}
+
+void VmaAllocation_T::InitBlockAllocation(
+    VmaDeviceMemoryBlock* block,
+    VmaAllocHandle allocHandle,
+    VkDeviceSize alignment,
+    VkDeviceSize size,
+    uint32_t memoryTypeIndex,
+    VmaSuballocationType suballocationType,
+    bool mapped)
+{
+    VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE);
+    VMA_ASSERT(block != VMA_NULL);
+    m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK;
+    m_Alignment = alignment;
+    m_Size = size;
+    m_MemoryTypeIndex = memoryTypeIndex;
+    if(mapped)
+    {
+        VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+        m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP;
+    }
+    m_SuballocationType = (uint8_t)suballocationType;
+    m_BlockAllocation.m_Block = block;
+    m_BlockAllocation.m_AllocHandle = allocHandle;
+}
+
+void VmaAllocation_T::InitDedicatedAllocation(
+    VmaAllocator allocator,
+    VmaPool hParentPool,
+    uint32_t memoryTypeIndex,
+    VkDeviceMemory hMemory,
+    VmaSuballocationType suballocationType,
+    void* pMappedData,
+    VkDeviceSize size)
+{
+    VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE);
+    VMA_ASSERT(hMemory != VK_NULL_HANDLE);
+    m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED;
+    m_Alignment = 0;
+    m_Size = size;
+    m_MemoryTypeIndex = memoryTypeIndex;
+    m_SuballocationType = (uint8_t)suballocationType;
+    m_DedicatedAllocation.m_ExtraData = VMA_NULL;
+    m_DedicatedAllocation.m_hParentPool = hParentPool;
+    m_DedicatedAllocation.m_hMemory = hMemory;
+    m_DedicatedAllocation.m_Prev = VMA_NULL;
+    m_DedicatedAllocation.m_Next = VMA_NULL;
+
+    if (pMappedData != VMA_NULL)
+    {
+        VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+        m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP;
+        EnsureExtraData(allocator);
+        m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData;
+    }
+}
+
+void VmaAllocation_T::Destroy(VmaAllocator allocator)
+{
+    FreeName(allocator);
+
+    if (GetType() == ALLOCATION_TYPE_DEDICATED)
+    {
+        vma_delete(allocator, m_DedicatedAllocation.m_ExtraData);
+    }
+}
+
+void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName)
+{
+    VMA_ASSERT(pName == VMA_NULL || pName != m_pName);
+
+    FreeName(hAllocator);
+
+    if (pName != VMA_NULL)
+        m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName);
+}
+
+uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation)
+{
+    VMA_ASSERT(allocation != VMA_NULL);
+    VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK);
+    VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK);
+
+    if (m_MapCount != 0)
+        m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount);
+
+    m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation);
+    std::swap(m_BlockAllocation, allocation->m_BlockAllocation);
+    m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this);
+
+#if VMA_STATS_STRING_ENABLED
+    std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage);
+#endif
+    return m_MapCount;
+}
+
+VmaAllocHandle VmaAllocation_T::GetAllocHandle() const
+{
+    switch (m_Type)
+    {
+    case ALLOCATION_TYPE_BLOCK:
+        return m_BlockAllocation.m_AllocHandle;
+    case ALLOCATION_TYPE_DEDICATED:
+        return VK_NULL_HANDLE;
+    default:
+        VMA_ASSERT(0);
+        return VK_NULL_HANDLE;
+    }
+}
+
+VkDeviceSize VmaAllocation_T::GetOffset() const
+{
+    switch (m_Type)
+    {
+    case ALLOCATION_TYPE_BLOCK:
+        return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle);
+    case ALLOCATION_TYPE_DEDICATED:
+        return 0;
+    default:
+        VMA_ASSERT(0);
+        return 0;
+    }
+}
+
+VmaPool VmaAllocation_T::GetParentPool() const
+{
+    switch (m_Type)
+    {
+    case ALLOCATION_TYPE_BLOCK:
+        return m_BlockAllocation.m_Block->GetParentPool();
+    case ALLOCATION_TYPE_DEDICATED:
+        return m_DedicatedAllocation.m_hParentPool;
+    default:
+        VMA_ASSERT(0);
+        return VK_NULL_HANDLE;
+    }
+}
+
+VkDeviceMemory VmaAllocation_T::GetMemory() const
+{
+    switch (m_Type)
+    {
+    case ALLOCATION_TYPE_BLOCK:
+        return m_BlockAllocation.m_Block->GetDeviceMemory();
+    case ALLOCATION_TYPE_DEDICATED:
+        return m_DedicatedAllocation.m_hMemory;
+    default:
+        VMA_ASSERT(0);
+        return VK_NULL_HANDLE;
+    }
+}
+
+void* VmaAllocation_T::GetMappedData() const
+{
+    switch (m_Type)
+    {
+    case ALLOCATION_TYPE_BLOCK:
+        if (m_MapCount != 0 || IsPersistentMap())
+        {
+            void* pBlockData = m_BlockAllocation.m_Block->GetMappedData();
+            VMA_ASSERT(pBlockData != VMA_NULL);
+            return (char*)pBlockData + GetOffset();
+        }
+        else
+        {
+            return VMA_NULL;
+        }
+        break;
+    case ALLOCATION_TYPE_DEDICATED:
+        VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) ==
+            (m_MapCount != 0 || IsPersistentMap()));
+        return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL;
+    default:
+        VMA_ASSERT(0);
+        return VMA_NULL;
+    }
+}
+
+void VmaAllocation_T::BlockAllocMap()
+{
+    VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK);
+    VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+
+    if (m_MapCount < 0xFF)
+    {
+        ++m_MapCount;
+    }
+    else
+    {
+        VMA_ASSERT(0 && "Allocation mapped too many times simultaneously.");
+    }
+}
+
+void VmaAllocation_T::BlockAllocUnmap()
+{
+    VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK);
+
+    if (m_MapCount > 0)
+    {
+        --m_MapCount;
+    }
+    else
+    {
+        VMA_ASSERT(0 && "Unmapping allocation not previously mapped.");
+    }
+}
+
+VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData)
+{
+    VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED);
+    VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it.");
+
+    EnsureExtraData(hAllocator);
+
+    if (m_MapCount != 0 || IsPersistentMap())
+    {
+        if (m_MapCount < 0xFF)
+        {
+            VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL);
+            *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData;
+            ++m_MapCount;
+            return VK_SUCCESS;
+        }
+        else
+        {
+            VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously.");
+            return VK_ERROR_MEMORY_MAP_FAILED;
+        }
+    }
+    else
+    {
+        VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)(
+            hAllocator->m_hDevice,
+            m_DedicatedAllocation.m_hMemory,
+            0, // offset
+            VK_WHOLE_SIZE,
+            0, // flags
+            ppData);
+        if (result == VK_SUCCESS)
+        {
+            m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData;
+            m_MapCount = 1;
+        }
+        return result;
+    }
+}
+
+void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator)
+{
+    VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED);
+
+    if (m_MapCount > 0)
+    {
+        --m_MapCount;
+        if (m_MapCount == 0 && !IsPersistentMap())
+        {
+            VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL);
+            m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL;
+            (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(
+                hAllocator->m_hDevice,
+                m_DedicatedAllocation.m_hMemory);
+        }
+    }
+    else
+    {
+        VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped.");
+    }
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const
+{
+    json.WriteString("Type");
+    json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]);
+
+    json.WriteString("Size");
+    json.WriteNumber(m_Size);
+    json.WriteString("Usage");
+    json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t.
+
+    if (m_pUserData != VMA_NULL)
+    {
+        json.WriteString("CustomData");
+        json.BeginString();
+        json.ContinueString_Pointer(m_pUserData);
+        json.EndString();
+    }
+    if (m_pName != VMA_NULL)
+    {
+        json.WriteString("Name");
+        json.WriteString(m_pName);
+    }
+}
+#if VMA_EXTERNAL_MEMORY_WIN32
+VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept
+{
+    auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR;
+    switch (m_Type)
+    {
+    case ALLOCATION_TYPE_BLOCK:
+        return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle);
+    case ALLOCATION_TYPE_DEDICATED:
+        EnsureExtraData(hAllocator);
+        return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle);
+    default:
+        VMA_ASSERT(0);
+        return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+}
+#endif // VMA_EXTERNAL_MEMORY_WIN32
+#endif // VMA_STATS_STRING_ENABLED
+
+void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator)
+{
+    if (m_DedicatedAllocation.m_ExtraData == VMA_NULL)
+    {
+        m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)();
+    }
+}
+
+void VmaAllocation_T::FreeName(VmaAllocator hAllocator)
+{
+    if(m_pName)
+    {
+        VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName);
+        m_pName = VMA_NULL;
+    }
+}
+#endif // _VMA_ALLOCATION_T_FUNCTIONS
+
+#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS
+VmaBlockVector::VmaBlockVector(
+    VmaAllocator hAllocator,
+    VmaPool hParentPool,
+    uint32_t memoryTypeIndex,
+    VkDeviceSize preferredBlockSize,
+    size_t minBlockCount,
+    size_t maxBlockCount,
+    VkDeviceSize bufferImageGranularity,
+    bool explicitBlockSize,
+    uint32_t algorithm,
+    float priority,
+    VkDeviceSize minAllocationAlignment,
+    void* pMemoryAllocateNext)
+    : m_hAllocator(hAllocator),
+    m_hParentPool(hParentPool),
+    m_MemoryTypeIndex(memoryTypeIndex),
+    m_PreferredBlockSize(preferredBlockSize),
+    m_MinBlockCount(minBlockCount),
+    m_MaxBlockCount(maxBlockCount),
+    m_BufferImageGranularity(bufferImageGranularity),
+    m_ExplicitBlockSize(explicitBlockSize),
+    m_Algorithm(algorithm),
+    m_Priority(priority),
+    m_MinAllocationAlignment(minAllocationAlignment),
+    m_pMemoryAllocateNext(pMemoryAllocateNext),
+    m_Blocks(VmaStlAllocator<VmaDeviceMemoryBlock*>(hAllocator->GetAllocationCallbacks())),
+    m_NextBlockId(0) {}
+
+VmaBlockVector::~VmaBlockVector()
+{
+    for (size_t i = m_Blocks.size(); i--; )
+    {
+        m_Blocks[i]->Destroy(m_hAllocator);
+        vma_delete(m_hAllocator, m_Blocks[i]);
+    }
+}
+
+VkResult VmaBlockVector::CreateMinBlocks()
+{
+    for (size_t i = 0; i < m_MinBlockCount; ++i)
+    {
+        VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL);
+        if (res != VK_SUCCESS)
+        {
+            return res;
+        }
+    }
+    return VK_SUCCESS;
+}
+
+void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats)
+{
+    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+    const size_t blockCount = m_Blocks.size();
+    for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
+    {
+        const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
+        VMA_ASSERT(pBlock);
+        VMA_HEAVY_ASSERT(pBlock->Validate());
+        pBlock->m_pMetadata->AddStatistics(inoutStats);
+    }
+}
+
+void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats)
+{
+    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+    const size_t blockCount = m_Blocks.size();
+    for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
+    {
+        const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
+        VMA_ASSERT(pBlock);
+        VMA_HEAVY_ASSERT(pBlock->Validate());
+        pBlock->m_pMetadata->AddDetailedStatistics(inoutStats);
+    }
+}
+
+bool VmaBlockVector::IsEmpty()
+{
+    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+    return m_Blocks.empty();
+}
+
+bool VmaBlockVector::IsCorruptionDetectionEnabled() const
+{
+    const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+    return (VMA_DEBUG_DETECT_CORRUPTION != 0) &&
+        (VMA_DEBUG_MARGIN > 0) &&
+        (m_Algorithm == 0 || m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) &&
+        (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags;
+}
+
+VkResult VmaBlockVector::Allocate(
+    VkDeviceSize size,
+    VkDeviceSize alignment,
+    const VmaAllocationCreateInfo& createInfo,
+    VmaSuballocationType suballocType,
+    size_t allocationCount,
+    VmaAllocation* pAllocations)
+{
+    size_t allocIndex;
+    VkResult res = VK_SUCCESS;
+
+    alignment = VMA_MAX(alignment, m_MinAllocationAlignment);
+
+    if (IsCorruptionDetectionEnabled())
+    {
+        size = VmaAlignUp<VkDeviceSize>(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE));
+        alignment = VmaAlignUp<VkDeviceSize>(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE));
+    }
+
+    {
+        VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
+        for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+        {
+            res = AllocatePage(
+                size,
+                alignment,
+                createInfo,
+                suballocType,
+                pAllocations + allocIndex);
+            if (res != VK_SUCCESS)
+            {
+                break;
+            }
+        }
+    }
+
+    if (res != VK_SUCCESS)
+    {
+        // Free all already created allocations.
+        while (allocIndex--)
+            Free(pAllocations[allocIndex]);
+        memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
+    }
+
+    return res;
+}
+
+VkResult VmaBlockVector::AllocatePage(
+    VkDeviceSize size,
+    VkDeviceSize alignment,
+    const VmaAllocationCreateInfo& createInfo,
+    VmaSuballocationType suballocType,
+    VmaAllocation* pAllocation)
+{
+    const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0;
+
+    VkDeviceSize freeMemory;
+    {
+        const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex);
+        VmaBudget heapBudget = {};
+        m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1);
+        freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0;
+    }
+
+    const bool canFallbackToDedicated = !HasExplicitBlockSize() &&
+        (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0;
+    const bool canCreateNewBlock =
+        ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) &&
+        (m_Blocks.size() < m_MaxBlockCount) &&
+        (freeMemory >= size || !canFallbackToDedicated);
+    uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK;
+
+    // Upper address can only be used with linear allocator and within single memory block.
+    if (isUpperAddress &&
+        (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1))
+    {
+        return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+
+    // Early reject: requested allocation size is larger that maximum block size for this block vector.
+    if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize)
+    {
+        return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+    }
+
+    // 1. Search existing allocations. Try to allocate.
+    if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
+    {
+        // Use only last block.
+        if (!m_Blocks.empty())
+        {
+            VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back();
+            VMA_ASSERT(pCurrBlock);
+            VkResult res = AllocateFromBlock(
+                pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+            if (res == VK_SUCCESS)
+            {
+                VMA_DEBUG_LOG_FORMAT("    Returned from last block #%" PRIu32, pCurrBlock->GetId());
+                IncrementallySortBlocks();
+                return VK_SUCCESS;
+            }
+        }
+    }
+    else
+    {
+        if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default
+        {
+            const bool isHostVisible =
+                (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
+            if(isHostVisible)
+            {
+                const bool isMappingAllowed = (createInfo.flags &
+                    (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0;
+                /*
+                For non-mappable allocations, check blocks that are not mapped first.
+                For mappable allocations, check blocks that are already mapped first.
+                This way, having many blocks, we will separate mappable and non-mappable allocations,
+                hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc.
+                */
+                for(size_t mappingI = 0; mappingI < 2; ++mappingI)
+                {
+                    // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
+                    for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+                    {
+                        VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+                        VMA_ASSERT(pCurrBlock);
+                        const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL;
+                        if((mappingI == 0) == (isMappingAllowed == isBlockMapped))
+                        {
+                            VkResult res = AllocateFromBlock(
+                                pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+                            if (res == VK_SUCCESS)
+                            {
+                                VMA_DEBUG_LOG_FORMAT("    Returned from existing block #%" PRIu32, pCurrBlock->GetId());
+                                IncrementallySortBlocks();
+                                return VK_SUCCESS;
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // Forward order in m_Blocks - prefer blocks with smallest amount of free space.
+                for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+                {
+                    VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+                    VMA_ASSERT(pCurrBlock);
+                    VkResult res = AllocateFromBlock(
+                        pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+                    if (res == VK_SUCCESS)
+                    {
+                        VMA_DEBUG_LOG_FORMAT("    Returned from existing block #%" PRIu32, pCurrBlock->GetId());
+                        IncrementallySortBlocks();
+                        return VK_SUCCESS;
+                    }
+                }
+            }
+        }
+        else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT
+        {
+            // Backward order in m_Blocks - prefer blocks with largest amount of free space.
+            for (size_t blockIndex = m_Blocks.size(); blockIndex--; )
+            {
+                VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
+                VMA_ASSERT(pCurrBlock);
+                VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+                if (res == VK_SUCCESS)
+                {
+                    VMA_DEBUG_LOG_FORMAT("    Returned from existing block #%" PRIu32, pCurrBlock->GetId());
+                    IncrementallySortBlocks();
+                    return VK_SUCCESS;
+                }
+            }
+        }
+    }
+
+    // 2. Try to create new block.
+    if (canCreateNewBlock)
+    {
+        // Calculate optimal size for new block.
+        VkDeviceSize newBlockSize = m_PreferredBlockSize;
+        uint32_t newBlockSizeShift = 0;
+        const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3;
+
+        if (!m_ExplicitBlockSize)
+        {
+            // Allocate 1/8, 1/4, 1/2 as first blocks.
+            const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize();
+            for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i)
+            {
+                const VkDeviceSize smallerNewBlockSize = newBlockSize / 2;
+                if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2)
+                {
+                    newBlockSize = smallerNewBlockSize;
+                    ++newBlockSizeShift;
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+
+        size_t newBlockIndex = 0;
+        VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ?
+            CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY;
+        // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize.
+        if (!m_ExplicitBlockSize)
+        {
+            while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX)
+            {
+                const VkDeviceSize smallerNewBlockSize = newBlockSize / 2;
+                if (smallerNewBlockSize >= size)
+                {
+                    newBlockSize = smallerNewBlockSize;
+                    ++newBlockSizeShift;
+                    res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ?
+                        CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY;
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+
+        if (res == VK_SUCCESS)
+        {
+            VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex];
+            VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size);
+
+            res = AllocateFromBlock(
+                pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation);
+            if (res == VK_SUCCESS)
+            {
+                VMA_DEBUG_LOG_FORMAT("    Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize);
+                IncrementallySortBlocks();
+                return VK_SUCCESS;
+            }
+            else
+            {
+                // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment.
+                return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+            }
+        }
+    }
+
+    return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+void VmaBlockVector::Free(const VmaAllocation hAllocation)
+{
+    VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL;
+
+    bool budgetExceeded = false;
+    {
+        const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex);
+        VmaBudget heapBudget = {};
+        m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1);
+        budgetExceeded = heapBudget.usage >= heapBudget.budget;
+    }
+
+    // Scope for lock.
+    {
+        VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+        VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock();
+
+        if (IsCorruptionDetectionEnabled())
+        {
+            VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize());
+            VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value.");
+        }
+
+        if (hAllocation->IsPersistentMap())
+        {
+            pBlock->Unmap(m_hAllocator, 1);
+        }
+
+        const bool hadEmptyBlockBeforeFree = HasEmptyBlock();
+        pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle());
+        pBlock->PostFree(m_hAllocator);
+        VMA_HEAVY_ASSERT(pBlock->Validate());
+
+        VMA_DEBUG_LOG_FORMAT("  Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex);
+
+        const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount;
+        // pBlock became empty after this deallocation.
+        if (pBlock->m_pMetadata->IsEmpty())
+        {
+            // Already had empty block. We don't want to have two, so delete this one.
+            if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock)
+            {
+                pBlockToDelete = pBlock;
+                Remove(pBlock);
+            }
+            // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth.
+        }
+        // pBlock didn't become empty, but we have another empty block - find and free that one.
+        // (This is optional, heuristics.)
+        else if (hadEmptyBlockBeforeFree && canDeleteBlock)
+        {
+            VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back();
+            if (pLastBlock->m_pMetadata->IsEmpty())
+            {
+                pBlockToDelete = pLastBlock;
+                m_Blocks.pop_back();
+            }
+        }
+
+        IncrementallySortBlocks();
+
+        m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize());
+        hAllocation->Destroy(m_hAllocator);
+        m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation);
+    }
+
+    // Destruction of a free block. Deferred until this point, outside of mutex
+    // lock, for performance reason.
+    if (pBlockToDelete != VMA_NULL)
+    {
+        VMA_DEBUG_LOG_FORMAT("    Deleted empty block #%" PRIu32, pBlockToDelete->GetId());
+        pBlockToDelete->Destroy(m_hAllocator);
+        vma_delete(m_hAllocator, pBlockToDelete);
+    }
+}
+
+VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const
+{
+    VkDeviceSize result = 0;
+    for (size_t i = m_Blocks.size(); i--; )
+    {
+        result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize());
+        if (result >= m_PreferredBlockSize)
+        {
+            break;
+        }
+    }
+    return result;
+}
+
+void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock)
+{
+    for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+    {
+        if (m_Blocks[blockIndex] == pBlock)
+        {
+            VmaVectorRemove(m_Blocks, blockIndex);
+            return;
+        }
+    }
+    VMA_ASSERT(0);
+}
+
+void VmaBlockVector::IncrementallySortBlocks()
+{
+    if (!m_IncrementalSort)
+        return;
+    if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
+    {
+        // Bubble sort only until first swap.
+        for (size_t i = 1; i < m_Blocks.size(); ++i)
+        {
+            if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize())
+            {
+                std::swap(m_Blocks[i - 1], m_Blocks[i]);
+                return;
+            }
+        }
+    }
+}
+
+void VmaBlockVector::SortByFreeSize()
+{
+    VMA_SORT(m_Blocks.begin(), m_Blocks.end(),
+        [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool
+        {
+            return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize();
+        });
+}
+
+VkResult VmaBlockVector::AllocateFromBlock(
+    VmaDeviceMemoryBlock* pBlock,
+    VkDeviceSize size,
+    VkDeviceSize alignment,
+    VmaAllocationCreateFlags allocFlags,
+    void* pUserData,
+    VmaSuballocationType suballocType,
+    uint32_t strategy,
+    VmaAllocation* pAllocation)
+{
+    const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0;
+
+    VmaAllocationRequest currRequest = {};
+    if (pBlock->m_pMetadata->CreateAllocationRequest(
+        size,
+        alignment,
+        isUpperAddress,
+        suballocType,
+        strategy,
+        &currRequest))
+    {
+        return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation);
+    }
+    return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+VkResult VmaBlockVector::CommitAllocationRequest(
+    VmaAllocationRequest& allocRequest,
+    VmaDeviceMemoryBlock* pBlock,
+    VkDeviceSize alignment,
+    VmaAllocationCreateFlags allocFlags,
+    void* pUserData,
+    VmaSuballocationType suballocType,
+    VmaAllocation* pAllocation)
+{
+    const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0;
+    const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0;
+    const bool isMappingAllowed = (allocFlags &
+        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0;
+
+    pBlock->PostAlloc(m_hAllocator);
+    // Allocate from pCurrBlock.
+    if (mapped)
+    {
+        VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL);
+        if (res != VK_SUCCESS)
+        {
+            return res;
+        }
+    }
+
+    *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed);
+    pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation);
+    (*pAllocation)->InitBlockAllocation(
+        pBlock,
+        allocRequest.allocHandle,
+        alignment,
+        allocRequest.size, // Not size, as actual allocation size may be larger than requested!
+        m_MemoryTypeIndex,
+        suballocType,
+        mapped);
+    VMA_HEAVY_ASSERT(pBlock->Validate());
+    if (isUserDataString)
+        (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData);
+    else
+        (*pAllocation)->SetUserData(m_hAllocator, pUserData);
+    m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size);
+    if (VMA_DEBUG_INITIALIZE_ALLOCATIONS)
+    {
+        m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
+    }
+    if (IsCorruptionDetectionEnabled())
+    {
+        VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size);
+        VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value.");
+    }
+    return VK_SUCCESS;
+}
+
+VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex)
+{
+    VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
+    allocInfo.pNext = m_pMemoryAllocateNext;
+    allocInfo.memoryTypeIndex = m_MemoryTypeIndex;
+    allocInfo.allocationSize = blockSize;
+
+#if VMA_BUFFER_DEVICE_ADDRESS
+    // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature.
+    VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR };
+    if (m_hAllocator->m_UseKhrBufferDeviceAddress)
+    {
+        allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
+        VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo);
+    }
+#endif // VMA_BUFFER_DEVICE_ADDRESS
+
+#if VMA_MEMORY_PRIORITY
+    VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT };
+    if (m_hAllocator->m_UseExtMemoryPriority)
+    {
+        VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f);
+        priorityInfo.priority = m_Priority;
+        VmaPnextChainPushFront(&allocInfo, &priorityInfo);
+    }
+#endif // VMA_MEMORY_PRIORITY
+
+#if VMA_EXTERNAL_MEMORY
+    // Attach VkExportMemoryAllocateInfoKHR if necessary.
+    VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR };
+    exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex);
+    if (exportMemoryAllocInfo.handleTypes != 0)
+    {
+        VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo);
+    }
+#endif // VMA_EXTERNAL_MEMORY
+
+    VkDeviceMemory mem = VK_NULL_HANDLE;
+    VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem);
+    if (res < 0)
+    {
+        return res;
+    }
+
+    // New VkDeviceMemory successfully created.
+
+    // Create new Allocation for it.
+    VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator);
+    pBlock->Init(
+        m_hAllocator,
+        m_hParentPool,
+        m_MemoryTypeIndex,
+        mem,
+        allocInfo.allocationSize,
+        m_NextBlockId++,
+        m_Algorithm,
+        m_BufferImageGranularity);
+
+    m_Blocks.push_back(pBlock);
+    if (pNewBlockIndex != VMA_NULL)
+    {
+        *pNewBlockIndex = m_Blocks.size() - 1;
+    }
+
+    return VK_SUCCESS;
+}
+
+bool VmaBlockVector::HasEmptyBlock()
+{
+    for (size_t index = 0, count = m_Blocks.size(); index < count; ++index)
+    {
+        VmaDeviceMemoryBlock* const pBlock = m_Blocks[index];
+        if (pBlock->m_pMetadata->IsEmpty())
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json)
+{
+    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+
+
+    json.BeginObject();
+    for (size_t i = 0; i < m_Blocks.size(); ++i)
+    {
+        json.BeginString();
+        json.ContinueString(m_Blocks[i]->GetId());
+        json.EndString();
+
+        json.BeginObject();
+        json.WriteString("MapRefCount");
+        json.WriteNumber(m_Blocks[i]->GetMapRefCount());
+
+        m_Blocks[i]->m_pMetadata->PrintDetailedMap(json);
+        json.EndObject();
+    }
+    json.EndObject();
+}
+#endif // VMA_STATS_STRING_ENABLED
+
+VkResult VmaBlockVector::CheckCorruption()
+{
+    if (!IsCorruptionDetectionEnabled())
+    {
+        return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+
+    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
+    for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
+    {
+        VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
+        VMA_ASSERT(pBlock);
+        VkResult res = pBlock->CheckCorruption(m_hAllocator);
+        if (res != VK_SUCCESS)
+        {
+            return res;
+        }
+    }
+    return VK_SUCCESS;
+}
+
+#endif // _VMA_BLOCK_VECTOR_FUNCTIONS
+
+#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS
+VmaDefragmentationContext_T::VmaDefragmentationContext_T(
+    VmaAllocator hAllocator,
+    const VmaDefragmentationInfo& info)
+    : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass),
+    m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass),
+    m_BreakCallback(info.pfnBreakCallback),
+    m_BreakCallbackUserData(info.pBreakCallbackUserData),
+    m_MoveAllocator(hAllocator->GetAllocationCallbacks()),
+    m_Moves(m_MoveAllocator)
+{
+    m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK;
+
+    if (info.pool != VMA_NULL)
+    {
+        m_BlockVectorCount = 1;
+        m_PoolBlockVector = &info.pool->m_BlockVector;
+        m_pBlockVectors = &m_PoolBlockVector;
+        m_PoolBlockVector->SetIncrementalSort(false);
+        m_PoolBlockVector->SortByFreeSize();
+    }
+    else
+    {
+        m_BlockVectorCount = hAllocator->GetMemoryTypeCount();
+        m_PoolBlockVector = VMA_NULL;
+        m_pBlockVectors = hAllocator->m_pBlockVectors;
+        for (uint32_t i = 0; i < m_BlockVectorCount; ++i)
+        {
+            VmaBlockVector* vector = m_pBlockVectors[i];
+            if (vector != VMA_NULL)
+            {
+                vector->SetIncrementalSort(false);
+                vector->SortByFreeSize();
+            }
+        }
+    }
+
+    switch (m_Algorithm)
+    {
+    case 0: // Default algorithm
+        m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT;
+        m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount);
+        break;
+    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT:
+        m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount);
+        break;
+    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT:
+        if (hAllocator->GetBufferImageGranularity() > 1)
+        {
+            m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount);
+        }
+        break;
+    }
+}
+
+VmaDefragmentationContext_T::~VmaDefragmentationContext_T()
+{
+    if (m_PoolBlockVector != VMA_NULL)
+    {
+        m_PoolBlockVector->SetIncrementalSort(true);
+    }
+    else
+    {
+        for (uint32_t i = 0; i < m_BlockVectorCount; ++i)
+        {
+            VmaBlockVector* vector = m_pBlockVectors[i];
+            if (vector != VMA_NULL)
+                vector->SetIncrementalSort(true);
+        }
+    }
+
+    if (m_AlgorithmState)
+    {
+        switch (m_Algorithm)
+        {
+        case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT:
+            vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateBalanced*>(m_AlgorithmState), m_BlockVectorCount);
+            break;
+        case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT:
+            vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateExtensive*>(m_AlgorithmState), m_BlockVectorCount);
+            break;
+        default:
+            VMA_ASSERT(0);
+        }
+    }
+}
+
+VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo)
+{
+    if (m_PoolBlockVector != VMA_NULL)
+    {
+        VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex);
+
+        if (m_PoolBlockVector->GetBlockCount() > 1)
+            ComputeDefragmentation(*m_PoolBlockVector, 0);
+        else if (m_PoolBlockVector->GetBlockCount() == 1)
+            ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0));
+    }
+    else
+    {
+        for (uint32_t i = 0; i < m_BlockVectorCount; ++i)
+        {
+            if (m_pBlockVectors[i] != VMA_NULL)
+            {
+                VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex);
+
+                if (m_pBlockVectors[i]->GetBlockCount() > 1)
+                {
+                    if (ComputeDefragmentation(*m_pBlockVectors[i], i))
+                        break;
+                }
+                else if (m_pBlockVectors[i]->GetBlockCount() == 1)
+                {
+                    if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0)))
+                        break;
+                }
+            }
+        }
+    }
+
+    moveInfo.moveCount = static_cast<uint32_t>(m_Moves.size());
+    if (moveInfo.moveCount > 0)
+    {
+        moveInfo.pMoves = m_Moves.data();
+        return VK_INCOMPLETE;
+    }
+
+    moveInfo.pMoves = VMA_NULL;
+    return VK_SUCCESS;
+}
+
+VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo)
+{
+    VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true);
+
+    VkResult result = VK_SUCCESS;
+    VmaStlAllocator<FragmentedBlock> blockAllocator(m_MoveAllocator.m_pCallbacks);
+    VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> immovableBlocks(blockAllocator);
+    VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> mappedBlocks(blockAllocator);
+
+    VmaAllocator allocator = VMA_NULL;
+    for (uint32_t i = 0; i < moveInfo.moveCount; ++i)
+    {
+        VmaDefragmentationMove& move = moveInfo.pMoves[i];
+        size_t prevCount = 0, currentCount = 0;
+        VkDeviceSize freedBlockSize = 0;
+
+        uint32_t vectorIndex;
+        VmaBlockVector* vector;
+        if (m_PoolBlockVector != VMA_NULL)
+        {
+            vectorIndex = 0;
+            vector = m_PoolBlockVector;
+        }
+        else
+        {
+            vectorIndex = move.srcAllocation->GetMemoryTypeIndex();
+            vector = m_pBlockVectors[vectorIndex];
+            VMA_ASSERT(vector != VMA_NULL);
+        }
+
+        switch (move.operation)
+        {
+        case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY:
+        {
+            uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation);
+            if (mapCount > 0)
+            {
+                allocator = vector->m_hAllocator;
+                VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock();
+                bool notPresent = true;
+                for (FragmentedBlock& block : mappedBlocks)
+                {
+                    if (block.block == newMapBlock)
+                    {
+                        notPresent = false;
+                        block.data += mapCount;
+                        break;
+                    }
+                }
+                if (notPresent)
+                    mappedBlocks.push_back({ mapCount, newMapBlock });
+            }
+
+            // Scope for locks, Free have it's own lock
+            {
+                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+                prevCount = vector->GetBlockCount();
+                freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize();
+            }
+            vector->Free(move.dstTmpAllocation);
+            {
+                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+                currentCount = vector->GetBlockCount();
+            }
+
+            result = VK_INCOMPLETE;
+            break;
+        }
+        case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE:
+        {
+            m_PassStats.bytesMoved -= move.srcAllocation->GetSize();
+            --m_PassStats.allocationsMoved;
+            vector->Free(move.dstTmpAllocation);
+
+            VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock();
+            bool notPresent = true;
+            for (const FragmentedBlock& block : immovableBlocks)
+            {
+                if (block.block == newBlock)
+                {
+                    notPresent = false;
+                    break;
+                }
+            }
+            if (notPresent)
+                immovableBlocks.push_back({ vectorIndex, newBlock });
+            break;
+        }
+        case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY:
+        {
+            m_PassStats.bytesMoved -= move.srcAllocation->GetSize();
+            --m_PassStats.allocationsMoved;
+            // Scope for locks, Free have it's own lock
+            {
+                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+                prevCount = vector->GetBlockCount();
+                freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize();
+            }
+            vector->Free(move.srcAllocation);
+            {
+                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+                currentCount = vector->GetBlockCount();
+            }
+            freedBlockSize *= prevCount - currentCount;
+
+            VkDeviceSize dstBlockSize;
+            {
+                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+                dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize();
+            }
+            vector->Free(move.dstTmpAllocation);
+            {
+                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+                freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount());
+                currentCount = vector->GetBlockCount();
+            }
+
+            result = VK_INCOMPLETE;
+            break;
+        }
+        default:
+            VMA_ASSERT(0);
+        }
+
+        if (prevCount > currentCount)
+        {
+            size_t freedBlocks = prevCount - currentCount;
+            m_PassStats.deviceMemoryBlocksFreed += static_cast<uint32_t>(freedBlocks);
+            m_PassStats.bytesFreed += freedBlockSize;
+        }
+
+        if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT &&
+            m_AlgorithmState != VMA_NULL)
+        {
+            // Avoid unnecessary tries to allocate when new free block is available
+            StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[vectorIndex];
+            if (state.firstFreeBlock != SIZE_MAX)
+            {
+                const size_t diff = prevCount - currentCount;
+                if (state.firstFreeBlock >= diff)
+                {
+                    state.firstFreeBlock -= diff;
+                    if (state.firstFreeBlock != 0)
+                        state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty();
+                }
+                else
+                    state.firstFreeBlock = 0;
+            }
+        }
+    }
+    moveInfo.moveCount = 0;
+    moveInfo.pMoves = VMA_NULL;
+    m_Moves.clear();
+
+    // Update stats
+    m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved;
+    m_GlobalStats.bytesFreed += m_PassStats.bytesFreed;
+    m_GlobalStats.bytesMoved += m_PassStats.bytesMoved;
+    m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed;
+    m_PassStats = { 0 };
+
+    // Move blocks with immovable allocations according to algorithm
+    if (immovableBlocks.size() > 0)
+    {
+        do
+        {
+            if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT)
+            {
+                if (m_AlgorithmState != VMA_NULL)
+                {
+                    bool swapped = false;
+                    // Move to the start of free blocks range
+                    for (const FragmentedBlock& block : immovableBlocks)
+                    {
+                        StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[block.data];
+                        if (state.operation != StateExtensive::Operation::Cleanup)
+                        {
+                            VmaBlockVector* vector = m_pBlockVectors[block.data];
+                            VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+
+                            for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i)
+                            {
+                                if (vector->GetBlock(i) == block.block)
+                                {
+                                    std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]);
+                                    if (state.firstFreeBlock != SIZE_MAX)
+                                    {
+                                        if (i + 1 < state.firstFreeBlock)
+                                        {
+                                            if (state.firstFreeBlock > 1)
+                                                std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]);
+                                            else
+                                                --state.firstFreeBlock;
+                                        }
+                                    }
+                                    swapped = true;
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                    if (swapped)
+                        result = VK_INCOMPLETE;
+                    break;
+                }
+            }
+
+            // Move to the beginning
+            for (const FragmentedBlock& block : immovableBlocks)
+            {
+                VmaBlockVector* vector = m_pBlockVectors[block.data];
+                VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex);
+
+                for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i)
+                {
+                    if (vector->GetBlock(i) == block.block)
+                    {
+                        std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]);
+                        break;
+                    }
+                }
+            }
+        } while (false);
+    }
+
+    // Bulk-map destination blocks
+    for (const FragmentedBlock& block : mappedBlocks)
+    {
+        VkResult res = block.block->Map(allocator, block.data, VMA_NULL);
+        VMA_ASSERT(res == VK_SUCCESS);
+    }
+    return result;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index)
+{
+    switch (m_Algorithm)
+    {
+    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT:
+        return ComputeDefragmentation_Fast(vector);
+    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT:
+        return ComputeDefragmentation_Balanced(vector, index, true);
+    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT:
+        return ComputeDefragmentation_Full(vector);
+    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT:
+        return ComputeDefragmentation_Extensive(vector, index);
+    default:
+        VMA_ASSERT(0);
+        return ComputeDefragmentation_Balanced(vector, index, true);
+    }
+}
+
+VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData(
+    VmaAllocHandle handle, VmaBlockMetadata* metadata)
+{
+    MoveAllocationData moveData;
+    moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle);
+    moveData.size = moveData.move.srcAllocation->GetSize();
+    moveData.alignment = moveData.move.srcAllocation->GetAlignment();
+    moveData.type = moveData.move.srcAllocation->GetSuballocationType();
+    moveData.flags = 0;
+
+    if (moveData.move.srcAllocation->IsPersistentMap())
+        moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
+    if (moveData.move.srcAllocation->IsMappingAllowed())
+        moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+
+    return moveData;
+}
+
+VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes)
+{
+    // Check custom criteria if exists
+    if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData))
+        return CounterStatus::End;
+
+    // Ignore allocation if will exceed max size for copy
+    if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes)
+    {
+        if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE)
+            return CounterStatus::Ignore;
+        else
+            return CounterStatus::End;
+    }
+    else
+        m_IgnoredAllocs = 0;
+    return CounterStatus::Pass;
+}
+
+bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes)
+{
+    m_PassStats.bytesMoved += bytes;
+    // Early return when max found
+    if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes)
+    {
+        VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations ||
+            m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!");
+        return true;
+    }
+    return false;
+}
+
+bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block)
+{
+    VmaBlockMetadata* metadata = block->m_pMetadata;
+
+    for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+        handle != VK_NULL_HANDLE;
+        handle = metadata->GetNextAllocation(handle))
+    {
+        MoveAllocationData moveData = GetMoveData(handle, metadata);
+        // Ignore newly created allocations by defragmentation algorithm
+        if (moveData.move.srcAllocation->GetUserData() == this)
+            continue;
+        switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+        {
+        case CounterStatus::Ignore:
+            continue;
+        case CounterStatus::End:
+            return true;
+        case CounterStatus::Pass:
+            break;
+        default:
+            VMA_ASSERT(0);
+        }
+
+        VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
+        if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
+        {
+            VmaAllocationRequest request = {};
+            if (metadata->CreateAllocationRequest(
+                moveData.size,
+                moveData.alignment,
+                false,
+                moveData.type,
+                VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+                &request))
+            {
+                if (metadata->GetAllocationOffset(request.allocHandle) < offset)
+                {
+                    if (vector.CommitAllocationRequest(
+                        request,
+                        block,
+                        moveData.alignment,
+                        moveData.flags,
+                        this,
+                        moveData.type,
+                        &moveData.move.dstTmpAllocation) == VK_SUCCESS)
+                    {
+                        m_Moves.push_back(moveData.move);
+                        if (IncrementCounters(moveData.size))
+                            return true;
+                    }
+                }
+            }
+        }
+    }
+    return false;
+}
+
+bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector)
+{
+    for (; start < end; ++start)
+    {
+        VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start);
+        if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size)
+        {
+            if (vector.AllocateFromBlock(dstBlock,
+                data.size,
+                data.alignment,
+                data.flags,
+                this,
+                data.type,
+                0,
+                &data.move.dstTmpAllocation) == VK_SUCCESS)
+            {
+                m_Moves.push_back(data.move);
+                if (IncrementCounters(data.size))
+                    return true;
+                break;
+            }
+        }
+    }
+    return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector)
+{
+    // Move only between blocks
+
+    // Go through allocations in last blocks and try to fit them inside first ones
+    for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i)
+    {
+        VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata;
+
+        for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+            handle != VK_NULL_HANDLE;
+            handle = metadata->GetNextAllocation(handle))
+        {
+            MoveAllocationData moveData = GetMoveData(handle, metadata);
+            // Ignore newly created allocations by defragmentation algorithm
+            if (moveData.move.srcAllocation->GetUserData() == this)
+                continue;
+            switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+            {
+            case CounterStatus::Ignore:
+                continue;
+            case CounterStatus::End:
+                return true;
+            case CounterStatus::Pass:
+                break;
+            default:
+                VMA_ASSERT(0);
+            }
+
+            // Check all previous blocks for free space
+            if (AllocInOtherBlock(0, i, moveData, vector))
+                return true;
+        }
+    }
+    return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update)
+{
+    // Go over every allocation and try to fit it in previous blocks at lowest offsets,
+    // if not possible: realloc within single block to minimize offset (exclude offset == 0),
+    // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block)
+    VMA_ASSERT(m_AlgorithmState != VMA_NULL);
+
+    StateBalanced& vectorState = reinterpret_cast<StateBalanced*>(m_AlgorithmState)[index];
+    if (update && vectorState.avgAllocSize == UINT64_MAX)
+        UpdateVectorStatistics(vector, vectorState);
+
+    const size_t startMoveCount = m_Moves.size();
+    VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2;
+    for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i)
+    {
+        VmaDeviceMemoryBlock* block = vector.GetBlock(i);
+        VmaBlockMetadata* metadata = block->m_pMetadata;
+        VkDeviceSize prevFreeRegionSize = 0;
+
+        for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+            handle != VK_NULL_HANDLE;
+            handle = metadata->GetNextAllocation(handle))
+        {
+            MoveAllocationData moveData = GetMoveData(handle, metadata);
+            // Ignore newly created allocations by defragmentation algorithm
+            if (moveData.move.srcAllocation->GetUserData() == this)
+                continue;
+            switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+            {
+            case CounterStatus::Ignore:
+                continue;
+            case CounterStatus::End:
+                return true;
+            case CounterStatus::Pass:
+                break;
+            default:
+                VMA_ASSERT(0);
+            }
+
+            // Check all previous blocks for free space
+            const size_t prevMoveCount = m_Moves.size();
+            if (AllocInOtherBlock(0, i, moveData, vector))
+                return true;
+
+            VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle);
+            // If no room found then realloc within block for lower offset
+            VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
+            if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
+            {
+                // Check if realloc will make sense
+                if (prevFreeRegionSize >= minimalFreeRegion ||
+                    nextFreeRegionSize >= minimalFreeRegion ||
+                    moveData.size <= vectorState.avgFreeSize ||
+                    moveData.size <= vectorState.avgAllocSize)
+                {
+                    VmaAllocationRequest request = {};
+                    if (metadata->CreateAllocationRequest(
+                        moveData.size,
+                        moveData.alignment,
+                        false,
+                        moveData.type,
+                        VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+                        &request))
+                    {
+                        if (metadata->GetAllocationOffset(request.allocHandle) < offset)
+                        {
+                            if (vector.CommitAllocationRequest(
+                                request,
+                                block,
+                                moveData.alignment,
+                                moveData.flags,
+                                this,
+                                moveData.type,
+                                &moveData.move.dstTmpAllocation) == VK_SUCCESS)
+                            {
+                                m_Moves.push_back(moveData.move);
+                                if (IncrementCounters(moveData.size))
+                                    return true;
+                            }
+                        }
+                    }
+                }
+            }
+            prevFreeRegionSize = nextFreeRegionSize;
+        }
+    }
+
+    // No moves performed, update statistics to current vector state
+    if (startMoveCount == m_Moves.size() && !update)
+    {
+        vectorState.avgAllocSize = UINT64_MAX;
+        return ComputeDefragmentation_Balanced(vector, index, false);
+    }
+    return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector)
+{
+    // Go over every allocation and try to fit it in previous blocks at lowest offsets,
+    // if not possible: realloc within single block to minimize offset (exclude offset == 0)
+
+    for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i)
+    {
+        VmaDeviceMemoryBlock* block = vector.GetBlock(i);
+        VmaBlockMetadata* metadata = block->m_pMetadata;
+
+        for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+            handle != VK_NULL_HANDLE;
+            handle = metadata->GetNextAllocation(handle))
+        {
+            MoveAllocationData moveData = GetMoveData(handle, metadata);
+            // Ignore newly created allocations by defragmentation algorithm
+            if (moveData.move.srcAllocation->GetUserData() == this)
+                continue;
+            switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+            {
+            case CounterStatus::Ignore:
+                continue;
+            case CounterStatus::End:
+                return true;
+            case CounterStatus::Pass:
+                break;
+            default:
+                VMA_ASSERT(0);
+            }
+
+            // Check all previous blocks for free space
+            const size_t prevMoveCount = m_Moves.size();
+            if (AllocInOtherBlock(0, i, moveData, vector))
+                return true;
+
+            // If no room found then realloc within block for lower offset
+            VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
+            if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
+            {
+                VmaAllocationRequest request = {};
+                if (metadata->CreateAllocationRequest(
+                    moveData.size,
+                    moveData.alignment,
+                    false,
+                    moveData.type,
+                    VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT,
+                    &request))
+                {
+                    if (metadata->GetAllocationOffset(request.allocHandle) < offset)
+                    {
+                        if (vector.CommitAllocationRequest(
+                            request,
+                            block,
+                            moveData.alignment,
+                            moveData.flags,
+                            this,
+                            moveData.type,
+                            &moveData.move.dstTmpAllocation) == VK_SUCCESS)
+                        {
+                            m_Moves.push_back(moveData.move);
+                            if (IncrementCounters(moveData.size))
+                                return true;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return false;
+}
+
+bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index)
+{
+    // First free single block, then populate it to the brim, then free another block, and so on
+
+    // Fallback to previous algorithm since without granularity conflicts it can achieve max packing
+    if (vector.m_BufferImageGranularity == 1)
+        return ComputeDefragmentation_Full(vector);
+
+    VMA_ASSERT(m_AlgorithmState != VMA_NULL);
+
+    StateExtensive& vectorState = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[index];
+
+    bool texturePresent = false, bufferPresent = false, otherPresent = false;
+    switch (vectorState.operation)
+    {
+    case StateExtensive::Operation::Done: // Vector defragmented
+        return false;
+    case StateExtensive::Operation::FindFreeBlockBuffer:
+    case StateExtensive::Operation::FindFreeBlockTexture:
+    case StateExtensive::Operation::FindFreeBlockAll:
+    {
+        // No more blocks to free, just perform fast realloc and move to cleanup
+        if (vectorState.firstFreeBlock == 0)
+        {
+            vectorState.operation = StateExtensive::Operation::Cleanup;
+            return ComputeDefragmentation_Fast(vector);
+        }
+
+        // No free blocks, have to clear last one
+        size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1;
+        VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata;
+
+        const size_t prevMoveCount = m_Moves.size();
+        for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin();
+            handle != VK_NULL_HANDLE;
+            handle = freeMetadata->GetNextAllocation(handle))
+        {
+            MoveAllocationData moveData = GetMoveData(handle, freeMetadata);
+            switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+            {
+            case CounterStatus::Ignore:
+                continue;
+            case CounterStatus::End:
+                return true;
+            case CounterStatus::Pass:
+                break;
+            default:
+                VMA_ASSERT(0);
+            }
+
+            // Check all previous blocks for free space
+            if (AllocInOtherBlock(0, last, moveData, vector))
+            {
+                // Full clear performed already
+                if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE)
+                    vectorState.firstFreeBlock = last;
+                return true;
+            }
+        }
+
+        if (prevMoveCount == m_Moves.size())
+        {
+            // Cannot perform full clear, have to move data in other blocks around
+            if (last != 0)
+            {
+                for (size_t i = last - 1; i; --i)
+                {
+                    if (ReallocWithinBlock(vector, vector.GetBlock(i)))
+                        return true;
+                }
+            }
+
+            if (prevMoveCount == m_Moves.size())
+            {
+                // No possible reallocs within blocks, try to move them around fast
+                return ComputeDefragmentation_Fast(vector);
+            }
+        }
+        else
+        {
+            switch (vectorState.operation)
+            {
+            case StateExtensive::Operation::FindFreeBlockBuffer:
+                vectorState.operation = StateExtensive::Operation::MoveBuffers;
+                break;
+            case StateExtensive::Operation::FindFreeBlockTexture:
+                vectorState.operation = StateExtensive::Operation::MoveTextures;
+                break;
+            case StateExtensive::Operation::FindFreeBlockAll:
+                vectorState.operation = StateExtensive::Operation::MoveAll;
+                break;
+            default:
+                VMA_ASSERT(0);
+                vectorState.operation = StateExtensive::Operation::MoveTextures;
+            }
+            vectorState.firstFreeBlock = last;
+            // Nothing done, block found without reallocations, can perform another reallocs in same pass
+            return ComputeDefragmentation_Extensive(vector, index);
+        }
+        break;
+    }
+    case StateExtensive::Operation::MoveTextures:
+    {
+        if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector,
+            vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent))
+        {
+            if (texturePresent)
+            {
+                vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture;
+                return ComputeDefragmentation_Extensive(vector, index);
+            }
+
+            if (!bufferPresent && !otherPresent)
+            {
+                vectorState.operation = StateExtensive::Operation::Cleanup;
+                break;
+            }
+
+            // No more textures to move, check buffers
+            vectorState.operation = StateExtensive::Operation::MoveBuffers;
+            bufferPresent = false;
+            otherPresent = false;
+        }
+        else
+            break;
+        VMA_FALLTHROUGH; // Fallthrough
+    }
+    case StateExtensive::Operation::MoveBuffers:
+    {
+        if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector,
+            vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent))
+        {
+            if (bufferPresent)
+            {
+                vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer;
+                return ComputeDefragmentation_Extensive(vector, index);
+            }
+
+            if (!otherPresent)
+            {
+                vectorState.operation = StateExtensive::Operation::Cleanup;
+                break;
+            }
+
+            // No more buffers to move, check all others
+            vectorState.operation = StateExtensive::Operation::MoveAll;
+            otherPresent = false;
+        }
+        else
+            break;
+        VMA_FALLTHROUGH; // Fallthrough
+    }
+    case StateExtensive::Operation::MoveAll:
+    {
+        if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector,
+            vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent))
+        {
+            if (otherPresent)
+            {
+                vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer;
+                return ComputeDefragmentation_Extensive(vector, index);
+            }
+            // Everything moved
+            vectorState.operation = StateExtensive::Operation::Cleanup;
+        }
+        break;
+    }
+    case StateExtensive::Operation::Cleanup:
+        // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062).
+        break;
+    }
+
+    if (vectorState.operation == StateExtensive::Operation::Cleanup)
+    {
+        // All other work done, pack data in blocks even tighter if possible
+        const size_t prevMoveCount = m_Moves.size();
+        for (size_t i = 0; i < vector.GetBlockCount(); ++i)
+        {
+            if (ReallocWithinBlock(vector, vector.GetBlock(i)))
+                return true;
+        }
+
+        if (prevMoveCount == m_Moves.size())
+            vectorState.operation = StateExtensive::Operation::Done;
+    }
+    return false;
+}
+
+void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state)
+{
+    size_t allocCount = 0;
+    size_t freeCount = 0;
+    state.avgFreeSize = 0;
+    state.avgAllocSize = 0;
+
+    for (size_t i = 0; i < vector.GetBlockCount(); ++i)
+    {
+        VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata;
+
+        allocCount += metadata->GetAllocationCount();
+        freeCount += metadata->GetFreeRegionsCount();
+        state.avgFreeSize += metadata->GetSumFreeSize();
+        state.avgAllocSize += metadata->GetSize();
+    }
+
+    state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount;
+    state.avgFreeSize /= freeCount;
+}
+
+bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType,
+    VmaBlockVector& vector, size_t firstFreeBlock,
+    bool& texturePresent, bool& bufferPresent, bool& otherPresent)
+{
+    const size_t prevMoveCount = m_Moves.size();
+    for (size_t i = firstFreeBlock ; i;)
+    {
+        VmaDeviceMemoryBlock* block = vector.GetBlock(--i);
+        VmaBlockMetadata* metadata = block->m_pMetadata;
+
+        for (VmaAllocHandle handle = metadata->GetAllocationListBegin();
+            handle != VK_NULL_HANDLE;
+            handle = metadata->GetNextAllocation(handle))
+        {
+            MoveAllocationData moveData = GetMoveData(handle, metadata);
+            // Ignore newly created allocations by defragmentation algorithm
+            if (moveData.move.srcAllocation->GetUserData() == this)
+                continue;
+            switch (CheckCounters(moveData.move.srcAllocation->GetSize()))
+            {
+            case CounterStatus::Ignore:
+                continue;
+            case CounterStatus::End:
+                return true;
+            case CounterStatus::Pass:
+                break;
+            default:
+                VMA_ASSERT(0);
+            }
+
+            // Move only single type of resources at once
+            if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType))
+            {
+                // Try to fit allocation into free blocks
+                if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector))
+                    return false;
+            }
+
+            if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL))
+                texturePresent = true;
+            else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER))
+                bufferPresent = true;
+            else
+                otherPresent = true;
+        }
+    }
+    return prevMoveCount == m_Moves.size();
+}
+#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS
+
+#ifndef _VMA_POOL_T_FUNCTIONS
+VmaPool_T::VmaPool_T(
+    VmaAllocator hAllocator,
+    const VmaPoolCreateInfo& createInfo,
+    VkDeviceSize preferredBlockSize)
+    : m_BlockVector(
+        hAllocator,
+        this, // hParentPool
+        createInfo.memoryTypeIndex,
+        createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize,
+        createInfo.minBlockCount,
+        createInfo.maxBlockCount,
+        (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(),
+        createInfo.blockSize != 0, // explicitBlockSize
+        createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm
+        createInfo.priority,
+        VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment),
+        createInfo.pMemoryAllocateNext),
+    m_Id(0),
+    m_Name(VMA_NULL) {}
+
+VmaPool_T::~VmaPool_T()
+{
+    VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL);
+
+    const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks();
+    VmaFreeString(allocs, m_Name);
+}
+
+void VmaPool_T::SetName(const char* pName)
+{
+    const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks();
+    VmaFreeString(allocs, m_Name);
+
+    if (pName != VMA_NULL)
+    {
+        m_Name = VmaCreateStringCopy(allocs, pName);
+    }
+    else
+    {
+        m_Name = VMA_NULL;
+    }
+}
+#endif // _VMA_POOL_T_FUNCTIONS
+
+#ifndef _VMA_ALLOCATOR_T_FUNCTIONS
+VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
+    m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0),
+    m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0),
+    m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0),
+    m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0),
+    m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0),
+    m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0),
+    m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0),
+    m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0),
+    m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0),
+    m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0),
+    m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0),
+    m_hDevice(pCreateInfo->device),
+    m_hInstance(pCreateInfo->instance),
+    m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL),
+    m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ?
+        *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks),
+    m_AllocationObjectAllocator(&m_AllocationCallbacks),
+    m_HeapSizeLimitMask(0),
+    m_DeviceMemoryCount(0),
+    m_PreferredLargeHeapBlockSize(0),
+    m_PhysicalDevice(pCreateInfo->physicalDevice),
+    m_GpuDefragmentationMemoryTypeBits(UINT32_MAX),
+    m_NextPoolId(0),
+    m_GlobalMemoryTypeBits(UINT32_MAX)
+{
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        m_UseKhrDedicatedAllocation = false;
+        m_UseKhrBindMemory2 = false;
+    }
+
+    if(VMA_DEBUG_DETECT_CORRUPTION)
+    {
+        // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it.
+        VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0);
+    }
+
+    VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance);
+
+    if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0))
+    {
+#if !(VMA_DEDICATED_ALLOCATION)
+        if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0)
+        {
+            VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros.");
+        }
+#endif
+#if !(VMA_BIND_MEMORY2)
+        if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0)
+        {
+            VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros.");
+        }
+#endif
+    }
+#if !(VMA_MEMORY_BUDGET)
+    if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros.");
+    }
+#endif
+#if !(VMA_BUFFER_DEVICE_ADDRESS)
+    if(m_UseKhrBufferDeviceAddress)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+    }
+#endif
+#if VMA_VULKAN_VERSION < 1004000
+    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 4, 0) && "vulkanApiVersion >= VK_API_VERSION_1_4 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if VMA_VULKAN_VERSION < 1003000
+    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 3, 0) && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if VMA_VULKAN_VERSION < 1002000
+    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 2, 0) && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if VMA_VULKAN_VERSION < 1001000
+    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0) && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros.");
+#endif
+#if !(VMA_MEMORY_PRIORITY)
+    if(m_UseExtMemoryPriority)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+    }
+#endif
+#if !(VMA_KHR_MAINTENANCE4)
+    if(m_UseKhrMaintenance4)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+    }
+#endif
+#if !(VMA_KHR_MAINTENANCE5)
+    if(m_UseKhrMaintenance5)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+    }
+#endif
+#if !(VMA_KHR_MAINTENANCE5)
+    if(m_UseKhrMaintenance5)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+    }
+#endif
+
+#if !(VMA_EXTERNAL_MEMORY_WIN32)
+    if(m_UseKhrExternalMemoryWin32)
+    {
+        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro.");
+    }
+#endif
+
+    memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks));
+    memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties));
+    memset(&m_MemProps, 0, sizeof(m_MemProps));
+
+    memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors));
+    memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions));
+
+#if VMA_EXTERNAL_MEMORY
+    memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes));
+#endif // #if VMA_EXTERNAL_MEMORY
+
+    if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL)
+    {
+        m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData;
+        m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate;
+        m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree;
+    }
+
+    ImportVulkanFunctions(pCreateInfo->pVulkanFunctions);
+
+    (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties);
+    (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps);
+
+    VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT));
+    VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY));
+    VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity));
+    VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize));
+
+    m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ?
+        pCreateInfo->preferredLargeHeapBlockSize : static_cast<VkDeviceSize>(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE);
+
+    m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits();
+
+#if VMA_EXTERNAL_MEMORY
+    if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL)
+    {
+        memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes,
+            sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount());
+    }
+#endif // #if VMA_EXTERNAL_MEMORY
+
+    if(pCreateInfo->pHeapSizeLimit != VMA_NULL)
+    {
+        for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
+        {
+            const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex];
+            if(limit != VK_WHOLE_SIZE)
+            {
+                m_HeapSizeLimitMask |= 1u << heapIndex;
+                if(limit < m_MemProps.memoryHeaps[heapIndex].size)
+                {
+                    m_MemProps.memoryHeaps[heapIndex].size = limit;
+                }
+            }
+        }
+    }
+
+    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+    {
+        // Create only supported types
+        if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0)
+        {
+            const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex);
+            m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)(
+                this,
+                VK_NULL_HANDLE, // hParentPool
+                memTypeIndex,
+                preferredBlockSize,
+                0,
+                SIZE_MAX,
+                GetBufferImageGranularity(),
+                false, // explicitBlockSize
+                0, // algorithm
+                0.5f, // priority (0.5 is the default per Vulkan spec)
+                GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment
+                VMA_NULL); // // pMemoryAllocateNext
+            // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here,
+            // because minBlockCount is 0.
+        }
+    }
+}
+
+VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo)
+{
+    VkResult res = VK_SUCCESS;
+
+#if VMA_MEMORY_BUDGET
+    if(m_UseExtMemoryBudget)
+    {
+        UpdateVulkanBudget();
+    }
+#endif // #if VMA_MEMORY_BUDGET
+
+    return res;
+}
+
+VmaAllocator_T::~VmaAllocator_T()
+{
+    VMA_ASSERT(m_Pools.IsEmpty());
+
+    for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; )
+    {
+        vma_delete(this, m_pBlockVectors[memTypeIndex]);
+    }
+}
+
+void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions)
+{
+#if VMA_STATIC_VULKAN_FUNCTIONS == 1
+    ImportVulkanFunctions_Static();
+#endif
+
+    if(pVulkanFunctions != VMA_NULL)
+    {
+        ImportVulkanFunctions_Custom(pVulkanFunctions);
+    }
+
+#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+    ImportVulkanFunctions_Dynamic();
+#endif
+
+    ValidateVulkanFunctions();
+}
+
+#if VMA_STATIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ImportVulkanFunctions_Static()
+{
+    // Vulkan 1.0
+    m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr;
+    m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr;
+    m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties;
+    m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties;
+    m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory;
+    m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory;
+    m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory;
+    m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory;
+    m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges;
+    m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges;
+    m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory;
+    m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory;
+    m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements;
+    m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements;
+    m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer;
+    m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer;
+    m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage;
+    m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage;
+    m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer;
+
+    // Vulkan 1.1
+#if VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2;
+        m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2;
+        m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2;
+        m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2;
+    }
+#endif
+
+#if VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2;
+    }
+#endif
+
+#if VMA_VULKAN_VERSION >= 1003000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0))
+    {
+        m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements;
+        m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements;
+    }
+#endif
+}
+
+#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions)
+{
+    VMA_ASSERT(pVulkanFunctions != VMA_NULL);
+
+#define VMA_COPY_IF_NOT_NULL(funcName) \
+    if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName;
+
+    VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr);
+    VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr);
+    VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties);
+    VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties);
+    VMA_COPY_IF_NOT_NULL(vkAllocateMemory);
+    VMA_COPY_IF_NOT_NULL(vkFreeMemory);
+    VMA_COPY_IF_NOT_NULL(vkMapMemory);
+    VMA_COPY_IF_NOT_NULL(vkUnmapMemory);
+    VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges);
+    VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges);
+    VMA_COPY_IF_NOT_NULL(vkBindBufferMemory);
+    VMA_COPY_IF_NOT_NULL(vkBindImageMemory);
+    VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements);
+    VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements);
+    VMA_COPY_IF_NOT_NULL(vkCreateBuffer);
+    VMA_COPY_IF_NOT_NULL(vkDestroyBuffer);
+    VMA_COPY_IF_NOT_NULL(vkCreateImage);
+    VMA_COPY_IF_NOT_NULL(vkDestroyImage);
+    VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer);
+
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR);
+    VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR);
+#endif
+
+#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+    VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR);
+    VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR);
+#endif
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+    VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR);
+#endif
+
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+    VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements);
+    VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements);
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+    VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR);
+#endif
+#undef VMA_COPY_IF_NOT_NULL
+}
+
+#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ImportVulkanFunctions_Dynamic()
+{
+    VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr &&
+        "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass "
+        "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. "
+        "Other members can be null.");
+
+#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \
+    if(m_VulkanFunctions.memberName == VMA_NULL) \
+        m_VulkanFunctions.memberName = \
+            (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString);
+#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \
+    if(m_VulkanFunctions.memberName == VMA_NULL) \
+        m_VulkanFunctions.memberName = \
+            (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString);
+
+    VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties");
+    VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
+    VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory");
+    VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory");
+    VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory");
+    VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory");
+    VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges");
+    VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges");
+    VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory");
+    VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory");
+    VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements");
+    VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements");
+    VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer");
+    VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer");
+    VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage");
+    VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage");
+    VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer");
+
+#if VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2");
+        VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2");
+        VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2");
+        VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2");
+    }
+#endif
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2");
+        // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410.
+        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR");
+    }
+    else if(m_UseExtMemoryBudget)
+    {
+        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR");
+        // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410.
+        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2");
+    }
+#endif
+
+#if VMA_DEDICATED_ALLOCATION
+    if(m_UseKhrDedicatedAllocation)
+    {
+        VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR");
+        VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR");
+    }
+#endif
+
+#if VMA_BIND_MEMORY2
+    if(m_UseKhrBindMemory2)
+    {
+        VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR");
+        VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR");
+    }
+#endif // #if VMA_BIND_MEMORY2
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2");
+    }
+    else if(m_UseExtMemoryBudget)
+    {
+        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR");
+    }
+#endif // #if VMA_MEMORY_BUDGET
+
+#if VMA_VULKAN_VERSION >= 1003000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0))
+    {
+        VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements");
+        VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements");
+    }
+#endif
+#if VMA_KHR_MAINTENANCE4
+    if(m_UseKhrMaintenance4)
+    {
+        VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR");
+        VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR");
+    }
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+    if (m_UseKhrExternalMemoryWin32)
+    {
+        VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR");
+    }
+#endif
+#undef VMA_FETCH_DEVICE_FUNC
+#undef VMA_FETCH_INSTANCE_FUNC
+}
+
+#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1
+
+void VmaAllocator_T::ValidateVulkanFunctions()
+{
+    VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkFlushMappedMemoryRanges != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkInvalidateMappedMemoryRanges != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL);
+    VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL);
+
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation)
+    {
+        VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL);
+        VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL);
+    }
+#endif
+
+#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2)
+    {
+        VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL);
+        VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL);
+    }
+#endif
+
+#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+    if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL);
+    }
+#endif
+#if VMA_EXTERNAL_MEMORY_WIN32
+    if (m_UseKhrExternalMemoryWin32)
+    {
+        VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL);
+    }
+#endif
+
+    // Not validating these due to suspected driver bugs with these function
+    // pointers being null despite correct extension or Vulkan version is enabled.
+    // See issue #397. Their usage in VMA is optional anyway.
+    //
+    // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL);
+    // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL);
+}
+
+VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex)
+{
+    const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
+    const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size;
+    const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE;
+    return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32);
+}
+
+VkResult VmaAllocator_T::AllocateMemoryOfType(
+    VmaPool pool,
+    VkDeviceSize size,
+    VkDeviceSize alignment,
+    bool dedicatedPreferred,
+    VkBuffer dedicatedBuffer,
+    VkImage dedicatedImage,
+    VmaBufferImageUsage dedicatedBufferImageUsage,
+    const VmaAllocationCreateInfo& createInfo,
+    uint32_t memTypeIndex,
+    VmaSuballocationType suballocType,
+    VmaDedicatedAllocationList& dedicatedAllocations,
+    VmaBlockVector& blockVector,
+    size_t allocationCount,
+    VmaAllocation* pAllocations)
+{
+    VMA_ASSERT(pAllocations != VMA_NULL);
+    VMA_DEBUG_LOG_FORMAT("  AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size);
+
+    VmaAllocationCreateInfo finalCreateInfo = createInfo;
+    VkResult res = CalcMemTypeParams(
+        finalCreateInfo,
+        memTypeIndex,
+        size,
+        allocationCount);
+    if(res != VK_SUCCESS)
+        return res;
+
+    if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0)
+    {
+        return AllocateDedicatedMemory(
+            pool,
+            size,
+            suballocType,
+            dedicatedAllocations,
+            memTypeIndex,
+            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
+            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
+            (finalCreateInfo.flags &
+                (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0,
+            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0,
+            finalCreateInfo.pUserData,
+            finalCreateInfo.priority,
+            dedicatedBuffer,
+            dedicatedImage,
+            dedicatedBufferImageUsage,
+            allocationCount,
+            pAllocations,
+            blockVector.GetAllocationNextPtr());
+    }
+    else
+    {
+        const bool canAllocateDedicated =
+            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 &&
+            (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize());
+
+        if(canAllocateDedicated)
+        {
+            // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size.
+            if(size > blockVector.GetPreferredBlockSize() / 2)
+            {
+                dedicatedPreferred = true;
+            }
+            // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget,
+            // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above
+            // 3/4 of the maximum allocation count.
+            if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 &&
+                m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4)
+            {
+                dedicatedPreferred = false;
+            }
+
+            if(dedicatedPreferred)
+            {
+                res = AllocateDedicatedMemory(
+                    pool,
+                    size,
+                    suballocType,
+                    dedicatedAllocations,
+                    memTypeIndex,
+                    (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
+                    (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
+                    (finalCreateInfo.flags &
+                        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0,
+                    (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0,
+                    finalCreateInfo.pUserData,
+                    finalCreateInfo.priority,
+                    dedicatedBuffer,
+                    dedicatedImage,
+                    dedicatedBufferImageUsage,
+                    allocationCount,
+                    pAllocations,
+                    blockVector.GetAllocationNextPtr());
+                if(res == VK_SUCCESS)
+                {
+                    // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here.
+                    VMA_DEBUG_LOG("    Allocated as DedicatedMemory");
+                    return VK_SUCCESS;
+                }
+            }
+        }
+
+        res = blockVector.Allocate(
+            size,
+            alignment,
+            finalCreateInfo,
+            suballocType,
+            allocationCount,
+            pAllocations);
+        if(res == VK_SUCCESS)
+            return VK_SUCCESS;
+
+        // Try dedicated memory.
+        if(canAllocateDedicated && !dedicatedPreferred)
+        {
+            res = AllocateDedicatedMemory(
+                pool,
+                size,
+                suballocType,
+                dedicatedAllocations,
+                memTypeIndex,
+                (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0,
+                (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0,
+                (finalCreateInfo.flags &
+                    (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0,
+                (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0,
+                finalCreateInfo.pUserData,
+                finalCreateInfo.priority,
+                dedicatedBuffer,
+                dedicatedImage,
+                dedicatedBufferImageUsage,
+                allocationCount,
+                pAllocations,
+                blockVector.GetAllocationNextPtr());
+            if(res == VK_SUCCESS)
+            {
+                // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here.
+                VMA_DEBUG_LOG("    Allocated as DedicatedMemory");
+                return VK_SUCCESS;
+            }
+        }
+        // Everything failed: Return error code.
+        VMA_DEBUG_LOG("    vkAllocateMemory FAILED");
+        return res;
+    }
+}
+
+VkResult VmaAllocator_T::AllocateDedicatedMemory(
+    VmaPool pool,
+    VkDeviceSize size,
+    VmaSuballocationType suballocType,
+    VmaDedicatedAllocationList& dedicatedAllocations,
+    uint32_t memTypeIndex,
+    bool map,
+    bool isUserDataString,
+    bool isMappingAllowed,
+    bool canAliasMemory,
+    void* pUserData,
+    float priority,
+    VkBuffer dedicatedBuffer,
+    VkImage dedicatedImage,
+    VmaBufferImageUsage dedicatedBufferImageUsage,
+    size_t allocationCount,
+    VmaAllocation* pAllocations,
+    const void* pNextChain)
+{
+    VMA_ASSERT(allocationCount > 0 && pAllocations);
+
+    VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
+    allocInfo.memoryTypeIndex = memTypeIndex;
+    allocInfo.allocationSize = size;
+    allocInfo.pNext = pNextChain;
+
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR };
+    if(!canAliasMemory)
+    {
+        if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+        {
+            if(dedicatedBuffer != VK_NULL_HANDLE)
+            {
+                VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE);
+                dedicatedAllocInfo.buffer = dedicatedBuffer;
+                VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo);
+            }
+            else if(dedicatedImage != VK_NULL_HANDLE)
+            {
+                dedicatedAllocInfo.image = dedicatedImage;
+                VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo);
+            }
+        }
+    }
+#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+
+#if VMA_BUFFER_DEVICE_ADDRESS
+    VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR };
+    if(m_UseKhrBufferDeviceAddress)
+    {
+        bool canContainBufferWithDeviceAddress = true;
+        if(dedicatedBuffer != VK_NULL_HANDLE)
+        {
+            canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN ||
+                dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT);
+        }
+        else if(dedicatedImage != VK_NULL_HANDLE)
+        {
+            canContainBufferWithDeviceAddress = false;
+        }
+        if(canContainBufferWithDeviceAddress)
+        {
+            allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
+            VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo);
+        }
+    }
+#endif // #if VMA_BUFFER_DEVICE_ADDRESS
+
+#if VMA_MEMORY_PRIORITY
+    VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT };
+    if(m_UseExtMemoryPriority)
+    {
+        VMA_ASSERT(priority >= 0.f && priority <= 1.f);
+        priorityInfo.priority = priority;
+        VmaPnextChainPushFront(&allocInfo, &priorityInfo);
+    }
+#endif // #if VMA_MEMORY_PRIORITY
+
+#if VMA_EXTERNAL_MEMORY
+    // Attach VkExportMemoryAllocateInfoKHR if necessary.
+    VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR };
+    exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex);
+    if(exportMemoryAllocInfo.handleTypes != 0)
+    {
+        VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo);
+    }
+#endif // #if VMA_EXTERNAL_MEMORY
+
+    size_t allocIndex;
+    VkResult res = VK_SUCCESS;
+    for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+    {
+        res = AllocateDedicatedMemoryPage(
+            pool,
+            size,
+            suballocType,
+            memTypeIndex,
+            allocInfo,
+            map,
+            isUserDataString,
+            isMappingAllowed,
+            pUserData,
+            pAllocations + allocIndex);
+        if(res != VK_SUCCESS)
+        {
+            break;
+        }
+    }
+
+    if(res == VK_SUCCESS)
+    {
+        for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+        {
+            dedicatedAllocations.Register(pAllocations[allocIndex]);
+        }
+        VMA_DEBUG_LOG_FORMAT("    Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex);
+    }
+    else
+    {
+        // Free all already created allocations.
+        while(allocIndex--)
+        {
+            VmaAllocation currAlloc = pAllocations[allocIndex];
+            VkDeviceMemory hMemory = currAlloc->GetMemory();
+
+            /*
+            There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory
+            before vkFreeMemory.
+
+            if(currAlloc->GetMappedData() != VMA_NULL)
+            {
+                (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory);
+            }
+            */
+
+            FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory);
+            m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize());
+            m_AllocationObjectAllocator.Free(currAlloc);
+        }
+
+        memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
+    }
+
+    return res;
+}
+
+VkResult VmaAllocator_T::AllocateDedicatedMemoryPage(
+    VmaPool pool,
+    VkDeviceSize size,
+    VmaSuballocationType suballocType,
+    uint32_t memTypeIndex,
+    const VkMemoryAllocateInfo& allocInfo,
+    bool map,
+    bool isUserDataString,
+    bool isMappingAllowed,
+    void* pUserData,
+    VmaAllocation* pAllocation)
+{
+    VkDeviceMemory hMemory = VK_NULL_HANDLE;
+    VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory);
+    if(res < 0)
+    {
+        VMA_DEBUG_LOG("    vkAllocateMemory FAILED");
+        return res;
+    }
+
+    void* pMappedData = VMA_NULL;
+    if(map)
+    {
+        res = (*m_VulkanFunctions.vkMapMemory)(
+            m_hDevice,
+            hMemory,
+            0,
+            VK_WHOLE_SIZE,
+            0,
+            &pMappedData);
+        if(res < 0)
+        {
+            VMA_DEBUG_LOG("    vkMapMemory FAILED");
+            FreeVulkanMemory(memTypeIndex, size, hMemory);
+            return res;
+        }
+    }
+
+    *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed);
+    (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size);
+    if (isUserDataString)
+        (*pAllocation)->SetName(this, (const char*)pUserData);
+    else
+        (*pAllocation)->SetUserData(this, pUserData);
+    m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size);
+    if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
+    {
+        FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED);
+    }
+
+    return VK_SUCCESS;
+}
+
+void VmaAllocator_T::GetBufferMemoryRequirements(
+    VkBuffer hBuffer,
+    VkMemoryRequirements& memReq,
+    bool& requiresDedicatedAllocation,
+    bool& prefersDedicatedAllocation) const
+{
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR };
+        memReqInfo.buffer = hBuffer;
+
+        VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR };
+
+        VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR };
+        VmaPnextChainPushFront(&memReq2, &memDedicatedReq);
+
+        (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2);
+
+        memReq = memReq2.memoryRequirements;
+        requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE);
+        prefersDedicatedAllocation  = (memDedicatedReq.prefersDedicatedAllocation  != VK_FALSE);
+    }
+    else
+#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    {
+        (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq);
+        requiresDedicatedAllocation = false;
+        prefersDedicatedAllocation  = false;
+    }
+}
+
+void VmaAllocator_T::GetImageMemoryRequirements(
+    VkImage hImage,
+    VkMemoryRequirements& memReq,
+    bool& requiresDedicatedAllocation,
+    bool& prefersDedicatedAllocation) const
+{
+#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0))
+    {
+        VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR };
+        memReqInfo.image = hImage;
+
+        VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR };
+
+        VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR };
+        VmaPnextChainPushFront(&memReq2, &memDedicatedReq);
+
+        (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2);
+
+        memReq = memReq2.memoryRequirements;
+        requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE);
+        prefersDedicatedAllocation  = (memDedicatedReq.prefersDedicatedAllocation  != VK_FALSE);
+    }
+    else
+#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+    {
+        (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq);
+        requiresDedicatedAllocation = false;
+        prefersDedicatedAllocation  = false;
+    }
+}
+
+VkResult VmaAllocator_T::FindMemoryTypeIndex(
+    uint32_t memoryTypeBits,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    VmaBufferImageUsage bufImgUsage,
+    uint32_t* pMemoryTypeIndex) const
+{
+    memoryTypeBits &= GetGlobalMemoryTypeBits();
+
+    if(pAllocationCreateInfo->memoryTypeBits != 0)
+    {
+        memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits;
+    }
+
+    VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0;
+    if(!FindMemoryPreferences(
+        IsIntegratedGpu(),
+        *pAllocationCreateInfo,
+        bufImgUsage,
+        requiredFlags, preferredFlags, notPreferredFlags))
+    {
+        return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+
+    *pMemoryTypeIndex = UINT32_MAX;
+    uint32_t minCost = UINT32_MAX;
+    for(uint32_t memTypeIndex = 0, memTypeBit = 1;
+        memTypeIndex < GetMemoryTypeCount();
+        ++memTypeIndex, memTypeBit <<= 1)
+    {
+        // This memory type is acceptable according to memoryTypeBits bitmask.
+        if((memTypeBit & memoryTypeBits) != 0)
+        {
+            const VkMemoryPropertyFlags currFlags =
+                m_MemProps.memoryTypes[memTypeIndex].propertyFlags;
+            // This memory type contains requiredFlags.
+            if((requiredFlags & ~currFlags) == 0)
+            {
+                // Calculate cost as number of bits from preferredFlags not present in this memory type.
+                uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) +
+                    VMA_COUNT_BITS_SET(currFlags & notPreferredFlags);
+                // Remember memory type with lowest cost.
+                if(currCost < minCost)
+                {
+                    *pMemoryTypeIndex = memTypeIndex;
+                    if(currCost == 0)
+                    {
+                        return VK_SUCCESS;
+                    }
+                    minCost = currCost;
+                }
+            }
+        }
+    }
+    return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT;
+}
+
+VkResult VmaAllocator_T::CalcMemTypeParams(
+    VmaAllocationCreateInfo& inoutCreateInfo,
+    uint32_t memTypeIndex,
+    VkDeviceSize size,
+    size_t allocationCount)
+{
+    // If memory type is not HOST_VISIBLE, disable MAPPED.
+    if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 &&
+        (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0)
+    {
+        inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT;
+    }
+
+    if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 &&
+        (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0)
+    {
+        const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
+        VmaBudget heapBudget = {};
+        GetHeapBudgets(&heapBudget, heapIndex, 1);
+        if(heapBudget.usage + size * allocationCount > heapBudget.budget)
+        {
+            return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+        }
+    }
+    return VK_SUCCESS;
+}
+
+VkResult VmaAllocator_T::CalcAllocationParams(
+    VmaAllocationCreateInfo& inoutCreateInfo,
+    bool dedicatedRequired,
+    bool dedicatedPreferred)
+{
+    VMA_ASSERT((inoutCreateInfo.flags &
+        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) !=
+        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) &&
+        "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect.");
+    VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 ||
+        (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) &&
+        "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.");
+    if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST)
+    {
+        if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0)
+        {
+            VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 &&
+                "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.");
+        }
+    }
+
+    // If memory is lazily allocated, it should be always dedicated.
+    if(dedicatedRequired ||
+        inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED)
+    {
+        inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+    }
+
+    if(inoutCreateInfo.pool != VK_NULL_HANDLE)
+    {
+        if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() &&
+            (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0)
+        {
+            VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations.");
+            return VK_ERROR_FEATURE_NOT_PRESENT;
+        }
+        inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority();
+    }
+
+    if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 &&
+        (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0)
+    {
+        VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense.");
+        return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+
+    if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY &&
+        (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0)
+    {
+        inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+    }
+
+    // Non-auto USAGE values imply HOST_ACCESS flags.
+    // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools.
+    // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*.
+    // Otherwise they just protect from assert on mapping.
+    if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO &&
+        inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE &&
+        inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST)
+    {
+        if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0)
+        {
+            inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+        }
+    }
+
+    return VK_SUCCESS;
+}
+
+VkResult VmaAllocator_T::AllocateMemory(
+    const VkMemoryRequirements& vkMemReq,
+    bool requiresDedicatedAllocation,
+    bool prefersDedicatedAllocation,
+    VkBuffer dedicatedBuffer,
+    VkImage dedicatedImage,
+    VmaBufferImageUsage dedicatedBufferImageUsage,
+    const VmaAllocationCreateInfo& createInfo,
+    VmaSuballocationType suballocType,
+    size_t allocationCount,
+    VmaAllocation* pAllocations)
+{
+    memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount);
+
+    VMA_ASSERT(VmaIsPow2(vkMemReq.alignment));
+
+    if(vkMemReq.size == 0)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    VmaAllocationCreateInfo createInfoFinal = createInfo;
+    VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation);
+    if(res != VK_SUCCESS)
+        return res;
+
+    if(createInfoFinal.pool != VK_NULL_HANDLE)
+    {
+        VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector;
+        return AllocateMemoryOfType(
+            createInfoFinal.pool,
+            vkMemReq.size,
+            vkMemReq.alignment,
+            prefersDedicatedAllocation,
+            dedicatedBuffer,
+            dedicatedImage,
+            dedicatedBufferImageUsage,
+            createInfoFinal,
+            blockVector.GetMemoryTypeIndex(),
+            suballocType,
+            createInfoFinal.pool->m_DedicatedAllocations,
+            blockVector,
+            allocationCount,
+            pAllocations);
+    }
+    else
+    {
+        // Bit mask of memory Vulkan types acceptable for this allocation.
+        uint32_t memoryTypeBits = vkMemReq.memoryTypeBits;
+        uint32_t memTypeIndex = UINT32_MAX;
+        res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex);
+        // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT.
+        if(res != VK_SUCCESS)
+            return res;
+        do
+        {
+            VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex];
+            VMA_ASSERT(blockVector && "Trying to use unsupported memory type!");
+            res = AllocateMemoryOfType(
+                VK_NULL_HANDLE,
+                vkMemReq.size,
+                vkMemReq.alignment,
+                requiresDedicatedAllocation || prefersDedicatedAllocation,
+                dedicatedBuffer,
+                dedicatedImage,
+                dedicatedBufferImageUsage,
+                createInfoFinal,
+                memTypeIndex,
+                suballocType,
+                m_DedicatedAllocations[memTypeIndex],
+                *blockVector,
+                allocationCount,
+                pAllocations);
+            // Allocation succeeded
+            if(res == VK_SUCCESS)
+                return VK_SUCCESS;
+
+            // Remove old memTypeIndex from list of possibilities.
+            memoryTypeBits &= ~(1u << memTypeIndex);
+            // Find alternative memTypeIndex.
+            res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex);
+        } while(res == VK_SUCCESS);
+
+        // No other matching memory type index could be found.
+        // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once.
+        return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+    }
+}
+
+void VmaAllocator_T::FreeMemory(
+    size_t allocationCount,
+    const VmaAllocation* pAllocations)
+{
+    VMA_ASSERT(pAllocations);
+
+    for(size_t allocIndex = allocationCount; allocIndex--; )
+    {
+        VmaAllocation allocation = pAllocations[allocIndex];
+
+        if(allocation != VK_NULL_HANDLE)
+        {
+            if(VMA_DEBUG_INITIALIZE_ALLOCATIONS)
+            {
+                FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED);
+            }
+
+            switch(allocation->GetType())
+            {
+            case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+                {
+                    VmaBlockVector* pBlockVector = VMA_NULL;
+                    VmaPool hPool = allocation->GetParentPool();
+                    if(hPool != VK_NULL_HANDLE)
+                    {
+                        pBlockVector = &hPool->m_BlockVector;
+                    }
+                    else
+                    {
+                        const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+                        pBlockVector = m_pBlockVectors[memTypeIndex];
+                        VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!");
+                    }
+                    pBlockVector->Free(allocation);
+                }
+                break;
+            case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+                FreeDedicatedMemory(allocation);
+                break;
+            default:
+                VMA_ASSERT(0);
+            }
+        }
+    }
+}
+
+void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats)
+{
+    // Initialize.
+    VmaClearDetailedStatistics(pStats->total);
+    for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i)
+        VmaClearDetailedStatistics(pStats->memoryType[i]);
+    for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
+        VmaClearDetailedStatistics(pStats->memoryHeap[i]);
+
+    // Process default pools.
+    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+    {
+        VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex];
+        if (pBlockVector != VMA_NULL)
+            pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+    }
+
+    // Process custom pools.
+    {
+        VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex);
+        for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool))
+        {
+            VmaBlockVector& blockVector = pool->m_BlockVector;
+            const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex();
+            blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+            pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+        }
+    }
+
+    // Process dedicated allocations.
+    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+    {
+        m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]);
+    }
+
+    // Sum from memory types to memory heaps.
+    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+    {
+        const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex;
+        VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]);
+    }
+
+    // Sum from memory heaps to total.
+    for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex)
+        VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]);
+
+    VMA_ASSERT(pStats->total.statistics.allocationCount == 0 ||
+        pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin);
+    VMA_ASSERT(pStats->total.unusedRangeCount == 0 ||
+        pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin);
+}
+
+void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount)
+{
+#if VMA_MEMORY_BUDGET
+    if(m_UseExtMemoryBudget)
+    {
+        if(m_Budget.m_OperationsSinceBudgetFetch < 30)
+        {
+            VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex);
+            for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets)
+            {
+                const uint32_t heapIndex = firstHeap + i;
+
+                outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex];
+                outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex];
+                outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex];
+                outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex];
+
+                if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex])
+                {
+                    outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] +
+                        outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex];
+                }
+                else
+                {
+                    outBudgets->usage = 0;
+                }
+
+                // Have to take MIN with heap size because explicit HeapSizeLimit is included in it.
+                outBudgets->budget = VMA_MIN(
+                    m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size);
+            }
+        }
+        else
+        {
+            UpdateVulkanBudget(); // Outside of mutex lock
+            GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion
+        }
+    }
+    else
+#endif
+    {
+        for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets)
+        {
+            const uint32_t heapIndex = firstHeap + i;
+
+            outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex];
+            outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex];
+            outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex];
+            outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex];
+
+            outBudgets->usage = outBudgets->statistics.blockBytes;
+            outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics.
+        }
+    }
+}
+
+void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo)
+{
+    pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex();
+    pAllocationInfo->deviceMemory = hAllocation->GetMemory();
+    pAllocationInfo->offset = hAllocation->GetOffset();
+    pAllocationInfo->size = hAllocation->GetSize();
+    pAllocationInfo->pMappedData = hAllocation->GetMappedData();
+    pAllocationInfo->pUserData = hAllocation->GetUserData();
+    pAllocationInfo->pName = hAllocation->GetName();
+}
+
+void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo)
+{
+    GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo);
+
+    switch (hAllocation->GetType())
+    {
+    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+        pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize();
+        pAllocationInfo->dedicatedMemory = VK_FALSE;
+        break;
+    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+        pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size;
+        pAllocationInfo->dedicatedMemory = VK_TRUE;
+        break;
+    default:
+        VMA_ASSERT(0);
+    }
+}
+
+VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool)
+{
+    VMA_DEBUG_LOG_FORMAT("  CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags);
+
+    VmaPoolCreateInfo newCreateInfo = *pCreateInfo;
+
+    // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash.
+    if(pCreateInfo->pMemoryAllocateNext)
+    {
+        VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0);
+    }
+
+    if(newCreateInfo.maxBlockCount == 0)
+    {
+        newCreateInfo.maxBlockCount = SIZE_MAX;
+    }
+    if(newCreateInfo.minBlockCount > newCreateInfo.maxBlockCount)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+    // Memory type index out of range or forbidden.
+    if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() ||
+        ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0)
+    {
+        return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+    if(newCreateInfo.minAllocationAlignment > 0)
+    {
+        VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment));
+    }
+
+    const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex);
+
+    *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize);
+
+    VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks();
+    if(res != VK_SUCCESS)
+    {
+        vma_delete(this, *pPool);
+        *pPool = VMA_NULL;
+        return res;
+    }
+
+    // Add to m_Pools.
+    {
+        VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex);
+        (*pPool)->SetId(m_NextPoolId++);
+        m_Pools.PushBack(*pPool);
+    }
+
+    return VK_SUCCESS;
+}
+
+void VmaAllocator_T::DestroyPool(VmaPool pool)
+{
+    // Remove from m_Pools.
+    {
+        VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex);
+        m_Pools.Remove(pool);
+    }
+
+    vma_delete(this, pool);
+}
+
+void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats)
+{
+    VmaClearStatistics(*pPoolStats);
+    pool->m_BlockVector.AddStatistics(*pPoolStats);
+    pool->m_DedicatedAllocations.AddStatistics(*pPoolStats);
+}
+
+void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats)
+{
+    VmaClearDetailedStatistics(*pPoolStats);
+    pool->m_BlockVector.AddDetailedStatistics(*pPoolStats);
+    pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats);
+}
+
+void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex)
+{
+    m_CurrentFrameIndex.store(frameIndex);
+
+#if VMA_MEMORY_BUDGET
+    if(m_UseExtMemoryBudget)
+    {
+        UpdateVulkanBudget();
+    }
+#endif // #if VMA_MEMORY_BUDGET
+}
+
+VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool)
+{
+    return hPool->m_BlockVector.CheckCorruption();
+}
+
+VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits)
+{
+    VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT;
+
+    // Process default pools.
+    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+    {
+        VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex];
+        if(pBlockVector != VMA_NULL)
+        {
+            VkResult localRes = pBlockVector->CheckCorruption();
+            switch(localRes)
+            {
+            case VK_ERROR_FEATURE_NOT_PRESENT:
+                break;
+            case VK_SUCCESS:
+                finalRes = VK_SUCCESS;
+                break;
+            default:
+                return localRes;
+            }
+        }
+    }
+
+    // Process custom pools.
+    {
+        VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex);
+        for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool))
+        {
+            if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0)
+            {
+                VkResult localRes = pool->m_BlockVector.CheckCorruption();
+                switch(localRes)
+                {
+                case VK_ERROR_FEATURE_NOT_PRESENT:
+                    break;
+                case VK_SUCCESS:
+                    finalRes = VK_SUCCESS;
+                    break;
+                default:
+                    return localRes;
+                }
+            }
+        }
+    }
+
+    return finalRes;
+}
+
+VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory)
+{
+    AtomicTransactionalIncrement<VMA_ATOMIC_UINT32> deviceMemoryCountIncrement;
+    const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount);
+#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT
+    if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount)
+    {
+        return VK_ERROR_TOO_MANY_OBJECTS;
+    }
+#endif
+
+    const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex);
+
+    // HeapSizeLimit is in effect for this heap.
+    if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0)
+    {
+        const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size;
+        VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex];
+        for(;;)
+        {
+            const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize;
+            if(blockBytesAfterAllocation > heapSize)
+            {
+                return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+            }
+            if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation))
+            {
+                break;
+            }
+        }
+    }
+    else
+    {
+        m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize;
+    }
+    ++m_Budget.m_BlockCount[heapIndex];
+
+    // VULKAN CALL vkAllocateMemory.
+    VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory);
+
+    if(res == VK_SUCCESS)
+    {
+#if VMA_MEMORY_BUDGET
+        ++m_Budget.m_OperationsSinceBudgetFetch;
+#endif
+
+        // Informative callback.
+        if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL)
+        {
+            (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData);
+        }
+
+        deviceMemoryCountIncrement.Commit();
+    }
+    else
+    {
+        --m_Budget.m_BlockCount[heapIndex];
+        m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize;
+    }
+
+    return res;
+}
+
+void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory)
+{
+    // Informative callback.
+    if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL)
+    {
+        (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData);
+    }
+
+    // VULKAN CALL vkFreeMemory.
+    (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks());
+
+    const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType);
+    --m_Budget.m_BlockCount[heapIndex];
+    m_Budget.m_BlockBytes[heapIndex] -= size;
+
+    --m_DeviceMemoryCount;
+}
+
+VkResult VmaAllocator_T::BindVulkanBuffer(
+    VkDeviceMemory memory,
+    VkDeviceSize memoryOffset,
+    VkBuffer buffer,
+    const void* pNext)
+{
+    if(pNext != VMA_NULL)
+    {
+#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2
+        if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) &&
+            m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL)
+        {
+            VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR };
+            bindBufferMemoryInfo.pNext = pNext;
+            bindBufferMemoryInfo.buffer = buffer;
+            bindBufferMemoryInfo.memory = memory;
+            bindBufferMemoryInfo.memoryOffset = memoryOffset;
+            return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo);
+        }
+        else
+#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2
+        {
+            return VK_ERROR_EXTENSION_NOT_PRESENT;
+        }
+    }
+    else
+    {
+        return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset);
+    }
+}
+
+VkResult VmaAllocator_T::BindVulkanImage(
+    VkDeviceMemory memory,
+    VkDeviceSize memoryOffset,
+    VkImage image,
+    const void* pNext)
+{
+    if(pNext != VMA_NULL)
+    {
+#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2
+        if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) &&
+            m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL)
+        {
+            VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR };
+            bindBufferMemoryInfo.pNext = pNext;
+            bindBufferMemoryInfo.image = image;
+            bindBufferMemoryInfo.memory = memory;
+            bindBufferMemoryInfo.memoryOffset = memoryOffset;
+            return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo);
+        }
+        else
+#endif // #if VMA_BIND_MEMORY2
+        {
+            return VK_ERROR_EXTENSION_NOT_PRESENT;
+        }
+    }
+    else
+    {
+        return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset);
+    }
+}
+
+VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData)
+{
+    switch(hAllocation->GetType())
+    {
+    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+        {
+            VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock();
+            char *pBytes = VMA_NULL;
+            VkResult res = pBlock->Map(this, 1, (void**)&pBytes);
+            if(res == VK_SUCCESS)
+            {
+                *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset();
+                hAllocation->BlockAllocMap();
+            }
+            return res;
+        }
+    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+        return hAllocation->DedicatedAllocMap(this, ppData);
+    default:
+        VMA_ASSERT(0);
+        return VK_ERROR_MEMORY_MAP_FAILED;
+    }
+}
+
+void VmaAllocator_T::Unmap(VmaAllocation hAllocation)
+{
+    switch(hAllocation->GetType())
+    {
+    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+        {
+            VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock();
+            hAllocation->BlockAllocUnmap();
+            pBlock->Unmap(this, 1);
+        }
+        break;
+    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+        hAllocation->DedicatedAllocUnmap(this);
+        break;
+    default:
+        VMA_ASSERT(0);
+    }
+}
+
+VkResult VmaAllocator_T::BindBufferMemory(
+    VmaAllocation hAllocation,
+    VkDeviceSize allocationLocalOffset,
+    VkBuffer hBuffer,
+    const void* pNext)
+{
+    VkResult res = VK_ERROR_UNKNOWN_COPY;
+    switch(hAllocation->GetType())
+    {
+    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+        res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext);
+        break;
+    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+    {
+        VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock();
+        VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block.");
+        res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext);
+        break;
+    }
+    default:
+        VMA_ASSERT(0);
+    }
+    return res;
+}
+
+VkResult VmaAllocator_T::BindImageMemory(
+    VmaAllocation hAllocation,
+    VkDeviceSize allocationLocalOffset,
+    VkImage hImage,
+    const void* pNext)
+{
+    VkResult res = VK_ERROR_UNKNOWN_COPY;
+    switch(hAllocation->GetType())
+    {
+    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+        res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext);
+        break;
+    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+    {
+        VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock();
+        VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block.");
+        res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext);
+        break;
+    }
+    default:
+        VMA_ASSERT(0);
+    }
+    return res;
+}
+
+VkResult VmaAllocator_T::FlushOrInvalidateAllocation(
+    VmaAllocation hAllocation,
+    VkDeviceSize offset, VkDeviceSize size,
+    VMA_CACHE_OPERATION op)
+{
+    VkResult res = VK_SUCCESS;
+
+    VkMappedMemoryRange memRange = {};
+    if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange))
+    {
+        switch(op)
+        {
+        case VMA_CACHE_FLUSH:
+            res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange);
+            break;
+        case VMA_CACHE_INVALIDATE:
+            res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange);
+            break;
+        default:
+            VMA_ASSERT(0);
+        }
+    }
+    // else: Just ignore this call.
+    return res;
+}
+
+VkResult VmaAllocator_T::FlushOrInvalidateAllocations(
+    uint32_t allocationCount,
+    const VmaAllocation* allocations,
+    const VkDeviceSize* offsets, const VkDeviceSize* sizes,
+    VMA_CACHE_OPERATION op)
+{
+    typedef VmaStlAllocator<VkMappedMemoryRange> RangeAllocator;
+    typedef VmaSmallVector<VkMappedMemoryRange, RangeAllocator, 16> RangeVector;
+    RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks()));
+
+    for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
+    {
+        const VmaAllocation alloc = allocations[allocIndex];
+        const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0;
+        const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE;
+        VkMappedMemoryRange newRange;
+        if(GetFlushOrInvalidateRange(alloc, offset, size, newRange))
+        {
+            ranges.push_back(newRange);
+        }
+    }
+
+    VkResult res = VK_SUCCESS;
+    if(!ranges.empty())
+    {
+        switch(op)
+        {
+        case VMA_CACHE_FLUSH:
+            res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data());
+            break;
+        case VMA_CACHE_INVALIDATE:
+            res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data());
+            break;
+        default:
+            VMA_ASSERT(0);
+        }
+    }
+    // else: Just ignore this call.
+    return res;
+}
+
+VkResult VmaAllocator_T::CopyMemoryToAllocation(
+    const void* pSrcHostPointer,
+    VmaAllocation dstAllocation,
+    VkDeviceSize dstAllocationLocalOffset,
+    VkDeviceSize size)
+{
+    void* dstMappedData = VMA_NULL;
+    VkResult res = Map(dstAllocation, &dstMappedData);
+    if(res == VK_SUCCESS)
+    {
+        memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size);
+        Unmap(dstAllocation);
+        res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH);
+    }
+    return res;
+}
+
+VkResult VmaAllocator_T::CopyAllocationToMemory(
+    VmaAllocation srcAllocation,
+    VkDeviceSize srcAllocationLocalOffset,
+    void* pDstHostPointer,
+    VkDeviceSize size)
+{
+    void* srcMappedData = VMA_NULL;
+    VkResult res = Map(srcAllocation, &srcMappedData);
+    if(res == VK_SUCCESS)
+    {
+        res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE);
+        if(res == VK_SUCCESS)
+        {
+            memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size);
+            Unmap(srcAllocation);
+        }
+    }
+    return res;
+}
+
+void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation)
+{
+    VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED);
+
+    const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+    VmaPool parentPool = allocation->GetParentPool();
+    if(parentPool == VK_NULL_HANDLE)
+    {
+        // Default pool
+        m_DedicatedAllocations[memTypeIndex].Unregister(allocation);
+    }
+    else
+    {
+        // Custom pool
+        parentPool->m_DedicatedAllocations.Unregister(allocation);
+    }
+
+    VkDeviceMemory hMemory = allocation->GetMemory();
+
+    /*
+    There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory
+    before vkFreeMemory.
+
+    if(allocation->GetMappedData() != VMA_NULL)
+    {
+        (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory);
+    }
+    */
+
+    FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory);
+
+    m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize());
+    allocation->Destroy(this);
+    m_AllocationObjectAllocator.Free(allocation);
+
+    VMA_DEBUG_LOG_FORMAT("    Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex);
+}
+
+uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const
+{
+    VkBufferCreateInfo dummyBufCreateInfo;
+    VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo);
+
+    uint32_t memoryTypeBits = 0;
+
+    // Create buffer.
+    VkBuffer buf = VK_NULL_HANDLE;
+    VkResult res = (*GetVulkanFunctions().vkCreateBuffer)(
+        m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf);
+    if(res == VK_SUCCESS)
+    {
+        // Query for supported memory types.
+        VkMemoryRequirements memReq;
+        (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq);
+        memoryTypeBits = memReq.memoryTypeBits;
+
+        // Destroy buffer.
+        (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks());
+    }
+
+    return memoryTypeBits;
+}
+
+uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const
+{
+    // Make sure memory information is already fetched.
+    VMA_ASSERT(GetMemoryTypeCount() > 0);
+
+    uint32_t memoryTypeBits = UINT32_MAX;
+
+    if(!m_UseAmdDeviceCoherentMemory)
+    {
+        // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD.
+        for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+        {
+            if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0)
+            {
+                memoryTypeBits &= ~(1u << memTypeIndex);
+            }
+        }
+    }
+
+    return memoryTypeBits;
+}
+
+bool VmaAllocator_T::GetFlushOrInvalidateRange(
+    VmaAllocation allocation,
+    VkDeviceSize offset, VkDeviceSize size,
+    VkMappedMemoryRange& outRange) const
+{
+    const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+    if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex))
+    {
+        const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize;
+        const VkDeviceSize allocationSize = allocation->GetSize();
+        VMA_ASSERT(offset <= allocationSize);
+
+        outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+        outRange.pNext = VMA_NULL;
+        outRange.memory = allocation->GetMemory();
+
+        switch(allocation->GetType())
+        {
+        case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED:
+            outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize);
+            if(size == VK_WHOLE_SIZE)
+            {
+                outRange.size = allocationSize - outRange.offset;
+            }
+            else
+            {
+                VMA_ASSERT(offset + size <= allocationSize);
+                outRange.size = VMA_MIN(
+                    VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize),
+                    allocationSize - outRange.offset);
+            }
+            break;
+        case VmaAllocation_T::ALLOCATION_TYPE_BLOCK:
+        {
+            // 1. Still within this allocation.
+            outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize);
+            if(size == VK_WHOLE_SIZE)
+            {
+                size = allocationSize - offset;
+            }
+            else
+            {
+                VMA_ASSERT(offset + size <= allocationSize);
+            }
+            outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize);
+
+            // 2. Adjust to whole block.
+            const VkDeviceSize allocationOffset = allocation->GetOffset();
+            VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0);
+            const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize();
+            outRange.offset += allocationOffset;
+            outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset);
+
+            break;
+        }
+        default:
+            VMA_ASSERT(0);
+        }
+        return true;
+    }
+    return false;
+}
+
+#if VMA_MEMORY_BUDGET
+void VmaAllocator_T::UpdateVulkanBudget()
+{
+    VMA_ASSERT(m_UseExtMemoryBudget);
+
+    VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR };
+
+    VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT };
+    VmaPnextChainPushFront(&memProps, &budgetProps);
+
+    GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps);
+
+    {
+        VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex);
+
+        for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
+        {
+            m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex];
+            m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex];
+            m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load();
+
+            // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size.
+            if(m_Budget.m_VulkanBudget[heapIndex] == 0)
+            {
+                m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics.
+            }
+            else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size)
+            {
+                m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size;
+            }
+            if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0)
+            {
+                m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex];
+            }
+        }
+        m_Budget.m_OperationsSinceBudgetFetch = 0;
+    }
+}
+#endif // VMA_MEMORY_BUDGET
+
+void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern)
+{
+    if(VMA_DEBUG_INITIALIZE_ALLOCATIONS &&
+        hAllocation->IsMappingAllowed() &&
+        (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0)
+    {
+        void* pData = VMA_NULL;
+        VkResult res = Map(hAllocation, &pData);
+        if(res == VK_SUCCESS)
+        {
+            memset(pData, (int)pattern, (size_t)hAllocation->GetSize());
+            FlushOrInvalidateAllocation(hAllocation, 0, VK_WHOLE_SIZE, VMA_CACHE_FLUSH);
+            Unmap(hAllocation);
+        }
+        else
+        {
+            VMA_ASSERT(0 && "VMA_DEBUG_INITIALIZE_ALLOCATIONS is enabled, but couldn't map memory to fill allocation.");
+        }
+    }
+}
+
+uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits()
+{
+    uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load();
+    if(memoryTypeBits == UINT32_MAX)
+    {
+        memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits();
+        m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits);
+    }
+    return memoryTypeBits;
+}
+
+#if VMA_STATS_STRING_ENABLED
+void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)
+{
+    json.WriteString("DefaultPools");
+    json.BeginObject();
+    {
+        for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+        {
+            VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex];
+            VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex];
+            if (pBlockVector != VMA_NULL)
+            {
+                json.BeginString("Type ");
+                json.ContinueString(memTypeIndex);
+                json.EndString();
+                json.BeginObject();
+                {
+                    json.WriteString("PreferredBlockSize");
+                    json.WriteNumber(pBlockVector->GetPreferredBlockSize());
+
+                    json.WriteString("Blocks");
+                    pBlockVector->PrintDetailedMap(json);
+
+                    json.WriteString("DedicatedAllocations");
+                    dedicatedAllocList.BuildStatsString(json);
+                }
+                json.EndObject();
+            }
+        }
+    }
+    json.EndObject();
+
+    json.WriteString("CustomPools");
+    json.BeginObject();
+    {
+        VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex);
+        if (!m_Pools.IsEmpty())
+        {
+            for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
+            {
+                bool displayType = true;
+                size_t index = 0;
+                for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool))
+                {
+                    VmaBlockVector& blockVector = pool->m_BlockVector;
+                    if (blockVector.GetMemoryTypeIndex() == memTypeIndex)
+                    {
+                        if (displayType)
+                        {
+                            json.BeginString("Type ");
+                            json.ContinueString(memTypeIndex);
+                            json.EndString();
+                            json.BeginArray();
+                            displayType = false;
+                        }
+
+                        json.BeginObject();
+                        {
+                            json.WriteString("Name");
+                            json.BeginString();
+                            json.ContinueString((uint64_t)index++);
+                            if (pool->GetName())
+                            {
+                                json.ContinueString(" - ");
+                                json.ContinueString(pool->GetName());
+                            }
+                            json.EndString();
+
+                            json.WriteString("PreferredBlockSize");
+                            json.WriteNumber(blockVector.GetPreferredBlockSize());
+
+                            json.WriteString("Blocks");
+                            blockVector.PrintDetailedMap(json);
+
+                            json.WriteString("DedicatedAllocations");
+                            pool->m_DedicatedAllocations.BuildStatsString(json);
+                        }
+                        json.EndObject();
+                    }
+                }
+
+                if (!displayType)
+                    json.EndArray();
+            }
+        }
+    }
+    json.EndObject();
+}
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_ALLOCATOR_T_FUNCTIONS
+
+
+#ifndef _VMA_PUBLIC_INTERFACE
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator(
+    const VmaAllocatorCreateInfo* pCreateInfo,
+    VmaAllocator* pAllocator)
+{
+    VMA_ASSERT(pCreateInfo && pAllocator);
+    VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 ||
+        (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 4));
+    VMA_DEBUG_LOG("vmaCreateAllocator");
+    *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo);
+    VkResult result = (*pAllocator)->Init(pCreateInfo);
+    if(result < 0)
+    {
+        vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator);
+        *pAllocator = VK_NULL_HANDLE;
+    }
+    return result;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator(
+    VmaAllocator allocator)
+{
+    if(allocator != VK_NULL_HANDLE)
+    {
+        VMA_DEBUG_LOG("vmaDestroyAllocator");
+        VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying.
+        vma_delete(&allocationCallbacks, allocator);
+    }
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo)
+{
+    VMA_ASSERT(allocator && pAllocatorInfo);
+    pAllocatorInfo->instance = allocator->m_hInstance;
+    pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice();
+    pAllocatorInfo->device = allocator->m_hDevice;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties(
+    VmaAllocator allocator,
+    const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties)
+{
+    VMA_ASSERT(allocator && ppPhysicalDeviceProperties);
+    *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties(
+    VmaAllocator allocator,
+    const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties)
+{
+    VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties);
+    *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties(
+    VmaAllocator allocator,
+    uint32_t memoryTypeIndex,
+    VkMemoryPropertyFlags* pFlags)
+{
+    VMA_ASSERT(allocator && pFlags);
+    VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount());
+    *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex(
+    VmaAllocator allocator,
+    uint32_t frameIndex)
+{
+    VMA_ASSERT(allocator);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->SetCurrentFrameIndex(frameIndex);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics(
+    VmaAllocator allocator,
+    VmaTotalStatistics* pStats)
+{
+    VMA_ASSERT(allocator && pStats);
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+    allocator->CalculateStatistics(pStats);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets(
+    VmaAllocator allocator,
+    VmaBudget* pBudgets)
+{
+    VMA_ASSERT(allocator && pBudgets);
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+    allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount());
+}
+
+#if VMA_STATS_STRING_ENABLED
+
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString(
+    VmaAllocator allocator,
+    char** ppStatsString,
+    VkBool32 detailedMap)
+{
+    VMA_ASSERT(allocator && ppStatsString);
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    VmaStringBuilder sb(allocator->GetAllocationCallbacks());
+    {
+        VmaBudget budgets[VK_MAX_MEMORY_HEAPS];
+        allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount());
+
+        VmaTotalStatistics stats;
+        allocator->CalculateStatistics(&stats);
+
+        VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb);
+        json.BeginObject();
+        {
+            json.WriteString("General");
+            json.BeginObject();
+            {
+                const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties;
+                const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps;
+
+                json.WriteString("API");
+                json.WriteString("Vulkan");
+
+                json.WriteString("apiVersion");
+                json.BeginString();
+                json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion));
+                json.ContinueString(".");
+                json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion));
+                json.ContinueString(".");
+                json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion));
+                json.EndString();
+
+                json.WriteString("GPU");
+                json.WriteString(deviceProperties.deviceName);
+                json.WriteString("deviceType");
+                json.WriteNumber(static_cast<uint32_t>(deviceProperties.deviceType));
+
+                json.WriteString("maxMemoryAllocationCount");
+                json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount);
+                json.WriteString("bufferImageGranularity");
+                json.WriteNumber(deviceProperties.limits.bufferImageGranularity);
+                json.WriteString("nonCoherentAtomSize");
+                json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize);
+
+                json.WriteString("memoryHeapCount");
+                json.WriteNumber(memoryProperties.memoryHeapCount);
+                json.WriteString("memoryTypeCount");
+                json.WriteNumber(memoryProperties.memoryTypeCount);
+            }
+            json.EndObject();
+        }
+        {
+            json.WriteString("Total");
+            VmaPrintDetailedStatistics(json, stats.total);
+        }
+        {
+            json.WriteString("MemoryInfo");
+            json.BeginObject();
+            {
+                for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex)
+                {
+                    json.BeginString("Heap ");
+                    json.ContinueString(heapIndex);
+                    json.EndString();
+                    json.BeginObject();
+                    {
+                        const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex];
+                        json.WriteString("Flags");
+                        json.BeginArray(true);
+                        {
+                            if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
+                                json.WriteString("DEVICE_LOCAL");
+                        #if VMA_VULKAN_VERSION >= 1001000
+                            if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT)
+                                json.WriteString("MULTI_INSTANCE");
+                        #endif
+
+                            VkMemoryHeapFlags flags = heapInfo.flags &
+                                ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
+                        #if VMA_VULKAN_VERSION >= 1001000
+                                    | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT
+                        #endif
+                                    );
+                            if (flags != 0)
+                                json.WriteNumber(flags);
+                        }
+                        json.EndArray();
+
+                        json.WriteString("Size");
+                        json.WriteNumber(heapInfo.size);
+
+                        json.WriteString("Budget");
+                        json.BeginObject();
+                        {
+                            json.WriteString("BudgetBytes");
+                            json.WriteNumber(budgets[heapIndex].budget);
+                            json.WriteString("UsageBytes");
+                            json.WriteNumber(budgets[heapIndex].usage);
+                        }
+                        json.EndObject();
+
+                        json.WriteString("Stats");
+                        VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]);
+
+                        json.WriteString("MemoryPools");
+                        json.BeginObject();
+                        {
+                            for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex)
+                            {
+                                if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex)
+                                {
+                                    json.BeginString("Type ");
+                                    json.ContinueString(typeIndex);
+                                    json.EndString();
+                                    json.BeginObject();
+                                    {
+                                        json.WriteString("Flags");
+                                        json.BeginArray(true);
+                                        {
+                                            VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags;
+                                            if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+                                                json.WriteString("DEVICE_LOCAL");
+                                            if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+                                                json.WriteString("HOST_VISIBLE");
+                                            if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+                                                json.WriteString("HOST_COHERENT");
+                                            if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
+                                                json.WriteString("HOST_CACHED");
+                                            if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)
+                                                json.WriteString("LAZILY_ALLOCATED");
+                                        #if VMA_VULKAN_VERSION >= 1001000
+                                            if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
+                                                json.WriteString("PROTECTED");
+                                        #endif
+                                        #if VK_AMD_device_coherent_memory
+                                            if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY)
+                                                json.WriteString("DEVICE_COHERENT_AMD");
+                                            if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)
+                                                json.WriteString("DEVICE_UNCACHED_AMD");
+                                        #endif
+
+                                            flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+                                        #if VMA_VULKAN_VERSION >= 1001000
+                                                | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT
+                                        #endif
+                                        #if VK_AMD_device_coherent_memory
+                                                | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY
+                                                | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY
+                                        #endif
+                                                | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+                                                | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+                                                | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+                                            if (flags != 0)
+                                                json.WriteNumber(flags);
+                                        }
+                                        json.EndArray();
+
+                                        json.WriteString("Stats");
+                                        VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]);
+                                    }
+                                    json.EndObject();
+                                }
+                            }
+
+                        }
+                        json.EndObject();
+                    }
+                    json.EndObject();
+                }
+            }
+            json.EndObject();
+        }
+
+        if (detailedMap == VK_TRUE)
+            allocator->PrintDetailedMap(json);
+
+        json.EndObject();
+    }
+
+    *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength());
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString(
+    VmaAllocator allocator,
+    char* pStatsString)
+{
+    if(pStatsString != VMA_NULL)
+    {
+        VMA_ASSERT(allocator);
+        VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString);
+    }
+}
+
+#endif // VMA_STATS_STRING_ENABLED
+
+/*
+This function is not protected by any mutex because it just reads immutable data.
+*/
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex(
+    VmaAllocator allocator,
+    uint32_t memoryTypeBits,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    uint32_t* pMemoryTypeIndex)
+{
+    VMA_ASSERT(allocator != VK_NULL_HANDLE);
+    VMA_ASSERT(pAllocationCreateInfo != VMA_NULL);
+    VMA_ASSERT(pMemoryTypeIndex != VMA_NULL);
+
+    return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo(
+    VmaAllocator allocator,
+    const VkBufferCreateInfo* pBufferCreateInfo,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    uint32_t* pMemoryTypeIndex)
+{
+    VMA_ASSERT(allocator != VK_NULL_HANDLE);
+    VMA_ASSERT(pBufferCreateInfo != VMA_NULL);
+    VMA_ASSERT(pAllocationCreateInfo != VMA_NULL);
+    VMA_ASSERT(pMemoryTypeIndex != VMA_NULL);
+
+    const VkDevice hDev = allocator->m_hDevice;
+    const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions();
+    VkResult res;
+
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+    if(funcs->vkGetDeviceBufferMemoryRequirements)
+    {
+        // Can query straight from VkBufferCreateInfo :)
+        VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR};
+        devBufMemReq.pCreateInfo = pBufferCreateInfo;
+
+        VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+        (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq);
+
+        res = allocator->FindMemoryTypeIndex(
+            memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo,
+            VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex);
+    }
+    else
+#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+    {
+        // Must create a dummy buffer to query :(
+        VkBuffer hBuffer = VK_NULL_HANDLE;
+        res = funcs->vkCreateBuffer(
+            hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer);
+        if(res == VK_SUCCESS)
+        {
+            VkMemoryRequirements memReq = {};
+            funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq);
+
+            res = allocator->FindMemoryTypeIndex(
+                memReq.memoryTypeBits, pAllocationCreateInfo,
+                VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex);
+
+            funcs->vkDestroyBuffer(
+                hDev, hBuffer, allocator->GetAllocationCallbacks());
+        }
+    }
+    return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo(
+    VmaAllocator allocator,
+    const VkImageCreateInfo* pImageCreateInfo,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    uint32_t* pMemoryTypeIndex)
+{
+    VMA_ASSERT(allocator != VK_NULL_HANDLE);
+    VMA_ASSERT(pImageCreateInfo != VMA_NULL);
+    VMA_ASSERT(pAllocationCreateInfo != VMA_NULL);
+    VMA_ASSERT(pMemoryTypeIndex != VMA_NULL);
+
+    const VkDevice hDev = allocator->m_hDevice;
+    const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions();
+    VkResult res;
+
+#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+    if(funcs->vkGetDeviceImageMemoryRequirements)
+    {
+        // Can query straight from VkImageCreateInfo :)
+        VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR};
+        devImgMemReq.pCreateInfo = pImageCreateInfo;
+        VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 &&
+            "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect.");
+
+        VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+        (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq);
+
+        res = allocator->FindMemoryTypeIndex(
+            memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo,
+            VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex);
+    }
+    else
+#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+    {
+        // Must create a dummy image to query :(
+        VkImage hImage = VK_NULL_HANDLE;
+        res = funcs->vkCreateImage(
+            hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage);
+        if(res == VK_SUCCESS)
+        {
+            VkMemoryRequirements memReq = {};
+            funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq);
+
+            res = allocator->FindMemoryTypeIndex(
+                memReq.memoryTypeBits, pAllocationCreateInfo,
+                VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex);
+
+            funcs->vkDestroyImage(
+                hDev, hImage, allocator->GetAllocationCallbacks());
+        }
+    }
+    return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool(
+    VmaAllocator allocator,
+    const VmaPoolCreateInfo* pCreateInfo,
+    VmaPool* pPool)
+{
+    VMA_ASSERT(allocator && pCreateInfo && pPool);
+
+    VMA_DEBUG_LOG("vmaCreatePool");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->CreatePool(pCreateInfo, pPool);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool(
+    VmaAllocator allocator,
+    VmaPool pool)
+{
+    VMA_ASSERT(allocator);
+
+    if(pool == VK_NULL_HANDLE)
+    {
+        return;
+    }
+
+    VMA_DEBUG_LOG("vmaDestroyPool");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->DestroyPool(pool);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics(
+    VmaAllocator allocator,
+    VmaPool pool,
+    VmaStatistics* pPoolStats)
+{
+    VMA_ASSERT(allocator && pool && pPoolStats);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->GetPoolStatistics(pool, pPoolStats);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics(
+    VmaAllocator allocator,
+    VmaPool pool,
+    VmaDetailedStatistics* pPoolStats)
+{
+    VMA_ASSERT(allocator && pool && pPoolStats);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->CalculatePoolStatistics(pool, pPoolStats);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool)
+{
+    VMA_ASSERT(allocator && pool);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    VMA_DEBUG_LOG("vmaCheckPoolCorruption");
+
+    return allocator->CheckPoolCorruption(pool);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName(
+    VmaAllocator allocator,
+    VmaPool pool,
+    const char** ppName)
+{
+    VMA_ASSERT(allocator && pool && ppName);
+
+    VMA_DEBUG_LOG("vmaGetPoolName");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    *ppName = pool->GetName();
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName(
+    VmaAllocator allocator,
+    VmaPool pool,
+    const char* pName)
+{
+    VMA_ASSERT(allocator && pool);
+
+    VMA_DEBUG_LOG("vmaSetPoolName");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    pool->SetName(pName);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory(
+    VmaAllocator allocator,
+    const VkMemoryRequirements* pVkMemoryRequirements,
+    const VmaAllocationCreateInfo* pCreateInfo,
+    VmaAllocation* pAllocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation);
+
+    VMA_DEBUG_LOG("vmaAllocateMemory");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    VkResult result = allocator->AllocateMemory(
+        *pVkMemoryRequirements,
+        false, // requiresDedicatedAllocation
+        false, // prefersDedicatedAllocation
+        VK_NULL_HANDLE, // dedicatedBuffer
+        VK_NULL_HANDLE, // dedicatedImage
+        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+        *pCreateInfo,
+        VMA_SUBALLOCATION_TYPE_UNKNOWN,
+        1, // allocationCount
+        pAllocation);
+
+    if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS)
+    {
+        allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+    }
+
+    return result;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages(
+    VmaAllocator allocator,
+    const VkMemoryRequirements* pVkMemoryRequirements,
+    const VmaAllocationCreateInfo* pCreateInfo,
+    size_t allocationCount,
+    VmaAllocation* pAllocations,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    if(allocationCount == 0)
+    {
+        return VK_SUCCESS;
+    }
+
+    VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocations);
+
+    VMA_DEBUG_LOG("vmaAllocateMemoryPages");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    VkResult result = allocator->AllocateMemory(
+        *pVkMemoryRequirements,
+        false, // requiresDedicatedAllocation
+        false, // prefersDedicatedAllocation
+        VK_NULL_HANDLE, // dedicatedBuffer
+        VK_NULL_HANDLE, // dedicatedImage
+        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+        *pCreateInfo,
+        VMA_SUBALLOCATION_TYPE_UNKNOWN,
+        allocationCount,
+        pAllocations);
+
+    if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS)
+    {
+        for(size_t i = 0; i < allocationCount; ++i)
+        {
+            allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i);
+        }
+    }
+
+    return result;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer(
+    VmaAllocator allocator,
+    VkBuffer buffer,
+    const VmaAllocationCreateInfo* pCreateInfo,
+    VmaAllocation* pAllocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation);
+
+    VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    VkMemoryRequirements vkMemReq = {};
+    bool requiresDedicatedAllocation = false;
+    bool prefersDedicatedAllocation = false;
+    allocator->GetBufferMemoryRequirements(buffer, vkMemReq,
+        requiresDedicatedAllocation,
+        prefersDedicatedAllocation);
+
+    VkResult result = allocator->AllocateMemory(
+        vkMemReq,
+        requiresDedicatedAllocation,
+        prefersDedicatedAllocation,
+        buffer, // dedicatedBuffer
+        VK_NULL_HANDLE, // dedicatedImage
+        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+        *pCreateInfo,
+        VMA_SUBALLOCATION_TYPE_BUFFER,
+        1, // allocationCount
+        pAllocation);
+
+    if(pAllocationInfo && result == VK_SUCCESS)
+    {
+        allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+    }
+
+    return result;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage(
+    VmaAllocator allocator,
+    VkImage image,
+    const VmaAllocationCreateInfo* pCreateInfo,
+    VmaAllocation* pAllocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation);
+
+    VMA_DEBUG_LOG("vmaAllocateMemoryForImage");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    VkMemoryRequirements vkMemReq = {};
+    bool requiresDedicatedAllocation = false;
+    bool prefersDedicatedAllocation  = false;
+    allocator->GetImageMemoryRequirements(image, vkMemReq,
+        requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+    VkResult result = allocator->AllocateMemory(
+        vkMemReq,
+        requiresDedicatedAllocation,
+        prefersDedicatedAllocation,
+        VK_NULL_HANDLE, // dedicatedBuffer
+        image, // dedicatedImage
+        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage
+        *pCreateInfo,
+        VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN,
+        1, // allocationCount
+        pAllocation);
+
+    if(pAllocationInfo && result == VK_SUCCESS)
+    {
+        allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+    }
+
+    return result;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory(
+    VmaAllocator allocator,
+    VmaAllocation allocation)
+{
+    VMA_ASSERT(allocator);
+
+    if(allocation == VK_NULL_HANDLE)
+    {
+        return;
+    }
+
+    VMA_DEBUG_LOG("vmaFreeMemory");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->FreeMemory(
+        1, // allocationCount
+        &allocation);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages(
+    VmaAllocator allocator,
+    size_t allocationCount,
+    const VmaAllocation* pAllocations)
+{
+    if(allocationCount == 0)
+    {
+        return;
+    }
+
+    VMA_ASSERT(allocator);
+
+    VMA_DEBUG_LOG("vmaFreeMemoryPages");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->FreeMemory(allocationCount, pAllocations);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && allocation && pAllocationInfo);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->GetAllocationInfo(allocation, pAllocationInfo);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VmaAllocationInfo2* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && allocation && pAllocationInfo);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->GetAllocationInfo2(allocation, pAllocationInfo);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    void* pUserData)
+{
+    VMA_ASSERT(allocator && allocation);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocation->SetUserData(allocator, pUserData);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    const char* VMA_NULLABLE pName)
+{
+    allocation->SetName(allocator, pName);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkMemoryPropertyFlags* VMA_NOT_NULL pFlags)
+{
+    VMA_ASSERT(allocator && allocation && pFlags);
+    const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex();
+    *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    void** ppData)
+{
+    VMA_ASSERT(allocator && allocation && ppData);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->Map(allocation, ppData);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory(
+    VmaAllocator allocator,
+    VmaAllocation allocation)
+{
+    VMA_ASSERT(allocator && allocation);
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    allocator->Unmap(allocation);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VkDeviceSize offset,
+    VkDeviceSize size)
+{
+    VMA_ASSERT(allocator && allocation);
+
+    VMA_DEBUG_LOG("vmaFlushAllocation");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VkDeviceSize offset,
+    VkDeviceSize size)
+{
+    VMA_ASSERT(allocator && allocation);
+
+    VMA_DEBUG_LOG("vmaInvalidateAllocation");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations(
+    VmaAllocator allocator,
+    uint32_t allocationCount,
+    const VmaAllocation* allocations,
+    const VkDeviceSize* offsets,
+    const VkDeviceSize* sizes)
+{
+    VMA_ASSERT(allocator);
+
+    if(allocationCount == 0)
+    {
+        return VK_SUCCESS;
+    }
+
+    VMA_ASSERT(allocations);
+
+    VMA_DEBUG_LOG("vmaFlushAllocations");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations(
+    VmaAllocator allocator,
+    uint32_t allocationCount,
+    const VmaAllocation* allocations,
+    const VkDeviceSize* offsets,
+    const VkDeviceSize* sizes)
+{
+    VMA_ASSERT(allocator);
+
+    if(allocationCount == 0)
+    {
+        return VK_SUCCESS;
+    }
+
+    VMA_ASSERT(allocations);
+
+    VMA_DEBUG_LOG("vmaInvalidateAllocations");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation(
+    VmaAllocator allocator,
+    const void* pSrcHostPointer,
+    VmaAllocation dstAllocation,
+    VkDeviceSize dstAllocationLocalOffset,
+    VkDeviceSize size)
+{
+    VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation);
+
+    if(size == 0)
+    {
+        return VK_SUCCESS;
+    }
+
+    VMA_DEBUG_LOG("vmaCopyMemoryToAllocation");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory(
+    VmaAllocator allocator,
+    VmaAllocation srcAllocation,
+    VkDeviceSize srcAllocationLocalOffset,
+    void* pDstHostPointer,
+    VkDeviceSize size)
+{
+    VMA_ASSERT(allocator && srcAllocation && pDstHostPointer);
+
+    if(size == 0)
+    {
+        return VK_SUCCESS;
+    }
+
+    VMA_DEBUG_LOG("vmaCopyAllocationToMemory");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(
+    VmaAllocator allocator,
+    uint32_t memoryTypeBits)
+{
+    VMA_ASSERT(allocator);
+
+    VMA_DEBUG_LOG("vmaCheckCorruption");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->CheckCorruption(memoryTypeBits);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation(
+    VmaAllocator allocator,
+    const VmaDefragmentationInfo* pInfo,
+    VmaDefragmentationContext* pContext)
+{
+    VMA_ASSERT(allocator && pInfo && pContext);
+
+    VMA_DEBUG_LOG("vmaBeginDefragmentation");
+
+    if (pInfo->pool != VMA_NULL)
+    {
+        // Check if run on supported algorithms
+        if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
+            return VK_ERROR_FEATURE_NOT_PRESENT;
+    }
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo);
+    return VK_SUCCESS;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation(
+    VmaAllocator allocator,
+    VmaDefragmentationContext context,
+    VmaDefragmentationStats* pStats)
+{
+    VMA_ASSERT(allocator && context);
+
+    VMA_DEBUG_LOG("vmaEndDefragmentation");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    if (pStats)
+        context->GetStats(*pStats);
+    vma_delete(allocator, context);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaDefragmentationContext VMA_NOT_NULL context,
+    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo)
+{
+    VMA_ASSERT(context && pPassInfo);
+
+    VMA_DEBUG_LOG("vmaBeginDefragmentationPass");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return context->DefragmentPassBegin(*pPassInfo);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaDefragmentationContext VMA_NOT_NULL context,
+    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo)
+{
+    VMA_ASSERT(context && pPassInfo);
+
+    VMA_DEBUG_LOG("vmaEndDefragmentationPass");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return context->DefragmentPassEnd(*pPassInfo);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VkBuffer buffer)
+{
+    VMA_ASSERT(allocator && allocation && buffer);
+
+    VMA_DEBUG_LOG("vmaBindBufferMemory");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VkDeviceSize allocationLocalOffset,
+    VkBuffer buffer,
+    const void* pNext)
+{
+    VMA_ASSERT(allocator && allocation && buffer);
+
+    VMA_DEBUG_LOG("vmaBindBufferMemory2");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VkImage image)
+{
+    VMA_ASSERT(allocator && allocation && image);
+
+    VMA_DEBUG_LOG("vmaBindImageMemory");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    return allocator->BindImageMemory(allocation, 0, image, VMA_NULL);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2(
+    VmaAllocator allocator,
+    VmaAllocation allocation,
+    VkDeviceSize allocationLocalOffset,
+    VkImage image,
+    const void* pNext)
+{
+    VMA_ASSERT(allocator && allocation && image);
+
+    VMA_DEBUG_LOG("vmaBindImageMemory2");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+        return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer(
+    VmaAllocator allocator,
+    const VkBufferCreateInfo* pBufferCreateInfo,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    VkBuffer* pBuffer,
+    VmaAllocation* pAllocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation);
+
+    if(pBufferCreateInfo->size == 0)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+    if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 &&
+        !allocator->m_UseKhrBufferDeviceAddress)
+    {
+        VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used.");
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    VMA_DEBUG_LOG("vmaCreateBuffer");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    *pBuffer = VK_NULL_HANDLE;
+    *pAllocation = VK_NULL_HANDLE;
+
+    // 1. Create VkBuffer.
+    VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)(
+        allocator->m_hDevice,
+        pBufferCreateInfo,
+        allocator->GetAllocationCallbacks(),
+        pBuffer);
+    if(res >= 0)
+    {
+        // 2. vkGetBufferMemoryRequirements.
+        VkMemoryRequirements vkMemReq = {};
+        bool requiresDedicatedAllocation = false;
+        bool prefersDedicatedAllocation  = false;
+        allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq,
+            requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+        // 3. Allocate memory using allocator.
+        res = allocator->AllocateMemory(
+            vkMemReq,
+            requiresDedicatedAllocation,
+            prefersDedicatedAllocation,
+            *pBuffer, // dedicatedBuffer
+            VK_NULL_HANDLE, // dedicatedImage
+            VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage
+            *pAllocationCreateInfo,
+            VMA_SUBALLOCATION_TYPE_BUFFER,
+            1, // allocationCount
+            pAllocation);
+
+        if(res >= 0)
+        {
+            // 3. Bind buffer with memory.
+            if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0)
+            {
+                res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL);
+            }
+            if(res >= 0)
+            {
+                // All steps succeeded.
+                #if VMA_STATS_STRING_ENABLED
+                    (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5);
+                #endif
+                if(pAllocationInfo != VMA_NULL)
+                {
+                    allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+                }
+
+                return VK_SUCCESS;
+            }
+            allocator->FreeMemory(
+                1, // allocationCount
+                pAllocation);
+            *pAllocation = VK_NULL_HANDLE;
+            (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+            *pBuffer = VK_NULL_HANDLE;
+            return res;
+        }
+        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+        *pBuffer = VK_NULL_HANDLE;
+        return res;
+    }
+    return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment(
+    VmaAllocator allocator,
+    const VkBufferCreateInfo* pBufferCreateInfo,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    VkDeviceSize minAlignment,
+    VkBuffer* pBuffer,
+    VmaAllocation* pAllocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation);
+
+    if(pBufferCreateInfo->size == 0)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+    if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 &&
+        !allocator->m_UseKhrBufferDeviceAddress)
+    {
+        VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used.");
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    VMA_DEBUG_LOG("vmaCreateBufferWithAlignment");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    *pBuffer = VK_NULL_HANDLE;
+    *pAllocation = VK_NULL_HANDLE;
+
+    // 1. Create VkBuffer.
+    VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)(
+        allocator->m_hDevice,
+        pBufferCreateInfo,
+        allocator->GetAllocationCallbacks(),
+        pBuffer);
+    if(res >= 0)
+    {
+        // 2. vkGetBufferMemoryRequirements.
+        VkMemoryRequirements vkMemReq = {};
+        bool requiresDedicatedAllocation = false;
+        bool prefersDedicatedAllocation  = false;
+        allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq,
+            requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+        // 2a. Include minAlignment
+        vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment);
+
+        // 3. Allocate memory using allocator.
+        res = allocator->AllocateMemory(
+            vkMemReq,
+            requiresDedicatedAllocation,
+            prefersDedicatedAllocation,
+            *pBuffer, // dedicatedBuffer
+            VK_NULL_HANDLE, // dedicatedImage
+            VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage
+            *pAllocationCreateInfo,
+            VMA_SUBALLOCATION_TYPE_BUFFER,
+            1, // allocationCount
+            pAllocation);
+
+        if(res >= 0)
+        {
+            // 3. Bind buffer with memory.
+            if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0)
+            {
+                res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL);
+            }
+            if(res >= 0)
+            {
+                // All steps succeeded.
+                #if VMA_STATS_STRING_ENABLED
+                    (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5);
+                #endif
+                if(pAllocationInfo != VMA_NULL)
+                {
+                    allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+                }
+
+                return VK_SUCCESS;
+            }
+            allocator->FreeMemory(
+                1, // allocationCount
+                pAllocation);
+            *pAllocation = VK_NULL_HANDLE;
+            (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+            *pBuffer = VK_NULL_HANDLE;
+            return res;
+        }
+        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+        *pBuffer = VK_NULL_HANDLE;
+        return res;
+    }
+    return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer)
+{
+    return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize allocationLocalOffset,
+    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo,
+    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer)
+{
+    VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation);
+    VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize());
+
+    VMA_DEBUG_LOG("vmaCreateAliasingBuffer2");
+
+    *pBuffer = VK_NULL_HANDLE;
+
+    if (pBufferCreateInfo->size == 0)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+    if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 &&
+        !allocator->m_UseKhrBufferDeviceAddress)
+    {
+        VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used.");
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    // 1. Create VkBuffer.
+    VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)(
+        allocator->m_hDevice,
+        pBufferCreateInfo,
+        allocator->GetAllocationCallbacks(),
+        pBuffer);
+    if (res >= 0)
+    {
+        // 2. Bind buffer with memory.
+        res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL);
+        if (res >= 0)
+        {
+            return VK_SUCCESS;
+        }
+        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks());
+    }
+    return res;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer(
+    VmaAllocator allocator,
+    VkBuffer buffer,
+    VmaAllocation allocation)
+{
+    VMA_ASSERT(allocator);
+
+    if(buffer == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE)
+    {
+        return;
+    }
+
+    VMA_DEBUG_LOG("vmaDestroyBuffer");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    if(buffer != VK_NULL_HANDLE)
+    {
+        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks());
+    }
+
+    if(allocation != VK_NULL_HANDLE)
+    {
+        allocator->FreeMemory(
+            1, // allocationCount
+            &allocation);
+    }
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage(
+    VmaAllocator allocator,
+    const VkImageCreateInfo* pImageCreateInfo,
+    const VmaAllocationCreateInfo* pAllocationCreateInfo,
+    VkImage* pImage,
+    VmaAllocation* pAllocation,
+    VmaAllocationInfo* pAllocationInfo)
+{
+    VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation);
+
+    if(pImageCreateInfo->extent.width == 0 ||
+        pImageCreateInfo->extent.height == 0 ||
+        pImageCreateInfo->extent.depth == 0 ||
+        pImageCreateInfo->mipLevels == 0 ||
+        pImageCreateInfo->arrayLayers == 0)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    VMA_DEBUG_LOG("vmaCreateImage");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    *pImage = VK_NULL_HANDLE;
+    *pAllocation = VK_NULL_HANDLE;
+
+    // 1. Create VkImage.
+    VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)(
+        allocator->m_hDevice,
+        pImageCreateInfo,
+        allocator->GetAllocationCallbacks(),
+        pImage);
+    if(res == VK_SUCCESS)
+    {
+        VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ?
+            VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL :
+            VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR;
+
+        // 2. Allocate memory using allocator.
+        VkMemoryRequirements vkMemReq = {};
+        bool requiresDedicatedAllocation = false;
+        bool prefersDedicatedAllocation  = false;
+        allocator->GetImageMemoryRequirements(*pImage, vkMemReq,
+            requiresDedicatedAllocation, prefersDedicatedAllocation);
+
+        res = allocator->AllocateMemory(
+            vkMemReq,
+            requiresDedicatedAllocation,
+            prefersDedicatedAllocation,
+            VK_NULL_HANDLE, // dedicatedBuffer
+            *pImage, // dedicatedImage
+            VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage
+            *pAllocationCreateInfo,
+            suballocType,
+            1, // allocationCount
+            pAllocation);
+
+        if(res == VK_SUCCESS)
+        {
+            // 3. Bind image with memory.
+            if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0)
+            {
+                res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL);
+            }
+            if(res == VK_SUCCESS)
+            {
+                // All steps succeeded.
+                #if VMA_STATS_STRING_ENABLED
+                    (*pAllocation)->InitImageUsage(*pImageCreateInfo);
+                #endif
+                if(pAllocationInfo != VMA_NULL)
+                {
+                    allocator->GetAllocationInfo(*pAllocation, pAllocationInfo);
+                }
+
+                return VK_SUCCESS;
+            }
+            allocator->FreeMemory(
+                1, // allocationCount
+                pAllocation);
+            *pAllocation = VK_NULL_HANDLE;
+            (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks());
+            *pImage = VK_NULL_HANDLE;
+            return res;
+        }
+        (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks());
+        *pImage = VK_NULL_HANDLE;
+        return res;
+    }
+    return res;
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage)
+{
+    return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation,
+    VkDeviceSize allocationLocalOffset,
+    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage)
+{
+    VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation);
+
+    *pImage = VK_NULL_HANDLE;
+
+    VMA_DEBUG_LOG("vmaCreateImage2");
+
+    if (pImageCreateInfo->extent.width == 0 ||
+        pImageCreateInfo->extent.height == 0 ||
+        pImageCreateInfo->extent.depth == 0 ||
+        pImageCreateInfo->mipLevels == 0 ||
+        pImageCreateInfo->arrayLayers == 0)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    // 1. Create VkImage.
+    VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)(
+        allocator->m_hDevice,
+        pImageCreateInfo,
+        allocator->GetAllocationCallbacks(),
+        pImage);
+    if (res >= 0)
+    {
+        // 2. Bind image with memory.
+        res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL);
+        if (res >= 0)
+        {
+            return VK_SUCCESS;
+        }
+        (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks());
+    }
+    return res;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage(
+    VmaAllocator VMA_NOT_NULL allocator,
+    VkImage VMA_NULLABLE_NON_DISPATCHABLE image,
+    VmaAllocation VMA_NULLABLE allocation)
+{
+    VMA_ASSERT(allocator);
+
+    if(image == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE)
+    {
+        return;
+    }
+
+    VMA_DEBUG_LOG("vmaDestroyImage");
+
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK
+
+    if(image != VK_NULL_HANDLE)
+    {
+        (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks());
+    }
+    if(allocation != VK_NULL_HANDLE)
+    {
+        allocator->FreeMemory(
+            1, // allocationCount
+            &allocation);
+    }
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock(
+    const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo,
+    VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock)
+{
+    VMA_ASSERT(pCreateInfo && pVirtualBlock);
+    VMA_ASSERT(pCreateInfo->size > 0);
+    VMA_DEBUG_LOG("vmaCreateVirtualBlock");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo);
+    VkResult res = (*pVirtualBlock)->Init();
+    if(res < 0)
+    {
+        vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock);
+        *pVirtualBlock = VK_NULL_HANDLE;
+    }
+    return res;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock)
+{
+    if(virtualBlock != VK_NULL_HANDLE)
+    {
+        VMA_DEBUG_LOG("vmaDestroyVirtualBlock");
+        VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+        VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying.
+        vma_delete(&allocationCallbacks, virtualBlock);
+    }
+}
+
+VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+    VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE;
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL);
+    VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo);
+}
+
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation,
+    VkDeviceSize* VMA_NULLABLE pOffset)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL);
+    VMA_DEBUG_LOG("vmaVirtualAllocate");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation)
+{
+    if(allocation != VK_NULL_HANDLE)
+    {
+        VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+        VMA_DEBUG_LOG("vmaVirtualFree");
+        VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+        virtualBlock->Free(allocation);
+    }
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+    VMA_DEBUG_LOG("vmaClearVirtualBlock");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    virtualBlock->Clear();
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+    VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    virtualBlock->SetAllocationUserData(allocation, pUserData);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaStatistics* VMA_NOT_NULL pStats)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL);
+    VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    virtualBlock->GetStatistics(*pStats);
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    VmaDetailedStatistics* VMA_NOT_NULL pStats)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL);
+    VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics");
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    virtualBlock->CalculateDetailedStatistics(*pStats);
+}
+
+#if VMA_STATS_STRING_ENABLED
+
+VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap)
+{
+    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL);
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks();
+    VmaStringBuilder sb(allocationCallbacks);
+    virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb);
+    *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength());
+}
+
+VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock,
+    char* VMA_NULLABLE pStatsString)
+{
+    if(pStatsString != VMA_NULL)
+    {
+        VMA_ASSERT(virtualBlock != VK_NULL_HANDLE);
+        VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+        VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString);
+    }
+}
+#if VMA_EXTERNAL_MEMORY_WIN32
+VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator,
+    VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle)
+{
+    VMA_ASSERT(allocator && allocation && pHandle);
+    VMA_DEBUG_GLOBAL_MUTEX_LOCK;
+    return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle);
+}
+#endif // VMA_EXTERNAL_MEMORY_WIN32 
+#endif // VMA_STATS_STRING_ENABLED
+#endif // _VMA_PUBLIC_INTERFACE
+#endif // VMA_IMPLEMENTATION
+
+/**
+\page quick_start Quick start
+
+\section quick_start_project_setup Project setup
+
+Vulkan Memory Allocator comes in form of a "stb-style" single header file.
+While you can pull the entire repository e.g. as Git module, there is also Cmake script provided,
+you don't need to build it as a separate library project.
+You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files.
+
+"Single header" doesn't mean that everything is contained in C/C++ declarations,
+like it tends to be in case of inline functions or C++ templates.
+It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro.
+If you don't do it properly, it will result in linker errors.
+
+To do it properly:
+
+-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library.
+   This includes declarations of all members of the library.
+-# In exactly one CPP file define following macro before this include.
+   It enables also internal definitions.
+
+\code
+#define VMA_IMPLEMENTATION
+#include "vk_mem_alloc.h"
+\endcode
+
+It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp".
+
+This library includes header `<vulkan/vulkan.h>`, which in turn
+includes `<windows.h>` on Windows. If you need some specific macros defined
+before including these headers (like `WIN32_LEAN_AND_MEAN` or
+`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define
+them before every `#include` of this library.
+It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h",
+that will be included in other source files instead of VMA header directly.
+
+This library is written in C++, but has C-compatible interface.
+Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full
+implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C.
+Some features of C++14 are used and required. Features of C++20 are used optionally when available.
+Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used.
+
+
+\section quick_start_initialization Initialization
+
+VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation,
+structures describing parameters of objects to be created like #VmaAllocationCreateInfo,
+and errors codes returned from functions using `VkResult` type.
+
+The first and the main object that needs to be created is #VmaAllocator.
+It represents the initialization of the entire library.
+Only one such object should be created per `VkDevice`.
+You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made.
+It must be destroyed before `VkDevice` is destroyed.
+
+At program startup:
+
+-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object.
+-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object.
+
+Only members `physicalDevice`, `device`, `instance` are required.
+However, you should inform the library which Vulkan version do you use by setting
+VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable
+by setting VmaAllocatorCreateInfo::flags.
+Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions.
+See below for details.
+
+\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version
+
+VMA supports Vulkan version down to 1.0, for backward compatibility.
+If you want to use higher version, you need to inform the library about it.
+This is a two-step process.
+
+<b>Step 1: Compile time.</b> By default, VMA compiles with code supporting the highest
+Vulkan version found in the included `<vulkan/vulkan.h>` that is also supported by the library.
+If this is OK, you don't need to do anything.
+However, if you want to compile VMA as if only some lower Vulkan version was available,
+define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`.
+It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version.
+For example, to compile against Vulkan 1.2:
+
+\code
+#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2
+#include "vk_mem_alloc.h"
+\endcode
+
+<b>Step 2: Runtime.</b> Even when compiled with higher Vulkan version available,
+VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object.
+By default, only Vulkan 1.0 is used.
+To initialize the allocator with support for higher Vulkan version, you need to set member
+VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`.
+See code sample below.
+
+\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions
+
+You may need to configure importing Vulkan functions. There are 3 ways to do this:
+
+-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows):
+   - You don't need to do anything.
+   - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default.
+-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`,
+   `vkGetDeviceProcAddr` (this is the option presented in the example below):
+   - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1.
+   - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr,
+     VmaVulkanFunctions::vkGetDeviceProcAddr.
+   - The library will fetch pointers to all other functions it needs internally.
+-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like
+   [Volk](https://github.com/zeux/volk):
+   - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0.
+   - Pass these pointers via structure #VmaVulkanFunctions.
+
+\subsection quick_start_initialization_enabling_extensions Enabling extensions
+
+VMA can automatically use following Vulkan extensions.
+If you found them available on the selected physical device and you enabled them
+while creating `VkInstance` / `VkDevice` object, inform VMA about their availability
+by setting appropriate flags in VmaAllocatorCreateInfo::flags.
+
+Vulkan extension              | VMA flag
+------------------------------|-----------------------------------------------------
+VK_KHR_dedicated_allocation   | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT
+VK_KHR_bind_memory2           | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT
+VK_KHR_maintenance4           | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT
+VK_KHR_maintenance5           | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT
+VK_EXT_memory_budget          | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT
+VK_KHR_buffer_device_address  | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT
+VK_EXT_memory_priority        | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT
+VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT
+VK_KHR_external_memory_win32  | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT
+
+Example with fetching pointers to Vulkan functions dynamically:
+
+\code
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#include "vk_mem_alloc.h"
+
+...
+
+VmaVulkanFunctions vulkanFunctions = {};
+vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr;
+vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr;
+
+VmaAllocatorCreateInfo allocatorCreateInfo = {};
+allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
+allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2;
+allocatorCreateInfo.physicalDevice = physicalDevice;
+allocatorCreateInfo.device = device;
+allocatorCreateInfo.instance = instance;
+allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions;
+
+VmaAllocator allocator;
+vmaCreateAllocator(&allocatorCreateInfo, &allocator);
+
+// Entire program...
+
+// At the end, don't forget to:
+vmaDestroyAllocator(allocator);
+\endcode
+
+
+\subsection quick_start_initialization_other_config Other configuration options
+
+There are additional configuration options available through preprocessor macros that you can define
+before including VMA header and through parameters passed in #VmaAllocatorCreateInfo.
+They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`),
+callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`),
+or your custom `VMA_ASSERT` macro, among others.
+For more information, see: @ref configuration.
+
+
+\section quick_start_resource_allocation Resource allocation
+
+When you want to create a buffer or image:
+
+-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure.
+-# Fill VmaAllocationCreateInfo structure.
+-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory
+   already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory.
+
+\code
+VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufferInfo.size = 65536;
+bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+Don't forget to destroy your buffer and allocation objects when no longer needed:
+
+\code
+vmaDestroyBuffer(allocator, buffer, allocation);
+\endcode
+
+If you need to map the buffer, you must set flag
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+in VmaAllocationCreateInfo::flags.
+There are many additional parameters that can control the choice of memory type to be used for the allocation
+and other features.
+For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping.
+
+
+\page choosing_memory_type Choosing memory type
+
+Physical devices in Vulkan support various combinations of memory heaps and
+types. Help with choosing correct and optimal memory type for your specific
+resource is one of the key features of this library. You can use it by filling
+appropriate members of VmaAllocationCreateInfo structure, as described below.
+You can also combine multiple methods.
+
+-# If you just want to find memory type index that meets your requirements, you
+   can use function: vmaFindMemoryTypeIndexForBufferInfo(),
+   vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex().
+-# If you want to allocate a region of device memory without association with any
+   specific image or buffer, you can use function vmaAllocateMemory(). Usage of
+   this function is not recommended and usually not needed.
+   vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once,
+   which may be useful for sparse binding.
+-# If you already have a buffer or an image created, you want to allocate memory
+   for it and then you will bind it yourself, you can use function
+   vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage().
+   For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory()
+   or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2().
+-# If you want to create a buffer or an image, allocate memory for it, and bind
+   them together, all in one call, you can use function vmaCreateBuffer(),
+   vmaCreateImage().
+   <b>This is the easiest and recommended way to use this library!</b>
+
+When using 3. or 4., the library internally queries Vulkan for memory types
+supported for that buffer or image (function `vkGetBufferMemoryRequirements()`)
+and uses only one of these types.
+
+If no memory type can be found that meets all the requirements, these functions
+return `VK_ERROR_FEATURE_NOT_PRESENT`.
+
+You can leave VmaAllocationCreateInfo structure completely filled with zeros.
+It means no requirements are specified for memory type.
+It is valid, although not very useful.
+
+\section choosing_memory_type_usage Usage
+
+The easiest way to specify memory requirements is to fill member
+VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage.
+It defines high level, common usage types.
+Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically.
+
+For example, if you want to create a uniform buffer that will be filled using
+transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can
+do it using following code. The buffer will most likely end up in a memory type with
+`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device.
+
+\code
+VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufferInfo.size = 65536;
+bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory
+on systems with discrete graphics card that have the memories separate, you can use
+#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST.
+
+When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory,
+you also need to specify one of the host access flags:
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`
+so you can map it.
+
+For example, a staging buffer that will be filled via mapped pointer and then
+used as a source of transfer to the buffer described previously can be created like this.
+It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT`
+but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM).
+
+\code
+VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+stagingBufferInfo.size = 65536;
+stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo stagingAllocInfo = {};
+stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+
+VkBuffer stagingBuffer;
+VmaAllocation stagingAllocation;
+vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr);
+\endcode
+
+For more examples of creating different kinds of resources, see chapter \ref usage_patterns.
+See also: @ref memory_mapping.
+
+Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows
+about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed,
+so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc.
+If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting
+memory type, as described below.
+
+\note
+Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`,
+`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`)
+are still available and work same way as in previous versions of the library
+for backward compatibility, but they are deprecated.
+
+\section choosing_memory_type_required_preferred_flags Required and preferred flags
+
+You can specify more detailed requirements by filling members
+VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags
+with a combination of bits from enum `VkMemoryPropertyFlags`. For example,
+if you want to create a buffer that will be persistently mapped on host (so it
+must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`,
+use following code:
+
+\code
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+A memory type is chosen that has all the required flags and as many preferred
+flags set as possible.
+
+Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags,
+plus some extra "magic" (heuristics).
+
+\section choosing_memory_type_explicit_memory_types Explicit memory types
+
+If you inspected memory types available on the physical device and <b>you have
+a preference for memory types that you want to use</b>, you can fill member
+VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set
+means that a memory type with that index is allowed to be used for the
+allocation. Special value 0, just like `UINT32_MAX`, means there are no
+restrictions to memory type index.
+
+Please note that this member is NOT just a memory type index.
+Still you can use it to choose just one, specific memory type.
+For example, if you already determined that your buffer should be created in
+memory type 2, use following code:
+
+\code
+uint32_t memoryTypeIndex = 2;
+
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.memoryTypeBits = 1u << memoryTypeIndex;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
+\endcode
+
+You can also use this parameter to <b>exclude some memory types</b>.
+If you inspect memory heaps and types available on the current physical device and
+you determine that for some reason you don't want to use a specific memory type for the allocation,
+you can enable automatic memory type selection but exclude certain memory type or types
+by setting all bits of `memoryTypeBits` to 1 except the ones you choose.
+
+\code
+// ...
+uint32_t excludedMemoryTypeIndex = 2;
+VmaAllocationCreateInfo allocInfo = {};
+allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex);
+// ...
+\endcode
+
+
+\section choosing_memory_type_custom_memory_pools Custom memory pools
+
+If you allocate from custom memory pool, all the ways of specifying memory
+requirements described above are not applicable and the aforementioned members
+of VmaAllocationCreateInfo structure are ignored. Memory type is selected
+explicitly when creating the pool and then used to make all the allocations from
+that pool. For further details, see \ref custom_memory_pools.
+
+\section choosing_memory_type_dedicated_allocations Dedicated allocations
+
+Memory for allocations is reserved out of larger block of `VkDeviceMemory`
+allocated from Vulkan internally. That is the main feature of this whole library.
+You can still request a separate memory block to be created for an allocation,
+just like you would do in a trivial solution without using any allocator.
+In that case, a buffer or image is always bound to that memory at offset 0.
+This is called a "dedicated allocation".
+You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+The library can also internally decide to use dedicated allocation in some cases, e.g.:
+
+- When the size of the allocation is large.
+- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled
+  and it reports that dedicated allocation is required or recommended for the resource.
+- When allocation of next big memory block fails due to not enough device memory,
+  but allocation with the exact requested size succeeds.
+
+
+\page memory_mapping Memory mapping
+
+To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`,
+to be able to read from it or write to it in CPU code.
+Mapping is possible only of memory allocated from a memory type that has
+`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag.
+Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose.
+You can use them directly with memory allocated by this library,
+but it is not recommended because of following issue:
+Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed.
+This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan.
+It is also not thread-safe.
+Because of this, Vulkan Memory Allocator provides following facilities:
+
+\note If you want to be able to map an allocation, you need to specify one of the flags
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable
+when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values.
+For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable,
+but these flags can still be used for consistency.
+
+\section memory_mapping_copy_functions Copy functions
+
+The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation().
+It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`,
+and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`).
+
+It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads
+(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`.
+
+\code
+struct ConstantBuffer
+{
+    ...
+};
+ConstantBuffer constantBufferData = ...
+
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = sizeof(ConstantBuffer);
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
+
+vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer));
+\endcode
+
+Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory().
+
+\section memory_mapping_mapping_functions Mapping functions
+
+The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory().
+They are safer and more convenient to use than standard Vulkan functions.
+You can map an allocation multiple times simultaneously - mapping is reference-counted internally.
+You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block.
+The way it is implemented is that the library always maps entire memory block, not just region of the allocation.
+For further details, see description of vmaMapMemory() function.
+Example:
+
+\code
+// Having these objects initialized:
+struct ConstantBuffer
+{
+    ...
+};
+ConstantBuffer constantBufferData = ...
+
+VmaAllocator allocator = ...
+VkBuffer constantBuffer = ...
+VmaAllocation constantBufferAllocation = ...
+
+// You can map and fill your buffer using following code:
+
+void* mappedData;
+vmaMapMemory(allocator, constantBufferAllocation, &mappedData);
+memcpy(mappedData, &constantBufferData, sizeof(constantBufferData));
+vmaUnmapMemory(allocator, constantBufferAllocation);
+\endcode
+
+When mapping, you may see a warning from Vulkan validation layer similar to this one:
+
+<i>Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.</i>
+
+It happens because the library maps entire `VkDeviceMemory` block, where different
+types of images and buffers may end up together, especially on GPUs with unified memory like Intel.
+You can safely ignore it if you are sure you access only memory of the intended
+object that you wanted to map.
+
+
+\section memory_mapping_persistently_mapped_memory Persistently mapped memory
+
+Keeping your memory persistently mapped is generally OK in Vulkan.
+You don't need to unmap it before using its data on the GPU.
+The library provides a special feature designed for that:
+Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in
+VmaAllocationCreateInfo::flags stay mapped all the time,
+so you can just access CPU pointer to it any time
+without a need to call any "map" or "unmap" function.
+Example:
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = sizeof(ConstantBuffer);
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+    VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+
+// Buffer is already mapped. You can access its memory.
+memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData));
+\endcode
+
+\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up
+in a mappable memory type.
+For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation.
+For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading.
+
+\section memory_mapping_cache_control Cache flush and invalidate
+
+Memory in Vulkan doesn't need to be unmapped before using it on GPU,
+but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set,
+you need to manually **invalidate** cache before reading of mapped pointer
+and **flush** cache after writing to mapped pointer.
+Map/unmap operations don't do that automatically.
+Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`,
+`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient
+functions that refer to given allocation object: vmaFlushAllocation(),
+vmaInvalidateAllocation(),
+or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations().
+
+Regions of memory specified for flush/invalidate must be aligned to
+`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library.
+In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations
+within blocks are aligned to this value, so their offsets are always multiply of
+`nonCoherentAtomSize` and two different allocations never share same "line" of this size.
+
+Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA)
+currently provide `HOST_COHERENT` flag on all memory types that are
+`HOST_VISIBLE`, so on PC you may not need to bother.
+
+
+\page staying_within_budget Staying within budget
+
+When developing a graphics-intensive game or program, it is important to avoid allocating
+more GPU memory than it is physically available. When the memory is over-committed,
+various bad things can happen, depending on the specific GPU, graphics driver, and
+operating system:
+
+- It may just work without any problems.
+- The application may slow down because some memory blocks are moved to system RAM
+  and the GPU has to access them through PCI Express bus.
+- A new allocation may take very long time to complete, even few seconds, and possibly
+  freeze entire system.
+- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST`
+  returned somewhere later.
+
+\section staying_within_budget_querying_for_budget Querying for budget
+
+To query for current memory usage and available budget, use function vmaGetHeapBudgets().
+Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap.
+
+Please note that this function returns different information and works faster than
+vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every
+allocation, while vmaCalculateStatistics() is intended to be used rarely,
+only to obtain statistical information, e.g. for debugging purposes.
+
+It is recommended to use <b>VK_EXT_memory_budget</b> device extension to obtain information
+about the budget from Vulkan device. VMA is able to use this extension automatically.
+When not enabled, the allocator behaves same way, but then it estimates current usage
+and available budget based on its internal information and Vulkan memory heap sizes,
+which may be less precise. In order to use this extension:
+
+1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2
+   required by it are available and enable them. Please note that the first is a device
+   extension and the second is instance extension!
+2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object.
+3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from
+   Vulkan inside of it to avoid overhead of querying it with every allocation.
+
+\section staying_within_budget_controlling_memory_usage Controlling memory usage
+
+There are many ways in which you can try to stay within the budget.
+
+First, when making new allocation requires allocating a new memory block, the library
+tries not to exceed the budget automatically. If a block with default recommended size
+(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even
+dedicated memory for just this resource.
+
+If the size of the requested resource plus current memory usage is more than the
+budget, by default the library still tries to create it, leaving it to the Vulkan
+implementation whether the allocation succeeds or fails. You can change this behavior
+by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is
+not made if it would exceed the budget or if the budget is already exceeded.
+VMA then tries to make the allocation from the next eligible Vulkan memory type.
+If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag
+when creating resources that are not essential for the application (e.g. the texture
+of a specific object) and not to pass it when creating critically important resources
+(e.g. render targets).
+
+On AMD graphics cards there is a custom vendor extension available: <b>VK_AMD_memory_overallocation_behavior</b>
+that allows to control the behavior of the Vulkan implementation in out-of-memory cases -
+whether it should fail with an error code or still allow the allocation.
+Usage of this extension involves only passing extra structure on Vulkan device creation,
+so it is out of scope of this library.
+
+Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure
+a new allocation is created only when it fits inside one of the existing memory blocks.
+If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+This also ensures that the function call is very fast because it never goes to Vulkan
+to obtain a new block.
+
+\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount
+set to more than 0 will currently try to allocate memory blocks without checking whether they
+fit within budget.
+
+
+\page resource_aliasing Resource aliasing (overlap)
+
+New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory
+management, give an opportunity to alias (overlap) multiple resources in the
+same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL).
+It can be useful to save video memory, but it must be used with caution.
+
+For example, if you know the flow of your whole render frame in advance, you
+are going to use some intermediate textures or buffers only during a small range of render passes,
+and you know these ranges don't overlap in time, you can bind these resources to
+the same place in memory, even if they have completely different parameters (width, height, format etc.).
+
+![Resource aliasing (overlap)](../gfx/Aliasing.png)
+
+Such scenario is possible using VMA, but you need to create your images manually.
+Then you need to calculate parameters of an allocation to be made using formula:
+
+- allocation size = max(size of each image)
+- allocation alignment = max(alignment of each image)
+- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image)
+
+Following example shows two different images bound to the same place in memory,
+allocated to fit largest of them.
+
+\code
+// A 512x512 texture to be sampled.
+VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+img1CreateInfo.imageType = VK_IMAGE_TYPE_2D;
+img1CreateInfo.extent.width = 512;
+img1CreateInfo.extent.height = 512;
+img1CreateInfo.extent.depth = 1;
+img1CreateInfo.mipLevels = 10;
+img1CreateInfo.arrayLayers = 1;
+img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB;
+img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
+img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+// A full screen texture to be used as color attachment.
+VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+img2CreateInfo.imageType = VK_IMAGE_TYPE_2D;
+img2CreateInfo.extent.width = 1920;
+img2CreateInfo.extent.height = 1080;
+img2CreateInfo.extent.depth = 1;
+img2CreateInfo.mipLevels = 1;
+img2CreateInfo.arrayLayers = 1;
+img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+VkImage img1;
+res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1);
+VkImage img2;
+res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2);
+
+VkMemoryRequirements img1MemReq;
+vkGetImageMemoryRequirements(device, img1, &img1MemReq);
+VkMemoryRequirements img2MemReq;
+vkGetImageMemoryRequirements(device, img2, &img2MemReq);
+
+VkMemoryRequirements finalMemReq = {};
+finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size);
+finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment);
+finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits;
+// Validate if(finalMemReq.memoryTypeBits != 0)
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+
+VmaAllocation alloc;
+res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr);
+
+res = vmaBindImageMemory(allocator, alloc, img1);
+res = vmaBindImageMemory(allocator, alloc, img2);
+
+// You can use img1, img2 here, but not at the same time!
+
+vmaFreeMemory(allocator, alloc);
+vkDestroyImage(allocator, img2, nullptr);
+vkDestroyImage(allocator, img1, nullptr);
+\endcode
+
+VMA also provides convenience functions that create a buffer or image and bind it to memory
+represented by an existing #VmaAllocation:
+vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(),
+vmaCreateAliasingImage(), vmaCreateAliasingImage2().
+Versions with "2" offer additional parameter `allocationLocalOffset`.
+
+Remember that using resources that alias in memory requires proper synchronization.
+You need to issue a memory barrier to make sure commands that use `img1` and `img2`
+don't overlap on GPU timeline.
+You also need to treat a resource after aliasing as uninitialized - containing garbage data.
+For example, if you use `img1` and then want to use `img2`, you need to issue
+an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`.
+
+Additional considerations:
+
+- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases.
+See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag.
+- You can create more complex layout where different images and buffers are bound
+at different offsets inside one large allocation. For example, one can imagine
+a big texture used in some render passes, aliasing with a set of many small buffers
+used between in some further passes. To bind a resource at non-zero offset in an allocation,
+use vmaBindBufferMemory2() / vmaBindImageMemory2().
+- Before allocating memory for the resources you want to alias, check `memoryTypeBits`
+returned in memory requirements of each resource to make sure the bits overlap.
+Some GPUs may expose multiple memory types suitable e.g. only for buffers or
+images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your
+resources may be disjoint. Aliasing them is not possible in that case.
+
+
+\page custom_memory_pools Custom memory pools
+
+A memory pool contains a number of `VkDeviceMemory` blocks.
+The library automatically creates and manages default pool for each memory type available on the device.
+Default memory pool automatically grows in size.
+Size of allocated blocks is also variable and managed automatically.
+You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null.
+
+You can create custom pool and allocate memory out of it.
+It can be useful if you want to:
+
+- Keep certain kind of allocations separate from others.
+- Enforce particular, fixed size of Vulkan memory blocks.
+- Limit maximum amount of Vulkan memory allocated for that pool.
+- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool.
+- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in
+  #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain.
+- Perform defragmentation on a specific subset of your allocations.
+
+To use custom memory pools:
+
+-# Fill VmaPoolCreateInfo structure.
+-# Call vmaCreatePool() to obtain #VmaPool handle.
+-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle.
+   You don't need to specify any other parameters of this structure, like `usage`.
+
+Example:
+
+\code
+// Find memoryTypeIndex for the pool.
+VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+sampleBufCreateInfo.size = 0x10000; // Doesn't matter.
+sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo sampleAllocCreateInfo = {};
+sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+uint32_t memTypeIndex;
+VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator,
+    &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex);
+// Check res...
+
+// Create a pool that can have at most 2 blocks, 128 MiB each.
+VmaPoolCreateInfo poolCreateInfo = {};
+poolCreateInfo.memoryTypeIndex = memTypeIndex;
+poolCreateInfo.blockSize = 128ull * 1024 * 1024;
+poolCreateInfo.maxBlockCount = 2;
+
+VmaPool pool;
+res = vmaCreatePool(allocator, &poolCreateInfo, &pool);
+// Check res...
+
+// Allocate a buffer out of it.
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 1024;
+bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.pool = pool;
+
+VkBuffer buf;
+VmaAllocation alloc;
+res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
+// Check res...
+\endcode
+
+You have to free all allocations made from this pool before destroying it.
+
+\code
+vmaDestroyBuffer(allocator, buf, alloc);
+vmaDestroyPool(allocator, pool);
+\endcode
+
+New versions of this library support creating dedicated allocations in custom pools.
+It is supported only when VmaPoolCreateInfo::blockSize = 0.
+To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and
+VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+
+
+\section custom_memory_pools_MemTypeIndex Choosing memory type index
+
+When creating a pool, you must explicitly specify memory type index.
+To find the one suitable for your buffers or images, you can use helper functions
+vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo().
+You need to provide structures with example parameters of buffers or images
+that you are going to create in that pool.
+
+\code
+VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+exampleBufCreateInfo.size = 1024; // Doesn't matter
+exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+uint32_t memTypeIndex;
+vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex);
+
+VmaPoolCreateInfo poolCreateInfo = {};
+poolCreateInfo.memoryTypeIndex = memTypeIndex;
+// ...
+\endcode
+
+When creating buffers/images allocated in that pool, provide following parameters:
+
+- `VkBufferCreateInfo`: Prefer to pass same parameters as above.
+  Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior.
+  Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers
+  or the other way around.
+- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member.
+  Other members are ignored anyway.
+
+
+\section custom_memory_pools_when_not_use When not to use custom pools
+
+Custom pools are commonly overused by VMA users.
+While it may feel natural to keep some logical groups of resources separate in memory,
+in most cases it does more harm than good.
+Using custom pool shouldn't be your first choice.
+Instead, please make all allocations from default pools first and only use custom pools
+if you can prove and measure that it is beneficial in some way,
+e.g. it results in lower memory usage, better performance, etc.
+
+Using custom pools has disadvantages:
+
+- Each pool has its own collection of `VkDeviceMemory` blocks.
+  Some of them may be partially or even completely empty.
+  Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory.
+- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex).
+  When using default pools, best memory type for each of your allocations can be selected automatically
+  using a carefully design algorithm that works across all kinds of GPUs.
+- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure.
+  When using default pools, VMA tries another compatible memory type.
+- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size,
+  while default pools start from small blocks and only allocate next blocks larger and larger
+  up to the preferred block size.
+
+Many of the common concerns can be addressed in a different way than using custom pools:
+
+- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived)
+  separate, you likely don't need to.
+  VMA uses a high quality allocation algorithm that manages memory well in various cases.
+  Please measure and check if using custom pools provides a benefit.
+- If you want to keep your images and buffers separate, you don't need to.
+  VMA respects `bufferImageGranularity` limit automatically.
+- If you want to keep your mapped and not mapped allocations separate, you don't need to.
+  VMA respects `nonCoherentAtomSize` limit automatically.
+  It also maps only those `VkDeviceMemory` blocks that need to map any allocation.
+  It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory.
+- If you want to choose a custom size for the default memory block, you can set it globally instead
+  using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize.
+- If you want to select specific memory type for your allocation,
+  you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead.
+- If you need to create a buffer with certain minimum alignment, you can still do it
+  using default pools with dedicated function vmaCreateBufferWithAlignment().
+
+
+\section linear_algorithm Linear allocation algorithm
+
+Each Vulkan memory block managed by this library has accompanying metadata that
+keeps track of used and unused regions. By default, the metadata structure and
+algorithm tries to find best place for new allocations among free regions to
+optimize memory usage. This way you can allocate and free objects in any order.
+
+![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png)
+
+Sometimes there is a need to use simpler, linear allocation algorithm. You can
+create custom pool that uses such algorithm by adding flag
+#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating
+#VmaPool object. Then an alternative metadata management is used. It always
+creates new allocations after last one and doesn't reuse free regions after
+allocations freed in the middle. It results in better allocation performance and
+less memory consumed by metadata.
+
+![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png)
+
+With this one flag, you can create a custom pool that can be used in many ways:
+free-at-once, stack, double stack, and ring buffer. See below for details.
+You don't need to specify explicitly which of these options you are going to use - it is detected automatically.
+
+\subsection linear_algorithm_free_at_once Free-at-once
+
+In a pool that uses linear algorithm, you still need to free all the allocations
+individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free
+them in any order. New allocations are always made after last one - free space
+in the middle is not reused. However, when you release all the allocation and
+the pool becomes empty, allocation starts from the beginning again. This way you
+can use linear algorithm to speed up creation of allocations that you are going
+to release all at once.
+
+![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png)
+
+This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount
+value that allows multiple memory blocks.
+
+\subsection linear_algorithm_stack Stack
+
+When you free an allocation that was created last, its space can be reused.
+Thanks to this, if you always release allocations in the order opposite to their
+creation (LIFO - Last In First Out), you can achieve behavior of a stack.
+
+![Stack](../gfx/Linear_allocator_4_stack.png)
+
+This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount
+value that allows multiple memory blocks.
+
+\subsection linear_algorithm_double_stack Double stack
+
+The space reserved by a custom pool with linear algorithm may be used by two
+stacks:
+
+- First, default one, growing up from offset 0.
+- Second, "upper" one, growing down from the end towards lower offsets.
+
+To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT
+to VmaAllocationCreateInfo::flags.
+
+![Double stack](../gfx/Linear_allocator_7_double_stack.png)
+
+Double stack is available only in pools with one memory block -
+VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined.
+
+When the two stacks' ends meet so there is not enough space between them for a
+new allocation, such allocation fails with usual
+`VK_ERROR_OUT_OF_DEVICE_MEMORY` error.
+
+\subsection linear_algorithm_ring_buffer Ring buffer
+
+When you free some allocations from the beginning and there is not enough free space
+for a new one at the end of a pool, allocator's "cursor" wraps around to the
+beginning and starts allocation there. Thanks to this, if you always release
+allocations in the same order as you created them (FIFO - First In First Out),
+you can achieve behavior of a ring buffer / queue.
+
+![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png)
+
+Ring buffer is available only in pools with one memory block -
+VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined.
+
+\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT.
+
+
+\page defragmentation Defragmentation
+
+Interleaved allocations and deallocations of many objects of varying size can
+cause fragmentation over time, which can lead to a situation where the library is unable
+to find a continuous range of free memory for a new allocation despite there is
+enough free space, just scattered across many small free ranges between existing
+allocations.
+
+To mitigate this problem, you can use defragmentation feature.
+It doesn't happen automatically though and needs your cooperation,
+because VMA is a low level library that only allocates memory.
+It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures.
+It cannot copy their contents as it doesn't record any commands to a command buffer.
+
+Example:
+
+\code
+VmaDefragmentationInfo defragInfo = {};
+defragInfo.pool = myPool;
+defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT;
+
+VmaDefragmentationContext defragCtx;
+VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx);
+// Check res...
+
+for(;;)
+{
+    VmaDefragmentationPassMoveInfo pass;
+    res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass);
+    if(res == VK_SUCCESS)
+        break;
+    else if(res != VK_INCOMPLETE)
+        // Handle error...
+
+    for(uint32_t i = 0; i < pass.moveCount; ++i)
+    {
+        // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents.
+        VmaAllocationInfo allocInfo;
+        vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo);
+        MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData;
+
+        // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset.
+        VkImageCreateInfo imgCreateInfo = ...
+        VkImage newImg;
+        res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg);
+        // Check res...
+        res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg);
+        // Check res...
+
+        // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place.
+        vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...);
+    }
+
+    // Make sure the copy commands finished executing.
+    vkWaitForFences(...);
+
+    // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation.
+    for(uint32_t i = 0; i < pass.moveCount; ++i)
+    {
+        // ...
+        vkDestroyImage(device, resData->img, nullptr);
+    }
+
+    // Update appropriate descriptors to point to the new places...
+
+    res = vmaEndDefragmentationPass(allocator, defragCtx, &pass);
+    if(res == VK_SUCCESS)
+        break;
+    else if(res != VK_INCOMPLETE)
+        // Handle error...
+}
+
+vmaEndDefragmentation(allocator, defragCtx, nullptr);
+\endcode
+
+Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage()
+create/destroy an allocation and a buffer/image at once, these are just a shortcut for
+creating the resource, allocating memory, and binding them together.
+Defragmentation works on memory allocations only. You must handle the rest manually.
+Defragmentation is an iterative process that should repreat "passes" as long as related functions
+return `VK_INCOMPLETE` not `VK_SUCCESS`.
+In each pass:
+
+1. vmaBeginDefragmentationPass() function call:
+   - Calculates and returns the list of allocations to be moved in this pass.
+     Note this can be a time-consuming process.
+   - Reserves destination memory for them by creating temporary destination allocations
+     that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo().
+2. Inside the pass, **you should**:
+   - Inspect the returned list of allocations to be moved.
+   - Create new buffers/images and bind them at the returned destination temporary allocations.
+   - Copy data from source to destination resources if necessary.
+   - Destroy the source buffers/images, but NOT their allocations.
+3. vmaEndDefragmentationPass() function call:
+   - Frees the source memory reserved for the allocations that are moved.
+   - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory.
+   - Frees `VkDeviceMemory` blocks that became empty.
+
+Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter.
+Defragmentation algorithm tries to move all suitable allocations.
+You can, however, refuse to move some of them inside a defragmentation pass, by setting
+`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE.
+This is not recommended and may result in suboptimal packing of the allocations after defragmentation.
+If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool.
+
+Inside a pass, for each allocation that should be moved:
+
+- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`.
+  - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass().
+- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared,
+  filled, and used temporarily in each rendering frame, you can just recreate this image
+  without copying its data.
+- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU
+  using `memcpy()`.
+- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE.
+  This will cancel the move.
+  - vmaEndDefragmentationPass() will then free the destination memory
+    not the source memory of the allocation, leaving it unchanged.
+- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time),
+  you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY.
+  - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object.
+
+You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool
+(like in the example above) or all the default pools by setting this member to null.
+
+Defragmentation is always performed in each pool separately.
+Allocations are never moved between different Vulkan memory types.
+The size of the destination memory reserved for a moved allocation is the same as the original one.
+Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation.
+Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones.
+
+You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved
+in each pass, e.g. to call it in sync with render frames and not to experience too big hitches.
+See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass.
+
+It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA
+usage, possibly from multiple threads, with the exception that allocations
+returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended.
+
+<b>Mapping</b> is preserved on allocations that are moved during defragmentation.
+Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations
+are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried
+using VmaAllocationInfo::pMappedData.
+
+\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT.
+
+
+\page statistics Statistics
+
+This library contains several functions that return information about its internal state,
+especially the amount of memory allocated from Vulkan.
+
+\section statistics_numeric_statistics Numeric statistics
+
+If you need to obtain basic statistics about memory usage per heap, together with current budget,
+you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget.
+This is useful to keep track of memory usage and stay within budget
+(see also \ref staying_within_budget).
+Example:
+
+\code
+uint32_t heapIndex = ...
+
+VmaBudget budgets[VK_MAX_MEMORY_HEAPS];
+vmaGetHeapBudgets(allocator, budgets);
+
+printf("My heap currently has %u allocations taking %llu B,\n",
+    budgets[heapIndex].statistics.allocationCount,
+    budgets[heapIndex].statistics.allocationBytes);
+printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n",
+    budgets[heapIndex].statistics.blockCount,
+    budgets[heapIndex].statistics.blockBytes);
+printf("Vulkan reports total usage %llu B with budget %llu B.\n",
+    budgets[heapIndex].usage,
+    budgets[heapIndex].budget);
+\endcode
+
+You can query for more detailed statistics per memory heap, type, and totals,
+including minimum and maximum allocation size and unused range size,
+by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics.
+This function is slower though, as it has to traverse all the internal data structures,
+so it should be used only for debugging purposes.
+
+You can query for statistics of a custom pool using function vmaGetPoolStatistics()
+or vmaCalculatePoolStatistics().
+
+You can query for information about a specific allocation using function vmaGetAllocationInfo().
+It fill structure #VmaAllocationInfo.
+
+\section statistics_json_dump JSON dump
+
+You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString().
+The result is guaranteed to be correct JSON.
+It uses ANSI encoding.
+Any strings provided by user (see [Allocation names](@ref allocation_names))
+are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding,
+this JSON string can be treated as using this encoding.
+It must be freed using function vmaFreeStatsString().
+
+The format of this JSON string is not part of official documentation of the library,
+but it will not change in backward-incompatible way without increasing library major version number
+and appropriate mention in changelog.
+
+The JSON string contains all the data that can be obtained using vmaCalculateStatistics().
+It can also contain detailed map of allocated memory blocks and their regions -
+free and occupied by allocations.
+This allows e.g. to visualize the memory or assess fragmentation.
+
+
+\page allocation_annotation Allocation names and user data
+
+\section allocation_user_data Allocation user data
+
+You can annotate allocations with your own information, e.g. for debugging purposes.
+To do that, fill VmaAllocationCreateInfo::pUserData field when creating
+an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer,
+some handle, index, key, ordinal number or any other value that would associate
+the allocation with your custom metadata.
+It is useful to identify appropriate data structures in your engine given #VmaAllocation,
+e.g. when doing \ref defragmentation.
+
+\code
+VkBufferCreateInfo bufCreateInfo = ...
+
+MyBufferMetadata* pMetadata = CreateBufferMetadata();
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.pUserData = pMetadata;
+
+VkBuffer buffer;
+VmaAllocation allocation;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr);
+\endcode
+
+The pointer may be later retrieved as VmaAllocationInfo::pUserData:
+
+\code
+VmaAllocationInfo allocInfo;
+vmaGetAllocationInfo(allocator, allocation, &allocInfo);
+MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData;
+\endcode
+
+It can also be changed using function vmaSetAllocationUserData().
+
+Values of (non-zero) allocations' `pUserData` are printed in JSON report created by
+vmaBuildStatsString() in hexadecimal form.
+
+\section allocation_names Allocation names
+
+An allocation can also carry a null-terminated string, giving a name to the allocation.
+To set it, call vmaSetAllocationName().
+The library creates internal copy of the string, so the pointer you pass doesn't need
+to be valid for whole lifetime of the allocation. You can free it after the call.
+
+\code
+std::string imageName = "Texture: ";
+imageName += fileName;
+vmaSetAllocationName(allocator, allocation, imageName.c_str());
+\endcode
+
+The string can be later retrieved by inspecting VmaAllocationInfo::pName.
+It is also printed in JSON report created by vmaBuildStatsString().
+
+\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it.
+You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library.
+
+
+\page virtual_allocator Virtual allocator
+
+As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator".
+It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block".
+You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan.
+A common use case is sub-allocation of pieces of one large GPU buffer.
+
+\section virtual_allocator_creating_virtual_block Creating virtual block
+
+To use this functionality, there is no main "allocator" object.
+You don't need to have #VmaAllocator object created.
+All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator:
+
+-# Fill in #VmaVirtualBlockCreateInfo structure.
+-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object.
+
+Example:
+
+\code
+VmaVirtualBlockCreateInfo blockCreateInfo = {};
+blockCreateInfo.size = 1048576; // 1 MB
+
+VmaVirtualBlock block;
+VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block);
+\endcode
+
+\section virtual_allocator_making_virtual_allocations Making virtual allocations
+
+#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions
+using the same code as the main Vulkan memory allocator.
+Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type
+that represents an opaque handle to an allocation within the virtual block.
+
+In order to make such allocation:
+
+-# Fill in #VmaVirtualAllocationCreateInfo structure.
+-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation.
+   You can also receive `VkDeviceSize offset` that was assigned to the allocation.
+
+Example:
+
+\code
+VmaVirtualAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.size = 4096; // 4 KB
+
+VmaVirtualAllocation alloc;
+VkDeviceSize offset;
+res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset);
+if(res == VK_SUCCESS)
+{
+    // Use the 4 KB of your memory starting at offset.
+}
+else
+{
+    // Allocation failed - no space for it could be found. Handle this error!
+}
+\endcode
+
+\section virtual_allocator_deallocation Deallocation
+
+When no longer needed, an allocation can be freed by calling vmaVirtualFree().
+You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate()
+called for the same #VmaVirtualBlock.
+
+When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock().
+All allocations must be freed before the block is destroyed, which is checked internally by an assert.
+However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once -
+a feature not available in normal Vulkan memory allocator. Example:
+
+\code
+vmaVirtualFree(block, alloc);
+vmaDestroyVirtualBlock(block);
+\endcode
+
+\section virtual_allocator_allocation_parameters Allocation parameters
+
+You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData().
+Its default value is null.
+It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some
+larger data structure containing more information. Example:
+
+\code
+struct CustomAllocData
+{
+    std::string m_AllocName;
+};
+CustomAllocData* allocData = new CustomAllocData();
+allocData->m_AllocName = "My allocation 1";
+vmaSetVirtualAllocationUserData(block, alloc, allocData);
+\endcode
+
+The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function
+vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo.
+If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation!
+Example:
+
+\code
+VmaVirtualAllocationInfo allocInfo;
+vmaGetVirtualAllocationInfo(block, alloc, &allocInfo);
+delete (CustomAllocData*)allocInfo.pUserData;
+
+vmaVirtualFree(block, alloc);
+\endcode
+
+\section virtual_allocator_alignment_and_units Alignment and units
+
+It feels natural to express sizes and offsets in bytes.
+If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member
+VmaVirtualAllocationCreateInfo::alignment to request it. Example:
+
+\code
+VmaVirtualAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.size = 4096; // 4 KB
+allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B
+
+VmaVirtualAllocation alloc;
+res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr);
+\endcode
+
+Alignments of different allocations made from one block may vary.
+However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`,
+you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes.
+It might be more convenient, but you need to make sure to use this new unit consistently in all the places:
+
+- VmaVirtualBlockCreateInfo::size
+- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment
+- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset
+
+\section virtual_allocator_statistics Statistics
+
+You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics()
+(to get brief statistics that are fast to calculate)
+or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate).
+The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator.
+Example:
+
+\code
+VmaStatistics stats;
+vmaGetVirtualBlockStatistics(block, &stats);
+printf("My virtual block has %llu bytes used by %u virtual allocations\n",
+    stats.allocationBytes, stats.allocationCount);
+\endcode
+
+You can also request a full list of allocations and free regions as a string in JSON format by calling
+vmaBuildVirtualBlockStatsString().
+Returned string must be later freed using vmaFreeVirtualBlockStatsString().
+The format of this string differs from the one returned by the main Vulkan allocator, but it is similar.
+
+\section virtual_allocator_additional_considerations Additional considerations
+
+The "virtual allocator" functionality is implemented on a level of individual memory blocks.
+Keeping track of a whole collection of blocks, allocating new ones when out of free space,
+deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user.
+
+Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory.
+See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT).
+You can find their description in chapter \ref custom_memory_pools.
+Allocation strategies are also supported.
+See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT).
+
+Following features are supported only by the allocator of the real GPU memory and not by virtual allocations:
+buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`.
+
+
+\page debugging_memory_usage Debugging incorrect memory usage
+
+If you suspect a bug with memory usage, like usage of uninitialized memory or
+memory being overwritten out of bounds of an allocation,
+you can use debug features of this library to verify this.
+
+\section debugging_memory_usage_initialization Memory initialization
+
+If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used,
+you can enable automatic memory initialization to verify this.
+To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1.
+
+\code
+#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1
+#include "vk_mem_alloc.h"
+\endcode
+
+It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`.
+Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`.
+Memory is automatically mapped and unmapped if necessary.
+
+If you find these values while debugging your program, good chances are that you incorrectly
+read Vulkan memory that is allocated but not initialized, or already freed, respectively.
+
+Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped.
+It works also with dedicated allocations.
+
+\section debugging_memory_usage_margins Margins
+
+By default, allocations are laid out in memory blocks next to each other if possible
+(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`).
+
+![Allocations without margin](../gfx/Margins_1.png)
+
+Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified
+number of bytes as a margin after every allocation.
+
+\code
+#define VMA_DEBUG_MARGIN 16
+#include "vk_mem_alloc.h"
+\endcode
+
+![Allocations with margin](../gfx/Margins_2.png)
+
+If your bug goes away after enabling margins, it means it may be caused by memory
+being overwritten outside of allocation boundaries. It is not 100% certain though.
+Change in application behavior may also be caused by different order and distribution
+of allocations across memory blocks after margins are applied.
+
+Margins work with all types of memory.
+
+Margin is applied only to allocations made out of memory blocks and not to dedicated
+allocations, which have their own memory block of specific size.
+It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag
+or those automatically decided to put into dedicated allocations, e.g. due to its
+large size or recommended by VK_KHR_dedicated_allocation extension.
+
+Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space.
+
+Note that enabling margins increases memory usage and fragmentation.
+
+Margins do not apply to \ref virtual_allocator.
+
+\section debugging_memory_usage_corruption_detection Corruption detection
+
+You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation
+of contents of the margins.
+
+\code
+#define VMA_DEBUG_MARGIN 16
+#define VMA_DEBUG_DETECT_CORRUPTION 1
+#include "vk_mem_alloc.h"
+\endcode
+
+When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN`
+(it must be multiply of 4) after every allocation is filled with a magic number.
+This idea is also know as "canary".
+Memory is automatically mapped and unmapped if necessary.
+
+This number is validated automatically when the allocation is destroyed.
+If it is not equal to the expected value, `VMA_ASSERT()` is executed.
+It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation,
+which indicates a serious bug.
+
+You can also explicitly request checking margins of all allocations in all memory blocks
+that belong to specified memory types by using function vmaCheckCorruption(),
+or in memory blocks that belong to specified custom pool, by using function
+vmaCheckPoolCorruption().
+
+Margin validation (corruption detection) works only for memory types that are
+`HOST_VISIBLE` and `HOST_COHERENT`.
+
+
+\section debugging_memory_usage_leak_detection Leak detection features
+
+At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using
+`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug
+builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()`
+to change this behavior.
+
+At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()`
+macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op.
+If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't
+quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning
+individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them.
+
+\page other_api_interop Interop with other graphics APIs
+
+VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL.
+
+\section opengl_interop_exporting_memory Exporting memory
+
+If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library:
+
+You can create \ref custom_memory_pools for such allocations.
+Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext
+while creating the custom pool.
+Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool,
+not only while creating it, as no copy of the structure is made,
+but its original pointer is used for each allocation instead.
+
+If you want to export all memory allocated by VMA from certain memory types,
+also dedicated allocations or other allocations made from default pools,
+an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes.
+It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library
+through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type.
+Please note that new versions of the library also support dedicated allocations created in custom pools.
+
+You should not mix these two methods in a way that allows to apply both to the same memory type.
+Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`.
+
+
+\section opengl_interop_custom_alignment Custom alignment
+
+Buffers or images exported to a different API like OpenGL may require a different alignment,
+higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`.
+To impose such alignment:
+
+You can create \ref custom_memory_pools for such allocations.
+Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation
+to be made out of this pool.
+The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image
+from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically.
+
+If you want to create a buffer with a specific minimum alignment out of default pools,
+use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`.
+
+Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated
+allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block.
+You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation.
+
+\section opengl_interop_extended_allocation_information Extended allocation information
+
+If you want to rely on VMA to allocate your buffers and images inside larger memory blocks,
+but you need to know the size of the entire block and whether the allocation was made
+with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve
+extended allocation information in structure #VmaAllocationInfo2.
+
+
+
+\page usage_patterns Recommended usage patterns
+
+Vulkan gives great flexibility in memory allocation.
+This chapter shows the most common patterns.
+
+See also slides from talk:
+[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New)
+
+
+\section usage_patterns_gpu_only GPU-only resource
+
+<b>When:</b>
+Any resources that you frequently write and read on GPU,
+e.g. images used as color attachments (aka "render targets"), depth-stencil attachments,
+images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)").
+
+<b>What to do:</b>
+Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+
+\code
+VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
+imgCreateInfo.extent.width = 3840;
+imgCreateInfo.extent.height = 2160;
+imgCreateInfo.extent.depth = 1;
+imgCreateInfo.mipLevels = 1;
+imgCreateInfo.arrayLayers = 1;
+imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+allocCreateInfo.priority = 1.0f;
+
+VkImage img;
+VmaAllocation alloc;
+vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr);
+\endcode
+
+<b>Also consider:</b>
+Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT,
+especially if they are large or if you plan to destroy and recreate them with different sizes
+e.g. when display resolution changes.
+Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later.
+When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation
+to decrease chances to be evicted to system memory by the operating system.
+
+\section usage_patterns_staging_copy_upload Staging copy for upload
+
+<b>When:</b>
+A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer
+to some GPU resource.
+
+<b>What to do:</b>
+Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT.
+Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`.
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 65536;
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+    VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+
+...
+
+memcpy(allocInfo.pMappedData, myData, myDataSize);
+\endcode
+
+<b>Also consider:</b>
+You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped
+using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above.
+
+
+\section usage_patterns_readback Readback
+
+<b>When:</b>
+Buffers for data written by or transferred from the GPU that you want to read back on the CPU,
+e.g. results of some computations.
+
+<b>What to do:</b>
+Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT.
+Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`
+and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`.
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 65536;
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
+    VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+
+...
+
+const float* downloadedData = (const float*)allocInfo.pMappedData;
+\endcode
+
+
+\section usage_patterns_advanced_data_uploading Advanced data uploading
+
+For resources that you frequently write on CPU via mapped pointer and
+frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible:
+
+-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory,
+   even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card,
+   and make the device reach out to that resource directly.
+   - Reads performed by the device will then go through PCI Express bus.
+     The performance of this access may be limited, but it may be fine depending on the size
+     of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity
+     of access.
+-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips),
+   a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL`
+   (fast to access from the GPU). Then, it is likely the best choice for such type of resource.
+-# Systems with a discrete graphics card and separate video memory may or may not expose
+   a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR).
+   If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS)
+   that is available to CPU for mapping.
+   - Writes performed by the host to that memory go through PCI Express bus.
+     The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0,
+     as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads.
+-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory,
+   a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them.
+
+Thankfully, VMA offers an aid to create and use such resources in the the way optimal
+for the current Vulkan device. To help the library make the best choice,
+use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with
+#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT.
+It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR),
+but if no such memory type is available or allocation from it fails
+(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS),
+it will fall back to `DEVICE_LOCAL` memory for fast GPU access.
+It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`,
+so you need to create another "staging" allocation and perform explicit transfers.
+
+\code
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = 65536;
+bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+    VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
+    VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+VkBuffer buf;
+VmaAllocation alloc;
+VmaAllocationInfo allocInfo;
+VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
+// Check result...
+
+VkMemoryPropertyFlags memPropFlags;
+vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags);
+
+if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+{
+    // Allocation ended up in a mappable memory and is already mapped - write to it directly.
+
+    // [Executed in runtime]:
+    memcpy(allocInfo.pMappedData, myData, myDataSize);
+    result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE);
+    // Check result...
+
+    VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
+    bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+    bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT;
+    bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bufMemBarrier.buffer = buf;
+    bufMemBarrier.offset = 0;
+    bufMemBarrier.size = VK_WHOLE_SIZE;
+
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
+        0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
+}
+else
+{
+    // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required.
+    VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+    stagingBufCreateInfo.size = 65536;
+    stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+    VmaAllocationCreateInfo stagingAllocCreateInfo = {};
+    stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+    stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+        VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+    VkBuffer stagingBuf;
+    VmaAllocation stagingAlloc;
+    VmaAllocationInfo stagingAllocInfo;
+    result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo,
+        &stagingBuf, &stagingAlloc, &stagingAllocInfo);
+    // Check result...
+
+    // [Executed in runtime]:
+    memcpy(stagingAllocInfo.pMappedData, myData, myDataSize);
+    result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE);
+    // Check result...
+
+    VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
+    bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+    bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+    bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bufMemBarrier.buffer = stagingBuf;
+    bufMemBarrier.offset = 0;
+    bufMemBarrier.size = VK_WHOLE_SIZE;
+
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+        0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
+
+    VkBufferCopy bufCopy = {
+        0, // srcOffset
+        0, // dstOffset,
+        myDataSize, // size
+    };
+
+    vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy);
+
+    VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
+    bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer
+    bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bufMemBarrier2.buffer = buf;
+    bufMemBarrier2.offset = 0;
+    bufMemBarrier2.size = VK_WHOLE_SIZE;
+
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
+        0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr);
+}
+\endcode
+
+\section usage_patterns_other_use_cases Other use cases
+
+Here are some other, less obvious use cases and their recommended settings:
+
+- An image that is used only as transfer source and destination, but it should stay on the device,
+  as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame,
+  for temporal antialiasing or other temporal effects.
+  - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT`
+  - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO
+- An image that is used only as transfer source and destination, but it should be placed
+  in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict
+  least recently used textures from VRAM.
+  - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT`
+  - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
+    as VMA needs a hint here to differentiate from the previous case.
+- A buffer that you want to map and write from the CPU, directly read from the GPU
+  (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or
+  host memory due to its large size.
+  - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT`
+  - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST
+  - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
+
+
+\page configuration Configuration
+
+Please check "CONFIGURATION SECTION" in the code to find macros that you can define
+before each include of this file or change directly in this file to provide
+your own implementation of basic facilities like assert, `min()` and `max()` functions,
+mutex, atomic etc.
+
+For example, define `VMA_ASSERT(expr)` before including the library to provide
+custom implementation of the assertion, compatible with your project.
+By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration
+and empty otherwise.
+
+Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations,
+including their names and other parameters. Example:
+
+\code
+#define VMA_LEAK_LOG_FORMAT(format, ...) do { \
+        printf((format), __VA_ARGS__); \
+        printf("\n"); \
+    } while(false)
+\endcode
+
+\section config_Vulkan_functions Pointers to Vulkan functions
+
+There are multiple ways to import pointers to Vulkan functions in the library.
+In the simplest case you don't need to do anything.
+If the compilation or linking of your program or the initialization of the #VmaAllocator
+doesn't work for you, you can try to reconfigure it.
+
+First, the allocator tries to fetch pointers to Vulkan functions linked statically,
+like this:
+
+\code
+m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory;
+\endcode
+
+If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`.
+
+Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions.
+You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or
+by using a helper library like [volk](https://github.com/zeux/volk).
+
+Third, VMA tries to fetch remaining pointers that are still null by calling
+`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own.
+You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr.
+Other pointers will be fetched automatically.
+If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`.
+
+Finally, all the function pointers required by the library (considering selected
+Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null.
+
+
+\section custom_memory_allocator Custom host memory allocator
+
+If you use custom allocator for CPU memory rather than default operator `new`
+and `delete` from C++, you can make this library using your allocator as well
+by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These
+functions will be passed to Vulkan, as well as used by the library itself to
+make any CPU-side allocations.
+
+\section allocation_callbacks Device memory allocation callbacks
+
+The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally.
+You can setup callbacks to be informed about these calls, e.g. for the purpose
+of gathering some statistics. To do it, fill optional member
+VmaAllocatorCreateInfo::pDeviceMemoryCallbacks.
+
+\section heap_memory_limit Device heap memory limit
+
+When device memory of certain heap runs out of free space, new allocations may
+fail (returning error code) or they may succeed, silently pushing some existing_
+memory blocks from GPU VRAM to system RAM (which degrades performance). This
+behavior is implementation-dependent - it depends on GPU vendor and graphics
+driver.
+
+On AMD cards it can be controlled while creating Vulkan device object by using
+VK_AMD_memory_overallocation_behavior extension, if available.
+
+Alternatively, if you want to test how your program behaves with limited amount of Vulkan device
+memory available without switching your graphics card to one that really has
+smaller VRAM, you can use a feature of this library intended for this purpose.
+To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit.
+
+
+
+\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation
+
+VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve
+performance on some GPUs. It augments Vulkan API with possibility to query
+driver whether it prefers particular buffer or image to have its own, dedicated
+allocation (separate `VkDeviceMemory` block) for better efficiency - to be able
+to do some internal optimizations. The extension is supported by this library.
+It will be used automatically when enabled.
+
+It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version
+and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion,
+you are all set.
+
+Otherwise, if you want to use it as an extension:
+
+1 . When creating Vulkan device, check if following 2 device extensions are
+supported (call `vkEnumerateDeviceExtensionProperties()`).
+If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`).
+
+- VK_KHR_get_memory_requirements2
+- VK_KHR_dedicated_allocation
+
+If you enabled these extensions:
+
+2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating
+your #VmaAllocator to inform the library that you enabled required extensions
+and you want the library to use them.
+
+\code
+allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
+
+vmaCreateAllocator(&allocatorInfo, &allocator);
+\endcode
+
+That is all. The extension will be automatically used whenever you create a
+buffer using vmaCreateBuffer() or image using vmaCreateImage().
+
+When using the extension together with Vulkan Validation Layer, you will receive
+warnings like this:
+
+_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._
+
+It is OK, you should just ignore it. It happens because you use function
+`vkGetBufferMemoryRequirements2KHR()` instead of standard
+`vkGetBufferMemoryRequirements()`, while the validation layer seems to be
+unaware of it.
+
+To learn more about this extension, see:
+
+- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation)
+- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5)
+
+
+
+\page vk_ext_memory_priority VK_EXT_memory_priority
+
+VK_EXT_memory_priority is a device extension that allows to pass additional "priority"
+value to Vulkan memory allocations that the implementation may use prefer certain
+buffers and images that are critical for performance to stay in device-local memory
+in cases when the memory is over-subscribed, while some others may be moved to the system memory.
+
+VMA offers convenient usage of this extension.
+If you enable it, you can pass "priority" parameter when creating allocations or custom pools
+and the library automatically passes the value to Vulkan using this extension.
+
+If you want to use this extension in connection with VMA, follow these steps:
+
+\section vk_ext_memory_priority_initialization Initialization
+
+1) Call `vkEnumerateDeviceExtensionProperties` for the physical device.
+Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority".
+
+2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`.
+Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned.
+Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true.
+
+3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority"
+to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`.
+
+4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`.
+Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`.
+Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to
+`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`.
+
+5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you
+have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT
+to VmaAllocatorCreateInfo::flags.
+
+\section vk_ext_memory_priority_usage Usage
+
+When using this extension, you should initialize following member:
+
+- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+- VmaPoolCreateInfo::priority when creating a custom pool.
+
+It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`.
+Memory allocated with higher value can be treated by the Vulkan implementation as higher priority
+and so it can have lower chances of being pushed out to system memory, experiencing degraded performance.
+
+It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images
+as dedicated and set high priority to them. For example:
+
+\code
+VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
+imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
+imgCreateInfo.extent.width = 3840;
+imgCreateInfo.extent.height = 2160;
+imgCreateInfo.extent.depth = 1;
+imgCreateInfo.mipLevels = 1;
+imgCreateInfo.arrayLayers = 1;
+imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
+imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+allocCreateInfo.priority = 1.0f;
+
+VkImage img;
+VmaAllocation alloc;
+vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr);
+\endcode
+
+`priority` member is ignored in the following situations:
+
+- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters
+  from the parameters passed in #VmaPoolCreateInfo when the pool was created.
+- Allocations created in default pools: They inherit the priority from the parameters
+  VMA used when creating default pools, which means `priority == 0.5f`.
+
+
+\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory
+
+VK_AMD_device_coherent_memory is a device extension that enables access to
+additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and
+`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for
+allocation of buffers intended for writing "breadcrumb markers" in between passes
+or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases.
+
+When the extension is available but has not been enabled, Vulkan physical device
+still exposes those memory types, but their usage is forbidden. VMA automatically
+takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt
+to allocate memory of such type is made.
+
+If you want to use this extension in connection with VMA, follow these steps:
+
+\section vk_amd_device_coherent_memory_initialization Initialization
+
+1) Call `vkEnumerateDeviceExtensionProperties` for the physical device.
+Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory".
+
+2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`.
+Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned.
+Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true.
+
+3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory"
+to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`.
+
+4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`.
+Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`.
+Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to
+`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`.
+
+5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you
+have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT
+to VmaAllocatorCreateInfo::flags.
+
+\section vk_amd_device_coherent_memory_usage Usage
+
+After following steps described above, you can create VMA allocations and custom pools
+out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible
+devices. There are multiple ways to do it, for example:
+
+- You can request or prefer to allocate out of such memory types by adding
+  `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags
+  or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with
+  other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage.
+- If you manually found memory type index to use for this purpose, force allocation
+  from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`.
+
+\section vk_amd_device_coherent_memory_more_information More information
+
+To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html)
+
+Example use of this extension can be found in the code of the sample and test suite
+accompanying this library.
+
+
+\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32
+
+On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE`
+of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances,
+in multiple processes, and/or in multiple APIs.
+VMA offers support for it.
+
+\section vk_khr_external_memory_win32_initialization Initialization
+
+1) Make sure the extension is defined in the code by including following header before including VMA:
+
+\code
+#include <vulkan/vulkan_win32.h>
+\endcode
+
+2) Check if "VK_KHR_external_memory_win32" is available among device extensions.
+Enable it when creating the `VkDevice` object.
+
+3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT
+when calling vmaCreateAllocator().
+
+4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro
+or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly.
+For more information, see \ref quick_start_initialization_importing_vulkan_functions.
+
+\section vk_khr_external_memory_win32_preparations Preparations
+
+You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`.
+
+To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain,
+and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain.
+To make use of them, you need to use \ref custom_memory_pools. Example:
+
+\code
+// Define an example buffer and allocation parameters.
+VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = {
+    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
+    nullptr,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+};
+VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+exampleBufCreateInfo.size = 0x10000; // Doesn't matter here.
+exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+exampleBufCreateInfo.pNext = &externalMemBufCreateInfo;
+
+VmaAllocationCreateInfo exampleAllocCreateInfo = {};
+exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
+
+// Find memory type index to use for the custom pool.
+uint32_t memTypeIndex;
+VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator,
+    &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
+// Check res...
+
+// Create a custom pool.
+constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = {
+    VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
+    nullptr,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+};
+VmaPoolCreateInfo poolCreateInfo = {};
+poolCreateInfo.memoryTypeIndex = memTypeIndex;
+poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo;
+
+VmaPool pool;
+res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool);
+// Check res...
+
+// YOUR OTHER CODE COMES HERE....
+
+// At the end, don't forget to destroy it!
+vmaDestroyPool(g_Allocator, pool);
+\endcode
+
+Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged
+for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block.
+No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`.
+
+\section vk_khr_external_memory_win32_memory_allocation Memory allocation
+
+Finally, you can create a buffer with an allocation out of the custom pool.
+The buffer should use same flags as the sample buffer used to find the memory type.
+It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain.
+
+\code
+VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = {
+    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
+    nullptr,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+};
+VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+bufCreateInfo.size = // Your desired buffer size.
+bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+bufCreateInfo.pNext = &externalMemBufCreateInfo;
+
+VmaAllocationCreateInfo allocCreateInfo = {};
+allocCreateInfo.pool = pool;  // It is enough to set this one member.
+
+VkBuffer buf;
+VmaAllocation alloc;
+res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
+// Check res...
+
+// YOUR OTHER CODE COMES HERE....
+
+// At the end, don't forget to destroy it!
+vmaDestroyBuffer(g_Allocator, buf, alloc);
+\endcode
+
+If you need each allocation to have its own device memory block and start at offset 0, you can still do 
+by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools.
+
+\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle
+
+After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to.
+VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`.
+
+\code
+HANDLE handle;
+res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle);
+// Check res...
+
+// YOUR OTHER CODE COMES HERE....
+
+// At the end, you must close the handle.
+CloseHandle(handle);
+\endcode
+
+Documentation of the VK_KHR_external_memory_win32 extension states that:
+
+> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType.
+
+This is ensured automatically inside VMA.
+The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed.
+Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close.
+
+For further information, please check documentation of the vmaGetMemoryWin32Handle() function.
+
+
+\page enabling_buffer_device_address Enabling buffer device address
+
+Device extension VK_KHR_buffer_device_address
+allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code.
+It has been promoted to core Vulkan 1.2.
+
+If you want to use this feature in connection with VMA, follow these steps:
+
+\section enabling_buffer_device_address_initialization Initialization
+
+1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device.
+Check if the extension is supported - if returned array of `VkExtensionProperties` contains
+"VK_KHR_buffer_device_address".
+
+2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`.
+Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned.
+Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true.
+
+3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add
+"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`.
+
+4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`.
+Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`.
+Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to
+`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`.
+
+5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you
+have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT
+to VmaAllocatorCreateInfo::flags.
+
+\section enabling_buffer_device_address_usage Usage
+
+After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA.
+The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to
+allocated memory blocks wherever it might be needed.
+
+Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`.
+The second part of this functionality related to "capture and replay" is not supported,
+as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage.
+
+\section enabling_buffer_device_address_more_information More information
+
+To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address)
+
+Example use of this extension can be found in the code of the sample and test suite
+accompanying this library.
+
+\page general_considerations General considerations
+
+\section general_considerations_thread_safety Thread safety
+
+- The library has no global state, so separate #VmaAllocator objects can be used
+  independently.
+  There should be no need to create multiple such objects though - one per `VkDevice` is enough.
+- By default, all calls to functions that take #VmaAllocator as first parameter
+  are safe to call from multiple threads simultaneously because they are
+  synchronized internally when needed.
+  This includes allocation and deallocation from default memory pool, as well as custom #VmaPool.
+- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT
+  flag, calls to functions that take such #VmaAllocator object must be
+  synchronized externally.
+- Access to a #VmaAllocation object must be externally synchronized. For example,
+  you must not call vmaGetAllocationInfo() and vmaMapMemory() from different
+  threads at the same time if you pass the same #VmaAllocation object to these
+  functions.
+- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously.
+
+\section general_considerations_versioning_and_compatibility Versioning and compatibility
+
+The library uses [**Semantic Versioning**](https://semver.org/),
+which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where:
+
+- Incremented Patch version means a release is backward- and forward-compatible,
+  introducing only some internal improvements, bug fixes, optimizations etc.
+  or changes that are out of scope of the official API described in this documentation.
+- Incremented Minor version means a release is backward-compatible,
+  so existing code that uses the library should continue to work, while some new
+  symbols could have been added: new structures, functions, new values in existing
+  enums and bit flags, new structure members, but not new function parameters.
+- Incrementing Major version means a release could break some backward compatibility.
+
+All changes between official releases are documented in file "CHANGELOG.md".
+
+\warning Backward compatibility is considered on the level of C++ source code, not binary linkage.
+Adding new members to existing structures is treated as backward compatible if initializing
+the new members to binary zero results in the old behavior.
+You should always fully initialize all library structures to zeros and not rely on their
+exact binary size.
+
+\section general_considerations_validation_layer_warnings Validation layer warnings
+
+When using this library, you can meet following types of warnings issued by
+Vulkan validation layer. They don't necessarily indicate a bug, so you may need
+to just ignore them.
+
+- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.*
+  - It happens when VK_KHR_dedicated_allocation extension is enabled.
+    `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it.
+- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.*
+  - It happens when you map a buffer or image, because the library maps entire
+    `VkDeviceMemory` block, where different types of images and buffers may end
+    up together, especially on GPUs with unified memory like Intel.
+- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.*
+  - It may happen when you use [defragmentation](@ref defragmentation).
+
+\section general_considerations_allocation_algorithm Allocation algorithm
+
+The library uses following algorithm for allocation, in order:
+
+-# Try to find free range of memory in existing blocks.
+-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size.
+-# If failed, try to create such block with size / 2, size / 4, size / 8.
+-# If failed, try to allocate separate `VkDeviceMemory` for this allocation,
+   just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT.
+-# If failed, choose other memory type that meets the requirements specified in
+   VmaAllocationCreateInfo and go to point 1.
+-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+
+\section general_considerations_features_not_supported Features not supported
+
+Features deliberately excluded from the scope of this library:
+
+-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images
+   between CPU and GPU memory and related synchronization is responsibility of the user.
+   Defining some "texture" object that would automatically stream its data from a
+   staging copy in CPU memory to GPU memory would rather be a feature of another,
+   higher-level library implemented on top of VMA.
+   VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory.
+-# **Recreation of buffers and images.** Although the library has functions for
+   buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to
+   recreate these objects yourself after defragmentation. That is because the big
+   structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in
+   #VmaAllocation object.
+-# **Handling CPU memory allocation failures.** When dynamically creating small C++
+   objects in CPU memory (not Vulkan memory), allocation failures are not checked
+   and handled gracefully, because that would complicate code significantly and
+   is usually not needed in desktop PC applications anyway.
+   Success of an allocation is just checked with an assert.
+-# **Code free of any compiler warnings.** Maintaining the library to compile and
+   work correctly on so many different platforms is hard enough. Being free of
+   any warnings, on any version of any compiler, is simply not feasible.
+   There are many preprocessor macros that make some variables unused, function parameters unreferenced,
+   or conditional expressions constant in some configurations.
+   The code of this library should not be bigger or more complicated just to silence these warnings.
+   It is recommended to disable such warnings instead.
+-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but
+   are not going to be included into this repository.
+*/