diff options
| -rw-r--r-- | assets/model.obj | 98 | ||||
| -rw-r--r-- | assets/texture.png | bin | 0 -> 5056 bytes | |||
| -rw-r--r-- | meson.build | 48 | ||||
| -rw-r--r-- | shaders/triangle.frag | 5 | ||||
| -rw-r--r-- | shaders/triangle.vert | 26 | ||||
| -rw-r--r-- | src/alloc.c | 171 | ||||
| -rw-r--r-- | src/alloc.h | 40 | ||||
| -rw-r--r-- | src/engine.c | 17 | ||||
| -rw-r--r-- | src/engine.h | 14 | ||||
| -rw-r--r-- | src/hash.c | 15 | ||||
| -rw-r--r-- | src/hash.h | 9 | ||||
| -rw-r--r-- | src/image.c | 20 | ||||
| -rw-r--r-- | src/image.h | 21 | ||||
| -rw-r--r-- | src/log.c | 13 | ||||
| -rw-r--r-- | src/log.h | 21 | ||||
| -rw-r--r-- | src/main.c | 28 | ||||
| -rw-r--r-- | src/maths.c | 103 | ||||
| -rw-r--r-- | src/maths.h | 49 | ||||
| -rw-r--r-- | src/platform.c | 5 | ||||
| -rw-r--r-- | src/platform.h | 98 | ||||
| -rw-r--r-- | src/platform_sdl.c | 124 | ||||
| -rw-r--r-- | src/renderer.c | 1470 | ||||
| -rw-r--r-- | src/renderer.h | 45 | ||||
| -rw-r--r-- | src/renderer/renderer.c | 2559 | ||||
| -rw-r--r-- | src/renderer/renderer.h | 126 | ||||
| -rw-r--r-- | src/renderer/vma_usage.cpp | 4 | ||||
| -rw-r--r-- | src/renderer/vma_usage.h | 6 | ||||
| -rw-r--r-- | src/str.c | 181 | ||||
| -rw-r--r-- | src/str.h | 62 | ||||
| -rw-r--r-- | thirdpartylicenses.md | 46 | ||||
| -rw-r--r-- | vendor/stb_image.h | 7988 | ||||
| -rw-r--r-- | vendor/tiny_obj_loader_c.h | 1793 | ||||
| -rw-r--r-- | vendor/vk_mem_alloc.h | 19111 | 
33 files changed, 32614 insertions, 1702 deletions
diff --git a/assets/model.obj b/assets/model.obj new file mode 100644 index 0000000..ab344bb --- /dev/null +++ b/assets/model.obj @@ -0,0 +1,98 @@ +# Blender 4.4.1 +# www.blender.org +mtllib model.mtl +o Cube +v 1.000000 1.000000 -1.000000 +v 1.000000 -1.000000 -1.000000 +v 1.000000 1.000000 1.000000 +v 1.000000 -1.000000 1.000000 +v -1.000000 1.000000 -1.000000 +v -1.000000 -1.000000 -1.000000 +v -1.000000 1.000000 1.000000 +v -1.000000 -1.000000 1.000000 +v -1.637802 -1.000000 0.000000 +v 1.389749 1.000000 0.000000 +v -1.637802 1.000000 0.000000 +v 1.389749 -1.000000 0.000000 +v 0.000000 -1.000000 -1.000000 +v 0.000000 1.000000 1.000000 +v 0.000000 -1.000000 1.000000 +v 0.000000 1.000000 -1.000000 +v 0.000000 -1.000000 0.000000 +v 0.000000 1.905488 0.000000 +vn -0.3792 0.6859 0.6211 +vn -0.0000 -0.0000 1.0000 +vn -0.8431 -0.0000 -0.5377 +vn -0.0000 -1.0000 -0.0000 +vn 0.9317 -0.0000 0.3631 +vn -0.0000 -0.0000 -1.0000 +vn 0.9317 -0.0000 -0.3631 +vn -0.8431 -0.0000 0.5377 +vn -0.3792 0.6859 -0.6211 +vn -0.0000 1.0000 -0.0000 +vn 0.4349 0.6675 -0.6044 +vn 0.4349 0.6675 0.6044 +vt 0.875000 0.625000 +vt 0.750000 0.750000 +vt 0.750000 0.625000 +vt 0.625000 0.875000 +vt 0.375000 1.000000 +vt 0.375000 0.875000 +vt 0.625000 0.125000 +vt 0.375000 0.250000 +vt 0.375000 0.125000 +vt 0.375000 0.625000 +vt 0.250000 0.750000 +vt 0.250000 0.625000 +vt 0.625000 0.625000 +vt 0.375000 0.750000 +vt 0.625000 0.375000 +vt 0.375000 0.500000 +vt 0.375000 0.375000 +vt 0.625000 0.500000 +vt 0.250000 0.500000 +vt 0.625000 0.000000 +vt 0.375000 0.000000 +vt 0.750000 0.500000 +vt 0.125000 0.625000 +vt 0.125000 0.500000 +vt 0.625000 0.250000 +vt 0.125000 0.750000 +vt 0.625000 0.750000 +vt 0.875000 0.750000 +vt 0.625000 1.000000 +vt 0.875000 0.500000 +s 0 +usemtl Material +f 11/1/1 14/2/1 18/3/1 +f 14/4/2 8/5/2 15/6/2 +f 11/7/3 6/8/3 9/9/3 +f 12/10/4 15/11/4 17/12/4 +f 10/13/5 4/14/5 12/10/5 +f 16/15/6 2/16/6 13/17/6 +f 1/18/7 12/10/7 2/16/7 +f 2/16/4 17/12/4 13/19/4 +f 7/20/8 9/9/8 8/21/8 +f 16/22/9 11/1/9 18/3/9 +f 16/22/10 10/13/10 1/18/10 +f 13/19/4 9/23/4 6/24/4 +f 5/25/6 13/17/6 6/8/6 +f 17/12/4 8/26/4 9/23/4 +f 3/27/2 15/6/2 4/14/2 +f 10/13/10 14/2/10 3/27/10 +f 11/1/10 7/28/10 14/2/10 +f 14/4/2 7/29/2 8/5/2 +f 11/7/3 5/25/3 6/8/3 +f 12/10/4 4/14/4 15/11/4 +f 10/13/5 3/27/5 4/14/5 +f 16/15/6 1/18/6 2/16/6 +f 1/18/7 10/13/7 12/10/7 +f 2/16/4 12/10/4 17/12/4 +f 7/20/8 11/7/8 9/9/8 +f 16/22/10 5/30/10 11/1/10 +f 16/22/11 18/3/11 10/13/11 +f 13/19/4 17/12/4 9/23/4 +f 5/25/6 16/15/6 13/17/6 +f 17/12/4 15/11/4 8/26/4 +f 3/27/2 14/4/2 15/6/2 +f 10/13/12 18/3/12 14/2/12 diff --git a/assets/texture.png b/assets/texture.png Binary files differnew file mode 100644 index 0000000..4065f75 --- /dev/null +++ b/assets/texture.png diff --git a/meson.build b/meson.build index 6aa0ceb..8ea3009 100644 --- a/meson.build +++ b/meson.build @@ -1,8 +1,11 @@ -project('visible-gltf', 'c', default_options: ['warning_level=3', 'c_std=c23']) +project('visiblegltf', [ 'c', 'cpp' ], default_options: ['c_std=c2x', 'cpp_std=c++20', 'warning_level=3'])  build_type = get_option('buildtype')  sdl3_dep = dependency('sdl3') + +vendor_incdir = include_directories('vendor', is_system: true) +  if host_machine.system() == 'darwin'  moltenvk_library_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/lib'  moltenvk_include_path = '/Users/clements/dev/VulkanSDK/1.4.309.0/macOS/include' @@ -10,7 +13,7 @@ vulkan_dep = declare_dependency(    link_args: ['-L' + moltenvk_library_path, '-lvulkan'],    include_directories: include_directories(moltenvk_include_path)  ) -else  +else  vulkan_dep = dependency('vulkan')  endif @@ -19,14 +22,39 @@ if build_type == 'debug'    vgltf_c_args += '-DVGLTF_DEBUG'  endif -executable( +if host_machine.system() == 'darwin' +  vgltf_c_args += '-DVGLTF_PLATFORM_MACOS' +elif host_machine.system() == 'linux' +  vgltf_c_args += '-DVGLTF_PLATFORM_LINUX' +elif host_machine.system() == 'windows' +  vgltf_c_args += '-DVGLTF_PLATFORM_WINDOWS' +endif + +vgltf_deps = [ +  sdl3_dep, +  vulkan_dep, +] + +vgltf_srcs = [ +  'src/main.c', +  'src/log.c', +  'src/maths.c', +  'src/alloc.c', +  'src/hash.c', +  'src/str.c', +  'src/platform.c', +  'src/platform_sdl.c', +  'src/image.c', +  'src/renderer/renderer.c', +  'src/renderer/vma_usage.cpp', +  'src/engine.c', +] + +vgltf_exe = executable(    'vgltf', -  [ -    'src/main.c', -    'src/log.c', -    'src/platform_sdl.c', -    'src/renderer.c', -  ], +  vgltf_srcs,    c_args: vgltf_c_args, -  dependencies: [sdl3_dep, vulkan_dep], +  dependencies: vgltf_deps, +  link_language: 'cpp', +  include_directories: [vendor_incdir]  ) diff --git a/shaders/triangle.frag b/shaders/triangle.frag index 7c5b0e7..c7d99f3 100644 --- a/shaders/triangle.frag +++ b/shaders/triangle.frag @@ -1,9 +1,12 @@  #version 450  layout(location = 0) in vec3 fragColor; +layout(location = 1) in vec2 fragTextureCoordinates;  layout(location = 0) out vec4 outColor; +layout(binding = 1) uniform sampler2D textureSampler; +  void main() { -    outColor = vec4(fragColor, 1.0); +    outColor = vec4(fragColor * texture(textureSampler, fragTextureCoordinates).rgb, 1.0);  } diff --git a/shaders/triangle.vert b/shaders/triangle.vert index f5b2f8d..bf93f44 100644 --- a/shaders/triangle.vert +++ b/shaders/triangle.vert @@ -1,20 +1,20 @@  #version 450 -layout(location = 0) out vec3 fragColor; +layout(location = 0) in vec3 inPosition; +layout(location = 1) in vec3 inColor; +layout(location = 2) in vec2 inTextureCoordinates; -vec2 positions[3] = vec2[]( -    vec2(0.0, -0.5), -    vec2(0.5, 0.5), -    vec2(-0.5, 0.5) -); +layout(location = 0) out vec3 fragColor; +layout(location = 1) out vec2 fragTextureCoordinates; -vec3 colors[3] = vec3[]( -    vec3(1.0, 0.0, 0.0), -    vec3(0.0, 1.0, 0.0), -    vec3(0.0, 0.0, 1.0) -); +layout(set = 0, binding = 0) uniform UniformBufferObject { +    mat4 model; +    mat4 view; +    mat4 projection; +} ubo;  void main() { -    gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0); -    fragColor = colors[gl_VertexIndex]; +    gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPosition, 1.0); +    fragColor = inColor; +    fragTextureCoordinates = inTextureCoordinates;  } diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..2fb7a78 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,171 @@ +#include "alloc.h" +#include "maths.h" +#include "platform.h" +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size) { +  assert(allocator); +  return allocator->allocate(size, allocator->ctx); +} +void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator, +                                     size_t alignment, size_t size) { +  assert(allocator); +  return allocator->allocate_aligned(alignment, size, allocator->ctx); +} +void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator, +                                   size_t count, size_t item_size) { +  assert(allocator); +  return allocator->allocate_array(count, item_size, allocator->ctx); +} +void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr, +                               size_t old_size, size_t new_size) { +  assert(allocator); +  return allocator->reallocate(ptr, old_size, new_size, allocator->ctx); +} +void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr) { +  assert(allocator); +  allocator->free(ptr, allocator->ctx); +} + +static void *memory_allocate(size_t size, void *ctx) { +  (void)ctx; +  void *ptr = malloc(size); +  if (!ptr) { +    VGLTF_PANIC("Couldn't allocate memory (out of mem?)"); +  } +  return ptr; +} + +static void *memory_allocate_aligned(size_t alignment, size_t size, void *ctx) { +  (void)ctx; +#ifdef VGLTF_PLATFORM_WINDOWS +  void *ptr = _aligned_malloc(size, VGLTF_MAX(alignment, sizeof(void *))); +#else +  void *ptr = aligned_alloc(VGLTF_MAX(alignment, sizeof(void *)), size); +#endif +  if (!ptr) { +    VGLTF_PANIC("Couldn't allocate aligned memory (out of mem?)"); +  } +  return ptr; +} + +static void *memory_allocate_array(size_t count, size_t item_size, void *ctx) { +  (void)ctx; +  void *ptr = calloc(count, item_size); +  if (!ptr) { +    VGLTF_PANIC("Couldn't allocate memory (out of mem?)"); +  } +  return ptr; +} + +static void *memory_reallocate(void *ptr, size_t old_size, size_t new_size, +                               void *ctx) { +  (void)old_size; +  (void)ctx; +  ptr = realloc(ptr, new_size); +  if (!ptr) { +    VGLTF_PANIC("Couldn't allocate memory (out of mem?)"); +  } +  return ptr; +} + +static void memory_free(void *ptr, void *ctx) { +  (void)ctx; +  free(ptr); +} + +thread_local struct vgltf_allocator system_allocator = { +    .allocate = memory_allocate, +    .allocate_aligned = memory_allocate_aligned, +    .allocate_array = memory_allocate_array, +    .reallocate = memory_reallocate, +    .free = memory_free}; + +void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena, +                    size_t size) { +  assert(allocator); +  assert(arena); +  arena->size = 0; +  arena->capacity = size; +  arena->data = vgltf_allocator_allocate(allocator, size); +} +void vgltf_arena_deinit(struct vgltf_allocator *allocator, +                      struct vgltf_arena *arena) { +  assert(allocator); +  assert(arena); +  vgltf_allocator_free(allocator, arena->data); +} +void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size) { +  assert(arena); +  assert(arena->size + size <= arena->capacity); +  void *ptr = arena->data + arena->size; +  arena->size += size; +  return ptr; +} + +void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count, +                               size_t item_size) { +  assert(arena); +  void *ptr = vgltf_arena_allocate(arena, count * item_size); +  memset(ptr, 0, count * item_size); +  return ptr; +} + +void vgltf_arena_reset(struct vgltf_arena *arena) { +  assert(arena); +  arena->size = 0; +} + +static void *arena_allocator_allocate(size_t size, void *ctx) { +  assert(ctx); +  return vgltf_arena_allocate(ctx, size); +} +static void *arena_allocator_allocate_aligned(size_t alignment, size_t size, +                                              void *ctx) { +  assert(ctx); +  if (alignment < sizeof(void *) || (alignment & (alignment - 1)) != 0) { +    return NULL; +  } + +  void *ptr = vgltf_arena_allocate(ctx, size + alignment - 1 + sizeof(void *)); +  if (!ptr) { +    return NULL; +  } + +  return (void *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & +                  ~(alignment - 1)); +} + +static void *arena_allocator_allocate_array(size_t count, size_t item_size, +                                            void *ctx) { +  assert(ctx); +  return vgltf_arena_allocate_array(ctx, count, item_size); +} + +static void *arena_allocator_reallocate(void *ptr, size_t old_size, +                                        size_t new_size, void *ctx) { +  assert(ptr); +  assert(ctx); + +  void *new_ptr = vgltf_arena_allocate(ctx, new_size); +  memcpy(new_ptr, ptr, old_size); +  return new_ptr; +} + +static void arena_allocator_free(void *ptr, void *ctx) { +  assert(ctx); +  (void)ptr; +} + +struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena) { +  return (struct vgltf_allocator){ +      .ctx = arena, +      .allocate = arena_allocator_allocate, +      .allocate_aligned = arena_allocator_allocate_aligned, +      .allocate_array = arena_allocator_allocate_array, +      .reallocate = arena_allocator_reallocate, +      .free = arena_allocator_free}; +} diff --git a/src/alloc.h b/src/alloc.h new file mode 100644 index 0000000..bde1d55 --- /dev/null +++ b/src/alloc.h @@ -0,0 +1,40 @@ +#ifndef VGLTF_ALLOC_H +#define VGLTF_ALLOC_H + +#include <stddef.h> + +struct vgltf_allocator { +  void *(*allocate)(size_t size, void *ctx); +  void *(*allocate_aligned)(size_t alignment, size_t size, void *ctx); +  void *(*allocate_array)(size_t count, size_t item_size, void *ctx); +  void *(*reallocate)(void *ptr, size_t old_size, size_t new_size, void *ctx); +  void (*free)(void *ptr, void *ctx); +  void *ctx; +}; + +void *vgltf_allocator_allocate(struct vgltf_allocator *allocator, size_t size); +void *vgltf_allocator_allocate_aligned(struct vgltf_allocator *allocator, +                                     size_t alignment, size_t size); +void *vgltf_allocator_allocate_array(struct vgltf_allocator *allocator, +                                   size_t count, size_t item_size); +void *vgltf_allocator_reallocate(struct vgltf_allocator *allocator, void *ptr, +                               size_t old_size, size_t new_size); +void vgltf_allocator_free(struct vgltf_allocator *allocator, void *ptr); + +extern thread_local struct vgltf_allocator system_allocator; + +struct vgltf_arena { +  size_t capacity; +  size_t size; +  char *data; +}; +void vgltf_arena_init(struct vgltf_allocator *allocator, struct vgltf_arena *arena, +                    size_t size); +void vgltf_arena_deinit(struct vgltf_allocator *allocator, struct vgltf_arena *arena); +void *vgltf_arena_allocate(struct vgltf_arena *arena, size_t size); +void *vgltf_arena_allocate_array(struct vgltf_arena *arena, size_t count, +                               size_t item_size); +void vgltf_arena_reset(struct vgltf_arena *arena); +struct vgltf_allocator vgltf_arena_allocator(struct vgltf_arena *arena); + +#endif // VGLTF_ALLOC_H diff --git a/src/engine.c b/src/engine.c new file mode 100644 index 0000000..8904474 --- /dev/null +++ b/src/engine.c @@ -0,0 +1,17 @@ +#include "engine.h" + +bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform) { +  if (!vgltf_renderer_init(&engine->renderer, platform)) { +    goto err; +  } + +  return true; +err: +  return false; +} +void vgltf_engine_deinit(struct vgltf_engine *engine) { +  vgltf_renderer_deinit(&engine->renderer); +} +void vgltf_engine_run_frame(struct vgltf_engine *engine) { +  vgltf_renderer_render_frame(&engine->renderer); +} diff --git a/src/engine.h b/src/engine.h new file mode 100644 index 0000000..5a7bc2d --- /dev/null +++ b/src/engine.h @@ -0,0 +1,14 @@ +#ifndef VGLTF_ENGINE_H +#define VGLTF_ENGINE_H + +#include "renderer/renderer.h" + +struct vgltf_engine { +  struct vgltf_renderer renderer; +}; + +bool vgltf_engine_init(struct vgltf_engine *engine, struct vgltf_platform *platform); +void vgltf_engine_deinit(struct vgltf_engine *engine); +void vgltf_engine_run_frame(struct vgltf_engine *engine); + +#endif // VGLTF_ENGINE_H diff --git a/src/hash.c b/src/hash.c new file mode 100644 index 0000000..cfdafc3 --- /dev/null +++ b/src/hash.c @@ -0,0 +1,15 @@ +#include "hash.h" +#include <assert.h> + +uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes) { +  assert(bytes); +  static const uint64_t FNV_OFFSET_BASIS = 14695981039346656037u; +  static const uint64_t FNV_PRIME = 1099511628211u; +  uint64_t hash = FNV_OFFSET_BASIS; +  for (size_t i = 0; i < nbytes; i++) { +    hash = hash ^ bytes[i]; +    hash = hash * FNV_PRIME; +  } + +  return hash; +} diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 0000000..f4f8e76 --- /dev/null +++ b/src/hash.h @@ -0,0 +1,9 @@ +#ifndef VGLTF_HASH_H +#define VGLTF_HASH_H + +#include <stddef.h> +#include <stdint.h> + +uint64_t vgltf_hash_fnv_1a(const char *bytes, size_t nbytes); + +#endif // VGLTF_HASH_H diff --git a/src/image.c b/src/image.c new file mode 100644 index 0000000..a2d29c7 --- /dev/null +++ b/src/image.c @@ -0,0 +1,20 @@ +#include "image.h" + +#define STB_IMAGE_IMPLEMENTATION +#include <stb_image.h> + +bool vgltf_image_load_from_file(struct vgltf_image *image, +                              struct vgltf_string_view path) { +  int width; +  int height; +  int tex_channels; +  image->data = +      stbi_load(path.data, &width, &height, &tex_channels, STBI_rgb_alpha); +  image->width = width; +  image->height = height; +  image->format = VGLTF_IMAGE_FORMAT_R8G8B8A8; + +  return image->data != nullptr; +} + +void vgltf_image_deinit(struct vgltf_image *image) { stbi_image_free(image->data); } diff --git a/src/image.h b/src/image.h new file mode 100644 index 0000000..426d605 --- /dev/null +++ b/src/image.h @@ -0,0 +1,21 @@ +#ifndef VGLTF_IMAGE_H +#define VGLTF_IMAGE_H + +#include <stdint.h> +#include "str.h" + +enum vgltf_image_format { +  VGLTF_IMAGE_FORMAT_R8G8B8A8, +}; + +struct vgltf_image { +  unsigned char* data; +  uint32_t width; +  uint32_t height; +  enum vgltf_image_format format; +}; + +bool vgltf_image_load_from_file(struct vgltf_image* image, struct vgltf_string_view path); +void vgltf_image_deinit(struct vgltf_image* image); + +#endif // VGLTF_IMAGE_H @@ -1,12 +1,5 @@  #include "log.h" -const char *vgltf_log_level_to_str(enum vgltf_log_level level) { -  switch (level) { -  case VGLTF_LOG_ERROR: -    return "error"; -  case VGLTF_LOG_INFO: -    return "info"; -  case VGLTF_LOG_DEBUG: -    return "debug"; -  } -} +const char *vgltf_log_level_str[] = {[VGLTF_LOG_LEVEL_DBG] = "debug", +                                   [VGLTF_LOG_LEVEL_INFO] = "info", +                                   [VGLTF_LOG_LEVEL_ERR] = "error"}; @@ -1,25 +1,26 @@  #ifndef VGLTF_LOG_H  #define VGLTF_LOG_H -#include <stdio.h> +#include <stdio.h> // IWYU pragma: keep  enum vgltf_log_level { -  VGLTF_LOG_DEBUG, -  VGLTF_LOG_INFO, -  VGLTF_LOG_ERROR, +  VGLTF_LOG_LEVEL_DBG, +  VGLTF_LOG_LEVEL_INFO, +  VGLTF_LOG_LEVEL_ERR,  }; -const char *vgltf_log_level_to_str(enum vgltf_log_level level); -#define VGLTF_LOG(level, ...)                                                  \ +extern const char *vgltf_log_level_str[]; + +#define VGLTF_LOG(level, ...)                                                    \    do {                                                                         \ -    fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_to_str(level), __FILE__,    \ +    fprintf(stderr, "[%s %s:%d] ", vgltf_log_level_str[level], __FILE__,         \              __LINE__);                                                         \      fprintf(stderr, __VA_ARGS__);                                              \      fprintf(stderr, "\n");                                                     \    } while (0) -#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_DEBUG, __VA_ARGS__) -#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_INFO, __VA_ARGS__) -#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_ERROR, __VA_ARGS__) +#define VGLTF_LOG_DBG(...) VGLTF_LOG(VGLTF_LOG_LEVEL_DBG, __VA_ARGS__) +#define VGLTF_LOG_INFO(...) VGLTF_LOG(VGLTF_LOG_LEVEL_INFO, __VA_ARGS__) +#define VGLTF_LOG_ERR(...) VGLTF_LOG(VGLTF_LOG_LEVEL_ERR, __VA_ARGS__)  #endif // VGLTF_LOG_H @@ -1,43 +1,39 @@ +#include "engine.h"  #include "log.h"  #include "platform.h" -#include "renderer.h"  int main(void) {    struct vgltf_platform platform = {};    if (!vgltf_platform_init(&platform)) { -    VGLTF_LOG_ERR("Couldn't initialize the platform layer"); +    VGLTF_LOG_ERR("Platform initialization failed");      goto err;    } -  struct vgltf_renderer renderer = {}; -  if (!vgltf_renderer_init(&renderer, &platform)) { -    VGLTF_LOG_ERR("Couldn't initialize the renderer"); +  struct vgltf_engine engine = {}; +  if (!vgltf_engine_init(&engine, &platform)) { +    VGLTF_LOG_ERR("Couldn't initialize the engine");      goto deinit_platform;    } +  VGLTF_LOG_INFO("Starting main loop");    while (true) {      struct vgltf_event event;      while (vgltf_platform_poll_event(&platform, &event)) { -      if (event.type == VGLTF_EVENT_QUIT || -          (event.type == VGLTF_EVENT_KEY_DOWN && -           event.key.key == VGLTF_KEY_ESCAPE)) { +      if (event.type == VGLTF_EVENT_QUIT || (event.type == VGLTF_EVENT_KEY_DOWN && +                                           event.key.key == VGLTF_KEY_ESCAPE)) {          goto out_main_loop; -      } else if (event.type == VGLTF_EVENT_WINDOW_RESIZED) { -        vgltf_renderer_on_window_resized( -            &renderer, -            (struct vgltf_window_size){.width = event.window_resized.width, -                                       .height = event.window_resized.height});        }      } -    vgltf_renderer_triangle_pass(&renderer); +    vgltf_engine_run_frame(&engine);    }  out_main_loop: -  vgltf_renderer_deinit(&renderer); +  VGLTF_LOG_INFO("Exiting main loop"); +  vgltf_engine_deinit(&engine);    vgltf_platform_deinit(&platform);    return 0;  deinit_platform:    vgltf_platform_deinit(&platform);  err: -  return 1; +  return -1;  } diff --git a/src/maths.c b/src/maths.c new file mode 100644 index 0000000..a79c68f --- /dev/null +++ b/src/maths.c @@ -0,0 +1,103 @@ +#include "maths.h" +#include <math.h> +#include <string.h> + +vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs) { +  return (vgltf_vec3){.x = lhs.x - rhs.x, .y = lhs.y - rhs.y, .z = lhs.z - rhs.z}; +} +vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs) { +  return (vgltf_vec3){.x = lhs.y * rhs.z - lhs.z * rhs.y, +                    .y = lhs.z * rhs.x - lhs.x * rhs.z, +                    .z = lhs.x * rhs.y - lhs.y * rhs.x}; +} +vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs) { +  return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z; +} +vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec) { +  return sqrtf(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z); +} +vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec) { +  vgltf_vec_value_type length = vgltf_vec3_length(vec); +  return (vgltf_vec3){ +      .x = vec.x / length, .y = vec.y / length, .z = vec.z / length}; +} +void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs) { +  for (int i = 0; i < 4; ++i) { +    for (int j = 0; j < 4; ++j) { +      out[i * 4 + j] = +          lhs[i * 4 + 0] * rhs[0 * 4 + j] + lhs[i * 4 + 1] * rhs[1 * 4 + j] + +          lhs[i * 4 + 2] * rhs[2 * 4 + j] + lhs[i * 4 + 3] * rhs[3 * 4 + j]; +    } +  } +} +void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix, +                     vgltf_mat_value_type angle_radians, vgltf_vec3 axis) { +  vgltf_vec3 a = vgltf_vec3_normalized(axis); +  vgltf_vec_value_type c = cosf(angle_radians); +  vgltf_vec_value_type s = sinf(angle_radians); +  vgltf_vec_value_type t = 1.f - c; + +  vgltf_mat4 rotation_matrix = {t * a.x * a.x + c, +                              t * a.x * a.y - s * a.z, +                              t * a.x * a.z + s * a.y, +                              0.f, +                              t * a.x * a.y + s * a.z, +                              t * a.y * a.y + c, +                              t * a.y * a.z - s * a.x, +                              0.f, +                              t * a.x * a.z - s * a.y, +                              t * a.y * a.z + s * a.x, +                              t * a.z * a.z + c, +                              0.f, +                              0.f, +                              0.f, +                              0.f, +                              1.f}; + +  vgltf_mat4_multiply(out, matrix, rotation_matrix); +} +void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position, +                      vgltf_vec3 target_position, vgltf_vec3 up_axis) { +  vgltf_vec3 forward = +      vgltf_vec3_normalized(vgltf_vec3_sub(target_position, eye_position)); +  vgltf_vec3 right = vgltf_vec3_normalized(vgltf_vec3_cross(forward, up_axis)); +  vgltf_vec3 camera_up = vgltf_vec3_cross(right, forward); + +  memcpy(out, (const vgltf_mat4)VGLTF_MAT4_IDENTITY, sizeof(vgltf_mat4)); +  out[0 * 4 + 0] = right.x; +  out[1 * 4 + 0] = right.y; +  out[2 * 4 + 0] = right.z; +  out[0 * 4 + 1] = camera_up.x; +  out[1 * 4 + 1] = camera_up.y; +  out[2 * 4 + 1] = camera_up.z; +  out[0 * 4 + 2] = -forward.x; +  out[1 * 4 + 2] = -forward.y; +  out[2 * 4 + 2] = -forward.z; +  out[3 * 4 + 0] = -vgltf_vec3_dot(right, eye_position); +  out[3 * 4 + 1] = -vgltf_vec3_dot(camera_up, eye_position); +  out[3 * 4 + 2] = vgltf_vec3_dot(forward, eye_position); +} +void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov_radians, +                          vgltf_mat_value_type aspect_ratio, +                          vgltf_mat_value_type near, vgltf_mat_value_type far) { +  float tan_half_fovy = tanf(fov_radians / 2.0f); +  out[0] = 1.f / (aspect_ratio * tan_half_fovy); +  out[1] = 0.0f; +  out[2] = 0.0f; +  out[3] = 0.0f; + +  out[4] = 0.0f; +  out[5] = 1.f / tan_half_fovy; +  out[6] = 0.0f; +  out[7] = 0.0f; + +  out[8] = 0.0f; +  out[9] = 0.0f; +  out[10] = -(far + near) / (far - near); +  out[11] = -1.0f; + +  out[12] = 0.0f; +  out[13] = 0.0f; +  out[14] = -(2.0f * far * near) / (far - near); +  out[15] = 0.0f; +} diff --git a/src/maths.h b/src/maths.h new file mode 100644 index 0000000..d50f285 --- /dev/null +++ b/src/maths.h @@ -0,0 +1,49 @@ +#ifndef VGLTF_MATHS_H +#define VGLTF_MATHS_H + +typedef float vgltf_vec_value_type; + +constexpr double VGLTF_MATHS_PI = 3.14159265358979323846; +#define VGLTF_MATHS_DEG_TO_RAD(deg) (deg * VGLTF_MATHS_PI / 180.0) +#define VGLTF_MAX(x, y) ((x) > (y) ? (x) : (y)) + +typedef struct { +  vgltf_vec_value_type x; +  vgltf_vec_value_type y; +} vgltf_vec2; + +typedef struct { +  vgltf_vec_value_type x; +  vgltf_vec_value_type y; +  vgltf_vec_value_type z; +} vgltf_vec3; +vgltf_vec3 vgltf_vec3_sub(vgltf_vec3 lhs, vgltf_vec3 rhs); +vgltf_vec3 vgltf_vec3_cross(vgltf_vec3 lhs, vgltf_vec3 rhs); +vgltf_vec_value_type vgltf_vec3_dot(vgltf_vec3 lhs, vgltf_vec3 rhs); + +vgltf_vec_value_type vgltf_vec3_length(vgltf_vec3 vec); +vgltf_vec3 vgltf_vec3_normalized(vgltf_vec3 vec); + +typedef vgltf_vec_value_type vgltf_mat_value_type; + +// row major +typedef vgltf_mat_value_type vgltf_mat4[16]; +void vgltf_mat4_multiply(vgltf_mat4 out, vgltf_mat4 lhs, vgltf_mat4 rhs); +void vgltf_mat4_rotate(vgltf_mat4 out, vgltf_mat4 matrix, +                     vgltf_mat_value_type angle_radians, vgltf_vec3 axis); +void vgltf_mat4_look_at(vgltf_mat4 out, vgltf_vec3 eye_position, +                      vgltf_vec3 target_position, vgltf_vec3 up_axis); +void vgltf_mat4_perspective(vgltf_mat4 out, vgltf_mat_value_type fov, +                          vgltf_mat_value_type aspect_ratio, +                          vgltf_mat_value_type near, vgltf_mat_value_type far); + +// clang-format off +#define VGLTF_MAT4_IDENTITY { \ +  1, 0, 0, 0, \ +  0, 1, 0, 0, \ +  0, 0, 1, 0, \ +  0, 0, 0, 1, \ +} +// clang-format on + +#endif // VGLTF_MATHS_H diff --git a/src/platform.c b/src/platform.c new file mode 100644 index 0000000..da4d7d4 --- /dev/null +++ b/src/platform.c @@ -0,0 +1,5 @@ +#include "platform.h" + +#define VGLTF_GENERATE_KEY_STRING(KEY) #KEY, +const char *vgltf_key_str[] = {VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_STRING)}; +#undef VGLTF_GENERATE_KEY_STRING diff --git a/src/platform.h b/src/platform.h index fe719d3..aff673f 100644 --- a/src/platform.h +++ b/src/platform.h @@ -3,66 +3,61 @@  #include "log.h"  #include <stdint.h> -#include <stdlib.h> +#include <stdlib.h> // IWYU pragma: keep -#define VGLTF_PANIC(...)                                                       \ +#define VGLTF_PANIC(...)                                                         \    do {                                                                         \ -    VGLTF_LOG_ERR("panic: " __VA_ARGS__);                                      \ +    VGLTF_LOG_ERR("PANIC " __VA_ARGS__);                                         \      exit(1);                                                                   \    } while (0) -enum vgltf_event_type { -  VGLTF_EVENT_QUIT, -  VGLTF_EVENT_KEY_DOWN, -  VGLTF_EVENT_WINDOW_RESIZED, -  VGLTF_EVENT_UNKNOWN, -}; +#define VGLTF_FOREACH_KEY(_M)                                                    \ +  _M(A)                                                                        \ +  _M(B)                                                                        \ +  _M(C)                                                                        \ +  _M(D)                                                                        \ +  _M(E)                                                                        \ +  _M(F)                                                                        \ +  _M(G)                                                                        \ +  _M(H)                                                                        \ +  _M(I)                                                                        \ +  _M(J)                                                                        \ +  _M(K)                                                                        \ +  _M(L)                                                                        \ +  _M(M)                                                                        \ +  _M(N)                                                                        \ +  _M(O)                                                                        \ +  _M(P)                                                                        \ +  _M(Q)                                                                        \ +  _M(R)                                                                        \ +  _M(S)                                                                        \ +  _M(T)                                                                        \ +  _M(U)                                                                        \ +  _M(V)                                                                        \ +  _M(W)                                                                        \ +  _M(X)                                                                        \ +  _M(Y)                                                                        \ +  _M(Z)                                                                        \ +  _M(ESCAPE) +#define VGLTF_GENERATE_KEY_ENUM(KEY) VGLTF_KEY_##KEY,  enum vgltf_key { -  VGLTF_KEY_A, -  VGLTF_KEY_B, -  VGLTF_KEY_C, -  VGLTF_KEY_D, -  VGLTF_KEY_E, -  VGLTF_KEY_F, -  VGLTF_KEY_G, -  VGLTF_KEY_H, -  VGLTF_KEY_I, -  VGLTF_KEY_J, -  VGLTF_KEY_K, -  VGLTF_KEY_L, -  VGLTF_KEY_M, -  VGLTF_KEY_N, -  VGLTF_KEY_O, -  VGLTF_KEY_P, -  VGLTF_KEY_Q, -  VGLTF_KEY_R, -  VGLTF_KEY_S, -  VGLTF_KEY_T, -  VGLTF_KEY_U, -  VGLTF_KEY_V, -  VGLTF_KEY_W, -  VGLTF_KEY_X, -  VGLTF_KEY_Y, -  VGLTF_KEY_Z, -  VGLTF_KEY_ESCAPE, +  VGLTF_FOREACH_KEY(VGLTF_GENERATE_KEY_ENUM) VGLTF_KEY_COUNT,    VGLTF_KEY_UNKNOWN  }; +#undef VGLTF_GENERATE_KEY_ENUM +extern const char *vgltf_key_str[]; + +enum vgltf_event_type { VGLTF_EVENT_QUIT, VGLTF_EVENT_KEY_DOWN, VGLTF_EVENT_UNKNOWN };  struct vgltf_key_event {    enum vgltf_key key;  }; -struct vgltf_window_resized_event { -  int32_t width; -  int32_t height; -}; -  struct vgltf_event {    enum vgltf_event_type type;    union {      struct vgltf_key_event key; -    struct vgltf_window_resized_event window_resized;    };  }; @@ -75,18 +70,19 @@ struct vgltf_platform;  bool vgltf_platform_init(struct vgltf_platform *platform);  void vgltf_platform_deinit(struct vgltf_platform *platform);  bool vgltf_platform_poll_event(struct vgltf_platform *platform, -                               struct vgltf_event *event); +                             struct vgltf_event *event);  bool vgltf_platform_get_window_size(struct vgltf_platform *platform, -                                    struct vgltf_window_size *window_size); - -// Vulkan specifics -#include "vulkan/vulkan_core.h" -char const *const * +                                  struct vgltf_window_size *window_size); +bool vgltf_platform_get_current_time_nanoseconds(long *time); +char *vgltf_platform_read_file_to_string(const char *filepath, size_t *out_size); +const char *const *  vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform, -                                              uint32_t *count); +                                            uint32_t *count); + +#include <vulkan/vulkan.h>  bool vgltf_platform_create_vulkan_surface(struct vgltf_platform *platform, -                                          VkInstance instance, -                                          VkSurfaceKHR *surface); +                                        VkInstance instance, +                                        VkSurfaceKHR *surface);  #include "platform_sdl.h" diff --git a/src/platform_sdl.c b/src/platform_sdl.c index 5cc6032..6593b9e 100644 --- a/src/platform_sdl.c +++ b/src/platform_sdl.c @@ -1,29 +1,25 @@ +#include "platform_sdl.h"  #include "log.h"  #include "platform.h" -#include "platform_sdl.h" -#include <SDL3/SDL_vulkan.h>  bool vgltf_platform_init(struct vgltf_platform *platform) { +  VGLTF_LOG_INFO("Initializing SDL platform..."); +    if (!SDL_Init(SDL_INIT_VIDEO)) {      VGLTF_LOG_ERR("SDL initialization failed: %s", SDL_GetError());      goto err;    } -  constexpr char WINDOW_TITLE[] = "VisibleGLTF"; -  constexpr int WINDOW_WIDTH = 800; -  constexpr int WINDOW_HEIGHT = 600; -  SDL_Window *window = -      SDL_CreateWindow(WINDOW_TITLE, WINDOW_WIDTH, WINDOW_HEIGHT, -                       SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE); -  if (!window) { -    VGLTF_LOG_ERR("SDL window creation failed: %s", SDL_GetError()); -    goto quit_sdl; +  platform->window = SDL_CreateWindow("vgltf", 800, 600, SDL_WINDOW_VULKAN); +  if (!platform->window) { +    VGLTF_LOG_ERR("Window creation failed: %s", SDL_GetError()); +    goto deinit_sdl;    } -  platform->window = window; - +  VGLTF_LOG_INFO("SDL platform initialized");    return true; -quit_sdl: + +deinit_sdl:    SDL_Quit();  err:    return false; @@ -31,67 +27,23 @@ err:  void vgltf_platform_deinit(struct vgltf_platform *platform) {    SDL_DestroyWindow(platform->window);    SDL_Quit(); +  VGLTF_LOG_INFO("SDL platform deinitialized");  } -static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode keycode) { -  switch (keycode) { -  case SDLK_A: -    return VGLTF_KEY_A; -  case SDLK_B: -    return VGLTF_KEY_B; -  case SDLK_C: -    return VGLTF_KEY_C; -  case SDLK_D: -    return VGLTF_KEY_D; -  case SDLK_E: -    return VGLTF_KEY_E; -  case SDLK_F: -    return VGLTF_KEY_F; -  case SDLK_G: -    return VGLTF_KEY_G; -  case SDLK_H: -    return VGLTF_KEY_H; -  case SDLK_I: -    return VGLTF_KEY_I; -  case SDLK_J: -    return VGLTF_KEY_J; -  case SDLK_K: -    return VGLTF_KEY_K; -  case SDLK_L: -    return VGLTF_KEY_L; -  case SDLK_M: -    return VGLTF_KEY_M; -  case SDLK_N: -    return VGLTF_KEY_N; -  case SDLK_O: -    return VGLTF_KEY_O; -  case SDLK_P: -    return VGLTF_KEY_P; -  case SDLK_Q: -    return VGLTF_KEY_Q; -  case SDLK_R: -    return VGLTF_KEY_R; -  case SDLK_S: -    return VGLTF_KEY_S; -  case SDLK_T: -    return VGLTF_KEY_T; -  case SDLK_U: -    return VGLTF_KEY_U; -  case SDLK_V: -    return VGLTF_KEY_V; -  case SDLK_W: -    return VGLTF_KEY_W; -  case SDLK_X: -    return VGLTF_KEY_X; -  case SDLK_Y: -    return VGLTF_KEY_Y; -  case SDLK_Z: -    return VGLTF_KEY_Z; -  case SDLK_ESCAPE: -    return VGLTF_KEY_ESCAPE; + +#define VGLTF_GENERATE_SDL_KEYCODE_MAPPING(KEY)                                \ +  case SDLK_##KEY:                                                             \ +    return VGLTF_KEY_##KEY; + +static enum vgltf_key vgltf_key_from_sdl_keycode(SDL_Keycode key_code) { +  switch (key_code) { +    VGLTF_FOREACH_KEY(VGLTF_GENERATE_SDL_KEYCODE_MAPPING)    default:      return VGLTF_KEY_UNKNOWN;    }  } + +#undef VGLTF_GENERATE_SDL_KEYCODE_MAPPING +  bool vgltf_platform_poll_event(struct vgltf_platform *platform,                                 struct vgltf_event *event) {    (void)platform; @@ -106,16 +58,12 @@ bool vgltf_platform_poll_event(struct vgltf_platform *platform,        event->type = VGLTF_EVENT_KEY_DOWN;        event->key.key = vgltf_key_from_sdl_keycode(sdl_event.key.key);        break; -    case SDL_EVENT_WINDOW_RESIZED: -      event->type = VGLTF_EVENT_WINDOW_RESIZED; -      event->window_resized.width = sdl_event.display.data1; -      event->window_resized.height = sdl_event.display.data2; -      break;      default:        event->type = VGLTF_EVENT_UNKNOWN;        break;      }    } +    return pending_events;  }  bool vgltf_platform_get_window_size(struct vgltf_platform *platform, @@ -123,7 +71,31 @@ bool vgltf_platform_get_window_size(struct vgltf_platform *platform,    return SDL_GetWindowSize(platform->window, &window_size->width,                             &window_size->height);  } -char const *const * +bool vgltf_platform_get_current_time_nanoseconds(long *time) { +  if (!SDL_GetCurrentTime(time)) { +    VGLTF_LOG_ERR("'SDL_GetCurrentTime failed: %s", SDL_GetError()); +    goto err; +  } + +  return true; +err: +  return false; +} + +char *vgltf_platform_read_file_to_string(const char *filepath, +                                         size_t *out_size) { +  char *file_data = SDL_LoadFile(filepath, out_size); +  if (!file_data) { +    VGLTF_LOG_ERR("Couldn't load file: %s", SDL_GetError()); +    return NULL; +  } + +  return file_data; +} + +#include <SDL3/SDL_vulkan.h> + +const char *const *  vgltf_platform_get_vulkan_instance_extensions(struct vgltf_platform *platform,                                                uint32_t *count) {    (void)platform; diff --git a/src/renderer.c b/src/renderer.c deleted file mode 100644 index 7022af6..0000000 --- a/src/renderer.c +++ /dev/null @@ -1,1470 +0,0 @@ -#include "log.h" -#include "renderer.h" -#include "src/platform.h" -#include "vulkan/vulkan_core.h" -#include <assert.h> - -static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"}; -static constexpr int VALIDATION_LAYER_COUNT = -    sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]); - -#ifdef VGLTF_DEBUG -static constexpr bool enable_validation_layers = true; -#else -static constexpr bool enable_validation_layers = false; -#endif - -static VKAPI_ATTR VkBool32 VKAPI_CALL -debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity, -               VkDebugUtilsMessageTypeFlagBitsEXT message_type, -               const VkDebugUtilsMessengerCallbackDataEXT *callback_data, -               void *user_data) { -  (void)message_severity; -  (void)message_type; -  (void)user_data; -  VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage); -  return VK_FALSE; -} - -static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10; -struct required_instance_extensions { -  const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; -  uint32_t count; -}; -void required_instance_extensions_push( -    struct required_instance_extensions *required_instance_extensions, -    const char *required_instance_extension) { -  if (required_instance_extensions->count == -      REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { -    VGLTF_PANIC("required instance extensions array is full"); -  } -  required_instance_extensions -      ->extensions[required_instance_extensions->count++] = -      required_instance_extension; -} - -static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128; -struct supported_instance_extensions { -  VkExtensionProperties -      properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; -  uint32_t count; -}; -bool supported_instance_extensions_init( -    struct supported_instance_extensions *supported_instance_extensions) { -  if (vkEnumerateInstanceExtensionProperties( -          nullptr, &supported_instance_extensions->count, nullptr) != -      VK_SUCCESS) { -    goto err; -  } - -  if (supported_instance_extensions->count > -      SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { -    VGLTF_LOG_ERR("supported instance extensions array cannot fit all the " -                  "VkExtensionProperties"); -    goto err; -  } - -  if (vkEnumerateInstanceExtensionProperties( -          nullptr, &supported_instance_extensions->count, -          supported_instance_extensions->properties) != VK_SUCCESS) { -    goto err; -  } -  return true; -err: -  return false; -} -void supported_instance_extensions_debug_print( -    const struct supported_instance_extensions *supported_instance_extensions) { -  VGLTF_LOG_DBG("Supported instance extensions:"); -  for (uint32_t i = 0; i < supported_instance_extensions->count; i++) { -    VGLTF_LOG_DBG("\t- %s", -                  supported_instance_extensions->properties[i].extensionName); -  } -} -bool supported_instance_extensions_includes( -    const struct supported_instance_extensions *supported_instance_extensions, -    const char *extension_name) { -  for (uint32_t supported_instance_extension_index = 0; -       supported_instance_extension_index < -       supported_instance_extensions->count; -       supported_instance_extension_index++) { -    const VkExtensionProperties *extension_properties = -        &supported_instance_extensions -             ->properties[supported_instance_extension_index]; -    if (strcmp(extension_properties->extensionName, extension_name) == 0) { -      return true; -    } -  } - -  return false; -} - -static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64; -struct supported_validation_layers { -  VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY]; -  uint32_t count; -}; -bool supported_validation_layers_init( -    struct supported_validation_layers *supported_validation_layers) { -  if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count, -                                         nullptr) != VK_SUCCESS) { -    goto err; -  } - -  if (supported_validation_layers->count > -      SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) { -    VGLTF_LOG_ERR("supported validation layers array cannot fit all the " -                  "VkLayerProperties"); -    goto err; -  } - -  if (vkEnumerateInstanceLayerProperties( -          &supported_validation_layers->count, -          supported_validation_layers->properties) != VK_SUCCESS) { -    goto err; -  } - -  return true; -err: -  return false; -} - -static bool are_validation_layer_supported() { -  struct supported_validation_layers supported_layers = {}; -  if (!supported_validation_layers_init(&supported_layers)) { -    goto err; -  } - -  for (int requested_layer_index = 0; -       requested_layer_index < VALIDATION_LAYER_COUNT; -       requested_layer_index++) { -    const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index]; -    bool requested_layer_found = false; -    for (uint32_t supported_layer_index = 0; -         supported_layer_index < supported_layers.count; -         supported_layer_index++) { -      VkLayerProperties *supported_layer = -          &supported_layers.properties[supported_layer_index]; -      if (strcmp(requested_layer_name, supported_layer->layerName) == 0) { -        requested_layer_found = true; -        break; -      } -    } - -    if (!requested_layer_found) { -      goto err; -    } -  } - -  return true; -err: -  return false; -} - -static bool fetch_required_instance_extensions( -    struct required_instance_extensions *required_extensions, -    struct vgltf_platform *platform) { -  struct supported_instance_extensions supported_extensions = {}; -  if (!supported_instance_extensions_init(&supported_extensions)) { -    VGLTF_LOG_ERR( -        "Couldn't fetch supported instance extensions details (OOM?)"); -    goto err; -  } -  supported_instance_extensions_debug_print(&supported_extensions); - -  uint32_t platform_required_extension_count = 0; -  const char *const *platform_required_extensions = -      vgltf_platform_get_vulkan_instance_extensions( -          platform, &platform_required_extension_count); -  for (uint32_t platform_required_extension_index = 0; -       platform_required_extension_index < platform_required_extension_count; -       platform_required_extension_index++) { -    required_instance_extensions_push( -        required_extensions, -        platform_required_extensions[platform_required_extension_index]); -  } -  required_instance_extensions_push( -      required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); - -  if (enable_validation_layers) { -    required_instance_extensions_push(required_extensions, -                                      VK_EXT_DEBUG_UTILS_EXTENSION_NAME); -  } - -  bool all_extensions_supported = true; -  for (uint32_t required_extension_index = 0; -       required_extension_index < required_extensions->count; -       required_extension_index++) { -    const char *required_extension_name = -        required_extensions->extensions[required_extension_index]; -    if (!supported_instance_extensions_includes(&supported_extensions, -                                                required_extension_name)) { -      VGLTF_LOG_ERR("Unsupported instance extension: %s", -                    required_extension_name); -      all_extensions_supported = false; -    } -  } - -  if (!all_extensions_supported) { -    VGLTF_LOG_ERR("Some required extensions are unsupported."); -    goto err; -  } - -  return true; -err: -  return false; -} - -static void populate_debug_messenger_create_info( -    VkDebugUtilsMessengerCreateInfoEXT *create_info) { -  *create_info = (VkDebugUtilsMessengerCreateInfoEXT){}; -  create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; -  create_info->messageSeverity = -      VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | -      VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | -      VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; -  create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | -                             VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | -                             VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; -  create_info->pfnUserCallback = debug_callback; -} - -static bool vgltf_renderer_create_instance(struct vgltf_renderer *renderer, -                                           struct vgltf_platform *platform) { -  VGLTF_LOG_INFO("Creating vulkan instance..."); -  if (enable_validation_layers && !are_validation_layer_supported()) { -    VGLTF_LOG_ERR("Requested validation layers aren't supported"); -    goto err; -  } - -  VkApplicationInfo application_info = { -      .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, -      .pApplicationName = "Visible GLTF", -      .applicationVersion = VK_MAKE_VERSION(0, 1, 0), -      .pEngineName = "No Engine", -      .engineVersion = VK_MAKE_VERSION(1, 0, 0), -      .apiVersion = VK_API_VERSION_1_2}; - -  struct required_instance_extensions required_extensions = {}; -  fetch_required_instance_extensions(&required_extensions, platform); - -  VkInstanceCreateInfo create_info = { -      .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, -      .pApplicationInfo = &application_info, -      .enabledExtensionCount = required_extensions.count, -      .ppEnabledExtensionNames = required_extensions.extensions, -      .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR}; - -  VkDebugUtilsMessengerCreateInfoEXT debug_create_info; -  if (enable_validation_layers) { -    create_info.enabledLayerCount = VALIDATION_LAYER_COUNT; -    create_info.ppEnabledLayerNames = VALIDATION_LAYERS; -    populate_debug_messenger_create_info(&debug_create_info); -    create_info.pNext = &debug_create_info; -  } - -  if (vkCreateInstance(&create_info, nullptr, &renderer->instance) != -      VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to create VkInstance"); -    goto err; -  } - -  return true; -err: -  return false; -} - -static VkResult create_debug_utils_messenger_ext( -    VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info, -    const VkAllocationCallbacks *allocator, -    VkDebugUtilsMessengerEXT *debug_messenger) { -  auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( -      instance, "vkCreateDebugUtilsMessengerEXT"); -  if (func != nullptr) { -    return func(instance, create_info, allocator, debug_messenger); -  } - -  return VK_ERROR_EXTENSION_NOT_PRESENT; -} - -static void -destroy_debug_utils_messenger_ext(VkInstance instance, -                                  VkDebugUtilsMessengerEXT debug_messenger, -                                  const VkAllocationCallbacks *allocator) { -  auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( -      instance, "vkDestroyDebugUtilsMessengerEXT"); -  if (func != nullptr) { -    func(instance, debug_messenger, allocator); -  } -} - -static void -vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) { -  if (!enable_validation_layers) -    return; -  VkDebugUtilsMessengerCreateInfoEXT create_info; -  populate_debug_messenger_create_info(&create_info); -  create_debug_utils_messenger_ext(renderer->instance, &create_info, nullptr, -                                   &renderer->debug_messenger); -} - -static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128; -struct available_physical_devices { -  VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY]; -  uint32_t count; -}; -static bool -available_physical_devices_init(VkInstance instance, -                                struct available_physical_devices *devices) { - -  if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) != -      VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't enumerate physical devices"); -    goto err; -  } - -  if (devices->count == 0) { -    VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support"); -    goto err; -  } - -  if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) { -    VGLTF_LOG_ERR("available physical devices array cannot fit all available " -                  "physical devices"); -    goto err; -  } - -  if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) != -      VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't enumerate physical devices"); -    goto err; -  } - -  return true; -err: -  return false; -} - -struct queue_family_indices { -  uint32_t graphics_family; -  uint32_t present_family; -  bool has_graphics_family; -  bool has_present_family; -}; -bool queue_family_indices_is_complete( -    const struct queue_family_indices *indices) { -  return indices->has_graphics_family && indices->has_present_family; -} -bool queue_family_indices_for_device(struct queue_family_indices *indices, -                                     VkPhysicalDevice device, -                                     VkSurfaceKHR surface) { -  static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64; -  uint32_t queue_family_count = 0; -  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, -                                           nullptr); - -  if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) { -    VGLTF_LOG_ERR( -        "Queue family properties array cannot fit all queue family properties"); -    goto err; -  } - -  VkQueueFamilyProperties -      queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {}; -  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, -                                           queue_family_properties); - -  for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count; -       queue_family_index++) { -    VkQueueFamilyProperties *queue_family = -        &queue_family_properties[queue_family_index]; - -    VkBool32 present_support; -    vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface, -                                         &present_support); - -    if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) { -      indices->graphics_family = queue_family_index; -      indices->has_graphics_family = true; -    } - -    if (present_support) { -      indices->present_family = queue_family_index; -      indices->has_present_family = true; -    } - -    if (queue_family_indices_is_complete(indices)) { -      break; -    } -  } - -  return true; -err: -  return false; -} - -static bool is_in_array(uint32_t *array, int length, uint32_t value) { -  for (int i = 0; i < length; i++) { -    if (array[i] == value) { -      return true; -    } -  } - -  return false; -} - -static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 128; -struct supported_extensions { -  VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY]; -  uint32_t count; -}; -bool supported_extensions_init( -    struct supported_extensions *supported_extensions, -    VkPhysicalDevice device) { -  if (vkEnumerateDeviceExtensionProperties(device, nullptr, -                                           &supported_extensions->count, -                                           nullptr) != VK_SUCCESS) { -    goto err; -  } - -  if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) { -    VGLTF_LOG_ERR( -        "supported extensions aarray cannot fit all the VkExtensionProperties"); -    goto err; -  } - -  if (vkEnumerateDeviceExtensionProperties( -          device, nullptr, &supported_extensions->count, -          supported_extensions->properties) != VK_SUCCESS) { -    goto err; -  } - -  return true; -err: -  return false; -} - -static bool supported_extensions_includes_extension( -    struct supported_extensions *supported_extensions, -    const char *extension_name) { -  for (uint32_t supported_extension_index = 0; -       supported_extension_index < supported_extensions->count; -       supported_extension_index++) { -    if (strcmp(supported_extensions->properties[supported_extension_index] -                   .extensionName, -               extension_name) == 0) { -      return true; -    } -  } -  return false; -} - -static const char *DEVICE_EXTENSIONS[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME, -                                          "VK_KHR_portability_subset"}; -static constexpr int DEVICE_EXTENSION_COUNT = -    sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]); -static bool are_device_extensions_supported(VkPhysicalDevice device) { -  struct supported_extensions supported_extensions = {}; -  if (!supported_extensions_init(&supported_extensions, device)) { -    goto err; -  } - -  for (uint32_t required_extension_index = 0; -       required_extension_index < DEVICE_EXTENSION_COUNT; -       required_extension_index++) { -    if (!supported_extensions_includes_extension( -            &supported_extensions, -            DEVICE_EXTENSIONS[required_extension_index])) { -      VGLTF_LOG_DBG("Unsupported: %s", -                    DEVICE_EXTENSIONS[required_extension_index]); -      goto err; -    } -  } - -  return true; - -err: -  return false; -} - -static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256; -static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256; -struct swapchain_support_details { -  VkSurfaceCapabilitiesKHR capabilities; -  VkSurfaceFormatKHR -      formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT]; -  VkPresentModeKHR -      present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT]; -  uint32_t format_count; -  uint32_t present_mode_count; -}; -bool swapchain_support_details_query_from_device( -    struct swapchain_support_details *swapchain_support_details, -    VkPhysicalDevice device, VkSurfaceKHR surface) { -  if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR( -          device, surface, &swapchain_support_details->capabilities) != -      VK_SUCCESS) { -    goto err; -  } - -  if (vkGetPhysicalDeviceSurfaceFormatsKHR( -          device, surface, &swapchain_support_details->format_count, nullptr) != -      VK_SUCCESS) { -    goto err; -  } - -  if (swapchain_support_details->format_count != 0 && -      swapchain_support_details->format_count < -          SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) { -    if (vkGetPhysicalDeviceSurfaceFormatsKHR( -            device, surface, &swapchain_support_details->format_count, -            swapchain_support_details->formats) != VK_SUCCESS) { -      goto err; -    } -  } - -  if (vkGetPhysicalDeviceSurfacePresentModesKHR( -          device, surface, &swapchain_support_details->present_mode_count, -          nullptr) != VK_SUCCESS) { -    goto err; -  } - -  if (swapchain_support_details->present_mode_count != 0 && -      swapchain_support_details->present_mode_count < -          SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) { -    if (vkGetPhysicalDeviceSurfacePresentModesKHR( -            device, surface, &swapchain_support_details->present_mode_count, -            swapchain_support_details->present_modes) != VK_SUCCESS) { -      goto err; -    } -  } - -  return true; -err: -  return false; -} - -static bool is_physical_device_suitable(VkPhysicalDevice device, -                                        VkSurfaceKHR surface) { -  struct queue_family_indices indices = {}; -  queue_family_indices_for_device(&indices, device, surface); - -  VGLTF_LOG_DBG("Checking for physical device extension support"); -  bool extensions_supported = are_device_extensions_supported(device); -  VGLTF_LOG_DBG("Supported: %d", extensions_supported); - -  bool swapchain_adequate = false; -  if (extensions_supported) { - -    VGLTF_LOG_DBG("Checking for swapchain support details"); -    struct swapchain_support_details swapchain_support_details = {}; -    if (!swapchain_support_details_query_from_device(&swapchain_support_details, -                                                     device, surface)) { -      VGLTF_LOG_ERR("Couldn't query swapchain support details from device"); -      goto err; -    } - -    swapchain_adequate = swapchain_support_details.format_count > 0 && -                         swapchain_support_details.present_mode_count > 0; -  } - -  return queue_family_indices_is_complete(&indices) && extensions_supported && -         swapchain_adequate; -err: -  return false; -} - -static bool -vgltf_renderer_pick_physical_device(struct vgltf_renderer *renderer) { -  VkPhysicalDevice physical_device = VK_NULL_HANDLE; - -  struct available_physical_devices available_physical_devices = {}; -  if (!available_physical_devices_init(renderer->instance, -                                       &available_physical_devices)) { -    VGLTF_LOG_ERR("Couldn't fetch available physical devices"); -    goto err; -  } - -  for (uint32_t available_physical_device_index = 0; -       available_physical_device_index < available_physical_devices.count; -       available_physical_device_index++) { -    VkPhysicalDevice available_physical_device = -        available_physical_devices.devices[available_physical_device_index]; -    if (is_physical_device_suitable(available_physical_device, -                                    renderer->surface)) { -      physical_device = available_physical_device; -      break; -    } -  } - -  if (physical_device == VK_NULL_HANDLE) { -    VGLTF_LOG_ERR("Failed to find a suitable GPU"); -    goto err; -  } - -  renderer->physical_device = physical_device; - -  return true; -err: -  return false; -} - -static bool -vgltf_renderer_create_logical_device(struct vgltf_renderer *renderer) { -  struct queue_family_indices queue_family_indices = {}; -  queue_family_indices_for_device(&queue_family_indices, -                                  renderer->physical_device, renderer->surface); -  static constexpr int MAX_QUEUE_FAMILY_COUNT = 2; - -  uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {}; -  int unique_queue_family_count = 0; - -  if (!is_in_array(unique_queue_families, unique_queue_family_count, -                   queue_family_indices.graphics_family)) { -    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); -    unique_queue_families[unique_queue_family_count++] = -        queue_family_indices.graphics_family; -  } -  if (!is_in_array(unique_queue_families, unique_queue_family_count, -                   queue_family_indices.present_family)) { -    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); -    unique_queue_families[unique_queue_family_count++] = -        queue_family_indices.present_family; -  } - -  float queue_priority = 1.f; -  VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {}; -  int queue_create_info_count = 0; -  for (int unique_queue_family_index = 0; -       unique_queue_family_index < unique_queue_family_count; -       unique_queue_family_index++) { -    queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){ -        .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, -        .queueFamilyIndex = unique_queue_families[unique_queue_family_index], -        .queueCount = 1, -        .pQueuePriorities = &queue_priority}; -  } - -  VkPhysicalDeviceFeatures device_features = {}; -  VkDeviceCreateInfo create_info = { -      .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, -      .pQueueCreateInfos = queue_create_infos, -      .queueCreateInfoCount = queue_create_info_count, -      .pEnabledFeatures = &device_features, -      .ppEnabledExtensionNames = DEVICE_EXTENSIONS, -      .enabledExtensionCount = DEVICE_EXTENSION_COUNT}; -  if (vkCreateDevice(renderer->physical_device, &create_info, nullptr, -                     &renderer->device) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to create logical device"); -    goto err; -  } - -  vkGetDeviceQueue(renderer->device, queue_family_indices.graphics_family, 0, -                   &renderer->graphics_queue); -  vkGetDeviceQueue(renderer->device, queue_family_indices.present_family, 0, -                   &renderer->present_queue); - -  return true; -err: -  return false; -} - -static bool vgltf_renderer_create_surface(struct vgltf_renderer *renderer, -                                          struct vgltf_platform *platform) { -  if (!vgltf_platform_create_vulkan_surface(platform, renderer->instance, -                                            &renderer->surface)) { -    VGLTF_LOG_ERR("Couldn't create surface"); -    goto err; -  } - -  return true; -err: -  return false; -} - -static VkSurfaceFormatKHR -choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats, -                                uint32_t available_format_count) { -  for (uint32_t available_format_index = 0; -       available_format_index < available_format_count; -       available_format_index++) { -    VkSurfaceFormatKHR *available_format = -        &available_formats[available_format_index]; -    if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB && -        available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { -      return *available_format; -    } -  } - -  return available_formats[0]; -} - -static VkPresentModeKHR -choose_swapchain_present_mode(VkPresentModeKHR *available_modes, -                              uint32_t available_mode_count) { -  for (uint32_t available_mode_index = 0; -       available_mode_index < available_mode_count; available_mode_index++) { -    VkPresentModeKHR available_mode = available_modes[available_mode_index]; -    if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) { -      return available_mode; -    } -  } - -  return VK_PRESENT_MODE_FIFO_KHR; -} - -static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) { -  return value < min ? min : value > max ? max : value; -} - -static VkExtent2D -choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width, -                        int height) { -  if (capabilities->currentExtent.width != UINT32_MAX) { -    return capabilities->currentExtent; -  } else { -    VkExtent2D actual_extent = {width, height}; -    actual_extent.width = -        clamp_uint32(capabilities->minImageExtent.width, -                     capabilities->maxImageExtent.width, actual_extent.width); -    actual_extent.height = -        clamp_uint32(capabilities->minImageExtent.height, -                     capabilities->maxImageExtent.height, actual_extent.height); -    return actual_extent; -  } -} - -static bool vgltf_renderer_create_swapchain(struct vgltf_renderer *renderer) { -  struct swapchain_support_details swapchain_support_details = {}; -  swapchain_support_details_query_from_device( -      &swapchain_support_details, renderer->physical_device, renderer->surface); - -  VkSurfaceFormatKHR surface_format = -      choose_swapchain_surface_format(swapchain_support_details.formats, -                                      swapchain_support_details.format_count); -  VkPresentModeKHR present_mode = choose_swapchain_present_mode( -      swapchain_support_details.present_modes, -      swapchain_support_details.present_mode_count); - -  VkExtent2D extent = choose_swapchain_extent( -      &swapchain_support_details.capabilities, renderer->window_size.width, -      renderer->window_size.height); -  uint32_t image_count = -      swapchain_support_details.capabilities.minImageCount + 1; -  if (swapchain_support_details.capabilities.maxImageCount > 0 && -      image_count > swapchain_support_details.capabilities.maxImageCount) { -    image_count = swapchain_support_details.capabilities.maxImageCount; -  } - -  VkSwapchainCreateInfoKHR create_info = { -      .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, -      .surface = renderer->surface, -      .minImageCount = image_count, -      .imageFormat = surface_format.format, -      .imageColorSpace = surface_format.colorSpace, -      .imageExtent = extent, -      .imageArrayLayers = 1, -      .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT}; -  struct queue_family_indices indices = {}; -  queue_family_indices_for_device(&indices, renderer->physical_device, -                                  renderer->surface); -  uint32_t queue_family_indices[] = {indices.graphics_family, -                                     indices.present_family}; -  if (indices.graphics_family != indices.present_family) { -    create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; -    create_info.queueFamilyIndexCount = 2; -    create_info.pQueueFamilyIndices = queue_family_indices; -  } else { -    create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; -  } - -  create_info.preTransform = -      swapchain_support_details.capabilities.currentTransform; -  create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; -  create_info.presentMode = present_mode; -  create_info.clipped = VK_TRUE; -  create_info.oldSwapchain = VK_NULL_HANDLE; - -  if (vkCreateSwapchainKHR(renderer->device, &create_info, nullptr, -                           &renderer->swapchain) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Swapchain creation failed!"); -    goto err; -  } - -  if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain, -                              &renderer->swapchain_image_count, -                              nullptr) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't get swapchain image count"); -    goto destroy_swapchain; -  } - -  if (renderer->swapchain_image_count > -      VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) { -    VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images", -                  renderer->swapchain_image_count); -    goto destroy_swapchain; -  } - -  if (vkGetSwapchainImagesKHR(renderer->device, renderer->swapchain, -                              &renderer->swapchain_image_count, -                              renderer->swapchain_images) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't get swapchain images"); -    goto destroy_swapchain; -  } - -  renderer->swapchain_image_format = surface_format.format; -  renderer->swapchain_extent = extent; - -  return true; -destroy_swapchain: -  vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr); -err: -  return false; -} - -static bool vgltf_renderer_create_image_views(struct vgltf_renderer *renderer) { -  uint32_t swapchain_image_index; -  for (swapchain_image_index = 0; -       swapchain_image_index < renderer->swapchain_image_count; -       swapchain_image_index++) { -    VkImage swapchain_image = renderer->swapchain_images[swapchain_image_index]; - -    VkImageViewCreateInfo create_info = { -        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, -        .image = swapchain_image, -        .viewType = VK_IMAGE_VIEW_TYPE_2D, -        .format = renderer->swapchain_image_format, -        .components = {VK_COMPONENT_SWIZZLE_IDENTITY, -                       VK_COMPONENT_SWIZZLE_IDENTITY, -                       VK_COMPONENT_SWIZZLE_IDENTITY, -                       VK_COMPONENT_SWIZZLE_IDENTITY}, -        .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, -                             .levelCount = 1, -                             .layerCount = 1}}; - -    if (vkCreateImageView( -            renderer->device, &create_info, nullptr, -            &renderer->swapchain_image_views[swapchain_image_index]) != -        VK_SUCCESS) { -      goto err; -    } -  } -  return true; -err: -  for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index; -       to_remove_index++) { -    vkDestroyImageView(renderer->device, -                       renderer->swapchain_image_views[to_remove_index], -                       nullptr); -  } -  return false; -} - -static bool create_shader_module(VkDevice device, const unsigned char *code, -                                 int size, VkShaderModule *out) { -  VkShaderModuleCreateInfo create_info = { -      .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, -      .codeSize = size, -      .pCode = (const uint32_t *)code, -  }; -  if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't create shader module"); -    goto err; -  } -  return true; -err: -  return false; -} - -static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) { -  VkAttachmentDescription color_attachment = { -      .format = renderer->swapchain_image_format, -      .samples = VK_SAMPLE_COUNT_1_BIT, -      .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, -      .storeOp = VK_ATTACHMENT_STORE_OP_STORE, -      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, -      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, -      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, -      .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}; -  VkAttachmentReference color_attachment_ref = { -      .attachment = 0, -      .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, -  }; -  VkSubpassDescription subpass = {.pipelineBindPoint = -                                      VK_PIPELINE_BIND_POINT_GRAPHICS, -                                  .pColorAttachments = &color_attachment_ref, -                                  .colorAttachmentCount = 1}; -  VkSubpassDependency dependency = { -      .srcSubpass = VK_SUBPASS_EXTERNAL, -      .dstSubpass = 0, -      .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, -      .srcAccessMask = 0, -      .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, -      .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT}; - -  VkRenderPassCreateInfo render_pass_info = { -      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, -      .attachmentCount = 1, -      .pAttachments = &color_attachment, -      .subpassCount = 1, -      .pSubpasses = &subpass, -      .dependencyCount = 1, -      .pDependencies = &dependency}; - -  if (vkCreateRenderPass(renderer->device, &render_pass_info, nullptr, -                         &renderer->render_pass) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to create render pass"); -    goto err; -  } - -  return true; -err: -  return false; -} - -static bool -vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) { -  static constexpr unsigned char triangle_shader_vert_code[] = { -#embed "../compiled_shaders/triangle.vert.spv" -  }; -  static constexpr unsigned char triangle_shader_frag_code[] = { -#embed "../compiled_shaders/triangle.frag.spv" -  }; - -  VkShaderModule triangle_shader_vert_module; -  if (!create_shader_module(renderer->device, triangle_shader_vert_code, -                            sizeof(triangle_shader_vert_code), -                            &triangle_shader_vert_module)) { -    VGLTF_LOG_ERR("Couldn't create triangle vert shader module"); -    goto err; -  } - -  VkShaderModule triangle_shader_frag_module; -  if (!create_shader_module(renderer->device, triangle_shader_frag_code, -                            sizeof(triangle_shader_frag_code), -                            &triangle_shader_frag_module)) { -    VGLTF_LOG_ERR("Couldn't create triangle frag shader module"); -    goto destroy_vert_shader_module; -  } - -  VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, -      .stage = VK_SHADER_STAGE_VERTEX_BIT, -      .module = triangle_shader_vert_module, -      .pName = "main"}; -  VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, -      .stage = VK_SHADER_STAGE_FRAGMENT_BIT, -      .module = triangle_shader_frag_module, -      .pName = "main"}; -  VkPipelineShaderStageCreateInfo shader_stages[] = { -      triangle_shader_vert_stage_create_info, -      triangle_shader_frag_stage_create_info}; - -  VkDynamicState dynamic_states[] = { -      VK_DYNAMIC_STATE_VIEWPORT, -      VK_DYNAMIC_STATE_SCISSOR, -  }; - -  VkPipelineDynamicStateCreateInfo dynamic_state = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, -      .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]), -      .pDynamicStates = dynamic_states}; - -  VkPipelineVertexInputStateCreateInfo vertex_input_info = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, -      .vertexBindingDescriptionCount = 0, -      .vertexAttributeDescriptionCount = 0, -  }; - -  VkPipelineInputAssemblyStateCreateInfo input_assembly = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, -      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, -      .primitiveRestartEnable = VK_FALSE, -  }; - -  VkPipelineViewportStateCreateInfo viewport_state = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, -      .viewportCount = 1, -      .scissorCount = 1}; - -  VkPipelineRasterizationStateCreateInfo rasterizer = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, -      .depthClampEnable = VK_FALSE, -      .rasterizerDiscardEnable = VK_FALSE, -      .polygonMode = VK_POLYGON_MODE_FILL, -      .lineWidth = 1.f, -      .cullMode = VK_CULL_MODE_BACK_BIT, -      .frontFace = VK_FRONT_FACE_CLOCKWISE, -      .depthBiasEnable = VK_FALSE}; - -  VkPipelineMultisampleStateCreateInfo multisampling = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, -      .sampleShadingEnable = VK_FALSE, -      .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, -  }; - -  VkPipelineColorBlendAttachmentState color_blend_attachment = { -      .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | -                        VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, -      .blendEnable = VK_FALSE, -  }; - -  VkPipelineColorBlendStateCreateInfo color_blending = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, -      .logicOpEnable = VK_FALSE, -      .attachmentCount = 1, -      .pAttachments = &color_blend_attachment}; - -  VkPipelineLayoutCreateInfo pipeline_layout_info = { -      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, -  }; - -  if (vkCreatePipelineLayout(renderer->device, &pipeline_layout_info, nullptr, -                             &renderer->pipeline_layout) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't create pipeline layout"); -    goto destroy_frag_shader_module; -  } - -  VkGraphicsPipelineCreateInfo pipeline_info = { -      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, -      .stageCount = 2, -      .pStages = shader_stages, -      .pVertexInputState = &vertex_input_info, -      .pInputAssemblyState = &input_assembly, -      .pViewportState = &viewport_state, -      .pRasterizationState = &rasterizer, -      .pMultisampleState = &multisampling, -      .pColorBlendState = &color_blending, -      .pDynamicState = &dynamic_state, -      .layout = renderer->pipeline_layout, -      .renderPass = renderer->render_pass, -      .subpass = 0, -  }; - -  if (vkCreateGraphicsPipelines(renderer->device, VK_NULL_HANDLE, 1, -                                &pipeline_info, nullptr, -                                &renderer->graphics_pipeline) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't create pipeline"); -    goto destroy_pipeline_layout; -  } - -  vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr); -  vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr); -  return true; -destroy_pipeline_layout: -  vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr); -destroy_frag_shader_module: -  vkDestroyShaderModule(renderer->device, triangle_shader_frag_module, nullptr); -destroy_vert_shader_module: -  vkDestroyShaderModule(renderer->device, triangle_shader_vert_module, nullptr); -err: -  return false; -} - -static bool -vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) { -  for (uint32_t i = 0; i < renderer->swapchain_image_count; i++) { -    VkImageView attachments[] = {renderer->swapchain_image_views[i]}; - -    VkFramebufferCreateInfo framebuffer_info = { -        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, -        .renderPass = renderer->render_pass, -        .attachmentCount = 1, -        .pAttachments = attachments, -        .width = renderer->swapchain_extent.width, -        .height = renderer->swapchain_extent.height, -        .layers = 1}; - -    if (vkCreateFramebuffer(renderer->device, &framebuffer_info, nullptr, -                            &renderer->swapchain_framebuffers[i]) != -        VK_SUCCESS) { -      VGLTF_LOG_ERR("Failed to create framebuffer"); -      goto err; -    } -  } - -  return true; -err: -  return false; -} - -static bool -vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) { -  struct queue_family_indices queue_family_indices = {}; -  if (!queue_family_indices_for_device(&queue_family_indices, -                                       renderer->physical_device, -                                       renderer->surface)) { -    VGLTF_LOG_ERR("Couldn't fetch queue family indices"); -    goto err; -  } - -  VkCommandPoolCreateInfo pool_info = { -      .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, -      .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, -      .queueFamilyIndex = queue_family_indices.graphics_family}; - -  if (vkCreateCommandPool(renderer->device, &pool_info, nullptr, -                          &renderer->command_pool) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't create command pool"); -    goto err; -  } - -  return true; -err: -  return false; -} - -static bool -vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) { -  VkCommandBufferAllocateInfo allocate_info = { -      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, -      .commandPool = renderer->command_pool, -      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, -      .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}; - -  if (vkAllocateCommandBuffers(renderer->device, &allocate_info, -                               renderer->command_buffer) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Couldn't allocate command buffers"); -    goto err; -  } - -  return true; -err: -  return false; -} - -static bool -vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) { -  VkSemaphoreCreateInfo semaphore_info = { -      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, -  }; - -  VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, -                                  .flags = VK_FENCE_CREATE_SIGNALED_BIT}; - -  int frame_in_flight_index = 0; -  for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; -       frame_in_flight_index++) { -    if (vkCreateSemaphore( -            renderer->device, &semaphore_info, nullptr, -            &renderer->image_available_semaphores[frame_in_flight_index]) != -            VK_SUCCESS || -        vkCreateSemaphore( -            renderer->device, &semaphore_info, nullptr, -            &renderer->render_finished_semaphores[frame_in_flight_index]) != -            VK_SUCCESS || -        vkCreateFence(renderer->device, &fence_info, nullptr, -                      &renderer->in_flight_fences[frame_in_flight_index]) != -            VK_SUCCESS) { -      VGLTF_LOG_ERR("Couldn't create sync objects"); -      goto err; -    } -  } - -  return true; -err: -  for (int frame_in_flight_to_delete_index = 0; -       frame_in_flight_to_delete_index < frame_in_flight_index; -       frame_in_flight_to_delete_index++) { -    vkDestroyFence(renderer->device, -                   renderer->in_flight_fences[frame_in_flight_index], nullptr); -    vkDestroySemaphore( -        renderer->device, -        renderer->render_finished_semaphores[frame_in_flight_index], nullptr); -    vkDestroySemaphore( -        renderer->device, -        renderer->image_available_semaphores[frame_in_flight_index], nullptr); -  } -  return false; -} - -static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) { -  for (uint32_t framebuffer_index = 0; -       framebuffer_index < renderer->swapchain_image_count; -       framebuffer_index++) { -    vkDestroyFramebuffer(renderer->device, -                         renderer->swapchain_framebuffers[framebuffer_index], -                         nullptr); -  } - -  for (uint32_t image_view_index = 0; -       image_view_index < renderer->swapchain_image_count; image_view_index++) { -    vkDestroyImageView(renderer->device, -                       renderer->swapchain_image_views[image_view_index], -                       nullptr); -  } - -  vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr); -} - -static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) { -  vkDeviceWaitIdle(renderer->device); -  vgltf_renderer_cleanup_swapchain(renderer); - -  // TODO add error handling -  vgltf_renderer_create_swapchain(renderer); -  vgltf_renderer_create_image_views(renderer); -  vgltf_renderer_create_framebuffers(renderer); -  return true; -} - -bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer) { -  vkWaitForFences(renderer->device, 1, -                  &renderer->in_flight_fences[renderer->current_frame], VK_TRUE, -                  UINT64_MAX); - -  uint32_t image_index; -  VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR( -      renderer->device, renderer->swapchain, UINT64_MAX, -      renderer->image_available_semaphores[renderer->current_frame], -      VK_NULL_HANDLE, &image_index); -  if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR || -      acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR || -      renderer->framebuffer_resized) { -    renderer->framebuffer_resized = false; -    vgltf_renderer_recreate_swapchain(renderer); -    return true; -  } else if (acquire_swapchain_image_result != VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to acquire a swapchain image"); -    goto err; -  } - -  vkResetFences(renderer->device, 1, -                &renderer->in_flight_fences[renderer->current_frame]); - -  vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0); -  VkCommandBufferBeginInfo begin_info = { -      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, -  }; - -  if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame], -                           &begin_info) != VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to begin recording command buffer"); -    goto err; -  } - -  VkRenderPassBeginInfo render_pass_info = { -      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, -      .renderPass = renderer->render_pass, -      .framebuffer = renderer->swapchain_framebuffers[image_index], -      .renderArea = {.offset = {}, .extent = renderer->swapchain_extent}, -      .clearValueCount = 1, -      .pClearValues = -          &(const VkClearValue){.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}}, - -  }; - -  vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame], -                       &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); -  vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame], -                    VK_PIPELINE_BIND_POINT_GRAPHICS, -                    renderer->graphics_pipeline); -  VkViewport viewport = {.x = 0.f, -                         .y = 0.f, -                         .width = (float)renderer->swapchain_extent.width, -                         .height = (float)renderer->swapchain_extent.height, -                         .minDepth = 0.f, -                         .maxDepth = 1.f}; -  vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1, -                   &viewport); -  VkRect2D scissor = {.offset = {}, .extent = renderer->swapchain_extent}; -  vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1, -                  &scissor); - -  vkCmdDraw(renderer->command_buffer[renderer->current_frame], 3, 1, 0, 0); - -  vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]); - -  if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) != -      VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to record command buffer"); -    goto err; -  } - -  VkSubmitInfo submit_info = { -      .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, -  }; - -  VkSemaphore wait_semaphores[] = { -      renderer->image_available_semaphores[renderer->current_frame]}; -  VkPipelineStageFlags wait_stages[] = { -      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; -  submit_info.waitSemaphoreCount = 1; -  submit_info.pWaitSemaphores = wait_semaphores; -  submit_info.pWaitDstStageMask = wait_stages; -  submit_info.commandBufferCount = 1; -  submit_info.pCommandBuffers = -      &renderer->command_buffer[renderer->current_frame]; - -  VkSemaphore signal_semaphores[] = { -      renderer->render_finished_semaphores[renderer->current_frame]}; -  submit_info.signalSemaphoreCount = 1; -  submit_info.pSignalSemaphores = signal_semaphores; -  if (vkQueueSubmit(renderer->graphics_queue, 1, &submit_info, -                    renderer->in_flight_fences[renderer->current_frame]) != -      VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to submit draw command buffer"); -    goto err; -  } - -  VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, -                                   .waitSemaphoreCount = 1, -                                   .pWaitSemaphores = signal_semaphores}; - -  VkSwapchainKHR swapchains[] = {renderer->swapchain}; -  present_info.swapchainCount = 1; -  present_info.pSwapchains = swapchains; -  present_info.pImageIndices = &image_index; -  VkResult result = vkQueuePresentKHR(renderer->present_queue, &present_info); -  if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) { -    vgltf_renderer_recreate_swapchain(renderer); -  } else if (acquire_swapchain_image_result != VK_SUCCESS) { -    VGLTF_LOG_ERR("Failed to acquire a swapchain image"); -    goto err; -  } -  renderer->current_frame = -      (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; -  return true; -err: -  return false; -} - -bool vgltf_renderer_init(struct vgltf_renderer *renderer, -                         struct vgltf_platform *platform) { -  if (!vgltf_renderer_create_instance(renderer, platform)) { -    VGLTF_LOG_ERR("instance creation failed"); -    goto err; -  } -  vgltf_renderer_setup_debug_messenger(renderer); -  if (!vgltf_renderer_create_surface(renderer, platform)) { -    goto destroy_instance; -  } - -  if (!vgltf_renderer_pick_physical_device(renderer)) { -    VGLTF_LOG_ERR("Couldn't pick physical device"); -    goto destroy_surface; -  } -  if (!vgltf_renderer_create_logical_device(renderer)) { -    VGLTF_LOG_ERR("Couldn't create logical device"); -    goto destroy_device; -  } - -  struct vgltf_window_size window_size = {800, 600}; -  if (!vgltf_platform_get_window_size(platform, &window_size)) { -    VGLTF_LOG_ERR("Couldn't get window size"); -    goto destroy_device; -  } -  renderer->window_size = window_size; - -  if (!vgltf_renderer_create_swapchain(renderer)) { -    VGLTF_LOG_ERR("Couldn't create swapchain"); -    goto destroy_device; -  } - -  if (!vgltf_renderer_create_image_views(renderer)) { -    VGLTF_LOG_ERR("Couldn't create image views"); -    goto destroy_swapchain; -  } - -  if (!vgltf_renderer_create_render_pass(renderer)) { -    VGLTF_LOG_ERR("Couldn't create render pass"); -    goto destroy_image_views; -  } - -  if (!vgltf_renderer_create_graphics_pipeline(renderer)) { -    VGLTF_LOG_ERR("Couldn't create graphics pipeline"); -    goto destroy_render_pass; -  } - -  if (!vgltf_renderer_create_framebuffers(renderer)) { -    VGLTF_LOG_ERR("Couldn't create framebuffers"); -    goto destroy_graphics_pipeline; -  } - -  if (!vgltf_renderer_create_command_pool(renderer)) { -    VGLTF_LOG_ERR("Couldn't create command pool"); -    goto destroy_frame_buffers; -  } - -  if (!vgltf_renderer_create_command_buffer(renderer)) { -    VGLTF_LOG_ERR("Couldn't create command buffer"); -    goto destroy_command_pool; -  } - -  if (!vgltf_renderer_create_sync_objects(renderer)) { -    VGLTF_LOG_ERR("Couldn't create sync objects"); -    goto destroy_command_pool; -  } - -  return true; - -destroy_command_pool: -  vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr); -destroy_frame_buffers: -  for (uint32_t swapchain_framebuffer_index = 0; -       swapchain_framebuffer_index < renderer->swapchain_image_count; -       swapchain_framebuffer_index++) { -    vkDestroyFramebuffer( -        renderer->device, -        renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr); -  } -destroy_graphics_pipeline: -  vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr); -  vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr); -destroy_render_pass: -  vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr); -destroy_image_views: -  for (uint32_t swapchain_image_view_index = 0; -       swapchain_image_view_index < renderer->swapchain_image_count; -       swapchain_image_view_index++) { -    vkDestroyImageView( -        renderer->device, -        renderer->swapchain_image_views[swapchain_image_view_index], nullptr); -  } -destroy_swapchain: -  vkDestroySwapchainKHR(renderer->device, renderer->swapchain, nullptr); -destroy_device: -  vkDestroyDevice(renderer->device, nullptr); -destroy_surface: -  vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr); -destroy_instance: -  if (enable_validation_layers) { -    destroy_debug_utils_messenger_ext(renderer->instance, -                                      renderer->debug_messenger, nullptr); -  } -  vkDestroyInstance(renderer->instance, nullptr); -err: -  return false; -} -void vgltf_renderer_deinit(struct vgltf_renderer *renderer) { -  vkDeviceWaitIdle(renderer->device); -  vgltf_renderer_cleanup_swapchain(renderer); -  vkDestroyPipeline(renderer->device, renderer->graphics_pipeline, nullptr); -  vkDestroyPipelineLayout(renderer->device, renderer->pipeline_layout, nullptr); -  vkDestroyRenderPass(renderer->device, renderer->render_pass, nullptr); -  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { -    vkDestroySemaphore(renderer->device, -                       renderer->image_available_semaphores[i], nullptr); -    vkDestroySemaphore(renderer->device, -                       renderer->render_finished_semaphores[i], nullptr); -    vkDestroyFence(renderer->device, renderer->in_flight_fences[i], nullptr); -  } -  vkDestroyCommandPool(renderer->device, renderer->command_pool, nullptr); -  vkDestroyDevice(renderer->device, nullptr); -  if (enable_validation_layers) { -    destroy_debug_utils_messenger_ext(renderer->instance, -                                      renderer->debug_messenger, nullptr); -  } -  vkDestroySurfaceKHR(renderer->instance, renderer->surface, nullptr); -  vkDestroyInstance(renderer->instance, nullptr); -} -void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, -                                      struct vgltf_window_size size) { -  if (size.width > 0 && size.height > 0 && -      size.width != renderer->window_size.width && -      size.height != renderer->window_size.height) { -    renderer->window_size = size; -    renderer->framebuffer_resized = true; -  } -} diff --git a/src/renderer.h b/src/renderer.h deleted file mode 100644 index a0417aa..0000000 --- a/src/renderer.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef VGLTF_RENDERER_H -#define VGLTF_RENDERER_H - -#include "platform.h" -#include <vulkan/vulkan.h> - -constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2; -constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32; -struct vgltf_renderer { -  VkInstance instance; -  VkPhysicalDevice physical_device; -  VkDevice device; -  VkQueue graphics_queue; -  VkQueue present_queue; -  VkDebugUtilsMessengerEXT debug_messenger; -  VkSurfaceKHR surface; -  VkSwapchainKHR swapchain; -  VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; -  VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; -  VkFormat swapchain_image_format; -  VkExtent2D swapchain_extent; -  uint32_t swapchain_image_count; -  VkRenderPass render_pass; -  VkPipelineLayout pipeline_layout; -  VkPipeline graphics_pipeline; -  VkFramebuffer -      swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; -  VkCommandPool command_pool; -  VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; -  VkSemaphore -      image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; -  VkSemaphore -      render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; -  VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; -  struct vgltf_window_size window_size; -  uint32_t current_frame; -  bool framebuffer_resized; -}; -bool vgltf_renderer_init(struct vgltf_renderer *renderer, -                         struct vgltf_platform *platform); -void vgltf_renderer_deinit(struct vgltf_renderer *renderer); -bool vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer); -void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, -                                      struct vgltf_window_size size); -#endif // VGLTF_RENDERER_H diff --git a/src/renderer/renderer.c b/src/renderer/renderer.c new file mode 100644 index 0000000..d34ef73 --- /dev/null +++ b/src/renderer/renderer.c @@ -0,0 +1,2559 @@ +#include "renderer.h" +#include "../image.h" +#include "../log.h" +#include "../maths.h" +#include "../platform.h" +#include "vma_usage.h" +#include <math.h> + +#define TINYOBJ_LOADER_C_IMPLEMENTATION +#include "vendor/tiny_obj_loader_c.h" + +#include <assert.h> +#include <vulkan/vulkan_core.h> + +static const char MODEL_PATH[] = "assets/model.obj"; +static const char TEXTURE_PATH[] = "assets/texture.png"; + +VkVertexInputBindingDescription vgltf_vertex_binding_description() { +  return (VkVertexInputBindingDescription){ +      .binding = 0, +      .stride = sizeof(struct vgltf_vertex), +      .inputRate = VK_VERTEX_INPUT_RATE_VERTEX}; +} +struct vgltf_vertex_input_attribute_descriptions +vgltf_vertex_attribute_descriptions(void) { +  return (struct vgltf_vertex_input_attribute_descriptions){ +      .descriptions = {(VkVertexInputAttributeDescription){ +                           .binding = 0, +                           .location = 0, +                           .format = VK_FORMAT_R32G32B32_SFLOAT, +                           .offset = offsetof(struct vgltf_vertex, position)}, +                       (VkVertexInputAttributeDescription){ +                           .binding = 0, +                           .location = 1, +                           .format = VK_FORMAT_R32G32B32_SFLOAT, +                           .offset = offsetof(struct vgltf_vertex, color)}, +                       (VkVertexInputAttributeDescription){ +                           .binding = 0, +                           .location = 2, +                           .format = VK_FORMAT_R32G32_SFLOAT, +                           .offset = offsetof(struct vgltf_vertex, +                                              texture_coordinates)}}, +      .count = 3}; +} + +static const char *VALIDATION_LAYERS[] = {"VK_LAYER_KHRONOS_validation"}; +static constexpr int VALIDATION_LAYER_COUNT = +    sizeof(VALIDATION_LAYERS) / sizeof(VALIDATION_LAYERS[0]); + +#ifdef VGLTF_DEBUG +static constexpr bool enable_validation_layers = true; +#else +static constexpr bool enable_validation_layers = false; +#endif + +static VKAPI_ATTR VkBool32 VKAPI_CALL +debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity, +               VkDebugUtilsMessageTypeFlagBitsEXT message_type, +               const VkDebugUtilsMessengerCallbackDataEXT *callback_data, +               void *user_data) { +  (void)message_severity; +  (void)message_type; +  (void)user_data; +  VGLTF_LOG_DBG("validation layer: %s", callback_data->pMessage); +  return VK_FALSE; +} + +static constexpr int REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 10; +struct required_instance_extensions { +  const char *extensions[REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; +  uint32_t count; +}; +void required_instance_extensions_push( +    struct required_instance_extensions *required_instance_extensions, +    const char *required_instance_extension) { +  if (required_instance_extensions->count == +      REQUIRED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { +    VGLTF_PANIC("required instance extensions array is full"); +  } +  required_instance_extensions +      ->extensions[required_instance_extensions->count++] = +      required_instance_extension; +} + +static constexpr int SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY = 128; +struct supported_instance_extensions { +  VkExtensionProperties +      properties[SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY]; +  uint32_t count; +}; +bool supported_instance_extensions_init( +    struct supported_instance_extensions *supported_instance_extensions) { +  if (vkEnumerateInstanceExtensionProperties( +          nullptr, &supported_instance_extensions->count, nullptr) != +      VK_SUCCESS) { +    goto err; +  } + +  if (supported_instance_extensions->count > +      SUPPORTED_INSTANCE_EXTENSIONS_ARRAY_CAPACITY) { +    VGLTF_LOG_ERR("supported instance extensions array cannot fit all the " +                  "VkExtensionProperties"); +    goto err; +  } + +  if (vkEnumerateInstanceExtensionProperties( +          nullptr, &supported_instance_extensions->count, +          supported_instance_extensions->properties) != VK_SUCCESS) { +    goto err; +  } +  return true; +err: +  return false; +} +void supported_instance_extensions_debug_print( +    const struct supported_instance_extensions *supported_instance_extensions) { +  VGLTF_LOG_DBG("Supported instance extensions:"); +  for (uint32_t i = 0; i < supported_instance_extensions->count; i++) { +    VGLTF_LOG_DBG("\t- %s", +                  supported_instance_extensions->properties[i].extensionName); +  } +} +bool supported_instance_extensions_includes( +    const struct supported_instance_extensions *supported_instance_extensions, +    const char *extension_name) { +  for (uint32_t supported_instance_extension_index = 0; +       supported_instance_extension_index < +       supported_instance_extensions->count; +       supported_instance_extension_index++) { +    const VkExtensionProperties *extension_properties = +        &supported_instance_extensions +             ->properties[supported_instance_extension_index]; +    if (strcmp(extension_properties->extensionName, extension_name) == 0) { +      return true; +    } +  } + +  return false; +} + +static constexpr uint32_t SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY = 64; +struct supported_validation_layers { +  VkLayerProperties properties[SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY]; +  uint32_t count; +}; +bool supported_validation_layers_init( +    struct supported_validation_layers *supported_validation_layers) { +  if (vkEnumerateInstanceLayerProperties(&supported_validation_layers->count, +                                         nullptr) != VK_SUCCESS) { +    goto err; +  } + +  if (supported_validation_layers->count > +      SUPPORTED_VALIDATION_LAYERS_ARRAY_CAPACITY) { +    VGLTF_LOG_ERR("supported validation layers array cannot fit all the " +                  "VkLayerProperties"); +    goto err; +  } + +  if (vkEnumerateInstanceLayerProperties( +          &supported_validation_layers->count, +          supported_validation_layers->properties) != VK_SUCCESS) { +    goto err; +  } + +  return true; +err: +  return false; +} + +static bool are_validation_layer_supported() { +  struct supported_validation_layers supported_layers = {}; +  if (!supported_validation_layers_init(&supported_layers)) { +    goto err; +  } + +  for (int requested_layer_index = 0; +       requested_layer_index < VALIDATION_LAYER_COUNT; +       requested_layer_index++) { +    const char *requested_layer_name = VALIDATION_LAYERS[requested_layer_index]; +    bool requested_layer_found = false; +    for (uint32_t supported_layer_index = 0; +         supported_layer_index < supported_layers.count; +         supported_layer_index++) { +      VkLayerProperties *supported_layer = +          &supported_layers.properties[supported_layer_index]; +      if (strcmp(requested_layer_name, supported_layer->layerName) == 0) { +        requested_layer_found = true; +        break; +      } +    } + +    if (!requested_layer_found) { +      goto err; +    } +  } + +  return true; +err: +  return false; +} + +static bool fetch_required_instance_extensions( +    struct required_instance_extensions *required_extensions, +    struct vgltf_platform *platform) { +  struct supported_instance_extensions supported_extensions = {}; +  if (!supported_instance_extensions_init(&supported_extensions)) { +    VGLTF_LOG_ERR( +        "Couldn't fetch supported instance extensions details (OOM?)"); +    goto err; +  } +  supported_instance_extensions_debug_print(&supported_extensions); + +  uint32_t platform_required_extension_count = 0; +  const char *const *platform_required_extensions = +      vgltf_platform_get_vulkan_instance_extensions( +          platform, &platform_required_extension_count); +  for (uint32_t platform_required_extension_index = 0; +       platform_required_extension_index < platform_required_extension_count; +       platform_required_extension_index++) { +    required_instance_extensions_push( +        required_extensions, +        platform_required_extensions[platform_required_extension_index]); +  } +#ifdef VGLTF_PLATFORM_MACOS +  required_instance_extensions_push( +      required_extensions, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); +#endif // VGLTF_PLATFORM_MACOS + +  if (enable_validation_layers) { +    required_instance_extensions_push(required_extensions, +                                      VK_EXT_DEBUG_UTILS_EXTENSION_NAME); +  } + +  bool all_extensions_supported = true; +  for (uint32_t required_extension_index = 0; +       required_extension_index < required_extensions->count; +       required_extension_index++) { +    const char *required_extension_name = +        required_extensions->extensions[required_extension_index]; +    if (!supported_instance_extensions_includes(&supported_extensions, +                                                required_extension_name)) { +      VGLTF_LOG_ERR("Unsupported instance extension: %s", +                    required_extension_name); +      all_extensions_supported = false; +    } +  } + +  if (!all_extensions_supported) { +    VGLTF_LOG_ERR("Some required extensions are unsupported."); +    goto err; +  } + +  return true; +err: +  return false; +} + +static void populate_debug_messenger_create_info( +    VkDebugUtilsMessengerCreateInfoEXT *create_info) { +  *create_info = (VkDebugUtilsMessengerCreateInfoEXT){}; +  create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; +  create_info->messageSeverity = +      VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | +      VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | +      VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; +  create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | +                             VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | +                             VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; +  create_info->pfnUserCallback = debug_callback; +} + +static bool vgltf_vk_instance_init(struct vgltf_vk_instance *instance, +                                   struct vgltf_platform *platform) { +  VGLTF_LOG_INFO("Creating vulkan instance..."); +  if (enable_validation_layers && !are_validation_layer_supported()) { +    VGLTF_LOG_ERR("Requested validation layers aren't supported"); +    goto err; +  } + +  VkApplicationInfo application_info = { +      .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, +      .pApplicationName = "Visible GLTF", +      .applicationVersion = VK_MAKE_VERSION(0, 1, 0), +      .pEngineName = "No Engine", +      .engineVersion = VK_MAKE_VERSION(1, 0, 0), +      .apiVersion = VK_API_VERSION_1_2}; + +  struct required_instance_extensions required_extensions = {}; +  fetch_required_instance_extensions(&required_extensions, platform); + +  VkInstanceCreateFlags flags = 0; +#ifdef VGLTF_PLATFORM_MACOS +  flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; +#endif // VGLTF_PLATFORM_MACOS + +  VkInstanceCreateInfo create_info = { +      .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, +      .pApplicationInfo = &application_info, +      .enabledExtensionCount = required_extensions.count, +      .ppEnabledExtensionNames = required_extensions.extensions, +      .flags = flags}; + +  VkDebugUtilsMessengerCreateInfoEXT debug_create_info; +  if (enable_validation_layers) { +    create_info.enabledLayerCount = VALIDATION_LAYER_COUNT; +    create_info.ppEnabledLayerNames = VALIDATION_LAYERS; +    populate_debug_messenger_create_info(&debug_create_info); +    create_info.pNext = &debug_create_info; +  } + +  if (vkCreateInstance(&create_info, nullptr, &instance->instance) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to create VkInstance"); +    goto err; +  } + +  return true; +err: +  return false; +} +static void vgltf_vk_instance_deinit(struct vgltf_vk_instance *instance) { +  vkDestroyInstance(instance->instance, nullptr); +} + +static VkResult create_debug_utils_messenger_ext( +    VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *create_info, +    const VkAllocationCallbacks *allocator, +    VkDebugUtilsMessengerEXT *debug_messenger) { +  auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( +      instance, "vkCreateDebugUtilsMessengerEXT"); +  if (func != nullptr) { +    return func(instance, create_info, allocator, debug_messenger); +  } + +  return VK_ERROR_EXTENSION_NOT_PRESENT; +} + +static void +destroy_debug_utils_messenger_ext(VkInstance instance, +                                  VkDebugUtilsMessengerEXT debug_messenger, +                                  const VkAllocationCallbacks *allocator) { +  auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( +      instance, "vkDestroyDebugUtilsMessengerEXT"); +  if (func != nullptr) { +    func(instance, debug_messenger, allocator); +  } +} + +static void +vgltf_renderer_setup_debug_messenger(struct vgltf_renderer *renderer) { +  if (!enable_validation_layers) +    return; +  VkDebugUtilsMessengerCreateInfoEXT create_info; +  populate_debug_messenger_create_info(&create_info); +  create_debug_utils_messenger_ext(renderer->instance.instance, &create_info, +                                   nullptr, &renderer->debug_messenger); +} + +static constexpr int AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY = 128; +struct available_physical_devices { +  VkPhysicalDevice devices[AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY]; +  uint32_t count; +}; +static bool +available_physical_devices_init(VkInstance instance, +                                struct available_physical_devices *devices) { + +  if (vkEnumeratePhysicalDevices(instance, &devices->count, nullptr) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't enumerate physical devices"); +    goto err; +  } + +  if (devices->count == 0) { +    VGLTF_LOG_ERR("Failed to find any GPU with Vulkan support"); +    goto err; +  } + +  if (devices->count > AVAILABLE_PHYSICAL_DEVICE_ARRAY_CAPACITY) { +    VGLTF_LOG_ERR("available physical devices array cannot fit all available " +                  "physical devices"); +    goto err; +  } + +  if (vkEnumeratePhysicalDevices(instance, &devices->count, devices->devices) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't enumerate physical devices"); +    goto err; +  } + +  return true; +err: +  return false; +} + +struct queue_family_indices { +  uint32_t graphics_family; +  uint32_t present_family; +  bool has_graphics_family; +  bool has_present_family; +}; +bool queue_family_indices_is_complete( +    const struct queue_family_indices *indices) { +  return indices->has_graphics_family && indices->has_present_family; +} +bool queue_family_indices_for_device(struct queue_family_indices *indices, +                                     VkPhysicalDevice device, +                                     VkSurfaceKHR surface) { +  static constexpr uint32_t QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY = 64; +  uint32_t queue_family_count = 0; +  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, +                                           nullptr); + +  if (queue_family_count > QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY) { +    VGLTF_LOG_ERR( +        "Queue family properties array cannot fit all queue family properties"); +    goto err; +  } + +  VkQueueFamilyProperties +      queue_family_properties[QUEUE_FAMILY_PROPERTIES_ARRAY_CAPACITY] = {}; +  vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, +                                           queue_family_properties); + +  for (uint32_t queue_family_index = 0; queue_family_index < queue_family_count; +       queue_family_index++) { +    VkQueueFamilyProperties *queue_family = +        &queue_family_properties[queue_family_index]; + +    VkBool32 present_support; +    vkGetPhysicalDeviceSurfaceSupportKHR(device, queue_family_index, surface, +                                         &present_support); + +    if (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) { +      indices->graphics_family = queue_family_index; +      indices->has_graphics_family = true; +    } + +    if (present_support) { +      indices->present_family = queue_family_index; +      indices->has_present_family = true; +    } + +    if (queue_family_indices_is_complete(indices)) { +      break; +    } +  } + +  return true; +err: +  return false; +} + +static bool is_in_array(uint32_t *array, int length, uint32_t value) { +  for (int i = 0; i < length; i++) { +    if (array[i] == value) { +      return true; +    } +  } + +  return false; +} + +static constexpr uint32_t SUPPORTED_EXTENSIONS_ARRAY_CAPACITY = 1024; +struct supported_extensions { +  VkExtensionProperties properties[SUPPORTED_EXTENSIONS_ARRAY_CAPACITY]; +  uint32_t count; +}; +bool supported_extensions_init( +    struct supported_extensions *supported_extensions, +    VkPhysicalDevice device) { +  if (vkEnumerateDeviceExtensionProperties(device, nullptr, +                                           &supported_extensions->count, +                                           nullptr) != VK_SUCCESS) { +    goto err; +  } + +  if (supported_extensions->count > SUPPORTED_EXTENSIONS_ARRAY_CAPACITY) { +    VGLTF_LOG_ERR("supported extensions array cannot fit all the supported " +                  "VkExtensionProperties (%u)", +                  supported_extensions->count); +    goto err; +  } + +  if (vkEnumerateDeviceExtensionProperties( +          device, nullptr, &supported_extensions->count, +          supported_extensions->properties) != VK_SUCCESS) { +    goto err; +  } + +  return true; +err: +  return false; +} + +static bool supported_extensions_includes_extension( +    struct supported_extensions *supported_extensions, +    const char *extension_name) { +  for (uint32_t supported_extension_index = 0; +       supported_extension_index < supported_extensions->count; +       supported_extension_index++) { +    if (strcmp(supported_extensions->properties[supported_extension_index] +                   .extensionName, +               extension_name) == 0) { +      return true; +    } +  } +  return false; +} + +static const char *DEVICE_EXTENSIONS[] = { +    VK_KHR_SWAPCHAIN_EXTENSION_NAME, +#ifdef VGLTF_PLATFORM_MACOS +    "VK_KHR_portability_subset", +#endif +}; +static constexpr int DEVICE_EXTENSION_COUNT = +    sizeof(DEVICE_EXTENSIONS) / sizeof(DEVICE_EXTENSIONS[0]); +static bool are_device_extensions_supported(VkPhysicalDevice device) { +  struct supported_extensions supported_extensions = {}; +  if (!supported_extensions_init(&supported_extensions, device)) { +    goto err; +  } + +  for (uint32_t required_extension_index = 0; +       required_extension_index < DEVICE_EXTENSION_COUNT; +       required_extension_index++) { +    if (!supported_extensions_includes_extension( +            &supported_extensions, +            DEVICE_EXTENSIONS[required_extension_index])) { +      VGLTF_LOG_DBG("Unsupported: %s", +                    DEVICE_EXTENSIONS[required_extension_index]); +      goto err; +    } +  } + +  return true; + +err: +  return false; +} + +static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT = 256; +static constexpr int SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT = 256; +struct swapchain_support_details { +  VkSurfaceCapabilitiesKHR capabilities; +  VkSurfaceFormatKHR +      formats[SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT]; +  VkPresentModeKHR +      present_modes[SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT]; +  uint32_t format_count; +  uint32_t present_mode_count; +}; +bool swapchain_support_details_query_from_device( +    struct swapchain_support_details *swapchain_support_details, +    VkPhysicalDevice device, VkSurfaceKHR surface) { +  if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR( +          device, surface, &swapchain_support_details->capabilities) != +      VK_SUCCESS) { +    goto err; +  } + +  if (vkGetPhysicalDeviceSurfaceFormatsKHR( +          device, surface, &swapchain_support_details->format_count, nullptr) != +      VK_SUCCESS) { +    goto err; +  } + +  if (swapchain_support_details->format_count != 0 && +      swapchain_support_details->format_count < +          SWAPCHAIN_SUPPORT_DETAILS_MAX_SURFACE_FORMAT_COUNT) { +    if (vkGetPhysicalDeviceSurfaceFormatsKHR( +            device, surface, &swapchain_support_details->format_count, +            swapchain_support_details->formats) != VK_SUCCESS) { +      goto err; +    } +  } + +  if (vkGetPhysicalDeviceSurfacePresentModesKHR( +          device, surface, &swapchain_support_details->present_mode_count, +          nullptr) != VK_SUCCESS) { +    goto err; +  } + +  if (swapchain_support_details->present_mode_count != 0 && +      swapchain_support_details->present_mode_count < +          SWAPCHAIN_SUPPORT_DETAILS_MAX_PRESENT_MODE_COUNT) { +    if (vkGetPhysicalDeviceSurfacePresentModesKHR( +            device, surface, &swapchain_support_details->present_mode_count, +            swapchain_support_details->present_modes) != VK_SUCCESS) { +      goto err; +    } +  } + +  return true; +err: +  return false; +} + +static bool is_physical_device_suitable(VkPhysicalDevice device, +                                        VkSurfaceKHR surface) { +  struct queue_family_indices indices = {}; +  queue_family_indices_for_device(&indices, device, surface); + +  VGLTF_LOG_DBG("Checking for physical device extension support"); +  bool extensions_supported = are_device_extensions_supported(device); +  VGLTF_LOG_DBG("Supported: %d", extensions_supported); + +  bool swapchain_adequate = false; +  if (extensions_supported) { + +    VGLTF_LOG_DBG("Checking for swapchain support details"); +    struct swapchain_support_details swapchain_support_details = {}; +    if (!swapchain_support_details_query_from_device(&swapchain_support_details, +                                                     device, surface)) { +      VGLTF_LOG_ERR("Couldn't query swapchain support details from device"); +      goto err; +    } + +    swapchain_adequate = swapchain_support_details.format_count > 0 && +                         swapchain_support_details.present_mode_count > 0; +  } + +  VkPhysicalDeviceFeatures supported_features; +  vkGetPhysicalDeviceFeatures(device, &supported_features); + +  return queue_family_indices_is_complete(&indices) && extensions_supported && +         swapchain_adequate && supported_features.samplerAnisotropy; +err: +  return false; +} + +static bool pick_physical_device(VkPhysicalDevice *physical_device, +                                 struct vgltf_vk_instance *instance, +                                 VkSurfaceKHR surface) { +  VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE; +  struct available_physical_devices available_physical_devices = {}; +  if (!available_physical_devices_init(instance->instance, +                                       &available_physical_devices)) { +    VGLTF_LOG_ERR("Couldn't fetch available physical devices"); +    goto err; +  } + +  for (uint32_t available_physical_device_index = 0; +       available_physical_device_index < available_physical_devices.count; +       available_physical_device_index++) { +    VkPhysicalDevice available_physical_device = +        available_physical_devices.devices[available_physical_device_index]; +    if (is_physical_device_suitable(available_physical_device, surface)) { +      vk_physical_device = available_physical_device; +      break; +    } +  } + +  if (vk_physical_device == VK_NULL_HANDLE) { +    VGLTF_LOG_ERR("Failed to find a suitable GPU"); +    goto err; +  } + +  *physical_device = vk_physical_device; + +  return true; +err: +  return false; +} + +static bool create_logical_device(VkDevice *device, VkQueue *graphics_queue, +                                  VkQueue *present_queue, +                                  VkPhysicalDevice physical_device, +                                  VkSurfaceKHR surface) { +  struct queue_family_indices queue_family_indices = {}; +  queue_family_indices_for_device(&queue_family_indices, physical_device, +                                  surface); +  static constexpr int MAX_QUEUE_FAMILY_COUNT = 2; + +  uint32_t unique_queue_families[MAX_QUEUE_FAMILY_COUNT] = {}; +  int unique_queue_family_count = 0; + +  if (!is_in_array(unique_queue_families, unique_queue_family_count, +                   queue_family_indices.graphics_family)) { +    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); +    unique_queue_families[unique_queue_family_count++] = +        queue_family_indices.graphics_family; +  } +  if (!is_in_array(unique_queue_families, unique_queue_family_count, +                   queue_family_indices.present_family)) { +    assert(unique_queue_family_count < MAX_QUEUE_FAMILY_COUNT); +    unique_queue_families[unique_queue_family_count++] = +        queue_family_indices.present_family; +  } + +  float queue_priority = 1.f; +  VkDeviceQueueCreateInfo queue_create_infos[MAX_QUEUE_FAMILY_COUNT] = {}; +  int queue_create_info_count = 0; +  for (int unique_queue_family_index = 0; +       unique_queue_family_index < unique_queue_family_count; +       unique_queue_family_index++) { +    queue_create_infos[queue_create_info_count++] = (VkDeviceQueueCreateInfo){ +        .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, +        .queueFamilyIndex = unique_queue_families[unique_queue_family_index], +        .queueCount = 1, +        .pQueuePriorities = &queue_priority}; +  } + +  VkPhysicalDeviceFeatures device_features = { +      .samplerAnisotropy = VK_TRUE, +  }; +  VkDeviceCreateInfo create_info = { +      .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, +      .pQueueCreateInfos = queue_create_infos, +      .queueCreateInfoCount = queue_create_info_count, +      .pEnabledFeatures = &device_features, +      .ppEnabledExtensionNames = DEVICE_EXTENSIONS, +      .enabledExtensionCount = DEVICE_EXTENSION_COUNT}; +  if (vkCreateDevice(physical_device, &create_info, nullptr, device) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to create logical device"); +    goto err; +  } + +  vkGetDeviceQueue(*device, queue_family_indices.graphics_family, 0, +                   graphics_queue); +  vkGetDeviceQueue(*device, queue_family_indices.present_family, 0, +                   present_queue); + +  return true; +err: +  return false; +} + +static bool create_allocator(VmaAllocator *allocator, +                             struct vgltf_vk_device *device, +                             struct vgltf_vk_instance *instance) { +  VmaAllocatorCreateInfo create_info = {.device = device->device, +                                        .instance = instance->instance, +                                        .physicalDevice = +                                            device->physical_device}; + +  if (vmaCreateAllocator(&create_info, allocator) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create VMA allocator"); +    goto err; +  } +  return true; +err: +  return false; +} + +static bool vgltf_vk_surface_init(struct vgltf_vk_surface *surface, +                                  struct vgltf_vk_instance *instance, +                                  struct vgltf_platform *platform) { +  if (!vgltf_platform_create_vulkan_surface(platform, instance->instance, +                                            &surface->surface)) { +    VGLTF_LOG_ERR("Couldn't create surface"); +    goto err; +  } + +  return true; +err: +  return false; +} + +static void vgltf_vk_surface_deinit(struct vgltf_vk_surface *surface, +                                    struct vgltf_vk_instance *instance) { +  vkDestroySurfaceKHR(instance->instance, surface->surface, nullptr); +} + +static VkSurfaceFormatKHR +choose_swapchain_surface_format(VkSurfaceFormatKHR *available_formats, +                                uint32_t available_format_count) { +  for (uint32_t available_format_index = 0; +       available_format_index < available_format_count; +       available_format_index++) { +    VkSurfaceFormatKHR *available_format = +        &available_formats[available_format_index]; +    if (available_format->format == VK_FORMAT_B8G8R8A8_SRGB && +        available_format->colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { +      return *available_format; +    } +  } + +  return available_formats[0]; +} + +static VkPresentModeKHR +choose_swapchain_present_mode(VkPresentModeKHR *available_modes, +                              uint32_t available_mode_count) { +  for (uint32_t available_mode_index = 0; +       available_mode_index < available_mode_count; available_mode_index++) { +    VkPresentModeKHR available_mode = available_modes[available_mode_index]; +    if (available_mode == VK_PRESENT_MODE_MAILBOX_KHR) { +      return available_mode; +    } +  } + +  return VK_PRESENT_MODE_FIFO_KHR; +} + +static uint32_t clamp_uint32(uint32_t min, uint32_t max, uint32_t value) { +  return value < min ? min : value > max ? max : value; +} + +static VkExtent2D +choose_swapchain_extent(const VkSurfaceCapabilitiesKHR *capabilities, int width, +                        int height) { +  if (capabilities->currentExtent.width != UINT32_MAX) { +    return capabilities->currentExtent; +  } else { +    VkExtent2D actual_extent = {width, height}; +    actual_extent.width = +        clamp_uint32(capabilities->minImageExtent.width, +                     capabilities->maxImageExtent.width, actual_extent.width); +    actual_extent.height = +        clamp_uint32(capabilities->minImageExtent.height, +                     capabilities->maxImageExtent.height, actual_extent.height); +    return actual_extent; +  } +} + +static bool create_swapchain(struct vgltf_vk_swapchain *swapchain, +                             struct vgltf_vk_device *device, +                             struct vgltf_vk_surface *surface, +                             struct vgltf_window_size *window_size) { +  struct swapchain_support_details swapchain_support_details = {}; +  swapchain_support_details_query_from_device( +      &swapchain_support_details, device->physical_device, surface->surface); + +  VkSurfaceFormatKHR surface_format = +      choose_swapchain_surface_format(swapchain_support_details.formats, +                                      swapchain_support_details.format_count); +  VkPresentModeKHR present_mode = choose_swapchain_present_mode( +      swapchain_support_details.present_modes, +      swapchain_support_details.present_mode_count); + +  VkExtent2D extent = +      choose_swapchain_extent(&swapchain_support_details.capabilities, +                              window_size->width, window_size->height); +  uint32_t image_count = +      swapchain_support_details.capabilities.minImageCount + 1; +  if (swapchain_support_details.capabilities.maxImageCount > 0 && +      image_count > swapchain_support_details.capabilities.maxImageCount) { +    image_count = swapchain_support_details.capabilities.maxImageCount; +  } + +  VkSwapchainCreateInfoKHR create_info = { +      .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, +      .surface = surface->surface, +      .minImageCount = image_count, +      .imageFormat = surface_format.format, +      .imageColorSpace = surface_format.colorSpace, +      .imageExtent = extent, +      .imageArrayLayers = 1, +      .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT}; +  struct queue_family_indices indices = {}; +  queue_family_indices_for_device(&indices, device->physical_device, +                                  surface->surface); +  uint32_t queue_family_indices[] = {indices.graphics_family, +                                     indices.present_family}; +  if (indices.graphics_family != indices.present_family) { +    create_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; +    create_info.queueFamilyIndexCount = 2; +    create_info.pQueueFamilyIndices = queue_family_indices; +  } else { +    create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; +  } + +  create_info.preTransform = +      swapchain_support_details.capabilities.currentTransform; +  create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; +  create_info.presentMode = present_mode; +  create_info.clipped = VK_TRUE; +  create_info.oldSwapchain = VK_NULL_HANDLE; + +  if (vkCreateSwapchainKHR(device->device, &create_info, nullptr, +                           &swapchain->swapchain) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Swapchain creation failed!"); +    goto err; +  } + +  if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain, +                              &swapchain->swapchain_image_count, +                              nullptr) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't get swapchain image count"); +    goto destroy_swapchain; +  } + +  if (swapchain->swapchain_image_count > +      VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT) { +    VGLTF_LOG_ERR("Swapchain image array cannot fit all %d swapchain images", +                  swapchain->swapchain_image_count); +    goto destroy_swapchain; +  } + +  if (vkGetSwapchainImagesKHR(device->device, swapchain->swapchain, +                              &swapchain->swapchain_image_count, +                              swapchain->swapchain_images) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't get swapchain images"); +    goto destroy_swapchain; +  } + +  swapchain->swapchain_image_format = surface_format.format; +  swapchain->swapchain_extent = extent; + +  return true; +destroy_swapchain: +  vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr); +err: +  return false; +} + +static bool create_image_view(struct vgltf_vk_device *device, VkImage image, +                              VkFormat format, VkImageView *image_view, +                              VkImageAspectFlags aspect_flags, +                              uint32_t mip_level_count) { + +  VkImageViewCreateInfo create_info = { +      .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, +      .image = image, +      .viewType = VK_IMAGE_VIEW_TYPE_2D, +      .format = format, +      .components = {VK_COMPONENT_SWIZZLE_IDENTITY, +                     VK_COMPONENT_SWIZZLE_IDENTITY, +                     VK_COMPONENT_SWIZZLE_IDENTITY, +                     VK_COMPONENT_SWIZZLE_IDENTITY}, +      .subresourceRange = {.aspectMask = aspect_flags, +                           .levelCount = mip_level_count, +                           .layerCount = 1}}; +  if (vkCreateImageView(device->device, &create_info, nullptr, image_view) != +      VK_SUCCESS) { +    return false; +  } + +  return true; +} + +static bool create_swapchain_image_views(struct vgltf_vk_swapchain *swapchain, +                                         struct vgltf_vk_device *device) { +  uint32_t swapchain_image_index; +  for (swapchain_image_index = 0; +       swapchain_image_index < swapchain->swapchain_image_count; +       swapchain_image_index++) { +    VkImage swapchain_image = +        swapchain->swapchain_images[swapchain_image_index]; + +    if (!create_image_view( +            device, swapchain_image, swapchain->swapchain_image_format, +            &swapchain->swapchain_image_views[swapchain_image_index], +            VK_IMAGE_ASPECT_COLOR_BIT, 1)) { +      goto err; +    } +  } +  return true; +err: +  for (uint32_t to_remove_index = 0; to_remove_index < swapchain_image_index; +       to_remove_index++) { +    vkDestroyImageView(device->device, +                       swapchain->swapchain_image_views[to_remove_index], +                       nullptr); +  } +  return false; +} + +static bool create_shader_module(VkDevice device, const unsigned char *code, +                                 int size, VkShaderModule *out) { +  VkShaderModuleCreateInfo create_info = { +      .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, +      .codeSize = size, +      .pCode = (const uint32_t *)code, +  }; +  if (vkCreateShaderModule(device, &create_info, nullptr, out) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create shader module"); +    goto err; +  } +  return true; +err: +  return false; +} + +static VkFormat find_supported_format(struct vgltf_renderer *renderer, +                                      const VkFormat *candidates, +                                      int candidate_count, VkImageTiling tiling, +                                      VkFormatFeatureFlags features) { +  for (int candidate_index = 0; candidate_index < candidate_count; +       candidate_index++) { +    VkFormat candidate = candidates[candidate_index]; +    VkFormatProperties properties; +    vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device, +                                        candidate, &properties); +    if (tiling == VK_IMAGE_TILING_LINEAR && +        (properties.linearTilingFeatures & features) == features) { +      return candidate; +    } else if (tiling == VK_IMAGE_TILING_OPTIMAL && +               (properties.optimalTilingFeatures & features) == features) { +      return candidate; +    } +  } + +  return VK_FORMAT_UNDEFINED; +} + +static VkFormat find_depth_format(struct vgltf_renderer *renderer) { +  return find_supported_format(renderer, +                               (const VkFormat[]){VK_FORMAT_D32_SFLOAT, +                                                  VK_FORMAT_D32_SFLOAT_S8_UINT, +                                                  VK_FORMAT_D24_UNORM_S8_UINT}, +                               3, VK_IMAGE_TILING_OPTIMAL, +                               VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT); +} + +static bool vgltf_renderer_create_render_pass(struct vgltf_renderer *renderer) { +  VkAttachmentDescription color_attachment = { +      .format = renderer->swapchain.swapchain_image_format, +      .samples = VK_SAMPLE_COUNT_1_BIT, +      .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, +      .storeOp = VK_ATTACHMENT_STORE_OP_STORE, +      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, +      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, +      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, +      .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}; +  VkAttachmentReference color_attachment_ref = { +      .attachment = 0, +      .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, +  }; +  VkAttachmentDescription depth_attachment = { +      .format = find_depth_format(renderer), +      .samples = VK_SAMPLE_COUNT_1_BIT, +      .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, +      .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, +      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, +      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, +      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, +      .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; +  VkAttachmentReference depth_attachment_ref = { +      .attachment = 1, +      .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, +  }; + +  VkSubpassDescription subpass = { +      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, +      .pColorAttachments = &color_attachment_ref, +      .colorAttachmentCount = 1, +      .pDepthStencilAttachment = &depth_attachment_ref}; +  VkSubpassDependency dependency = { +      .srcSubpass = VK_SUBPASS_EXTERNAL, +      .dstSubpass = 0, +      .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | +                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, +      .srcAccessMask = 0, +      .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | +                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, +      .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                       VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT}; + +  VkAttachmentDescription attachments[] = {color_attachment, depth_attachment}; +  int attachment_count = sizeof(attachments) / sizeof(attachments[0]); +  VkRenderPassCreateInfo render_pass_info = { +      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, +      .attachmentCount = attachment_count, +      .pAttachments = attachments, +      .subpassCount = 1, +      .pSubpasses = &subpass, +      .dependencyCount = 1, +      .pDependencies = &dependency}; + +  if (vkCreateRenderPass(renderer->device.device, &render_pass_info, nullptr, +                         &renderer->render_pass) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to create render pass"); +    goto err; +  } + +  return true; +err: +  return false; +} + +static bool +vgltf_renderer_create_descriptor_set_layout(struct vgltf_renderer *renderer) { +  VkDescriptorSetLayoutBinding ubo_layout_binding = { +      .binding = 0, +      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, +      .descriptorCount = 1, +      .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, +  }; +  VkDescriptorSetLayoutBinding sampler_layout_binding = { +      .binding = 1, +      .descriptorCount = 1, +      .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +      .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, +  }; + +  VkDescriptorSetLayoutBinding bindings[] = {ubo_layout_binding, +                                             sampler_layout_binding}; +  int binding_count = sizeof(bindings) / sizeof(bindings[0]); + +  VkDescriptorSetLayoutCreateInfo layout_info = { +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, +      .bindingCount = binding_count, +      .pBindings = bindings}; + +  if (vkCreateDescriptorSetLayout(renderer->device.device, &layout_info, +                                  nullptr, &renderer->descriptor_set_layout) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to create descriptor set layout"); +    goto err; +  } +  return true; +err: +  return false; +} + +static bool +vgltf_renderer_create_graphics_pipeline(struct vgltf_renderer *renderer) { +  static constexpr unsigned char triangle_shader_vert_code[] = { +#embed "../compiled_shaders/triangle.vert.spv" +  }; +  static constexpr unsigned char triangle_shader_frag_code[] = { +#embed "../compiled_shaders/triangle.frag.spv" +  }; + +  VkShaderModule triangle_shader_vert_module; +  if (!create_shader_module(renderer->device.device, triangle_shader_vert_code, +                            sizeof(triangle_shader_vert_code), +                            &triangle_shader_vert_module)) { +    VGLTF_LOG_ERR("Couldn't create triangle vert shader module"); +    goto err; +  } + +  VkShaderModule triangle_shader_frag_module; +  if (!create_shader_module(renderer->device.device, triangle_shader_frag_code, +                            sizeof(triangle_shader_frag_code), +                            &triangle_shader_frag_module)) { +    VGLTF_LOG_ERR("Couldn't create triangle frag shader module"); +    goto destroy_vert_shader_module; +  } + +  VkPipelineShaderStageCreateInfo triangle_shader_vert_stage_create_info = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, +      .stage = VK_SHADER_STAGE_VERTEX_BIT, +      .module = triangle_shader_vert_module, +      .pName = "main"}; +  VkPipelineShaderStageCreateInfo triangle_shader_frag_stage_create_info = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, +      .stage = VK_SHADER_STAGE_FRAGMENT_BIT, +      .module = triangle_shader_frag_module, +      .pName = "main"}; +  VkPipelineShaderStageCreateInfo shader_stages[] = { +      triangle_shader_vert_stage_create_info, +      triangle_shader_frag_stage_create_info}; + +  VkDynamicState dynamic_states[] = { +      VK_DYNAMIC_STATE_VIEWPORT, +      VK_DYNAMIC_STATE_SCISSOR, +  }; + +  VkPipelineDynamicStateCreateInfo dynamic_state = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, +      .dynamicStateCount = sizeof(dynamic_states) / sizeof(dynamic_states[0]), +      .pDynamicStates = dynamic_states}; + +  VkVertexInputBindingDescription vertex_binding_description = +      vgltf_vertex_binding_description(); +  struct vgltf_vertex_input_attribute_descriptions +      vertex_attribute_descriptions = vgltf_vertex_attribute_descriptions(); + +  VkPipelineVertexInputStateCreateInfo vertex_input_info = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +      .vertexBindingDescriptionCount = 1, +      .vertexAttributeDescriptionCount = vertex_attribute_descriptions.count, +      .pVertexBindingDescriptions = &vertex_binding_description, +      .pVertexAttributeDescriptions = +          vertex_attribute_descriptions.descriptions}; + +  VkPipelineInputAssemblyStateCreateInfo input_assembly = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, +      .primitiveRestartEnable = VK_FALSE, +  }; + +  VkPipelineViewportStateCreateInfo viewport_state = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, +      .viewportCount = 1, +      .scissorCount = 1}; + +  VkPipelineRasterizationStateCreateInfo rasterizer = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +      .depthClampEnable = VK_FALSE, +      .rasterizerDiscardEnable = VK_FALSE, +      .polygonMode = VK_POLYGON_MODE_FILL, +      .lineWidth = 1.f, +      .cullMode = VK_CULL_MODE_BACK_BIT, +      .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, +      .depthBiasEnable = VK_FALSE}; + +  VkPipelineMultisampleStateCreateInfo multisampling = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +      .sampleShadingEnable = VK_FALSE, +      .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, +  }; + +  VkPipelineColorBlendAttachmentState color_blend_attachment = { +      .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | +                        VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, +      .blendEnable = VK_FALSE, +  }; + +  VkPipelineDepthStencilStateCreateInfo depth_stencil = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, +      .depthTestEnable = VK_TRUE, +      .depthWriteEnable = VK_TRUE, +      .depthCompareOp = VK_COMPARE_OP_LESS, +      .depthBoundsTestEnable = VK_FALSE, +      .stencilTestEnable = VK_FALSE, +  }; + +  VkPipelineColorBlendStateCreateInfo color_blending = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, +      .logicOpEnable = VK_FALSE, +      .attachmentCount = 1, +      .pAttachments = &color_blend_attachment}; + +  VkPipelineLayoutCreateInfo pipeline_layout_info = { +      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, +      .setLayoutCount = 1, +      .pSetLayouts = &renderer->descriptor_set_layout}; + +  if (vkCreatePipelineLayout(renderer->device.device, &pipeline_layout_info, +                             nullptr, +                             &renderer->pipeline_layout) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create pipeline layout"); +    goto destroy_frag_shader_module; +  } + +  VkGraphicsPipelineCreateInfo pipeline_info = { +      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, +      .stageCount = 2, +      .pStages = shader_stages, +      .pVertexInputState = &vertex_input_info, +      .pInputAssemblyState = &input_assembly, +      .pViewportState = &viewport_state, +      .pRasterizationState = &rasterizer, +      .pMultisampleState = &multisampling, +      .pColorBlendState = &color_blending, +      .pDepthStencilState = &depth_stencil, +      .pDynamicState = &dynamic_state, +      .layout = renderer->pipeline_layout, +      .renderPass = renderer->render_pass, +      .subpass = 0, +  }; + +  if (vkCreateGraphicsPipelines(renderer->device.device, VK_NULL_HANDLE, 1, +                                &pipeline_info, nullptr, +                                &renderer->graphics_pipeline) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create pipeline"); +    goto destroy_pipeline_layout; +  } + +  vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module, +                        nullptr); +  vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module, +                        nullptr); +  return true; +destroy_pipeline_layout: +  vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout, +                          nullptr); +destroy_frag_shader_module: +  vkDestroyShaderModule(renderer->device.device, triangle_shader_frag_module, +                        nullptr); +destroy_vert_shader_module: +  vkDestroyShaderModule(renderer->device.device, triangle_shader_vert_module, +                        nullptr); +err: +  return false; +} + +static bool +vgltf_renderer_create_framebuffers(struct vgltf_renderer *renderer) { +  for (uint32_t i = 0; i < renderer->swapchain.swapchain_image_count; i++) { +    VkImageView attachments[] = {renderer->swapchain.swapchain_image_views[i], +                                 renderer->depth_image_view}; +    int attachment_count = sizeof(attachments) / sizeof(attachments[0]); + +    VkFramebufferCreateInfo framebuffer_info = { +        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, +        .renderPass = renderer->render_pass, +        .attachmentCount = attachment_count, +        .pAttachments = attachments, +        .width = renderer->swapchain.swapchain_extent.width, +        .height = renderer->swapchain.swapchain_extent.height, +        .layers = 1}; + +    if (vkCreateFramebuffer(renderer->device.device, &framebuffer_info, nullptr, +                            &renderer->swapchain_framebuffers[i]) != +        VK_SUCCESS) { +      VGLTF_LOG_ERR("Failed to create framebuffer"); +      goto err; +    } +  } + +  return true; +err: +  return false; +} + +static bool +vgltf_renderer_create_command_pool(struct vgltf_renderer *renderer) { +  struct queue_family_indices queue_family_indices = {}; +  if (!queue_family_indices_for_device(&queue_family_indices, +                                       renderer->device.physical_device, +                                       renderer->surface.surface)) { +    VGLTF_LOG_ERR("Couldn't fetch queue family indices"); +    goto err; +  } + +  VkCommandPoolCreateInfo pool_info = { +      .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, +      .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, +      .queueFamilyIndex = queue_family_indices.graphics_family}; + +  if (vkCreateCommandPool(renderer->device.device, &pool_info, nullptr, +                          &renderer->command_pool) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create command pool"); +    goto err; +  } + +  return true; +err: +  return false; +} + +static VkCommandBuffer +begin_single_time_commands(struct vgltf_renderer *renderer) { +  VkCommandBufferAllocateInfo allocate_info = { +      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, +      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, +      .commandPool = renderer->command_pool, +      .commandBufferCount = 1}; + +  VkCommandBuffer command_buffer; +  vkAllocateCommandBuffers(renderer->device.device, &allocate_info, +                           &command_buffer); + +  VkCommandBufferBeginInfo begin_info = { +      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, +      .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + +  vkBeginCommandBuffer(command_buffer, &begin_info); + +  return command_buffer; +} + +static void end_single_time_commands(struct vgltf_renderer *renderer, +                                     VkCommandBuffer command_buffer) { +  vkEndCommandBuffer(command_buffer); +  VkSubmitInfo submit_info = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, +                              .commandBufferCount = 1, +                              .pCommandBuffers = &command_buffer}; + +  vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info, +                VK_NULL_HANDLE); +  vkQueueWaitIdle(renderer->device.graphics_queue); +  vkFreeCommandBuffers(renderer->device.device, renderer->command_pool, 1, +                       &command_buffer); +} + +static bool vgltf_renderer_copy_buffer(struct vgltf_renderer *renderer, +                                       VkBuffer src_buffer, VkBuffer dst_buffer, +                                       VkDeviceSize size) { +  VkCommandBuffer command_buffer = begin_single_time_commands(renderer); +  VkBufferCopy copy_region = {.size = size}; +  vkCmdCopyBuffer(command_buffer, src_buffer, dst_buffer, 1, ©_region); +  end_single_time_commands(renderer, command_buffer); +  return true; +} + +static void vgltf_renderer_create_image( +    struct vgltf_renderer *renderer, uint32_t width, uint32_t height, +    uint32_t mip_level_count, VkFormat format, VkImageTiling tiling, +    VkImageUsageFlags usage, VkMemoryPropertyFlags properties, +    struct vgltf_renderer_allocated_image *image) { + +  vmaCreateImage( +      renderer->device.allocator, +      &(const VkImageCreateInfo){ +          .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, +          .imageType = VK_IMAGE_TYPE_2D, +          .extent = {width, height, 1}, +          .mipLevels = mip_level_count, +          .arrayLayers = 1, +          .format = format, +          .tiling = tiling, +          .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, +          .usage = usage, +          .sharingMode = VK_SHARING_MODE_EXCLUSIVE, +          .samples = VK_SAMPLE_COUNT_1_BIT, +      }, +      &(const VmaAllocationCreateInfo){.usage = VMA_MEMORY_USAGE_GPU_ONLY, +                                       .requiredFlags = properties}, +      &image->image, &image->allocation, &image->info); +} + +static bool has_stencil_component(VkFormat format) { +  return format == VK_FORMAT_D32_SFLOAT_S8_UINT || +         format == VK_FORMAT_D24_UNORM_S8_UINT; +} + +static bool transition_image_layout(struct vgltf_renderer *renderer, +                                    VkImage image, VkFormat format, +                                    VkImageLayout old_layout, +                                    VkImageLayout new_layout, +                                    uint32_t mip_level_count) { +  (void)format; +  VkCommandBuffer command_buffer = begin_single_time_commands(renderer); +  VkImageMemoryBarrier barrier = { +      .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +      .oldLayout = old_layout, +      .newLayout = new_layout, +      .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +      .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +      .image = image, +      .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                           .baseMipLevel = 0, +                           .levelCount = mip_level_count, +                           .baseArrayLayer = 0, +                           .layerCount = 1}, +      .srcAccessMask = 0, +      .dstAccessMask = 0}; + +  if (new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { +    barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + +    if (has_stencil_component(format)) { +      barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; +    } +  } else { +    barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; +  } + +  VkPipelineStageFlags source_stage; +  VkPipelineStageFlags destination_stage; +  if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED && +      new_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { +    barrier.srcAccessMask = 0; +    barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +    source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; +    destination_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; +  } else if (old_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && +             new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { +    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +    barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; +    source_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; +    destination_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; +  } else if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED && +             new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { +    barrier.srcAccessMask = 0; +    barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; +    source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; +    destination_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; +  } else { +    goto err; +  } + +  vkCmdPipelineBarrier(command_buffer, source_stage, destination_stage, 0, 0, +                       nullptr, 0, nullptr, 1, &barrier); + +  end_single_time_commands(renderer, command_buffer); +  return true; +err: +  return false; +} + +void copy_buffer_to_image(struct vgltf_renderer *renderer, VkBuffer buffer, +                          VkImage image, uint32_t width, uint32_t height) { +  VkCommandBuffer command_buffer = begin_single_time_commands(renderer); +  VkBufferImageCopy region = { +      .bufferOffset = 0, +      .bufferRowLength = 0, +      .bufferImageHeight = 0, +      .imageSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                           .mipLevel = 0, +                           .baseArrayLayer = 0, +                           .layerCount = 1}, +      .imageOffset = {0, 0, 0}, +      .imageExtent = {width, height, 1}}; + +  vkCmdCopyBufferToImage(command_buffer, buffer, image, +                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + +  end_single_time_commands(renderer, command_buffer); +} + +static bool +vgltf_renderer_create_depth_resources(struct vgltf_renderer *renderer) { +  VkFormat depth_format = find_depth_format(renderer); +  vgltf_renderer_create_image( +      renderer, renderer->swapchain.swapchain_extent.width, +      renderer->swapchain.swapchain_extent.height, 1, depth_format, +      VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, +      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->depth_image); +  create_image_view(&renderer->device, renderer->depth_image.image, +                    depth_format, &renderer->depth_image_view, +                    VK_IMAGE_ASPECT_DEPTH_BIT, 1); + +  transition_image_layout(renderer, renderer->depth_image.image, depth_format, +                          VK_IMAGE_LAYOUT_UNDEFINED, +                          VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1); +  return true; +} + +static bool +vgltf_renderer_create_buffer(struct vgltf_renderer *renderer, VkDeviceSize size, +                             VkBufferUsageFlags usage, +                             VkMemoryPropertyFlags properties, +                             struct vgltf_renderer_allocated_buffer *buffer) { +  VkBufferCreateInfo buffer_info = {.sType = +                                        VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, +                                    .size = size, +                                    .usage = usage, +                                    .sharingMode = VK_SHARING_MODE_EXCLUSIVE}; +  VmaAllocationCreateInfo alloc_info = { +      .usage = VMA_MEMORY_USAGE_AUTO, +      .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | +               VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT, +      .preferredFlags = properties}; + +  if (vmaCreateBuffer(renderer->device.allocator, &buffer_info, &alloc_info, +                      &buffer->buffer, &buffer->allocation, +                      &buffer->info) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to create buffer"); +    goto err; +  } + +  return true; +err: +  return false; +} + +static void generate_mipmaps(struct vgltf_renderer *renderer, VkImage image, +                             VkFormat image_format, int32_t texture_width, +                             int32_t texture_height, uint32_t mip_levels) { +  VkFormatProperties format_properties; +  vkGetPhysicalDeviceFormatProperties(renderer->device.physical_device, +                                      image_format, &format_properties); +  if (!(format_properties.optimalTilingFeatures & +        VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) { +    VGLTF_PANIC("Texture image format does not support linear blitting!"); +  } + +  VkCommandBuffer command_buffer = begin_single_time_commands(renderer); +  VkImageMemoryBarrier barrier = { +      .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +      .image = image, +      .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +      .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +      .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                           .baseArrayLayer = 0, +                           .layerCount = 1, +                           .levelCount = 1}}; + +  int32_t mip_width = texture_width; +  int32_t mip_height = texture_height; + +  for (uint32_t i = 1; i < mip_levels; i++) { +    barrier.subresourceRange.baseMipLevel = i - 1; +    barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; +    barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; +    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +    barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + +    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, +                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, +                         nullptr, 1, &barrier); +    VkImageBlit blit = { +        .srcOffsets = {{0, 0, 0}, {mip_width, mip_height, 1}}, +        .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                           .mipLevel = i - 1, +                           .baseArrayLayer = 0, +                           .layerCount = 1}, +        .dstOffsets = {{0, 0, 0}, +                       {mip_width > 1 ? mip_width / 2 : 1, +                        mip_height > 1 ? mip_height / 2 : 1, 1}}, +        .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                           .mipLevel = i, +                           .baseArrayLayer = 0, +                           .layerCount = 1}, +    }; +    vkCmdBlitImage(command_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, +                   image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, +                   VK_FILTER_LINEAR); +    barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; +    barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; +    barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; +    barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + +    vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, +                         VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, +                         0, nullptr, 1, &barrier); +    if (mip_width > 1) +      mip_width /= 2; +    if (mip_height > 1) +      mip_height /= 2; +  } +  barrier.subresourceRange.baseMipLevel = mip_levels - 1; +  barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; +  barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; +  barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +  barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; +  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, +                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, +                       nullptr, 1, &barrier); + +  end_single_time_commands(renderer, command_buffer); +} + +static bool +vgltf_renderer_create_texture_image(struct vgltf_renderer *renderer) { +  struct vgltf_image image; +  if (!vgltf_image_load_from_file(&image, SV(TEXTURE_PATH))) { +    VGLTF_LOG_ERR("Couldn't load image from file"); +    goto err; +  } +  renderer->mip_level_count = +      floor(log2(VGLTF_MAX(image.width, image.height))) + 1; + +  VkDeviceSize image_size = image.width * image.height * 4; +  struct vgltf_renderer_allocated_buffer staging_buffer = {}; +  if (!vgltf_renderer_create_buffer(renderer, image_size, +                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT, +                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | +                                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, +                                    &staging_buffer)) { +    VGLTF_LOG_ERR("Couldn't create staging buffer"); +    goto deinit_image; +  } + +  void *data; +  vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data); +  memcpy(data, image.data, image_size); +  vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation); + +  vgltf_renderer_create_image( +      renderer, image.width, image.height, renderer->mip_level_count, +      VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_TILING_OPTIMAL, +      VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | +          VK_IMAGE_USAGE_SAMPLED_BIT, +      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->texture_image); + +  transition_image_layout(renderer, renderer->texture_image.image, +                          VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED, +                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                          renderer->mip_level_count); +  copy_buffer_to_image(renderer, staging_buffer.buffer, +                       renderer->texture_image.image, image.width, +                       image.height); + +  generate_mipmaps(renderer, renderer->texture_image.image, +                   VK_FORMAT_R8G8B8A8_SRGB, image.width, image.height, +                   renderer->mip_level_count); + +  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, +                   staging_buffer.allocation); +  vgltf_image_deinit(&image); +  return true; +deinit_image: +  vgltf_image_deinit(&image); +err: +  return false; +} + +static bool +vgltf_renderer_create_texture_image_view(struct vgltf_renderer *renderer) { +  return create_image_view( +      &renderer->device, renderer->texture_image.image, VK_FORMAT_R8G8B8A8_SRGB, +      &renderer->texture_image_view, VK_IMAGE_ASPECT_COLOR_BIT, +      renderer->mip_level_count); +} + +static bool +vgltf_renderer_create_texture_sampler(struct vgltf_renderer *renderer) { +  VkPhysicalDeviceProperties properties = {}; +  vkGetPhysicalDeviceProperties(renderer->device.physical_device, &properties); + +  VkSamplerCreateInfo sampler_info = { +      .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, +      .magFilter = VK_FILTER_LINEAR, +      .minFilter = VK_FILTER_LINEAR, +      .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, +      .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, +      .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, +      .anisotropyEnable = VK_TRUE, +      .maxAnisotropy = properties.limits.maxSamplerAnisotropy, +      .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK, +      .unnormalizedCoordinates = VK_FALSE, +      .compareEnable = VK_FALSE, +      .compareOp = VK_COMPARE_OP_ALWAYS, +      .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, +      .mipLodBias = 0.f, +      .minLod = 0.f, +      .maxLod = renderer->mip_level_count}; + +  if (vkCreateSampler(renderer->device.device, &sampler_info, nullptr, +                      &renderer->texture_sampler) != VK_SUCCESS) { +    goto err; +  } + +  return true; +err: +  return false; +} + +static void get_file_data(void *ctx, const char *filename, const int is_mtl, +                          const char *obj_filename, char **data, size_t *len) { +  (void)ctx; +  (void)is_mtl; + +  if (!filename) { +    VGLTF_LOG_ERR("Null filename"); +    *data = NULL; +    *len = 0; +    return; +  } +  *data = vgltf_platform_read_file_to_string(obj_filename, len); +} + +static bool load_model(struct vgltf_renderer *renderer) { +  tinyobj_attrib_t attrib; +  tinyobj_shape_t *shapes = nullptr; +  size_t shape_count; +  tinyobj_material_t *materials = nullptr; +  size_t material_count; + +  if ((tinyobj_parse_obj(&attrib, &shapes, &shape_count, &materials, +                         &material_count, MODEL_PATH, get_file_data, nullptr, +                         TINYOBJ_FLAG_TRIANGULATE)) != TINYOBJ_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't load obj"); +    return false; +  } + +  for (size_t shape_index = 0; shape_index < shape_count; shape_index++) { +    tinyobj_shape_t *shape = &shapes[shape_index]; +    unsigned int face_offset = shape->face_offset; +    for (size_t face_index = face_offset; +         face_index < face_offset + shape->length; face_index++) { +      float v[3][3]; +      float t[3][2]; + +      tinyobj_vertex_index_t idx0 = attrib.faces[face_index * 3 + 0]; +      tinyobj_vertex_index_t idx1 = attrib.faces[face_index * 3 + 1]; +      tinyobj_vertex_index_t idx2 = attrib.faces[face_index * 3 + 2]; + +      for (int k = 0; k < 3; k++) { +        int f0 = idx0.v_idx; +        int f1 = idx1.v_idx; +        int f2 = idx2.v_idx; + +        v[0][k] = attrib.vertices[3 * (size_t)f0 + k]; +        v[1][k] = attrib.vertices[3 * (size_t)f1 + k]; +        v[2][k] = attrib.vertices[3 * (size_t)f2 + k]; +      } + +      for (int k = 0; k < 2; k++) { +        int t0 = idx0.vt_idx; +        int t1 = idx1.vt_idx; +        int t2 = idx2.vt_idx; + +        t[0][k] = attrib.texcoords[2 * (size_t)t0 + k]; +        t[1][k] = attrib.texcoords[2 * (size_t)t1 + k]; +        t[2][k] = attrib.texcoords[2 * (size_t)t2 + k]; +      } + +      for (int k = 0; k < 3; k++) { +        renderer->vertices[renderer->vertex_count++] = (struct vgltf_vertex){ +            .position = {v[k][0], v[k][1], v[k][2]}, +            .texture_coordinates = {t[k][0], 1.f - t[k][1]}, +            .color = {1.f, 1.f, 1.f}}; +        renderer->indices[renderer->index_count++] = renderer->index_count; +      } +    } +    tinyobj_attrib_free(&attrib); +    tinyobj_shapes_free(shapes, shape_count); +    tinyobj_materials_free(materials, material_count); +  } +  return true; +} + +static bool +vgltf_renderer_create_vertex_buffer(struct vgltf_renderer *renderer) { +  VkDeviceSize buffer_size = +      renderer->vertex_count * sizeof(struct vgltf_vertex); + +  struct vgltf_renderer_allocated_buffer staging_buffer = {}; +  if (!vgltf_renderer_create_buffer(renderer, buffer_size, +                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT, +                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | +                                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, +                                    &staging_buffer)) { +    VGLTF_LOG_ERR("Failed to create transfer buffer"); +    goto err; +  } + +  void *data; +  vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data); +  memcpy(data, renderer->vertices, +         renderer->vertex_count * sizeof(struct vgltf_vertex)); +  vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation); + +  if (!vgltf_renderer_create_buffer( +          renderer, buffer_size, +          VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, +          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->vertex_buffer)) { +    VGLTF_LOG_ERR("Failed to create vertex buffer"); +    goto destroy_staging_buffer; +  } + +  vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer, +                             renderer->vertex_buffer.buffer, buffer_size); +  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, +                   staging_buffer.allocation); +  return true; +destroy_staging_buffer: +  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, +                   staging_buffer.allocation); +err: +  return false; +} + +static bool +vgltf_renderer_create_index_buffer(struct vgltf_renderer *renderer) { +  VkDeviceSize buffer_size = renderer->index_count * sizeof(uint16_t); +  struct vgltf_renderer_allocated_buffer staging_buffer = {}; +  if (!vgltf_renderer_create_buffer(renderer, buffer_size, +                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT, +                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | +                                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, +                                    &staging_buffer)) { +    VGLTF_LOG_ERR("Failed to create transfer buffer"); +    goto err; +  } + +  void *data; +  vmaMapMemory(renderer->device.allocator, staging_buffer.allocation, &data); +  memcpy(data, renderer->indices, renderer->index_count * sizeof(uint16_t)); +  vmaUnmapMemory(renderer->device.allocator, staging_buffer.allocation); + +  if (!vgltf_renderer_create_buffer( +          renderer, buffer_size, +          VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, +          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &renderer->index_buffer)) { +    VGLTF_LOG_ERR("Failed to create index buffer"); +    goto destroy_staging_buffer; +  } +  vgltf_renderer_copy_buffer(renderer, staging_buffer.buffer, +                             renderer->index_buffer.buffer, buffer_size); +  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, +                   staging_buffer.allocation); +  return true; + +destroy_staging_buffer: +  vmaDestroyBuffer(renderer->device.allocator, staging_buffer.buffer, +                   staging_buffer.allocation); +err: +  return false; +} + +static bool +vgltf_renderer_create_command_buffer(struct vgltf_renderer *renderer) { +  VkCommandBufferAllocateInfo allocate_info = { +      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, +      .commandPool = renderer->command_pool, +      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, +      .commandBufferCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}; + +  if (vkAllocateCommandBuffers(renderer->device.device, &allocate_info, +                               renderer->command_buffer) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't allocate command buffers"); +    goto err; +  } + +  return true; +err: +  return false; +} + +static bool +vgltf_renderer_create_sync_objects(struct vgltf_renderer *renderer) { +  VkSemaphoreCreateInfo semaphore_info = { +      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, +  }; + +  VkFenceCreateInfo fence_info = {.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, +                                  .flags = VK_FENCE_CREATE_SIGNALED_BIT}; + +  int frame_in_flight_index = 0; +  for (; frame_in_flight_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; +       frame_in_flight_index++) { +    if (vkCreateSemaphore( +            renderer->device.device, &semaphore_info, nullptr, +            &renderer->image_available_semaphores[frame_in_flight_index]) != +            VK_SUCCESS || +        vkCreateSemaphore( +            renderer->device.device, &semaphore_info, nullptr, +            &renderer->render_finished_semaphores[frame_in_flight_index]) != +            VK_SUCCESS || +        vkCreateFence(renderer->device.device, &fence_info, nullptr, +                      &renderer->in_flight_fences[frame_in_flight_index]) != +            VK_SUCCESS) { +      VGLTF_LOG_ERR("Couldn't create sync objects"); +      goto err; +    } +  } + +  return true; +err: +  for (int frame_in_flight_to_delete_index = 0; +       frame_in_flight_to_delete_index < frame_in_flight_index; +       frame_in_flight_to_delete_index++) { +    vkDestroyFence(renderer->device.device, +                   renderer->in_flight_fences[frame_in_flight_index], nullptr); +    vkDestroySemaphore( +        renderer->device.device, +        renderer->render_finished_semaphores[frame_in_flight_index], nullptr); +    vkDestroySemaphore( +        renderer->device.device, +        renderer->image_available_semaphores[frame_in_flight_index], nullptr); +  } +  return false; +} + +static bool vgltf_vk_swapchain_init(struct vgltf_vk_swapchain *swapchain, +                                    struct vgltf_vk_device *device, +                                    struct vgltf_vk_surface *surface, +                                    struct vgltf_window_size *window_size) { +  if (!create_swapchain(swapchain, device, surface, window_size)) { +    VGLTF_LOG_ERR("Couldn't create swapchain"); +    goto err; +  } + +  if (!create_swapchain_image_views(swapchain, device)) { +    VGLTF_LOG_ERR("Couldn't create image views"); +    goto destroy_swapchain; +  } + +  return true; +destroy_swapchain: +  vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr); +err: +  return false; +} + +static void vgltf_vk_swapchain_deinit(struct vgltf_vk_swapchain *swapchain, +                                      struct vgltf_vk_device *device) { +  for (uint32_t swapchain_image_view_index = 0; +       swapchain_image_view_index < swapchain->swapchain_image_count; +       swapchain_image_view_index++) { +    vkDestroyImageView( +        device->device, +        swapchain->swapchain_image_views[swapchain_image_view_index], nullptr); +  } +  vkDestroySwapchainKHR(device->device, swapchain->swapchain, nullptr); +} + +static void vgltf_renderer_cleanup_swapchain(struct vgltf_renderer *renderer) { +  vkDestroyImageView(renderer->device.device, renderer->depth_image_view, +                     nullptr); +  vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image, +                  renderer->depth_image.allocation); + +  for (uint32_t framebuffer_index = 0; +       framebuffer_index < renderer->swapchain.swapchain_image_count; +       framebuffer_index++) { +    vkDestroyFramebuffer(renderer->device.device, +                         renderer->swapchain_framebuffers[framebuffer_index], +                         nullptr); +  } + +  vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device); +} + +static bool vgltf_renderer_recreate_swapchain(struct vgltf_renderer *renderer) { +  vkDeviceWaitIdle(renderer->device.device); +  vgltf_renderer_cleanup_swapchain(renderer); + +  // TODO add error handling +  create_swapchain(&renderer->swapchain, &renderer->device, &renderer->surface, +                   &renderer->window_size); +  create_swapchain_image_views(&renderer->swapchain, &renderer->device); +  vgltf_renderer_create_depth_resources(renderer); +  vgltf_renderer_create_framebuffers(renderer); +  return true; +} + +static void vgltf_renderer_triangle_pass(struct vgltf_renderer *renderer, +                                         uint32_t swapchain_image_index) { +  VkRenderPassBeginInfo render_pass_info = { +      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, +      .renderPass = renderer->render_pass, +      .framebuffer = renderer->swapchain_framebuffers[swapchain_image_index], +      .renderArea = {.offset = {}, +                     .extent = renderer->swapchain.swapchain_extent}, +      .clearValueCount = 2, +      .pClearValues = +          (const VkClearValue[]){{.color = {.float32 = {0.f, 0.f, 0.f, 1.f}}}, +                                 {.depthStencil = {1.0f, 0}}}, + +  }; + +  vkCmdBeginRenderPass(renderer->command_buffer[renderer->current_frame], +                       &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); +  vkCmdBindPipeline(renderer->command_buffer[renderer->current_frame], +                    VK_PIPELINE_BIND_POINT_GRAPHICS, +                    renderer->graphics_pipeline); +  VkViewport viewport = { +      .x = 0.f, +      .y = 0.f, +      .width = (float)renderer->swapchain.swapchain_extent.width, +      .height = (float)renderer->swapchain.swapchain_extent.height, +      .minDepth = 0.f, +      .maxDepth = 1.f}; +  vkCmdSetViewport(renderer->command_buffer[renderer->current_frame], 0, 1, +                   &viewport); +  VkRect2D scissor = {.offset = {}, +                      .extent = renderer->swapchain.swapchain_extent}; +  vkCmdSetScissor(renderer->command_buffer[renderer->current_frame], 0, 1, +                  &scissor); + +  VkBuffer vertex_buffers[] = {renderer->vertex_buffer.buffer}; +  VkDeviceSize offsets[] = {0}; +  vkCmdBindVertexBuffers(renderer->command_buffer[renderer->current_frame], 0, +                         1, vertex_buffers, offsets); +  vkCmdBindIndexBuffer(renderer->command_buffer[renderer->current_frame], +                       renderer->index_buffer.buffer, 0, VK_INDEX_TYPE_UINT16); + +  vkCmdBindDescriptorSets( +      renderer->command_buffer[renderer->current_frame], +      VK_PIPELINE_BIND_POINT_GRAPHICS, renderer->pipeline_layout, 0, 1, +      &renderer->descriptor_sets[renderer->current_frame], 0, nullptr); +  vkCmdDrawIndexed(renderer->command_buffer[renderer->current_frame], +                   renderer->index_count, 1, 0, 0, 0); + +  vkCmdEndRenderPass(renderer->command_buffer[renderer->current_frame]); +} + +static void update_uniform_buffer(struct vgltf_renderer *renderer, +                                  uint32_t current_frame) { +  static long long start_time_nanoseconds = 0; +  if (start_time_nanoseconds == 0) { +    if (!vgltf_platform_get_current_time_nanoseconds(&start_time_nanoseconds)) { +      VGLTF_LOG_ERR("Couldn't get current time"); +    } +  } + +  long long current_time_nanoseconds = 0; +  if (!vgltf_platform_get_current_time_nanoseconds(¤t_time_nanoseconds)) { +    VGLTF_LOG_ERR("Couldn't get current time"); +  } + +  long elapsed_time_nanoseconds = +      current_time_nanoseconds - start_time_nanoseconds; +  float elapsed_time_seconds = elapsed_time_nanoseconds / 1e9f; +  VGLTF_LOG_INFO("Elapsed time: %f", elapsed_time_seconds); + +  vgltf_mat4 model_matrix; +  vgltf_mat4_rotate(model_matrix, (vgltf_mat4)VGLTF_MAT4_IDENTITY, +                    elapsed_time_seconds * VGLTF_MATHS_DEG_TO_RAD(90.0f), +                    (vgltf_vec3){0.f, 0.f, 1.f}); + +  vgltf_mat4 view_matrix; +  vgltf_mat4_look_at(view_matrix, (vgltf_vec3){2.f, 2.f, 2.f}, +                     (vgltf_vec3){0.f, 0.f, 0.f}, (vgltf_vec3){0.f, 0.f, 1.f}); + +  vgltf_mat4 projection_matrix; +  vgltf_mat4_perspective(projection_matrix, VGLTF_MATHS_DEG_TO_RAD(45.f), +                         (float)renderer->swapchain.swapchain_extent.width / +                             (float)renderer->swapchain.swapchain_extent.height, +                         0.1f, 10.f); +  projection_matrix[1 * 4 + 1] *= -1; + +  struct vgltf_renderer_uniform_buffer_object ubo = {}; +  memcpy(ubo.model, model_matrix, sizeof(vgltf_mat4)); +  memcpy(ubo.view, view_matrix, sizeof(vgltf_mat4)); +  memcpy(ubo.projection, projection_matrix, sizeof(vgltf_mat4)); +  memcpy(renderer->mapped_uniform_buffers[current_frame], &ubo, sizeof(ubo)); +} + +bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer) { +  vkWaitForFences(renderer->device.device, 1, +                  &renderer->in_flight_fences[renderer->current_frame], VK_TRUE, +                  UINT64_MAX); + +  uint32_t image_index; +  VkResult acquire_swapchain_image_result = vkAcquireNextImageKHR( +      renderer->device.device, renderer->swapchain.swapchain, UINT64_MAX, +      renderer->image_available_semaphores[renderer->current_frame], +      VK_NULL_HANDLE, &image_index); +  if (acquire_swapchain_image_result == VK_ERROR_OUT_OF_DATE_KHR || +      acquire_swapchain_image_result == VK_SUBOPTIMAL_KHR || +      renderer->framebuffer_resized) { +    renderer->framebuffer_resized = false; +    vgltf_renderer_recreate_swapchain(renderer); +    return true; +  } else if (acquire_swapchain_image_result != VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to acquire a swapchain image"); +    goto err; +  } + +  vkResetFences(renderer->device.device, 1, +                &renderer->in_flight_fences[renderer->current_frame]); + +  vkResetCommandBuffer(renderer->command_buffer[renderer->current_frame], 0); +  VkCommandBufferBeginInfo begin_info = { +      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, +  }; + +  if (vkBeginCommandBuffer(renderer->command_buffer[renderer->current_frame], +                           &begin_info) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to begin recording command buffer"); +    goto err; +  } + +  vgltf_renderer_triangle_pass(renderer, image_index); + +  if (vkEndCommandBuffer(renderer->command_buffer[renderer->current_frame]) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to record command buffer"); +    goto err; +  } + +  update_uniform_buffer(renderer, renderer->current_frame); + +  VkSubmitInfo submit_info = { +      .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, +  }; + +  VkSemaphore wait_semaphores[] = { +      renderer->image_available_semaphores[renderer->current_frame]}; +  VkPipelineStageFlags wait_stages[] = { +      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; +  submit_info.waitSemaphoreCount = 1; +  submit_info.pWaitSemaphores = wait_semaphores; +  submit_info.pWaitDstStageMask = wait_stages; +  submit_info.commandBufferCount = 1; +  submit_info.pCommandBuffers = +      &renderer->command_buffer[renderer->current_frame]; + +  VkSemaphore signal_semaphores[] = { +      renderer->render_finished_semaphores[renderer->current_frame]}; +  submit_info.signalSemaphoreCount = 1; +  submit_info.pSignalSemaphores = signal_semaphores; +  if (vkQueueSubmit(renderer->device.graphics_queue, 1, &submit_info, +                    renderer->in_flight_fences[renderer->current_frame]) != +      VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to submit draw command buffer"); +    goto err; +  } + +  VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, +                                   .waitSemaphoreCount = 1, +                                   .pWaitSemaphores = signal_semaphores}; + +  VkSwapchainKHR swapchains[] = {renderer->swapchain.swapchain}; +  present_info.swapchainCount = 1; +  present_info.pSwapchains = swapchains; +  present_info.pImageIndices = &image_index; +  VkResult result = +      vkQueuePresentKHR(renderer->device.present_queue, &present_info); +  if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) { +    vgltf_renderer_recreate_swapchain(renderer); +  } else if (acquire_swapchain_image_result != VK_SUCCESS) { +    VGLTF_LOG_ERR("Failed to acquire a swapchain image"); +    goto err; +  } +  renderer->current_frame = +      (renderer->current_frame + 1) % VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; +  return true; +err: +  return false; +} +static bool +vgltf_renderer_create_uniform_buffers(struct vgltf_renderer *renderer) { +  VkDeviceSize buffer_size = +      sizeof(struct vgltf_renderer_uniform_buffer_object); + +  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { +    vgltf_renderer_create_buffer(renderer, buffer_size, +                                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, +                                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | +                                     VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, +                                 &renderer->uniform_buffers[i]); +    vmaMapMemory(renderer->device.allocator, +                 renderer->uniform_buffers[i].allocation, +                 &renderer->mapped_uniform_buffers[i]); +  } + +  return true; +} + +static bool +vgltf_renderer_create_descriptor_pool(struct vgltf_renderer *renderer) { +  VkDescriptorPoolSize pool_sizes[] = { +      (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, +                             .descriptorCount = +                                 VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}, +      (VkDescriptorPoolSize){.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +                             .descriptorCount = +                                 VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}}; +  int pool_size_count = sizeof(pool_sizes) / sizeof(pool_sizes[0]); + +  VkDescriptorPoolCreateInfo pool_info = { +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, +      .poolSizeCount = pool_size_count, +      .pPoolSizes = pool_sizes, +      .maxSets = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT}; + +  if (vkCreateDescriptorPool(renderer->device.device, &pool_info, nullptr, +                             &renderer->descriptor_pool) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create uniform descriptor pool"); +    goto err; +  } + +  return true; +err: +  return false; +} +static bool +vgltf_renderer_create_descriptor_sets(struct vgltf_renderer *renderer) { +  VkDescriptorSetLayout layouts[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT] = {}; +  for (int layout_index = 0; +       layout_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; +       layout_index++) { +    layouts[layout_index] = renderer->descriptor_set_layout; +  } + +  VkDescriptorSetAllocateInfo alloc_info = { +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, +      .descriptorPool = renderer->descriptor_pool, +      .descriptorSetCount = VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT, +      .pSetLayouts = layouts}; + +  if (vkAllocateDescriptorSets(renderer->device.device, &alloc_info, +                               renderer->descriptor_sets) != VK_SUCCESS) { +    VGLTF_LOG_ERR("Couldn't create descriptor sets"); +    goto err; +  } + +  for (int set_index = 0; set_index < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; +       set_index++) { +    VkDescriptorBufferInfo buffer_info = { +        .buffer = renderer->uniform_buffers[set_index].buffer, +        .offset = 0, +        .range = sizeof(struct vgltf_renderer_uniform_buffer_object)}; + +    VkDescriptorImageInfo image_info = { +        .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, +        .imageView = renderer->texture_image_view, +        .sampler = renderer->texture_sampler, +    }; + +    VkWriteDescriptorSet descriptor_writes[] = { +        (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, +                               .dstSet = renderer->descriptor_sets[set_index], +                               .dstBinding = 0, +                               .dstArrayElement = 0, +                               .descriptorType = +                                   VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, +                               .descriptorCount = 1, +                               .pBufferInfo = &buffer_info}, + +        (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, +                               .dstSet = renderer->descriptor_sets[set_index], +                               .dstBinding = 1, +                               .dstArrayElement = 0, +                               .descriptorType = +                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +                               .descriptorCount = 1, +                               .pImageInfo = &image_info}}; +    int descriptor_write_count = +        sizeof(descriptor_writes) / sizeof(descriptor_writes[0]); + +    vkUpdateDescriptorSets(renderer->device.device, descriptor_write_count, +                           descriptor_writes, 0, nullptr); +  } + +  return true; +err: +  return false; +} + +static bool vgltf_vk_device_init(struct vgltf_vk_device *device, +                                 struct vgltf_vk_instance *instance, +                                 struct vgltf_vk_surface *surface) { +  if (!pick_physical_device(&device->physical_device, instance, +                            surface->surface)) { +    VGLTF_LOG_ERR("Couldn't pick physical device"); +    goto err; +  } + +  if (!create_logical_device(&device->device, &device->graphics_queue, +                             &device->present_queue, device->physical_device, +                             surface->surface)) { +    VGLTF_LOG_ERR("Couldn't pick logical device"); +    goto err; +  } + +  if (!create_allocator(&device->allocator, device, instance)) { +    VGLTF_LOG_ERR("Couldn't create allocator"); +    goto destroy_logical_device; +  } + +  return true; +destroy_logical_device: +  vkDestroyDevice(device->device, nullptr); +err: +  return false; +} + +static void vgltf_vk_device_deinit(struct vgltf_vk_device *device) { +  vmaDestroyAllocator(device->allocator); +  vkDestroyDevice(device->device, nullptr); +} + +bool vgltf_renderer_init(struct vgltf_renderer *renderer, +                         struct vgltf_platform *platform) { +  if (!vgltf_vk_instance_init(&renderer->instance, platform)) { +    VGLTF_LOG_ERR("instance creation failed"); +    goto err; +  } +  vgltf_renderer_setup_debug_messenger(renderer); +  if (!vgltf_vk_surface_init(&renderer->surface, &renderer->instance, +                             platform)) { +    goto destroy_instance; +  } + +  if (!vgltf_vk_device_init(&renderer->device, &renderer->instance, +                            &renderer->surface)) { +    VGLTF_LOG_ERR("Device creation failed"); +    goto destroy_surface; +  } + +  struct vgltf_window_size window_size = {800, 600}; +  if (!vgltf_platform_get_window_size(platform, &window_size)) { +    VGLTF_LOG_ERR("Couldn't get window size"); +    goto destroy_device; +  } +  renderer->window_size = window_size; + +  if (!vgltf_vk_swapchain_init(&renderer->swapchain, &renderer->device, +                               &renderer->surface, &renderer->window_size)) { +    VGLTF_LOG_ERR("Couldn't create swapchain"); +    goto destroy_device; +  } + +  if (!vgltf_renderer_create_render_pass(renderer)) { +    VGLTF_LOG_ERR("Couldn't create render pass"); +    goto destroy_swapchain; +  } + +  if (!vgltf_renderer_create_descriptor_set_layout(renderer)) { +    VGLTF_LOG_ERR("Couldn't create descriptor set layout"); +    goto destroy_render_pass; +  } + +  if (!vgltf_renderer_create_graphics_pipeline(renderer)) { +    VGLTF_LOG_ERR("Couldn't create graphics pipeline"); +    goto destroy_descriptor_set_layout; +  } + +  if (!vgltf_renderer_create_command_pool(renderer)) { +    VGLTF_LOG_ERR("Couldn't create command pool"); +    goto destroy_graphics_pipeline; +  } + +  if (!vgltf_renderer_create_depth_resources(renderer)) { +    VGLTF_LOG_ERR("Couldn't create depth resources"); +    goto destroy_command_pool; +  } + +  if (!vgltf_renderer_create_framebuffers(renderer)) { +    VGLTF_LOG_ERR("Couldn't create framebuffers"); +    goto destroy_depth_resources; +  } + +  if (!vgltf_renderer_create_texture_image(renderer)) { +    VGLTF_LOG_ERR("Couldn't create texture image"); +    goto destroy_frame_buffers; +  } + +  if (!vgltf_renderer_create_texture_image_view(renderer)) { +    VGLTF_LOG_ERR("Couldn't create texture image view"); +    goto destroy_texture_image; +  } + +  if (!vgltf_renderer_create_texture_sampler(renderer)) { +    VGLTF_LOG_ERR("Couldn't create texture sampler"); +    goto destroy_texture_image_view; +  } + +  if (!load_model(renderer)) { +    VGLTF_LOG_ERR("Couldn't load model"); +    goto destroy_texture_sampler; +  } + +  if (!vgltf_renderer_create_vertex_buffer(renderer)) { +    VGLTF_LOG_ERR("Couldn't create vertex buffer"); +    goto destroy_model; +  } + +  if (!vgltf_renderer_create_index_buffer(renderer)) { +    VGLTF_LOG_ERR("Couldn't create index buffer"); +    goto destroy_vertex_buffer; +  } + +  if (!vgltf_renderer_create_uniform_buffers(renderer)) { +    VGLTF_LOG_ERR("Couldn't create uniform buffers"); +    goto destroy_index_buffer; +  } + +  if (!vgltf_renderer_create_descriptor_pool(renderer)) { +    VGLTF_LOG_ERR("Couldn't create descriptor pool"); +    goto destroy_uniform_buffers; +  } + +  if (!vgltf_renderer_create_descriptor_sets(renderer)) { +    VGLTF_LOG_ERR("Couldn't create descriptor sets"); +    goto destroy_descriptor_pool; +  } + +  if (!vgltf_renderer_create_command_buffer(renderer)) { +    VGLTF_LOG_ERR("Couldn't create command buffer"); +    goto destroy_descriptor_pool; +  } + +  if (!vgltf_renderer_create_sync_objects(renderer)) { +    VGLTF_LOG_ERR("Couldn't create sync objects"); +    goto destroy_descriptor_pool; +  } + +  return true; + +destroy_descriptor_pool: +  vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool, +                          nullptr); +destroy_uniform_buffers: +  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { +    vmaDestroyBuffer(renderer->device.allocator, +                     renderer->uniform_buffers[i].buffer, +                     renderer->uniform_buffers[i].allocation); +  } +destroy_index_buffer: +  vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer, +                   renderer->index_buffer.allocation); +destroy_vertex_buffer: +  vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer, +                   renderer->vertex_buffer.allocation); +destroy_model: +  // TODO +destroy_texture_sampler: +  vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr); +destroy_texture_image_view: +  vkDestroyImageView(renderer->device.device, renderer->texture_image_view, +                     nullptr); +destroy_texture_image: +  vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image, +                  renderer->texture_image.allocation); +destroy_depth_resources: +  vkDestroyImageView(renderer->device.device, renderer->depth_image_view, +                     nullptr); +  vmaDestroyImage(renderer->device.allocator, renderer->depth_image.image, +                  renderer->depth_image.allocation); +destroy_command_pool: +  vkDestroyCommandPool(renderer->device.device, renderer->command_pool, +                       nullptr); +destroy_frame_buffers: +  for (uint32_t swapchain_framebuffer_index = 0; +       swapchain_framebuffer_index < renderer->swapchain.swapchain_image_count; +       swapchain_framebuffer_index++) { +    vkDestroyFramebuffer( +        renderer->device.device, +        renderer->swapchain_framebuffers[swapchain_framebuffer_index], nullptr); +  } +destroy_graphics_pipeline: +  vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline, +                    nullptr); +  vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout, +                          nullptr); +destroy_descriptor_set_layout: +  vkDestroyDescriptorSetLayout(renderer->device.device, +                               renderer->descriptor_set_layout, nullptr); +destroy_render_pass: +  vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr); +destroy_swapchain: +  vgltf_vk_swapchain_deinit(&renderer->swapchain, &renderer->device); +destroy_device: +  vgltf_vk_device_deinit(&renderer->device); +destroy_surface: +  vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance); +destroy_instance: +  if (enable_validation_layers) { +    destroy_debug_utils_messenger_ext(renderer->instance.instance, +                                      renderer->debug_messenger, nullptr); +  } +  vgltf_vk_instance_deinit(&renderer->instance); +err: +  return false; +} +void vgltf_renderer_deinit(struct vgltf_renderer *renderer) { +  vkDeviceWaitIdle(renderer->device.device); +  vgltf_renderer_cleanup_swapchain(renderer); +  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { +    vmaUnmapMemory(renderer->device.allocator, +                   renderer->uniform_buffers[i].allocation); +    vmaDestroyBuffer(renderer->device.allocator, +                     renderer->uniform_buffers[i].buffer, +                     renderer->uniform_buffers[i].allocation); +  } +  vmaDestroyBuffer(renderer->device.allocator, renderer->index_buffer.buffer, +                   renderer->index_buffer.allocation); +  vmaDestroyBuffer(renderer->device.allocator, renderer->vertex_buffer.buffer, +                   renderer->vertex_buffer.allocation); +  vkDestroySampler(renderer->device.device, renderer->texture_sampler, nullptr); +  vkDestroyImageView(renderer->device.device, renderer->texture_image_view, +                     nullptr); +  vmaDestroyImage(renderer->device.allocator, renderer->texture_image.image, +                  renderer->texture_image.allocation); +  vkDestroyPipeline(renderer->device.device, renderer->graphics_pipeline, +                    nullptr); +  vkDestroyPipelineLayout(renderer->device.device, renderer->pipeline_layout, +                          nullptr); +  vkDestroyDescriptorPool(renderer->device.device, renderer->descriptor_pool, +                          nullptr); +  vkDestroyDescriptorSetLayout(renderer->device.device, +                               renderer->descriptor_set_layout, nullptr); +  vkDestroyRenderPass(renderer->device.device, renderer->render_pass, nullptr); +  for (int i = 0; i < VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT; i++) { +    vkDestroySemaphore(renderer->device.device, +                       renderer->image_available_semaphores[i], nullptr); +    vkDestroySemaphore(renderer->device.device, +                       renderer->render_finished_semaphores[i], nullptr); +    vkDestroyFence(renderer->device.device, renderer->in_flight_fences[i], +                   nullptr); +  } +  vkDestroyCommandPool(renderer->device.device, renderer->command_pool, +                       nullptr); +  vgltf_vk_device_deinit(&renderer->device); +  vgltf_vk_surface_deinit(&renderer->surface, &renderer->instance); +  if (enable_validation_layers) { +    destroy_debug_utils_messenger_ext(renderer->instance.instance, +                                      renderer->debug_messenger, nullptr); +  } +  vgltf_vk_instance_deinit(&renderer->instance); +} +void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, +                                      struct vgltf_window_size size) { +  if (size.width > 0 && size.height > 0 && +      size.width != renderer->window_size.width && +      size.height != renderer->window_size.height) { +    renderer->window_size = size; +    renderer->framebuffer_resized = true; +  } +} diff --git a/src/renderer/renderer.h b/src/renderer/renderer.h new file mode 100644 index 0000000..79e1f3d --- /dev/null +++ b/src/renderer/renderer.h @@ -0,0 +1,126 @@ +#ifndef VGLTF_RENDERER_H +#define VGLTF_RENDERER_H + +#include "../maths.h" +#include "../platform.h" +#include "vma_usage.h" +#include <vulkan/vulkan.h> + +struct vgltf_vertex { +  vgltf_vec3 position; +  vgltf_vec3 color; +  vgltf_vec2 texture_coordinates; +}; +VkVertexInputBindingDescription vgltf_vertex_binding_description(void); + +struct vgltf_vertex_input_attribute_descriptions { +  VkVertexInputAttributeDescription descriptions[3]; +  uint32_t count; +}; +struct vgltf_vertex_input_attribute_descriptions +vgltf_vertex_attribute_descriptions(void); + +struct vgltf_renderer_uniform_buffer_object { +  alignas(16) vgltf_mat4 model; +  alignas(16) vgltf_mat4 view; +  alignas(16) vgltf_mat4 projection; +}; + +struct vgltf_renderer_allocated_buffer { +  VkBuffer buffer; +  VmaAllocation allocation; +  VmaAllocationInfo info; +}; + +struct vgltf_renderer_allocated_image { +  VkImage image; +  VmaAllocation allocation; +  VmaAllocationInfo info; +}; + +struct vgltf_vk_instance { +  VkInstance instance; +}; + +struct vgltf_vk_device { +  VkPhysicalDevice physical_device; +  VkDevice device; +  VkQueue graphics_queue; +  VkQueue present_queue; +  VmaAllocator allocator; +}; + +struct vgltf_vk_surface { +  VkSurfaceKHR surface; +}; + +constexpr int VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT = 32; +struct vgltf_vk_swapchain { +  VkSwapchainKHR swapchain; +  VkFormat swapchain_image_format; +  VkImage swapchain_images[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; +  VkImageView swapchain_image_views[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; +  VkExtent2D swapchain_extent; +  uint32_t swapchain_image_count; +}; + +struct vgltf_vk_pipeline { +  VkPipelineLayout layout; +  VkPipeline pipeline; +}; + +constexpr int VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT = 2; +struct vgltf_renderer { +  struct vgltf_vk_instance instance; +  struct vgltf_vk_device device; +  VkDebugUtilsMessengerEXT debug_messenger; +  struct vgltf_vk_surface surface; +  struct vgltf_vk_swapchain swapchain; +  struct vgltf_renderer_allocated_image depth_image; +  VkImageView depth_image_view; + +  VkRenderPass render_pass; +  VkDescriptorSetLayout descriptor_set_layout; + +  VkDescriptorPool descriptor_pool; +  VkDescriptorSet descriptor_sets[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; +  VkPipelineLayout pipeline_layout; +  VkPipeline graphics_pipeline; + +  VkFramebuffer swapchain_framebuffers[VGLTF_RENDERER_MAX_SWAPCHAIN_IMAGE_COUNT]; + +  VkCommandPool command_pool; +  VkCommandBuffer command_buffer[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; +  VkSemaphore +      image_available_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; +  VkSemaphore +      render_finished_semaphores[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; +  VkFence in_flight_fences[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + +  struct vgltf_renderer_allocated_buffer +      uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; +  void *mapped_uniform_buffers[VGLTF_RENDERER_MAX_FRAME_IN_FLIGHT_COUNT]; + +  uint32_t mip_level_count; +  struct vgltf_renderer_allocated_image texture_image; +  VkImageView texture_image_view; +  VkSampler texture_sampler; +  struct vgltf_vertex vertices[100000]; +  int vertex_count; +  uint16_t indices[100000]; +  int index_count; +  struct vgltf_renderer_allocated_buffer vertex_buffer; +  struct vgltf_renderer_allocated_buffer index_buffer; + +  struct vgltf_window_size window_size; +  uint32_t current_frame; +  bool framebuffer_resized; +}; +bool vgltf_renderer_init(struct vgltf_renderer *renderer, +                       struct vgltf_platform *platform); +void vgltf_renderer_deinit(struct vgltf_renderer *renderer); +bool vgltf_renderer_render_frame(struct vgltf_renderer *renderer); +void vgltf_renderer_on_window_resized(struct vgltf_renderer *renderer, +                                    struct vgltf_window_size size); + +#endif // VGLTF_RENDERER_H diff --git a/src/renderer/vma_usage.cpp b/src/renderer/vma_usage.cpp new file mode 100644 index 0000000..83006a1 --- /dev/null +++ b/src/renderer/vma_usage.cpp @@ -0,0 +1,4 @@ +#include "vma_usage.h" + +#define VMA_IMPLEMENTATION +#include <vk_mem_alloc.h> diff --git a/src/renderer/vma_usage.h b/src/renderer/vma_usage.h new file mode 100644 index 0000000..e9b5aa4 --- /dev/null +++ b/src/renderer/vma_usage.h @@ -0,0 +1,6 @@ +#ifndef VGLTF_VMA_USAGE_H +#define VGLTF_VMA_USAGE_H + +#include <vk_mem_alloc.h> + +#endif // VGLTF_VMA_USAGE_H diff --git a/src/str.c b/src/str.c new file mode 100644 index 0000000..9c68d43 --- /dev/null +++ b/src/str.c @@ -0,0 +1,181 @@ +#include "str.h" +#include "alloc.h" +#include "hash.h" +#include "platform.h" +#include <assert.h> +#include <stdarg.h> +#include <string.h> + +struct vgltf_string_view vgltf_string_view_from_literal(const char *str) { +  assert(str); +  size_t length = strlen(str); +  return (struct vgltf_string_view){.length = length, .data = str}; +} +struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string) { +  return (struct vgltf_string_view){.length = string.length, .data = string.data}; +} +char vgltf_string_view_at(const struct vgltf_string_view *string_view, +                        size_t index) { +  assert(string_view); +  assert(index < string_view->length); +  return string_view->data[index]; +} +bool vgltf_string_view_eq(struct vgltf_string_view view, +                        struct vgltf_string_view other) { +  return view.length == other.length && +         (strncmp(view.data, other.data, view.length) == 0); +} +size_t vgltf_string_view_length(const struct vgltf_string_view *string_view) { +  assert(string_view); +  return string_view->length; +} + +uint64_t vgltf_string_view_hash(const struct vgltf_string_view view) { +  return vgltf_hash_fnv_1a(view.data, view.length); +} + +int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view, +                                             size_t offset, +                                             uint32_t *codepoint) { +  assert(codepoint); +  assert(offset < view.length); + +  const unsigned char *s = (unsigned char *)&view.data[offset]; + +  int size; +  if ((*s & 0x80) == 0) { +    *codepoint = *s; +    size = 1; +  } else if ((*s & 0xE0) == 0xC0) { +    *codepoint = *s & 0x1f; +    size = 2; +  } else if ((*s & 0xF0) == 0xE0) { +    *codepoint = *s & 0x0f; +    size = 3; +  } else if ((*s & 0xF8) == 0xF0) { +    *codepoint = *s & 0x07; +    size = 4; +  } else { +    VGLTF_LOG_ERR("Invalid UTF-8 sequence"); +    return 0; +  } + +  for (int i = 1; i < size; i++) { +    if ((s[i] & 0xC0) != 0x80) { +      VGLTF_LOG_ERR("Invalid UTF-8 continuation byte"); +      return 0; +    } + +    *codepoint = (*codepoint << 6) | (s[i] & 0x3F); +  } + +  return size; +} +int vgltf_string_utf8_encode_codepoint(uint32_t codepoint, +                                     char encoded_codepoint[4]) { +  assert(encoded_codepoint); +  if (codepoint > 0x10FFFF) { +    return -1; +  } + +  if (codepoint <= 0x7F) { +    encoded_codepoint[0] = (uint8_t)codepoint; +    return 1; +  } else if (codepoint <= 0x7FF) { +    encoded_codepoint[0] = 0xC0 | ((codepoint >> 6) & 0x1F); +    encoded_codepoint[1] = 0x80 | (codepoint & 0x3F); +    return 2; +  } else if (codepoint <= 0xFFFF) { +    encoded_codepoint[0] = 0xE0 | ((codepoint >> 12) & 0x0F); +    encoded_codepoint[1] = 0x80 | ((codepoint >> 6) & 0x3F); +    encoded_codepoint[2] = 0x80 | (codepoint & 0x3F); +    return 3; +  } else { +    encoded_codepoint[0] = 0xF0 | ((codepoint >> 18) & 0x07); +    encoded_codepoint[1] = 0x80 | ((codepoint >> 12) & 0x3F); +    encoded_codepoint[2] = 0x80 | ((codepoint >> 6) & 0x3F); +    encoded_codepoint[3] = 0x80 | (codepoint & 0x3F); +    return 4; +  } +} + +struct vgltf_string +vgltf_string_from_null_terminated(struct vgltf_allocator *allocator, +                                const char *str) { +  assert(allocator); +  assert(str); +  struct vgltf_string string; +  size_t length = strlen(str); +  char *data = vgltf_allocator_allocate(allocator, length + 1); +  if (!data) { +    VGLTF_PANIC("Couldn't allocate string"); +  } +  strncpy(data, str, length); +  string.length = length; +  string.data = data; +  return string; +} +struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator, +                                   const struct vgltf_string string) { +  assert(allocator); + +  size_t length = string.length; +  char *data = vgltf_allocator_allocate(allocator, length + 1); +  memcpy(data, string.data, length); +  data[length] = '\0'; + +  return (struct vgltf_string){.data = data, .length = length}; +} +struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator, +                                         struct vgltf_string_view head, +                                         struct vgltf_string_view tail) { +  assert(allocator); +  size_t length = head.length + tail.length; +  char *data = vgltf_allocator_allocate(allocator, length + 1); +  memcpy(data, head.data, head.length); +  memcpy(data + head.length, tail.data, tail.length); +  data[length] = '\0'; +  return (struct vgltf_string){.data = data, .length = length}; +} +struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator, +                                       struct vgltf_string_view fmt, ...) { +  va_list args; +  va_start(args, fmt); +  struct vgltf_string formatted_string = +      vgltf_string_vformatted(allocator, fmt, args); +  va_end(args); + +  return formatted_string; +} +struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator, +                                        struct vgltf_string_view fmt, +                                        va_list args) { +  assert(allocator); +  char str[1024]; +  size_t length = vsnprintf(str, 1024, fmt.data, args); +  char *data = vgltf_allocator_allocate(allocator, length + 1); +  memcpy(data, str, length); +  data[length] = '\0'; +  return (struct vgltf_string){.data = data, .length = length}; +} +void vgltf_string_deinit(struct vgltf_allocator *allocator, +                       struct vgltf_string *string) { +  assert(allocator); +  assert(string); +  vgltf_allocator_free(allocator, string->data); +} +size_t vgltf_string_length(const struct vgltf_string *string) { +  return string->length; +} +bool vgltf_string_eq_view(const struct vgltf_string string, +                        const struct vgltf_string_view view) { +  return string.length == view.length && +         (strncmp(string.data, view.data, string.length) == 0); +} +uint64_t vgltf_string_hash(const struct vgltf_string string) { +  return vgltf_hash_fnv_1a(string.data, string.length); +} +bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other) { +  return string.length == other.length && +         (strncmp(string.data, other.data, string.length) == 0); +} diff --git a/src/str.h b/src/str.h new file mode 100644 index 0000000..c0e4e5c --- /dev/null +++ b/src/str.h @@ -0,0 +1,62 @@ +#ifndef VGLTF_STR_H +#define VGLTF_STR_H + +#include "alloc.h" +#include <stdarg.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> // IWYU pragma: keep + +#define SV(str)                                                                \ +  (struct vgltf_string_view) { .data = str, .length = strlen(str) } + +struct vgltf_string; +struct vgltf_string_view { +  const char *data; +  size_t length; +}; + +struct vgltf_string_view vgltf_string_view_from_literal(const char *str); +struct vgltf_string_view vgltf_string_view_from_string(struct vgltf_string string); +size_t vgltf_string_view_length(const struct vgltf_string_view *string_view); +char vgltf_string_view_at(const struct vgltf_string_view *string_view, +                        size_t index); +bool vgltf_string_view_eq(struct vgltf_string_view view, +                        struct vgltf_string_view other); +uint64_t vgltf_string_view_hash(const struct vgltf_string_view view); +// Fetches the next utf8 codepoint in the string at the given offset +// Returns the size of the codepoint in bytes, 0 in case of error +int vgltf_string_view_utf8_codepoint_at_offset(struct vgltf_string_view view, +                                             size_t offset, +                                             uint32_t *codepoint); +// codepoint has to be a char[4] +int vgltf_string_utf8_encode_codepoint(uint32_t codepoint, +                                     char encoded_codepoint[4]); + +struct vgltf_string { +  char *data; +  size_t length; +}; +struct vgltf_string +vgltf_string_from_null_terminated(struct vgltf_allocator *allocator, +                                const char *str); +struct vgltf_string vgltf_string_clone(struct vgltf_allocator *allocator, +                                   const struct vgltf_string string); +struct vgltf_string vgltf_string_concatenate(struct vgltf_allocator *allocator, +                                         struct vgltf_string_view head, +                                         struct vgltf_string_view tail); +struct vgltf_string vgltf_string_formatted(struct vgltf_allocator *allocator, +                                       struct vgltf_string_view fmt, ...); +struct vgltf_string vgltf_string_vformatted(struct vgltf_allocator *allocator, +                                        struct vgltf_string_view fmt, +                                        va_list args); +void vgltf_string_deinit(struct vgltf_allocator *allocator, +                       struct vgltf_string *string); +size_t vgltf_string_length(const struct vgltf_string *string); +bool vgltf_string_eq_view(const struct vgltf_string string, +                        const struct vgltf_string_view view); +uint64_t vgltf_string_hash(const struct vgltf_string string); +bool vgltf_string_eq(struct vgltf_string string, struct vgltf_string other); + +#endif // VGLTF_STR_H diff --git a/thirdpartylicenses.md b/thirdpartylicenses.md new file mode 100644 index 0000000..3d898b6 --- /dev/null +++ b/thirdpartylicenses.md @@ -0,0 +1,46 @@ +# stb_image: +Public Domain + +# vk_mem_alloc: +Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +# tinyobjloader_c: +The MIT License (MIT) + +Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/stb_image.h b/vendor/stb_image.h new file mode 100644 index 0000000..9eedabe --- /dev/null +++ b/vendor/stb_image.h @@ -0,0 +1,7988 @@ +/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb +                                  no warranty implied; use at your own risk + +   Do this: +      #define STB_IMAGE_IMPLEMENTATION +   before you include this file in *one* C or C++ file to create the implementation. + +   // i.e. it should look like this: +   #include ... +   #include ... +   #include ... +   #define STB_IMAGE_IMPLEMENTATION +   #include "stb_image.h" + +   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. +   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + +   QUICK NOTES: +      Primarily of interest to game developers and other people who can +          avoid problematic images and only need the trivial interface + +      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) +      PNG 1/2/4/8/16-bit-per-channel + +      TGA (not sure what subset, if a subset) +      BMP non-1bpp, non-RLE +      PSD (composited view only, no extra channels, 8/16 bit-per-channel) + +      GIF (*comp always reports as 4-channel) +      HDR (radiance rgbE format) +      PIC (Softimage PIC) +      PNM (PPM and PGM binary only) + +      Animated GIF still needs a proper API, but here's one way to do it: +          http://gist.github.com/urraka/685d9a6340b26b830d49 + +      - decode from memory or through FILE (define STBI_NO_STDIO to remove code) +      - decode from arbitrary I/O callbacks +      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + +   Full documentation under "DOCUMENTATION" below. + + +LICENSE + +  See end of file for license information. + +RECENT REVISION HISTORY: + +      2.30  (2024-05-31) avoid erroneous gcc warning +      2.29  (2023-05-xx) optimizations +      2.28  (2023-01-29) many error fixes, security errors, just tons of stuff +      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes +      2.26  (2020-07-13) many minor fixes +      2.25  (2020-02-02) fix warnings +      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically +      2.23  (2019-08-11) fix clang static analysis warning +      2.22  (2019-03-04) gif fixes, fix warnings +      2.21  (2019-02-25) fix typo in comment +      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs +      2.19  (2018-02-11) fix warning +      2.18  (2018-01-30) fix warnings +      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings +      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes +      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC +      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs +      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes +      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes +      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 +                         RGB-format JPEG; remove white matting in PSD; +                         allocate large structures on the stack; +                         correct channel count for PNG & BMP +      2.10  (2016-01-22) avoid warning introduced in 2.09 +      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + +   See end of file for full revision history. + + + ============================    Contributors    ========================= + + Image formats                          Extensions, features +    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info) +    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info) +    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG) +    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks) +    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG) +    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip) +    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD) +    github:urraka (animated gif)           Junggon Kim (PNM comments) +    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA) +                                           socks-the-fox (16-bit PNG) +                                           Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP) +    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query) +    Arseny Kapoulkine                      Simon Breuss (16-bit PNM) +    John-Mark Allen +    Carmelo J Fdez-Aguera + + Bug & warning fixes +    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko +    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski +    Phil Jordan                                Dave Moore           Roy Eltham +    Hayaki Saito            Nathan Reed        Won Chun +    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community +    Thomas Ruf              Ronny Chevalier                         github:rlyeh +    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou +    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk +    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar +    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex +    Cass Everitt            Ryamond Barbiero                        github:grim210 +    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw +    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus +    Josh Tobin              Neil Bickford      Matthew Gregan       github:poppolopoppo +    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji +    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007 +                            Brad Weinberger    Matvey Cherevko      github:mosra +    Luca Sas                Alexander Veselov  Zack Middleton       [reserved] +    Ryan C. Gordon          [reserved]                              [reserved] +                     DO NOT ADD YOUR NAME HERE + +                     Jacko Dirks + +  To add your name to the credits, pick a random blank space in the middle and fill it. +  80% of merge conflicts on stb PRs are due to people adding their name at the end +  of the credits. +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +//    - no 12-bit-per-channel JPEG +//    - no JPEGs with arithmetic coding +//    - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +//    int x,y,n; +//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +//    // ... process data if not NULL ... +//    // ... x = width, y = height, n = # 8-bit components per pixel ... +//    // ... replace '0' with '1'..'4' to force that many components per pixel +//    // ... but 'n' will always be the number that it would have been if you said 0 +//    stbi_image_free(data); +// +// Standard parameters: +//    int *x                 -- outputs image width in pixels +//    int *y                 -- outputs image height in pixels +//    int *channels_in_file  -- outputs # of image components in image file +//    int desired_channels   -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +//     N=#comp     components +//       1           grey +//       2           grey, alpha +//       3           red, green, blue +//       4           red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// To query the width, height and component count of an image without having to +// decode the full file, you can use the stbi_info family of functions: +// +//   int x,y,n,ok; +//   ok = stbi_info(filename, &x, &y, &n); +//   // returns ok=1 and sets x, y, n if image is a supported format, +//   // 0 otherwise. +// +// Note that stb_image pervasively uses ints in its public API for sizes, +// including sizes of memory buffers. This is now part of the API and thus +// hard to change without causing breakage. As a result, the various image +// loaders all have certain limits on image size; these differ somewhat +// by format but generally boil down to either just under 2GB or just under +// 1GB. When the decoded image would be larger than this, stb_image decoding +// will fail. +// +// Additionally, stb_image will reject image files that have any of their +// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, +// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, +// the only way to have an image with such dimensions load correctly +// is for it to have a rather extreme aspect ratio. Either way, the +// assumption here is that such larger images are likely to be malformed +// or malicious. If you do need to load an image with individual dimensions +// larger than that, and it still fits in the overall size limit, you can +// #define STBI_MAX_DIMENSIONS on your own to be something larger. +// +// =========================================================================== +// +// UNICODE: +// +//   If compiling for Windows and you wish to use Unicode filenames, compile +//   with +//       #define STBI_WINDOWS_UTF8 +//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert +//   Windows wchar_t filenames to utf8. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +//    1. easy to use +//    2. easy to maintain +//    3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy-to-use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// provide more explicit reasons why performance can't be emphasized. +// +//    - Portable ("ease of use") +//    - Small source code footprint ("easy to maintain") +//    - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support   (disable by defining STBI_NO_HDR) +// +// stb_image supports loading HDR images in general, and currently the Radiance +// .HDR file format specifically. You can still load any file through the existing +// interface; if you attempt to load an HDR file, it will be automatically remapped +// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +//     stbi_hdr_to_ldr_gamma(2.2f); +//     stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +//    float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +//     stbi_ldr_to_hdr_scale(1.0f); +//     stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +//     stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// We optionally support converting iPhone-formatted PNGs (which store +// premultiplied BGRA) back to RGB, even though they're internally encoded +// differently. To enable this conversion, call +// stbi_convert_iphone_png_to_rgb(1). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +//  - You can suppress implementation of any of the decoders to reduce +//    your code footprint by #defining one or more of the following +//    symbols before creating the implementation. +// +//        STBI_NO_JPEG +//        STBI_NO_PNG +//        STBI_NO_BMP +//        STBI_NO_PSD +//        STBI_NO_TGA +//        STBI_NO_GIF +//        STBI_NO_HDR +//        STBI_NO_PIC +//        STBI_NO_PNM   (.ppm and .pgm) +// +//  - You can request *only* certain decoders and suppress all other ones +//    (this will be more forward-compatible, as addition of new decoders +//    doesn't require you to disable them explicitly): +// +//        STBI_ONLY_JPEG +//        STBI_ONLY_PNG +//        STBI_ONLY_BMP +//        STBI_ONLY_PSD +//        STBI_ONLY_TGA +//        STBI_ONLY_GIF +//        STBI_ONLY_HDR +//        STBI_ONLY_PIC +//        STBI_ONLY_PNM   (.ppm and .pgm) +// +//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// +//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater +//    than that size (in either width or height) without further processing. +//    This is to let programs in the wild set an upper bound to prevent +//    denial-of-service attacks on untrusted data, as one could generate a +//    valid image of gigantic dimensions and force stb_image to allocate a +//    huge block of memory and spend disproportionate time decoding it. By +//    default this is set to (1 << 24), which is 16777216, but that's still +//    very big. + +#ifndef STBI_NO_STDIO +#include <stdio.h> +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ +   STBI_default = 0, // only used for desired_channels + +   STBI_grey       = 1, +   STBI_grey_alpha = 2, +   STBI_rgb        = 3, +   STBI_rgb_alpha  = 4 +}; + +#include <stdlib.h> +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef STBIDEF +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ +   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read +   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative +   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + +#ifdef STBI_WINDOWS_UTF8 +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR +   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels); + +   #ifndef STBI_NO_STDIO +   STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +   #endif +#endif + +#ifndef STBI_NO_HDR +   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma); +   STBIDEF void   stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR +   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma); +   STBIDEF void   stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int      stbi_is_hdr          (char const *filename); +STBIDEF int      stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// on most compilers (and ALL modern mainstream compilers) this is threadsafe +STBIDEF const char *stbi_failure_reason  (void); + +// free the loaded image -- this is just free() +STBIDEF void     stbi_image_free      (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp); +STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp); +STBIDEF int      stbi_is_16_bit          (char const *filename); +STBIDEF int      stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// as above, but only applies to images loaded on the thread that calls the function +// this function is only available if your compiler supports thread-local variables; +// calling it will fail to link if your compiler doesn't +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +////   end header file   ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ +  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ +  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ +  || defined(STBI_ONLY_ZLIB) +   #ifndef STBI_ONLY_JPEG +   #define STBI_NO_JPEG +   #endif +   #ifndef STBI_ONLY_PNG +   #define STBI_NO_PNG +   #endif +   #ifndef STBI_ONLY_BMP +   #define STBI_NO_BMP +   #endif +   #ifndef STBI_ONLY_PSD +   #define STBI_NO_PSD +   #endif +   #ifndef STBI_ONLY_TGA +   #define STBI_NO_TGA +   #endif +   #ifndef STBI_ONLY_GIF +   #define STBI_NO_GIF +   #endif +   #ifndef STBI_ONLY_HDR +   #define STBI_NO_HDR +   #endif +   #ifndef STBI_ONLY_PIC +   #define STBI_NO_PIC +   #endif +   #ifndef STBI_ONLY_PNM +   #define STBI_NO_PNM +   #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include <stdarg.h> +#include <stddef.h> // ptrdiff_t on osx +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include <math.h>  // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include <stdio.h> +#endif + +#ifndef STBI_ASSERT +#include <assert.h> +#define STBI_ASSERT(x) assert(x) +#endif + +#ifdef __cplusplus +#define STBI_EXTERN extern "C" +#else +#define STBI_EXTERN extern +#endif + + +#ifndef _MSC_VER +   #ifdef __cplusplus +   #define stbi_inline inline +   #else +   #define stbi_inline +   #endif +#else +   #define stbi_inline __forceinline +#endif + +#ifndef STBI_NO_THREAD_LOCALS +   #if defined(__cplusplus) &&  __cplusplus >= 201103L +      #define STBI_THREAD_LOCAL       thread_local +   #elif defined(__GNUC__) && __GNUC__ < 5 +      #define STBI_THREAD_LOCAL       __thread +   #elif defined(_MSC_VER) +      #define STBI_THREAD_LOCAL       __declspec(thread) +   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) +      #define STBI_THREAD_LOCAL       _Thread_local +   #endif + +   #ifndef STBI_THREAD_LOCAL +      #if defined(__GNUC__) +        #define STBI_THREAD_LOCAL       __thread +      #endif +   #endif +#endif + +#if defined(_MSC_VER) || defined(__SYMBIAN32__) +typedef unsigned short stbi__uint16; +typedef   signed short stbi__int16; +typedef unsigned int   stbi__uint32; +typedef   signed int   stbi__int32; +#else +#include <stdint.h> +typedef uint16_t stbi__uint16; +typedef int16_t  stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t  stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v)  (void)(v) +#else +#define STBI_NOTUSED(v)  (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL +   #define stbi_lrot(x,y)  _lrotl(x,y) +#else +   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz)           malloc(sz) +#define STBI_REALLOC(p,newsz)     realloc(p,newsz) +#define STBI_FREE(p)              free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include <emmintrin.h> + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400  // not VC6 +#include <intrin.h> // __cpuid +static int stbi__cpuid3(void) +{ +   int info[4]; +   __cpuid(info,1); +   return info[3]; +} +#else +static int stbi__cpuid3(void) +{ +   int res; +   __asm { +      mov  eax,1 +      cpuid +      mov  res,edx +   } +   return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ +   int info3 = stbi__cpuid3(); +   return ((info3 >> 26) & 1) != 0; +} +#endif + +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ +   // If we're even attempting to compile this on GCC/Clang, that means +   // -msse2 is on, which means the compiler is allowed to use SSE2 +   // instructions at will, and so are we. +   return 1; +} +#endif + +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include <arm_neon.h> +#ifdef _MSC_VER +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name +#else +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) +#endif + +/////////////////////////////////////////////// +// +//  stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ +   stbi__uint32 img_x, img_y; +   int img_n, img_out_n; + +   stbi_io_callbacks io; +   void *io_user_data; + +   int read_from_callbacks; +   int buflen; +   stbi_uc buffer_start[128]; +   int callback_already_read; + +   stbi_uc *img_buffer, *img_buffer_end; +   stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ +   s->io.read = NULL; +   s->read_from_callbacks = 0; +   s->callback_already_read = 0; +   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; +   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ +   s->io = *c; +   s->io_user_data = user; +   s->buflen = sizeof(s->buffer_start); +   s->read_from_callbacks = 1; +   s->callback_already_read = 0; +   s->img_buffer = s->img_buffer_original = s->buffer_start; +   stbi__refill_buffer(s); +   s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ +   return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ +   int ch; +   fseek((FILE*) user, n, SEEK_CUR); +   ch = fgetc((FILE*) user);  /* have to read a byte to reset feof()'s flag */ +   if (ch != EOF) { +      ungetc(ch, (FILE *) user);  /* push byte back onto stream if valid. */ +   } +} + +static int stbi__stdio_eof(void *user) +{ +   return feof((FILE*) user) || ferror((FILE *) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ +   stbi__stdio_read, +   stbi__stdio_skip, +   stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ +   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ +   // conceptually rewind SHOULD rewind to the beginning of the stream, +   // but we just rewind to the beginning of the initial buffer, because +   // we only use it after doing 'test', which only ever looks at at most 92 bytes +   s->img_buffer = s->img_buffer_original; +   s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ +   STBI_ORDER_RGB, +   STBI_ORDER_BGR +}; + +typedef struct +{ +   int bits_per_channel; +   int num_channels; +   int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int      stbi__jpeg_test(stbi__context *s); +static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int      stbi__png_test(stbi__context *s); +static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int      stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int      stbi__bmp_test(stbi__context *s); +static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int      stbi__tga_test(stbi__context *s); +static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int      stbi__psd_test(stbi__context *s); +static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int      stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int      stbi__hdr_test(stbi__context *s); +static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int      stbi__pic_test(stbi__context *s); +static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int      stbi__gif_test(stbi__context *s); +static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int      stbi__pnm_test(stbi__context *s); +static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +static int      stbi__pnm_is16(stbi__context *s); +#endif + +static +#ifdef STBI_THREAD_LOCAL +STBI_THREAD_LOCAL +#endif +const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ +   return stbi__g_failure_reason; +} + +#ifndef STBI_NO_FAILURE_STRINGS +static int stbi__err(const char *str) +{ +   stbi__g_failure_reason = str; +   return 0; +} +#endif + +static void *stbi__malloc(size_t size) +{ +    return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ +   if (b < 0) return 0; +   // now 0 <= b <= INT_MAX, hence also +   // 0 <= INT_MAX - b <= INTMAX. +   // And "a + b <= INT_MAX" (which might overflow) is the +   // same as a <= INT_MAX - b (no overflow) +   return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ +   if (a < 0 || b < 0) return 0; +   if (b == 0) return 1; // mul-by-0 is always safe +   // portable way to check for no overflows in a*b +   return a <= INT_MAX/b; +} + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ +   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} +#endif + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ +   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && +      stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ +   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && +      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ +   if (!stbi__mad2sizes_valid(a, b, add)) return NULL; +   return stbi__malloc(a*b + add); +} +#endif + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ +   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; +   return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ +   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; +   return stbi__malloc(a*b*c*d + add); +} +#endif + +// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow. +static int stbi__addints_valid(int a, int b) +{ +   if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow +   if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. +   return a <= INT_MAX - b; +} + +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) +{ +   if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow +   if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid +   if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN +   return a >= SHRT_MIN / b; +} + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS +   #define stbi__err(x,y)  0 +#elif defined(STBI_FAILURE_USERMSG) +   #define stbi__err(x,y)  stbi__err(y) +#else +   #define stbi__err(x,y)  stbi__err(x) +#endif + +#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ +   STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load_global = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ +   stbi__vertically_flip_on_load_global = flag_true_if_should_flip; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__vertically_flip_on_load  stbi__vertically_flip_on_load_global +#else +static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; + +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) +{ +   stbi__vertically_flip_on_load_local = flag_true_if_should_flip; +   stbi__vertically_flip_on_load_set = 1; +} + +#define stbi__vertically_flip_on_load  (stbi__vertically_flip_on_load_set       \ +                                         ? stbi__vertically_flip_on_load_local  \ +                                         : stbi__vertically_flip_on_load_global) +#endif // STBI_THREAD_LOCAL + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ +   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields +   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed +   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order +   ri->num_channels = 0; + +   // test the formats with a very explicit header first (at least a FOURCC +   // or distinctive magic number first) +   #ifndef STBI_NO_PNG +   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri); +   #endif +   #ifndef STBI_NO_BMP +   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri); +   #endif +   #ifndef STBI_NO_GIF +   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri); +   #endif +   #ifndef STBI_NO_PSD +   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); +   #else +   STBI_NOTUSED(bpc); +   #endif +   #ifndef STBI_NO_PIC +   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri); +   #endif + +   // then the formats that can end up attempting to load with just 1 or 2 +   // bytes matching expectations; these are prone to false positives, so +   // try them later +   #ifndef STBI_NO_JPEG +   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); +   #endif +   #ifndef STBI_NO_PNM +   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri); +   #endif + +   #ifndef STBI_NO_HDR +   if (stbi__hdr_test(s)) { +      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); +      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); +   } +   #endif + +   #ifndef STBI_NO_TGA +   // test tga last because it's a crappy test! +   if (stbi__tga_test(s)) +      return stbi__tga_load(s,x,y,comp,req_comp, ri); +   #endif + +   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ +   int i; +   int img_len = w * h * channels; +   stbi_uc *reduced; + +   reduced = (stbi_uc *) stbi__malloc(img_len); +   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + +   for (i = 0; i < img_len; ++i) +      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + +   STBI_FREE(orig); +   return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ +   int i; +   int img_len = w * h * channels; +   stbi__uint16 *enlarged; + +   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); +   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + +   for (i = 0; i < img_len; ++i) +      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + +   STBI_FREE(orig); +   return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ +   int row; +   size_t bytes_per_row = (size_t)w * bytes_per_pixel; +   stbi_uc temp[2048]; +   stbi_uc *bytes = (stbi_uc *)image; + +   for (row = 0; row < (h>>1); row++) { +      stbi_uc *row0 = bytes + row*bytes_per_row; +      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; +      // swap row0 with row1 +      size_t bytes_left = bytes_per_row; +      while (bytes_left) { +         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); +         memcpy(temp, row0, bytes_copy); +         memcpy(row0, row1, bytes_copy); +         memcpy(row1, temp, bytes_copy); +         row0 += bytes_copy; +         row1 += bytes_copy; +         bytes_left -= bytes_copy; +      } +   } +} + +#ifndef STBI_NO_GIF +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ +   int slice; +   int slice_size = w * h * bytes_per_pixel; + +   stbi_uc *bytes = (stbi_uc *)image; +   for (slice = 0; slice < z; ++slice) { +      stbi__vertical_flip(bytes, w, h, bytes_per_pixel); +      bytes += slice_size; +   } +} +#endif + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ +   stbi__result_info ri; +   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + +   if (result == NULL) +      return NULL; + +   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. +   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + +   if (ri.bits_per_channel != 8) { +      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); +      ri.bits_per_channel = 8; +   } + +   // @TODO: move stbi__convert_format to here + +   if (stbi__vertically_flip_on_load) { +      int channels = req_comp ? req_comp : *comp; +      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); +   } + +   return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ +   stbi__result_info ri; +   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + +   if (result == NULL) +      return NULL; + +   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. +   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + +   if (ri.bits_per_channel != 16) { +      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); +      ri.bits_per_channel = 16; +   } + +   // @TODO: move stbi__convert_format16 to here +   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + +   if (stbi__vertically_flip_on_load) { +      int channels = req_comp ? req_comp : *comp; +      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); +   } + +   return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ +   if (stbi__vertically_flip_on_load && result != NULL) { +      int channels = req_comp ? req_comp : *comp; +      stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); +   } +} +#endif + +#ifndef STBI_NO_STDIO + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); +#endif + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ +	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ +   FILE *f; +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +   wchar_t wMode[64]; +   wchar_t wFilename[1024]; +	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) +      return 0; + +	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) +      return 0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 +	if (0 != _wfopen_s(&f, wFilename, wMode)) +		f = 0; +#else +   f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 +   if (0 != fopen_s(&f, filename, mode)) +      f=0; +#else +   f = fopen(filename, mode); +#endif +   return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ +   FILE *f = stbi__fopen(filename, "rb"); +   unsigned char *result; +   if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); +   result = stbi_load_from_file(f,x,y,comp,req_comp); +   fclose(f); +   return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ +   unsigned char *result; +   stbi__context s; +   stbi__start_file(&s,f); +   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +   if (result) { +      // need to 'unget' all the characters in the IO buffer +      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); +   } +   return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ +   stbi__uint16 *result; +   stbi__context s; +   stbi__start_file(&s,f); +   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); +   if (result) { +      // need to 'unget' all the characters in the IO buffer +      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); +   } +   return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ +   FILE *f = stbi__fopen(filename, "rb"); +   stbi__uint16 *result; +   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); +   result = stbi_load_from_file_16(f,x,y,comp,req_comp); +   fclose(f); +   return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ +   stbi__context s; +   stbi__start_mem(&s,buffer,len); +   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ +   stbi__context s; +   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); +   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ +   stbi__context s; +   stbi__start_mem(&s,buffer,len); +   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ +   stbi__context s; +   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); +   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ +   unsigned char *result; +   stbi__context s; +   stbi__start_mem(&s,buffer,len); + +   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); +   if (stbi__vertically_flip_on_load) { +      stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); +   } + +   return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ +   unsigned char *data; +   #ifndef STBI_NO_HDR +   if (stbi__hdr_test(s)) { +      stbi__result_info ri; +      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); +      if (hdr_data) +         stbi__float_postprocess(hdr_data,x,y,comp,req_comp); +      return hdr_data; +   } +   #endif +   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); +   if (data) +      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); +   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ +   stbi__context s; +   stbi__start_mem(&s,buffer,len); +   return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ +   stbi__context s; +   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); +   return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ +   float *result; +   FILE *f = stbi__fopen(filename, "rb"); +   if (!f) return stbi__errpf("can't fopen", "Unable to open file"); +   result = stbi_loadf_from_file(f,x,y,comp,req_comp); +   fclose(f); +   return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ +   stbi__context s; +   stbi__start_file(&s,f); +   return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ +   #ifndef STBI_NO_HDR +   stbi__context s; +   stbi__start_mem(&s,buffer,len); +   return stbi__hdr_test(&s); +   #else +   STBI_NOTUSED(buffer); +   STBI_NOTUSED(len); +   return 0; +   #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int      stbi_is_hdr          (char const *filename) +{ +   FILE *f = stbi__fopen(filename, "rb"); +   int result=0; +   if (f) { +      result = stbi_is_hdr_from_file(f); +      fclose(f); +   } +   return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ +   #ifndef STBI_NO_HDR +   long pos = ftell(f); +   int res; +   stbi__context s; +   stbi__start_file(&s,f); +   res = stbi__hdr_test(&s); +   fseek(f, pos, SEEK_SET); +   return res; +   #else +   STBI_NOTUSED(f); +   return 0; +   #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ +   #ifndef STBI_NO_HDR +   stbi__context s; +   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); +   return stbi__hdr_test(&s); +   #else +   STBI_NOTUSED(clbk); +   STBI_NOTUSED(user); +   return 0; +   #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ +   STBI__SCAN_load=0, +   STBI__SCAN_type, +   STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ +   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); +   s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); +   if (n == 0) { +      // at end of file, treat same as if from memory, but need to handle case +      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file +      s->read_from_callbacks = 0; +      s->img_buffer = s->buffer_start; +      s->img_buffer_end = s->buffer_start+1; +      *s->img_buffer = 0; +   } else { +      s->img_buffer = s->buffer_start; +      s->img_buffer_end = s->buffer_start + n; +   } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ +   if (s->img_buffer < s->img_buffer_end) +      return *s->img_buffer++; +   if (s->read_from_callbacks) { +      stbi__refill_buffer(s); +      return *s->img_buffer++; +   } +   return 0; +} + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +stbi_inline static int stbi__at_eof(stbi__context *s) +{ +   if (s->io.read) { +      if (!(s->io.eof)(s->io_user_data)) return 0; +      // if feof() is true, check if buffer = end +      // special case: we've only got the special 0 character at the end +      if (s->read_from_callbacks == 0) return 1; +   } + +   return s->img_buffer >= s->img_buffer_end; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) +// nothing +#else +static void stbi__skip(stbi__context *s, int n) +{ +   if (n == 0) return;  // already there! +   if (n < 0) { +      s->img_buffer = s->img_buffer_end; +      return; +   } +   if (s->io.read) { +      int blen = (int) (s->img_buffer_end - s->img_buffer); +      if (blen < n) { +         s->img_buffer = s->img_buffer_end; +         (s->io.skip)(s->io_user_data, n - blen); +         return; +      } +   } +   s->img_buffer += n; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) +// nothing +#else +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ +   if (s->io.read) { +      int blen = (int) (s->img_buffer_end - s->img_buffer); +      if (blen < n) { +         int res, count; + +         memcpy(buffer, s->img_buffer, blen); + +         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); +         res = (count == (n-blen)); +         s->img_buffer = s->img_buffer_end; +         return res; +      } +   } + +   if (s->img_buffer+n <= s->img_buffer_end) { +      memcpy(buffer, s->img_buffer, n); +      s->img_buffer += n; +      return 1; +   } else +      return 0; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static int stbi__get16be(stbi__context *s) +{ +   int z = stbi__get8(s); +   return (z << 8) + stbi__get8(s); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static stbi__uint32 stbi__get32be(stbi__context *s) +{ +   stbi__uint32 z = stbi__get16be(s); +   return (z << 16) + stbi__get16be(s); +} +#endif + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ +   int z = stbi__get8(s); +   return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ +   stbi__uint32 z = stbi__get16le(s); +   z += (stbi__uint32)stbi__get16le(s) << 16; +   return z; +} +#endif + +#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +////////////////////////////////////////////////////////////////////////////// +// +//  generic converter from built-in img_n to req_comp +//    individual types do this automatically as much as possible (e.g. jpeg +//    does all cases internally since it needs to colorspace convert anyway, +//    and it never has alpha, so very few cases ). png can automatically +//    interleave an alpha=255 channel, but falls back to this for other cases +// +//  assume data buffer is malloced, so malloc a new one and free that one +//  only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ +   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ +   int i,j; +   unsigned char *good; + +   if (req_comp == img_n) return data; +   STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + +   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); +   if (good == NULL) { +      STBI_FREE(data); +      return stbi__errpuc("outofmem", "Out of memory"); +   } + +   for (j=0; j < (int) y; ++j) { +      unsigned char *src  = data + j * x * img_n   ; +      unsigned char *dest = good + j * x * req_comp; + +      #define STBI__COMBO(a,b)  ((a)*8+(b)) +      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) +      // convert source image with img_n components to one with req_comp components; +      // avoid switch per pixel, so use switch per scanline and massive macros +      switch (STBI__COMBO(img_n, req_comp)) { +         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break; +         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break; +         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break; +         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break; +         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break; +         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break; +         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break; +         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break; +         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break; +         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break; +         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; +         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break; +         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); +      } +      #undef STBI__CASE +   } + +   STBI_FREE(data); +   return good; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ +   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ +   int i,j; +   stbi__uint16 *good; + +   if (req_comp == img_n) return data; +   STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + +   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); +   if (good == NULL) { +      STBI_FREE(data); +      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); +   } + +   for (j=0; j < (int) y; ++j) { +      stbi__uint16 *src  = data + j * x * img_n   ; +      stbi__uint16 *dest = good + j * x * req_comp; + +      #define STBI__COMBO(a,b)  ((a)*8+(b)) +      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) +      // convert source image with img_n components to one with req_comp components; +      // avoid switch per pixel, so use switch per scanline and massive macros +      switch (STBI__COMBO(img_n, req_comp)) { +         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break; +         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break; +         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break; +         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break; +         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break; +         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break; +         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break; +         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break; +         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; +         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break; +         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; +         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break; +         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); +      } +      #undef STBI__CASE +   } + +   STBI_FREE(data); +   return good; +} +#endif + +#ifndef STBI_NO_LINEAR +static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ +   int i,k,n; +   float *output; +   if (!data) return NULL; +   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); +   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } +   // compute number of non-alpha components +   if (comp & 1) n = comp; else n = comp-1; +   for (i=0; i < x*y; ++i) { +      for (k=0; k < n; ++k) { +         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); +      } +   } +   if (n < comp) { +      for (i=0; i < x*y; ++i) { +         output[i*comp + n] = data[i*comp + n]/255.0f; +      } +   } +   STBI_FREE(data); +   return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x)   ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp) +{ +   int i,k,n; +   stbi_uc *output; +   if (!data) return NULL; +   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); +   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } +   // compute number of non-alpha components +   if (comp & 1) n = comp; else n = comp-1; +   for (i=0; i < x*y; ++i) { +      for (k=0; k < n; ++k) { +         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; +         if (z < 0) z = 0; +         if (z > 255) z = 255; +         output[i*comp + k] = (stbi_uc) stbi__float2int(z); +      } +      if (k < comp) { +         float z = data[i*comp+k] * 255 + 0.5f; +         if (z < 0) z = 0; +         if (z > 255) z = 255; +         output[i*comp + k] = (stbi_uc) stbi__float2int(z); +      } +   } +   STBI_FREE(data); +   return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +//  "baseline" JPEG/JFIF decoder +// +//    simple implementation +//      - doesn't support delayed output of y-dimension +//      - simple interface (only one output format: 8-bit interleaved RGB) +//      - doesn't try to recover corrupt jpegs +//      - doesn't allow partial loading, loading multiple at once +//      - still fast on x86 (copying globals into locals doesn't help x86) +//      - allocates lots of intermediate memory (full size of all components) +//        - non-interleaved case requires this anyway +//        - allows good upsampling (see next) +//    high-quality +//      - upsampled channels are bilinearly interpolated, even across blocks +//      - quality integer IDCT derived from IJG's 'slow' +//    performance +//      - fast huffman; reasonable integer IDCT +//      - some SIMD kernels for common paths on targets with SSE2/NEON +//      - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache + +typedef struct +{ +   stbi_uc  fast[1 << FAST_BITS]; +   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win +   stbi__uint16 code[256]; +   stbi_uc  values[256]; +   stbi_uc  size[257]; +   unsigned int maxcode[18]; +   int    delta[17];   // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ +   stbi__context *s; +   stbi__huffman huff_dc[4]; +   stbi__huffman huff_ac[4]; +   stbi__uint16 dequant[4][64]; +   stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs +   int img_h_max, img_v_max; +   int img_mcu_x, img_mcu_y; +   int img_mcu_w, img_mcu_h; + +// definition of jpeg image component +   struct +   { +      int id; +      int h,v; +      int tq; +      int hd,ha; +      int dc_pred; + +      int x,y,w2,h2; +      stbi_uc *data; +      void *raw_data, *raw_coeff; +      stbi_uc *linebuf; +      short   *coeff;   // progressive only +      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks +   } img_comp[4]; + +   stbi__uint32   code_buffer; // jpeg entropy-coded buffer +   int            code_bits;   // number of valid bits +   unsigned char  marker;      // marker seen while filling entropy buffer +   int            nomore;      // flag if we saw a marker so must stop + +   int            progressive; +   int            spec_start; +   int            spec_end; +   int            succ_high; +   int            succ_low; +   int            eob_run; +   int            jfif; +   int            app14_color_transform; // Adobe APP14 tag +   int            rgb; + +   int scan_n, order[4]; +   int restart_interval, todo; + +// kernels +   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); +   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); +   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ +   int i,j,k=0; +   unsigned int code; +   // build size list for each symbol (from JPEG spec) +   for (i=0; i < 16; ++i) { +      for (j=0; j < count[i]; ++j) { +         h->size[k++] = (stbi_uc) (i+1); +         if(k >= 257) return stbi__err("bad size list","Corrupt JPEG"); +      } +   } +   h->size[k] = 0; + +   // compute actual symbols (from jpeg spec) +   code = 0; +   k = 0; +   for(j=1; j <= 16; ++j) { +      // compute delta to add to code to compute symbol id +      h->delta[j] = k - code; +      if (h->size[k] == j) { +         while (h->size[k] == j) +            h->code[k++] = (stbi__uint16) (code++); +         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); +      } +      // compute largest code + 1 for this size, preshifted as needed later +      h->maxcode[j] = code << (16-j); +      code <<= 1; +   } +   h->maxcode[j] = 0xffffffff; + +   // build non-spec acceleration table; 255 is flag for not-accelerated +   memset(h->fast, 255, 1 << FAST_BITS); +   for (i=0; i < k; ++i) { +      int s = h->size[i]; +      if (s <= FAST_BITS) { +         int c = h->code[i] << (FAST_BITS-s); +         int m = 1 << (FAST_BITS-s); +         for (j=0; j < m; ++j) { +            h->fast[c+j] = (stbi_uc) i; +         } +      } +   } +   return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ +   int i; +   for (i=0; i < (1 << FAST_BITS); ++i) { +      stbi_uc fast = h->fast[i]; +      fast_ac[i] = 0; +      if (fast < 255) { +         int rs = h->values[fast]; +         int run = (rs >> 4) & 15; +         int magbits = rs & 15; +         int len = h->size[fast]; + +         if (magbits && len + magbits <= FAST_BITS) { +            // magnitude code followed by receive_extend code +            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); +            int m = 1 << (magbits - 1); +            if (k < m) k += (~0U << magbits) + 1; +            // if the result is small enough, we can fit it in fast_ac table +            if (k >= -128 && k <= 127) +               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); +         } +      } +   } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ +   do { +      unsigned int b = j->nomore ? 0 : stbi__get8(j->s); +      if (b == 0xff) { +         int c = stbi__get8(j->s); +         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes +         if (c != 0) { +            j->marker = (unsigned char) c; +            j->nomore = 1; +            return; +         } +      } +      j->code_buffer |= b << (24 - j->code_bits); +      j->code_bits += 8; +   } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ +   unsigned int temp; +   int c,k; + +   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + +   // look at the top FAST_BITS and determine what symbol ID it is, +   // if the code is <= FAST_BITS +   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); +   k = h->fast[c]; +   if (k < 255) { +      int s = h->size[k]; +      if (s > j->code_bits) +         return -1; +      j->code_buffer <<= s; +      j->code_bits -= s; +      return h->values[k]; +   } + +   // naive test is to shift the code_buffer down so k bits are +   // valid, then test against maxcode. To speed this up, we've +   // preshifted maxcode left so that it has (16-k) 0s at the +   // end; in other words, regardless of the number of bits, it +   // wants to be compared against something shifted to have 16; +   // that way we don't need to shift inside the loop. +   temp = j->code_buffer >> 16; +   for (k=FAST_BITS+1 ; ; ++k) +      if (temp < h->maxcode[k]) +         break; +   if (k == 17) { +      // error! code not found +      j->code_bits -= 16; +      return -1; +   } + +   if (k > j->code_bits) +      return -1; + +   // convert the huffman code to the symbol id +   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; +   if(c < 0 || c >= 256) // symbol id out of bounds! +       return -1; +   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + +   // convert the id to a symbol +   j->code_bits -= k; +   j->code_buffer <<= k; +   return h->values[c]; +} + +// bias[n] = (-1<<n) + 1 +static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767}; + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n) +{ +   unsigned int k; +   int sgn; +   if (j->code_bits < n) stbi__grow_buffer_unsafe(j); +   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing + +   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) +   k = stbi_lrot(j->code_buffer, n); +   j->code_buffer = k & ~stbi__bmask[n]; +   k &= stbi__bmask[n]; +   j->code_bits -= n; +   return k + (stbi__jbias[n] & (sgn - 1)); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ +   unsigned int k; +   if (j->code_bits < n) stbi__grow_buffer_unsafe(j); +   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing +   k = stbi_lrot(j->code_buffer, n); +   j->code_buffer = k & ~stbi__bmask[n]; +   k &= stbi__bmask[n]; +   j->code_bits -= n; +   return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ +   unsigned int k; +   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); +   if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing +   k = j->code_buffer; +   j->code_buffer <<= 1; +   --j->code_bits; +   return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ +    0,  1,  8, 16,  9,  2,  3, 10, +   17, 24, 32, 25, 18, 11,  4,  5, +   12, 19, 26, 33, 40, 48, 41, 34, +   27, 20, 13,  6,  7, 14, 21, 28, +   35, 42, 49, 56, 57, 50, 43, 36, +   29, 22, 15, 23, 30, 37, 44, 51, +   58, 59, 52, 45, 38, 31, 39, 46, +   53, 60, 61, 54, 47, 55, 62, 63, +   // let corrupt input sample past end +   63, 63, 63, 63, 63, 63, 63, 63, +   63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ +   int diff,dc,k; +   int t; + +   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); +   t = stbi__jpeg_huff_decode(j, hdc); +   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); + +   // 0 all the ac values now so we can do it 32-bits at a time +   memset(data,0,64*sizeof(data[0])); + +   diff = t ? stbi__extend_receive(j, t) : 0; +   if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG"); +   dc = j->img_comp[b].dc_pred + diff; +   j->img_comp[b].dc_pred = dc; +   if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); +   data[0] = (short) (dc * dequant[0]); + +   // decode AC components, see JPEG spec +   k = 1; +   do { +      unsigned int zig; +      int c,r,s; +      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); +      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); +      r = fac[c]; +      if (r) { // fast-AC path +         k += (r >> 4) & 15; // run +         s = r & 15; // combined length +         if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); +         j->code_buffer <<= s; +         j->code_bits -= s; +         // decode into unzigzag'd location +         zig = stbi__jpeg_dezigzag[k++]; +         data[zig] = (short) ((r >> 8) * dequant[zig]); +      } else { +         int rs = stbi__jpeg_huff_decode(j, hac); +         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); +         s = rs & 15; +         r = rs >> 4; +         if (s == 0) { +            if (rs != 0xf0) break; // end block +            k += 16; +         } else { +            k += r; +            // decode into unzigzag'd location +            zig = stbi__jpeg_dezigzag[k++]; +            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); +         } +      } +   } while (k < 64); +   return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ +   int diff,dc; +   int t; +   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + +   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + +   if (j->succ_high == 0) { +      // first scan for DC coefficient, must be first +      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now +      t = stbi__jpeg_huff_decode(j, hdc); +      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); +      diff = t ? stbi__extend_receive(j, t) : 0; + +      if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG"); +      dc = j->img_comp[b].dc_pred + diff; +      j->img_comp[b].dc_pred = dc; +      if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); +      data[0] = (short) (dc * (1 << j->succ_low)); +   } else { +      // refinement scan for DC coefficient +      if (stbi__jpeg_get_bit(j)) +         data[0] += (short) (1 << j->succ_low); +   } +   return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ +   int k; +   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + +   if (j->succ_high == 0) { +      int shift = j->succ_low; + +      if (j->eob_run) { +         --j->eob_run; +         return 1; +      } + +      k = j->spec_start; +      do { +         unsigned int zig; +         int c,r,s; +         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); +         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); +         r = fac[c]; +         if (r) { // fast-AC path +            k += (r >> 4) & 15; // run +            s = r & 15; // combined length +            if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); +            j->code_buffer <<= s; +            j->code_bits -= s; +            zig = stbi__jpeg_dezigzag[k++]; +            data[zig] = (short) ((r >> 8) * (1 << shift)); +         } else { +            int rs = stbi__jpeg_huff_decode(j, hac); +            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); +            s = rs & 15; +            r = rs >> 4; +            if (s == 0) { +               if (r < 15) { +                  j->eob_run = (1 << r); +                  if (r) +                     j->eob_run += stbi__jpeg_get_bits(j, r); +                  --j->eob_run; +                  break; +               } +               k += 16; +            } else { +               k += r; +               zig = stbi__jpeg_dezigzag[k++]; +               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); +            } +         } +      } while (k <= j->spec_end); +   } else { +      // refinement scan for these AC coefficients + +      short bit = (short) (1 << j->succ_low); + +      if (j->eob_run) { +         --j->eob_run; +         for (k = j->spec_start; k <= j->spec_end; ++k) { +            short *p = &data[stbi__jpeg_dezigzag[k]]; +            if (*p != 0) +               if (stbi__jpeg_get_bit(j)) +                  if ((*p & bit)==0) { +                     if (*p > 0) +                        *p += bit; +                     else +                        *p -= bit; +                  } +         } +      } else { +         k = j->spec_start; +         do { +            int r,s; +            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh +            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); +            s = rs & 15; +            r = rs >> 4; +            if (s == 0) { +               if (r < 15) { +                  j->eob_run = (1 << r) - 1; +                  if (r) +                     j->eob_run += stbi__jpeg_get_bits(j, r); +                  r = 64; // force end of block +               } else { +                  // r=15 s=0 should write 16 0s, so we just do +                  // a run of 15 0s and then write s (which is 0), +                  // so we don't have to do anything special here +               } +            } else { +               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); +               // sign bit +               if (stbi__jpeg_get_bit(j)) +                  s = bit; +               else +                  s = -bit; +            } + +            // advance by r +            while (k <= j->spec_end) { +               short *p = &data[stbi__jpeg_dezigzag[k++]]; +               if (*p != 0) { +                  if (stbi__jpeg_get_bit(j)) +                     if ((*p & bit)==0) { +                        if (*p > 0) +                           *p += bit; +                        else +                           *p -= bit; +                     } +               } else { +                  if (r == 0) { +                     *p = (short) s; +                     break; +                  } +                  --r; +               } +            } +         } while (k <= j->spec_end); +      } +   } +   return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ +   // trick to use a single test to catch both cases +   if ((unsigned int) x > 255) { +      if (x < 0) return 0; +      if (x > 255) return 255; +   } +   return (stbi_uc) x; +} + +#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x)  ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ +   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ +   p2 = s2;                                    \ +   p3 = s6;                                    \ +   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \ +   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \ +   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \ +   p2 = s0;                                    \ +   p3 = s4;                                    \ +   t0 = stbi__fsh(p2+p3);                      \ +   t1 = stbi__fsh(p2-p3);                      \ +   x0 = t0+t3;                                 \ +   x3 = t0-t3;                                 \ +   x1 = t1+t2;                                 \ +   x2 = t1-t2;                                 \ +   t0 = s7;                                    \ +   t1 = s5;                                    \ +   t2 = s3;                                    \ +   t3 = s1;                                    \ +   p3 = t0+t2;                                 \ +   p4 = t1+t3;                                 \ +   p1 = t0+t3;                                 \ +   p2 = t1+t2;                                 \ +   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \ +   t0 = t0*stbi__f2f( 0.298631336f);           \ +   t1 = t1*stbi__f2f( 2.053119869f);           \ +   t2 = t2*stbi__f2f( 3.072711026f);           \ +   t3 = t3*stbi__f2f( 1.501321110f);           \ +   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \ +   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \ +   p3 = p3*stbi__f2f(-1.961570560f);           \ +   p4 = p4*stbi__f2f(-0.390180644f);           \ +   t3 += p1+p4;                                \ +   t2 += p2+p3;                                \ +   t1 += p2+p4;                                \ +   t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ +   int i,val[64],*v=val; +   stbi_uc *o; +   short *d = data; + +   // columns +   for (i=0; i < 8; ++i,++d, ++v) { +      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing +      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 +           && d[40]==0 && d[48]==0 && d[56]==0) { +         //    no shortcut                 0     seconds +         //    (1|2|3|4|5|6|7)==0          0     seconds +         //    all separate               -0.047 seconds +         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds +         int dcterm = d[0]*4; +         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; +      } else { +         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) +         // constants scaled things up by 1<<12; let's bring them back +         // down, but keep 2 extra bits of precision +         x0 += 512; x1 += 512; x2 += 512; x3 += 512; +         v[ 0] = (x0+t3) >> 10; +         v[56] = (x0-t3) >> 10; +         v[ 8] = (x1+t2) >> 10; +         v[48] = (x1-t2) >> 10; +         v[16] = (x2+t1) >> 10; +         v[40] = (x2-t1) >> 10; +         v[24] = (x3+t0) >> 10; +         v[32] = (x3-t0) >> 10; +      } +   } + +   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { +      // no fast case since the first 1D IDCT spread components out +      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) +      // constants scaled things up by 1<<12, plus we had 1<<2 from first +      // loop, plus horizontal and vertical each scale by sqrt(8) so together +      // we've got an extra 1<<3, so 1<<17 total we need to remove. +      // so we want to round that, which means adding 0.5 * 1<<17, +      // aka 65536. Also, we'll end up with -128 to 127 that we want +      // to encode as 0..255 by adding 128, so we'll add that before the shift +      x0 += 65536 + (128<<17); +      x1 += 65536 + (128<<17); +      x2 += 65536 + (128<<17); +      x3 += 65536 + (128<<17); +      // tried computing the shifts into temps, or'ing the temps to see +      // if any were out of range, but that was slower +      o[0] = stbi__clamp((x0+t3) >> 17); +      o[7] = stbi__clamp((x0-t3) >> 17); +      o[1] = stbi__clamp((x1+t2) >> 17); +      o[6] = stbi__clamp((x1-t2) >> 17); +      o[2] = stbi__clamp((x2+t1) >> 17); +      o[5] = stbi__clamp((x2-t1) >> 17); +      o[3] = stbi__clamp((x3+t0) >> 17); +      o[4] = stbi__clamp((x3-t0) >> 17); +   } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ +   // This is constructed to match our regular (generic) integer IDCT exactly. +   __m128i row0, row1, row2, row3, row4, row5, row6, row7; +   __m128i tmp; + +   // dot product constant: even elems=x, odd elems=y +   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + +   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit) +   // out(1) = c1[even]*x + c1[odd]*y +   #define dct_rot(out0,out1, x,y,c0,c1) \ +      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ +      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ +      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ +      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ +      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ +      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + +   // out = in << 12  (in 16-bit, out 32-bit) +   #define dct_widen(out, in) \ +      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ +      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + +   // wide add +   #define dct_wadd(out, a, b) \ +      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ +      __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + +   // wide sub +   #define dct_wsub(out, a, b) \ +      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ +      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + +   // butterfly a/b, add bias, then shift by "s" and pack +   #define dct_bfly32o(out0, out1, a,b,bias,s) \ +      { \ +         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ +         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ +         dct_wadd(sum, abiased, b); \ +         dct_wsub(dif, abiased, b); \ +         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ +         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ +      } + +   // 8-bit interleave step (for transposes) +   #define dct_interleave8(a, b) \ +      tmp = a; \ +      a = _mm_unpacklo_epi8(a, b); \ +      b = _mm_unpackhi_epi8(tmp, b) + +   // 16-bit interleave step (for transposes) +   #define dct_interleave16(a, b) \ +      tmp = a; \ +      a = _mm_unpacklo_epi16(a, b); \ +      b = _mm_unpackhi_epi16(tmp, b) + +   #define dct_pass(bias,shift) \ +      { \ +         /* even part */ \ +         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ +         __m128i sum04 = _mm_add_epi16(row0, row4); \ +         __m128i dif04 = _mm_sub_epi16(row0, row4); \ +         dct_widen(t0e, sum04); \ +         dct_widen(t1e, dif04); \ +         dct_wadd(x0, t0e, t3e); \ +         dct_wsub(x3, t0e, t3e); \ +         dct_wadd(x1, t1e, t2e); \ +         dct_wsub(x2, t1e, t2e); \ +         /* odd part */ \ +         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ +         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ +         __m128i sum17 = _mm_add_epi16(row1, row7); \ +         __m128i sum35 = _mm_add_epi16(row3, row5); \ +         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ +         dct_wadd(x4, y0o, y4o); \ +         dct_wadd(x5, y1o, y5o); \ +         dct_wadd(x6, y2o, y5o); \ +         dct_wadd(x7, y3o, y4o); \ +         dct_bfly32o(row0,row7, x0,x7,bias,shift); \ +         dct_bfly32o(row1,row6, x1,x6,bias,shift); \ +         dct_bfly32o(row2,row5, x2,x5,bias,shift); \ +         dct_bfly32o(row3,row4, x3,x4,bias,shift); \ +      } + +   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); +   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); +   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); +   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); +   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); +   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); +   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); +   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + +   // rounding biases in column/row passes, see stbi__idct_block for explanation. +   __m128i bias_0 = _mm_set1_epi32(512); +   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + +   // load +   row0 = _mm_load_si128((const __m128i *) (data + 0*8)); +   row1 = _mm_load_si128((const __m128i *) (data + 1*8)); +   row2 = _mm_load_si128((const __m128i *) (data + 2*8)); +   row3 = _mm_load_si128((const __m128i *) (data + 3*8)); +   row4 = _mm_load_si128((const __m128i *) (data + 4*8)); +   row5 = _mm_load_si128((const __m128i *) (data + 5*8)); +   row6 = _mm_load_si128((const __m128i *) (data + 6*8)); +   row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + +   // column pass +   dct_pass(bias_0, 10); + +   { +      // 16bit 8x8 transpose pass 1 +      dct_interleave16(row0, row4); +      dct_interleave16(row1, row5); +      dct_interleave16(row2, row6); +      dct_interleave16(row3, row7); + +      // transpose pass 2 +      dct_interleave16(row0, row2); +      dct_interleave16(row1, row3); +      dct_interleave16(row4, row6); +      dct_interleave16(row5, row7); + +      // transpose pass 3 +      dct_interleave16(row0, row1); +      dct_interleave16(row2, row3); +      dct_interleave16(row4, row5); +      dct_interleave16(row6, row7); +   } + +   // row pass +   dct_pass(bias_1, 17); + +   { +      // pack +      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 +      __m128i p1 = _mm_packus_epi16(row2, row3); +      __m128i p2 = _mm_packus_epi16(row4, row5); +      __m128i p3 = _mm_packus_epi16(row6, row7); + +      // 8bit 8x8 transpose pass 1 +      dct_interleave8(p0, p2); // a0e0a1e1... +      dct_interleave8(p1, p3); // c0g0c1g1... + +      // transpose pass 2 +      dct_interleave8(p0, p1); // a0c0e0g0... +      dct_interleave8(p2, p3); // b0d0f0h0... + +      // transpose pass 3 +      dct_interleave8(p0, p2); // a0b0c0d0... +      dct_interleave8(p1, p3); // a4b4c4d4... + +      // store +      _mm_storel_epi64((__m128i *) out, p0); out += out_stride; +      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; +      _mm_storel_epi64((__m128i *) out, p2); out += out_stride; +      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; +      _mm_storel_epi64((__m128i *) out, p1); out += out_stride; +      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; +      _mm_storel_epi64((__m128i *) out, p3); out += out_stride; +      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); +   } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ +   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + +   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); +   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); +   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); +   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); +   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); +   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); +   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); +   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); +   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); +   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); +   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); +   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ +   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ +   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ +   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ +   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ +   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ +   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ +   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ +   int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ +   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ +   int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ +   { \ +      dct_wadd(sum, a, b); \ +      dct_wsub(dif, a, b); \ +      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ +      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ +   } + +#define dct_pass(shiftop, shift) \ +   { \ +      /* even part */ \ +      int16x8_t sum26 = vaddq_s16(row2, row6); \ +      dct_long_mul(p1e, sum26, rot0_0); \ +      dct_long_mac(t2e, p1e, row6, rot0_1); \ +      dct_long_mac(t3e, p1e, row2, rot0_2); \ +      int16x8_t sum04 = vaddq_s16(row0, row4); \ +      int16x8_t dif04 = vsubq_s16(row0, row4); \ +      dct_widen(t0e, sum04); \ +      dct_widen(t1e, dif04); \ +      dct_wadd(x0, t0e, t3e); \ +      dct_wsub(x3, t0e, t3e); \ +      dct_wadd(x1, t1e, t2e); \ +      dct_wsub(x2, t1e, t2e); \ +      /* odd part */ \ +      int16x8_t sum15 = vaddq_s16(row1, row5); \ +      int16x8_t sum17 = vaddq_s16(row1, row7); \ +      int16x8_t sum35 = vaddq_s16(row3, row5); \ +      int16x8_t sum37 = vaddq_s16(row3, row7); \ +      int16x8_t sumodd = vaddq_s16(sum17, sum35); \ +      dct_long_mul(p5o, sumodd, rot1_0); \ +      dct_long_mac(p1o, p5o, sum17, rot1_1); \ +      dct_long_mac(p2o, p5o, sum35, rot1_2); \ +      dct_long_mul(p3o, sum37, rot2_0); \ +      dct_long_mul(p4o, sum15, rot2_1); \ +      dct_wadd(sump13o, p1o, p3o); \ +      dct_wadd(sump24o, p2o, p4o); \ +      dct_wadd(sump23o, p2o, p3o); \ +      dct_wadd(sump14o, p1o, p4o); \ +      dct_long_mac(x4, sump13o, row7, rot3_0); \ +      dct_long_mac(x5, sump24o, row5, rot3_1); \ +      dct_long_mac(x6, sump23o, row3, rot3_2); \ +      dct_long_mac(x7, sump14o, row1, rot3_3); \ +      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ +      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ +      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ +      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ +   } + +   // load +   row0 = vld1q_s16(data + 0*8); +   row1 = vld1q_s16(data + 1*8); +   row2 = vld1q_s16(data + 2*8); +   row3 = vld1q_s16(data + 3*8); +   row4 = vld1q_s16(data + 4*8); +   row5 = vld1q_s16(data + 5*8); +   row6 = vld1q_s16(data + 6*8); +   row7 = vld1q_s16(data + 7*8); + +   // add DC bias +   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + +   // column pass +   dct_pass(vrshrn_n_s32, 10); + +   // 16bit 8x8 transpose +   { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + +      // pass 1 +      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 +      dct_trn16(row2, row3); +      dct_trn16(row4, row5); +      dct_trn16(row6, row7); + +      // pass 2 +      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 +      dct_trn32(row1, row3); +      dct_trn32(row4, row6); +      dct_trn32(row5, row7); + +      // pass 3 +      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 +      dct_trn64(row1, row5); +      dct_trn64(row2, row6); +      dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 +   } + +   // row pass +   // vrshrn_n_s32 only supports shifts up to 16, we need +   // 17. so do a non-rounding shift of 16 first then follow +   // up with a rounding shift by 1. +   dct_pass(vshrn_n_s32, 16); + +   { +      // pack and round +      uint8x8_t p0 = vqrshrun_n_s16(row0, 1); +      uint8x8_t p1 = vqrshrun_n_s16(row1, 1); +      uint8x8_t p2 = vqrshrun_n_s16(row2, 1); +      uint8x8_t p3 = vqrshrun_n_s16(row3, 1); +      uint8x8_t p4 = vqrshrun_n_s16(row4, 1); +      uint8x8_t p5 = vqrshrun_n_s16(row5, 1); +      uint8x8_t p6 = vqrshrun_n_s16(row6, 1); +      uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + +      // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + +      // sadly can't use interleaved stores here since we only write +      // 8 bytes to each scan line! + +      // 8x8 8-bit transpose pass 1 +      dct_trn8_8(p0, p1); +      dct_trn8_8(p2, p3); +      dct_trn8_8(p4, p5); +      dct_trn8_8(p6, p7); + +      // pass 2 +      dct_trn8_16(p0, p2); +      dct_trn8_16(p1, p3); +      dct_trn8_16(p4, p6); +      dct_trn8_16(p5, p7); + +      // pass 3 +      dct_trn8_32(p0, p4); +      dct_trn8_32(p1, p5); +      dct_trn8_32(p2, p6); +      dct_trn8_32(p3, p7); + +      // store +      vst1_u8(out, p0); out += out_stride; +      vst1_u8(out, p1); out += out_stride; +      vst1_u8(out, p2); out += out_stride; +      vst1_u8(out, p3); out += out_stride; +      vst1_u8(out, p4); out += out_stride; +      vst1_u8(out, p5); out += out_stride; +      vst1_u8(out, p6); out += out_stride; +      vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 +   } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none  0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ +   stbi_uc x; +   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } +   x = stbi__get8(j->s); +   if (x != 0xff) return STBI__MARKER_none; +   while (x == 0xff) +      x = stbi__get8(j->s); // consume repeated 0xff fill bytes +   return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ +   j->code_bits = 0; +   j->code_buffer = 0; +   j->nomore = 0; +   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; +   j->marker = STBI__MARKER_none; +   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; +   j->eob_run = 0; +   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, +   // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ +   stbi__jpeg_reset(z); +   if (!z->progressive) { +      if (z->scan_n == 1) { +         int i,j; +         STBI_SIMD_ALIGN(short, data[64]); +         int n = z->order[0]; +         // non-interleaved data, we just need to process one block at a time, +         // in trivial scanline order +         // number of blocks to do just depends on how many actual "pixels" this +         // component has, independent of interleaved MCU blocking and such +         int w = (z->img_comp[n].x+7) >> 3; +         int h = (z->img_comp[n].y+7) >> 3; +         for (j=0; j < h; ++j) { +            for (i=0; i < w; ++i) { +               int ha = z->img_comp[n].ha; +               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; +               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); +               // every data block is an MCU, so countdown the restart interval +               if (--z->todo <= 0) { +                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); +                  // if it's NOT a restart, then just bail, so we get corrupt data +                  // rather than no data +                  if (!STBI__RESTART(z->marker)) return 1; +                  stbi__jpeg_reset(z); +               } +            } +         } +         return 1; +      } else { // interleaved +         int i,j,k,x,y; +         STBI_SIMD_ALIGN(short, data[64]); +         for (j=0; j < z->img_mcu_y; ++j) { +            for (i=0; i < z->img_mcu_x; ++i) { +               // scan an interleaved mcu... process scan_n components in order +               for (k=0; k < z->scan_n; ++k) { +                  int n = z->order[k]; +                  // scan out an mcu's worth of this component; that's just determined +                  // by the basic H and V specified for the component +                  for (y=0; y < z->img_comp[n].v; ++y) { +                     for (x=0; x < z->img_comp[n].h; ++x) { +                        int x2 = (i*z->img_comp[n].h + x)*8; +                        int y2 = (j*z->img_comp[n].v + y)*8; +                        int ha = z->img_comp[n].ha; +                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; +                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); +                     } +                  } +               } +               // after all interleaved components, that's an interleaved MCU, +               // so now count down the restart interval +               if (--z->todo <= 0) { +                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); +                  if (!STBI__RESTART(z->marker)) return 1; +                  stbi__jpeg_reset(z); +               } +            } +         } +         return 1; +      } +   } else { +      if (z->scan_n == 1) { +         int i,j; +         int n = z->order[0]; +         // non-interleaved data, we just need to process one block at a time, +         // in trivial scanline order +         // number of blocks to do just depends on how many actual "pixels" this +         // component has, independent of interleaved MCU blocking and such +         int w = (z->img_comp[n].x+7) >> 3; +         int h = (z->img_comp[n].y+7) >> 3; +         for (j=0; j < h; ++j) { +            for (i=0; i < w; ++i) { +               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); +               if (z->spec_start == 0) { +                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) +                     return 0; +               } else { +                  int ha = z->img_comp[n].ha; +                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) +                     return 0; +               } +               // every data block is an MCU, so countdown the restart interval +               if (--z->todo <= 0) { +                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); +                  if (!STBI__RESTART(z->marker)) return 1; +                  stbi__jpeg_reset(z); +               } +            } +         } +         return 1; +      } else { // interleaved +         int i,j,k,x,y; +         for (j=0; j < z->img_mcu_y; ++j) { +            for (i=0; i < z->img_mcu_x; ++i) { +               // scan an interleaved mcu... process scan_n components in order +               for (k=0; k < z->scan_n; ++k) { +                  int n = z->order[k]; +                  // scan out an mcu's worth of this component; that's just determined +                  // by the basic H and V specified for the component +                  for (y=0; y < z->img_comp[n].v; ++y) { +                     for (x=0; x < z->img_comp[n].h; ++x) { +                        int x2 = (i*z->img_comp[n].h + x); +                        int y2 = (j*z->img_comp[n].v + y); +                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); +                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) +                           return 0; +                     } +                  } +               } +               // after all interleaved components, that's an interleaved MCU, +               // so now count down the restart interval +               if (--z->todo <= 0) { +                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); +                  if (!STBI__RESTART(z->marker)) return 1; +                  stbi__jpeg_reset(z); +               } +            } +         } +         return 1; +      } +   } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ +   int i; +   for (i=0; i < 64; ++i) +      data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ +   if (z->progressive) { +      // dequantize and idct the data +      int i,j,n; +      for (n=0; n < z->s->img_n; ++n) { +         int w = (z->img_comp[n].x+7) >> 3; +         int h = (z->img_comp[n].y+7) >> 3; +         for (j=0; j < h; ++j) { +            for (i=0; i < w; ++i) { +               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); +               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); +               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); +            } +         } +      } +   } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ +   int L; +   switch (m) { +      case STBI__MARKER_none: // no marker found +         return stbi__err("expected marker","Corrupt JPEG"); + +      case 0xDD: // DRI - specify restart interval +         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); +         z->restart_interval = stbi__get16be(z->s); +         return 1; + +      case 0xDB: // DQT - define quantization table +         L = stbi__get16be(z->s)-2; +         while (L > 0) { +            int q = stbi__get8(z->s); +            int p = q >> 4, sixteen = (p != 0); +            int t = q & 15,i; +            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); +            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + +            for (i=0; i < 64; ++i) +               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); +            L -= (sixteen ? 129 : 65); +         } +         return L==0; + +      case 0xC4: // DHT - define huffman table +         L = stbi__get16be(z->s)-2; +         while (L > 0) { +            stbi_uc *v; +            int sizes[16],i,n=0; +            int q = stbi__get8(z->s); +            int tc = q >> 4; +            int th = q & 15; +            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); +            for (i=0; i < 16; ++i) { +               sizes[i] = stbi__get8(z->s); +               n += sizes[i]; +            } +            if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values! +            L -= 17; +            if (tc == 0) { +               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; +               v = z->huff_dc[th].values; +            } else { +               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; +               v = z->huff_ac[th].values; +            } +            for (i=0; i < n; ++i) +               v[i] = stbi__get8(z->s); +            if (tc != 0) +               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); +            L -= n; +         } +         return L==0; +   } + +   // check for comment block or APP blocks +   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { +      L = stbi__get16be(z->s); +      if (L < 2) { +         if (m == 0xFE) +            return stbi__err("bad COM len","Corrupt JPEG"); +         else +            return stbi__err("bad APP len","Corrupt JPEG"); +      } +      L -= 2; + +      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment +         static const unsigned char tag[5] = {'J','F','I','F','\0'}; +         int ok = 1; +         int i; +         for (i=0; i < 5; ++i) +            if (stbi__get8(z->s) != tag[i]) +               ok = 0; +         L -= 5; +         if (ok) +            z->jfif = 1; +      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment +         static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; +         int ok = 1; +         int i; +         for (i=0; i < 6; ++i) +            if (stbi__get8(z->s) != tag[i]) +               ok = 0; +         L -= 6; +         if (ok) { +            stbi__get8(z->s); // version +            stbi__get16be(z->s); // flags0 +            stbi__get16be(z->s); // flags1 +            z->app14_color_transform = stbi__get8(z->s); // color transform +            L -= 6; +         } +      } + +      stbi__skip(z->s, L); +      return 1; +   } + +   return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ +   int i; +   int Ls = stbi__get16be(z->s); +   z->scan_n = stbi__get8(z->s); +   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); +   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); +   for (i=0; i < z->scan_n; ++i) { +      int id = stbi__get8(z->s), which; +      int q = stbi__get8(z->s); +      for (which = 0; which < z->s->img_n; ++which) +         if (z->img_comp[which].id == id) +            break; +      if (which == z->s->img_n) return 0; // no match +      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); +      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); +      z->order[i] = which; +   } + +   { +      int aa; +      z->spec_start = stbi__get8(z->s); +      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0 +      aa = stbi__get8(z->s); +      z->succ_high = (aa >> 4); +      z->succ_low  = (aa & 15); +      if (z->progressive) { +         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) +            return stbi__err("bad SOS", "Corrupt JPEG"); +      } else { +         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); +         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); +         z->spec_end = 63; +      } +   } + +   return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ +   int i; +   for (i=0; i < ncomp; ++i) { +      if (z->img_comp[i].raw_data) { +         STBI_FREE(z->img_comp[i].raw_data); +         z->img_comp[i].raw_data = NULL; +         z->img_comp[i].data = NULL; +      } +      if (z->img_comp[i].raw_coeff) { +         STBI_FREE(z->img_comp[i].raw_coeff); +         z->img_comp[i].raw_coeff = 0; +         z->img_comp[i].coeff = 0; +      } +      if (z->img_comp[i].linebuf) { +         STBI_FREE(z->img_comp[i].linebuf); +         z->img_comp[i].linebuf = NULL; +      } +   } +   return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ +   stbi__context *s = z->s; +   int Lf,p,i,q, h_max=1,v_max=1,c; +   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG +   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline +   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG +   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires +   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); +   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); +   c = stbi__get8(s); +   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); +   s->img_n = c; +   for (i=0; i < c; ++i) { +      z->img_comp[i].data = NULL; +      z->img_comp[i].linebuf = NULL; +   } + +   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + +   z->rgb = 0; +   for (i=0; i < s->img_n; ++i) { +      static const unsigned char rgb[3] = { 'R', 'G', 'B' }; +      z->img_comp[i].id = stbi__get8(s); +      if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) +         ++z->rgb; +      q = stbi__get8(s); +      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); +      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); +      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); +   } + +   if (scan != STBI__SCAN_load) return 1; + +   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + +   for (i=0; i < s->img_n; ++i) { +      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; +      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; +   } + +   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios +   // and I've never seen a non-corrupted JPEG file actually use them +   for (i=0; i < s->img_n; ++i) { +      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG"); +      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG"); +   } + +   // compute interleaved mcu info +   z->img_h_max = h_max; +   z->img_v_max = v_max; +   z->img_mcu_w = h_max * 8; +   z->img_mcu_h = v_max * 8; +   // these sizes can't be more than 17 bits +   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; +   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + +   for (i=0; i < s->img_n; ++i) { +      // number of effective pixels (e.g. for non-interleaved MCU) +      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; +      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; +      // to simplify generation, we'll allocate enough memory to decode +      // the bogus oversized data from using interleaved MCUs and their +      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't +      // discard the extra data until colorspace conversion +      // +      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) +      // so these muls can't overflow with 32-bit ints (which we require) +      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; +      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; +      z->img_comp[i].coeff = 0; +      z->img_comp[i].raw_coeff = 0; +      z->img_comp[i].linebuf = NULL; +      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); +      if (z->img_comp[i].raw_data == NULL) +         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); +      // align blocks for idct using mmx/sse +      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); +      if (z->progressive) { +         // w2, h2 are multiples of 8 (see above) +         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; +         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; +         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); +         if (z->img_comp[i].raw_coeff == NULL) +            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); +         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); +      } +   } + +   return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x)         ((x) == 0xdc) +#define stbi__SOI(x)         ((x) == 0xd8) +#define stbi__EOI(x)         ((x) == 0xd9) +#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x)         ((x) == 0xda) + +#define stbi__SOF_progressive(x)   ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ +   int m; +   z->jfif = 0; +   z->app14_color_transform = -1; // valid values are 0,1,2 +   z->marker = STBI__MARKER_none; // initialize cached marker to empty +   m = stbi__get_marker(z); +   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); +   if (scan == STBI__SCAN_type) return 1; +   m = stbi__get_marker(z); +   while (!stbi__SOF(m)) { +      if (!stbi__process_marker(z,m)) return 0; +      m = stbi__get_marker(z); +      while (m == STBI__MARKER_none) { +         // some files have extra padding after their blocks, so ok, we'll scan +         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); +         m = stbi__get_marker(z); +      } +   } +   z->progressive = stbi__SOF_progressive(m); +   if (!stbi__process_frame_header(z, scan)) return 0; +   return 1; +} + +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +{ +   // some JPEGs have junk at end, skip over it but if we find what looks +   // like a valid marker, resume there +   while (!stbi__at_eof(j->s)) { +      stbi_uc x = stbi__get8(j->s); +      while (x == 0xff) { // might be a marker +         if (stbi__at_eof(j->s)) return STBI__MARKER_none; +         x = stbi__get8(j->s); +         if (x != 0x00 && x != 0xff) { +            // not a stuffed zero or lead-in to another marker, looks +            // like an actual marker, return it +            return x; +         } +         // stuffed zero has x=0 now which ends the loop, meaning we go +         // back to regular scan loop. +         // repeated 0xff keeps trying to read the next byte of the marker. +      } +   } +   return STBI__MARKER_none; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ +   int m; +   for (m = 0; m < 4; m++) { +      j->img_comp[m].raw_data = NULL; +      j->img_comp[m].raw_coeff = NULL; +   } +   j->restart_interval = 0; +   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; +   m = stbi__get_marker(j); +   while (!stbi__EOI(m)) { +      if (stbi__SOS(m)) { +         if (!stbi__process_scan_header(j)) return 0; +         if (!stbi__parse_entropy_coded_data(j)) return 0; +         if (j->marker == STBI__MARKER_none ) { +         j->marker = stbi__skip_jpeg_junk_at_end(j); +            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 +         } +         m = stbi__get_marker(j); +         if (STBI__RESTART(m)) +            m = stbi__get_marker(j); +      } else if (stbi__DNL(m)) { +         int Ld = stbi__get16be(j->s); +         stbi__uint32 NL = stbi__get16be(j->s); +         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); +         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); +         m = stbi__get_marker(j); +      } else { +         if (!stbi__process_marker(j, m)) return 1; +         m = stbi__get_marker(j); +      } +   } +   if (j->progressive) +      stbi__jpeg_finish(j); +   return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, +                                    int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ +   STBI_NOTUSED(out); +   STBI_NOTUSED(in_far); +   STBI_NOTUSED(w); +   STBI_NOTUSED(hs); +   return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ +   // need to generate two samples vertically for every one in input +   int i; +   STBI_NOTUSED(hs); +   for (i=0; i < w; ++i) +      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); +   return out; +} + +static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ +   // need to generate two samples horizontally for every one in input +   int i; +   stbi_uc *input = in_near; + +   if (w == 1) { +      // if only one sample, can't do any interpolation +      out[0] = out[1] = input[0]; +      return out; +   } + +   out[0] = input[0]; +   out[1] = stbi__div4(input[0]*3 + input[1] + 2); +   for (i=1; i < w-1; ++i) { +      int n = 3*input[i]+2; +      out[i*2+0] = stbi__div4(n+input[i-1]); +      out[i*2+1] = stbi__div4(n+input[i+1]); +   } +   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); +   out[i*2+1] = input[w-1]; + +   STBI_NOTUSED(in_far); +   STBI_NOTUSED(hs); + +   return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ +   // need to generate 2x2 samples for every one in input +   int i,t0,t1; +   if (w == 1) { +      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); +      return out; +   } + +   t1 = 3*in_near[0] + in_far[0]; +   out[0] = stbi__div4(t1+2); +   for (i=1; i < w; ++i) { +      t0 = t1; +      t1 = 3*in_near[i]+in_far[i]; +      out[i*2-1] = stbi__div16(3*t0 + t1 + 8); +      out[i*2  ] = stbi__div16(3*t1 + t0 + 8); +   } +   out[w*2-1] = stbi__div4(t1+2); + +   STBI_NOTUSED(hs); + +   return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ +   // need to generate 2x2 samples for every one in input +   int i=0,t0,t1; + +   if (w == 1) { +      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); +      return out; +   } + +   t1 = 3*in_near[0] + in_far[0]; +   // process groups of 8 pixels for as long as we can. +   // note we can't handle the last pixel in a row in this loop +   // because we need to handle the filter boundary conditions. +   for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) +      // load and perform the vertical filtering pass +      // this uses 3*x + y = 4*x + (y - x) +      __m128i zero  = _mm_setzero_si128(); +      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i)); +      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); +      __m128i farw  = _mm_unpacklo_epi8(farb, zero); +      __m128i nearw = _mm_unpacklo_epi8(nearb, zero); +      __m128i diff  = _mm_sub_epi16(farw, nearw); +      __m128i nears = _mm_slli_epi16(nearw, 2); +      __m128i curr  = _mm_add_epi16(nears, diff); // current row + +      // horizontal filter works the same based on shifted vers of current +      // row. "prev" is current row shifted right by 1 pixel; we need to +      // insert the previous pixel value (from t1). +      // "next" is current row shifted left by 1 pixel, with first pixel +      // of next block of 8 pixels added in. +      __m128i prv0 = _mm_slli_si128(curr, 2); +      __m128i nxt0 = _mm_srli_si128(curr, 2); +      __m128i prev = _mm_insert_epi16(prv0, t1, 0); +      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + +      // horizontal filter, polyphase implementation since it's convenient: +      // even pixels = 3*cur + prev = cur*4 + (prev - cur) +      // odd  pixels = 3*cur + next = cur*4 + (next - cur) +      // note the shared term. +      __m128i bias  = _mm_set1_epi16(8); +      __m128i curs = _mm_slli_epi16(curr, 2); +      __m128i prvd = _mm_sub_epi16(prev, curr); +      __m128i nxtd = _mm_sub_epi16(next, curr); +      __m128i curb = _mm_add_epi16(curs, bias); +      __m128i even = _mm_add_epi16(prvd, curb); +      __m128i odd  = _mm_add_epi16(nxtd, curb); + +      // interleave even and odd pixels, then undo scaling. +      __m128i int0 = _mm_unpacklo_epi16(even, odd); +      __m128i int1 = _mm_unpackhi_epi16(even, odd); +      __m128i de0  = _mm_srli_epi16(int0, 4); +      __m128i de1  = _mm_srli_epi16(int1, 4); + +      // pack and write output +      __m128i outv = _mm_packus_epi16(de0, de1); +      _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) +      // load and perform the vertical filtering pass +      // this uses 3*x + y = 4*x + (y - x) +      uint8x8_t farb  = vld1_u8(in_far + i); +      uint8x8_t nearb = vld1_u8(in_near + i); +      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); +      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); +      int16x8_t curr  = vaddq_s16(nears, diff); // current row + +      // horizontal filter works the same based on shifted vers of current +      // row. "prev" is current row shifted right by 1 pixel; we need to +      // insert the previous pixel value (from t1). +      // "next" is current row shifted left by 1 pixel, with first pixel +      // of next block of 8 pixels added in. +      int16x8_t prv0 = vextq_s16(curr, curr, 7); +      int16x8_t nxt0 = vextq_s16(curr, curr, 1); +      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); +      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + +      // horizontal filter, polyphase implementation since it's convenient: +      // even pixels = 3*cur + prev = cur*4 + (prev - cur) +      // odd  pixels = 3*cur + next = cur*4 + (next - cur) +      // note the shared term. +      int16x8_t curs = vshlq_n_s16(curr, 2); +      int16x8_t prvd = vsubq_s16(prev, curr); +      int16x8_t nxtd = vsubq_s16(next, curr); +      int16x8_t even = vaddq_s16(curs, prvd); +      int16x8_t odd  = vaddq_s16(curs, nxtd); + +      // undo scaling and round, then store with even/odd phases interleaved +      uint8x8x2_t o; +      o.val[0] = vqrshrun_n_s16(even, 4); +      o.val[1] = vqrshrun_n_s16(odd,  4); +      vst2_u8(out + i*2, o); +#endif + +      // "previous" value for next iter +      t1 = 3*in_near[i+7] + in_far[i+7]; +   } + +   t0 = t1; +   t1 = 3*in_near[i] + in_far[i]; +   out[i*2] = stbi__div16(3*t1 + t0 + 8); + +   for (++i; i < w; ++i) { +      t0 = t1; +      t1 = 3*in_near[i]+in_far[i]; +      out[i*2-1] = stbi__div16(3*t0 + t1 + 8); +      out[i*2  ] = stbi__div16(3*t1 + t0 + 8); +   } +   out[w*2-1] = stbi__div4(t1+2); + +   STBI_NOTUSED(hs); + +   return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ +   // resample with nearest-neighbor +   int i,j; +   STBI_NOTUSED(in_far); +   for (i=0; i < w; ++i) +      for (j=0; j < hs; ++j) +         out[i*hs+j] = in_near[i]; +   return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ +   int i; +   for (i=0; i < count; ++i) { +      int y_fixed = (y[i] << 20) + (1<<19); // rounding +      int r,g,b; +      int cr = pcr[i] - 128; +      int cb = pcb[i] - 128; +      r = y_fixed +  cr* stbi__float2fixed(1.40200f); +      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); +      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f); +      r >>= 20; +      g >>= 20; +      b >>= 20; +      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } +      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } +      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } +      out[0] = (stbi_uc)r; +      out[1] = (stbi_uc)g; +      out[2] = (stbi_uc)b; +      out[3] = 255; +      out += step; +   } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ +   int i = 0; + +#ifdef STBI_SSE2 +   // step == 3 is pretty ugly on the final interleave, and i'm not convinced +   // it's useful in practice (you wouldn't use it for textures, for example). +   // so just accelerate step == 4 case. +   if (step == 4) { +      // this is a fairly straightforward implementation and not super-optimized. +      __m128i signflip  = _mm_set1_epi8(-0x80); +      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f)); +      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); +      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); +      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f)); +      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); +      __m128i xw = _mm_set1_epi16(255); // alpha channel + +      for (; i+7 < count; i += 8) { +         // load +         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); +         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); +         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); +         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 +         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + +         // unpack to short (and left-shift cr, cb by 8) +         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes); +         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); +         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + +         // color transform +         __m128i yws = _mm_srli_epi16(yw, 4); +         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); +         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); +         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); +         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); +         __m128i rws = _mm_add_epi16(cr0, yws); +         __m128i gwt = _mm_add_epi16(cb0, yws); +         __m128i bws = _mm_add_epi16(yws, cb1); +         __m128i gws = _mm_add_epi16(gwt, cr1); + +         // descale +         __m128i rw = _mm_srai_epi16(rws, 4); +         __m128i bw = _mm_srai_epi16(bws, 4); +         __m128i gw = _mm_srai_epi16(gws, 4); + +         // back to byte, set up for transpose +         __m128i brb = _mm_packus_epi16(rw, bw); +         __m128i gxb = _mm_packus_epi16(gw, xw); + +         // transpose to interleave channels +         __m128i t0 = _mm_unpacklo_epi8(brb, gxb); +         __m128i t1 = _mm_unpackhi_epi8(brb, gxb); +         __m128i o0 = _mm_unpacklo_epi16(t0, t1); +         __m128i o1 = _mm_unpackhi_epi16(t0, t1); + +         // store +         _mm_storeu_si128((__m128i *) (out + 0), o0); +         _mm_storeu_si128((__m128i *) (out + 16), o1); +         out += 32; +      } +   } +#endif + +#ifdef STBI_NEON +   // in this version, step=3 support would be easy to add. but is there demand? +   if (step == 4) { +      // this is a fairly straightforward implementation and not super-optimized. +      uint8x8_t signflip = vdup_n_u8(0x80); +      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f)); +      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); +      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); +      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f)); + +      for (; i+7 < count; i += 8) { +         // load +         uint8x8_t y_bytes  = vld1_u8(y + i); +         uint8x8_t cr_bytes = vld1_u8(pcr + i); +         uint8x8_t cb_bytes = vld1_u8(pcb + i); +         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); +         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + +         // expand to s16 +         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); +         int16x8_t crw = vshll_n_s8(cr_biased, 7); +         int16x8_t cbw = vshll_n_s8(cb_biased, 7); + +         // color transform +         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); +         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); +         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); +         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); +         int16x8_t rws = vaddq_s16(yws, cr0); +         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); +         int16x8_t bws = vaddq_s16(yws, cb1); + +         // undo scaling, round, convert to byte +         uint8x8x4_t o; +         o.val[0] = vqrshrun_n_s16(rws, 4); +         o.val[1] = vqrshrun_n_s16(gws, 4); +         o.val[2] = vqrshrun_n_s16(bws, 4); +         o.val[3] = vdup_n_u8(255); + +         // store, interleaving r/g/b/a +         vst4_u8(out, o); +         out += 8*4; +      } +   } +#endif + +   for (; i < count; ++i) { +      int y_fixed = (y[i] << 20) + (1<<19); // rounding +      int r,g,b; +      int cr = pcr[i] - 128; +      int cb = pcb[i] - 128; +      r = y_fixed + cr* stbi__float2fixed(1.40200f); +      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); +      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f); +      r >>= 20; +      g >>= 20; +      b >>= 20; +      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } +      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } +      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } +      out[0] = (stbi_uc)r; +      out[1] = (stbi_uc)g; +      out[2] = (stbi_uc)b; +      out[3] = 255; +      out += step; +   } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ +   j->idct_block_kernel = stbi__idct_block; +   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; +   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 +   if (stbi__sse2_available()) { +      j->idct_block_kernel = stbi__idct_simd; +      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; +      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +   } +#endif + +#ifdef STBI_NEON +   j->idct_block_kernel = stbi__idct_simd; +   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; +   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ +   stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ +   resample_row_func resample; +   stbi_uc *line0,*line1; +   int hs,vs;   // expansion factor in each axis +   int w_lores; // horizontal pixels pre-expansion +   int ystep;   // how far through vertical expansion we are +   int ypos;    // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ +   unsigned int t = x*y + 128; +   return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ +   int n, decode_n, is_rgb; +   z->s->img_n = 0; // make stbi__cleanup_jpeg safe + +   // validate req_comp +   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + +   // load a jpeg image from whichever source, but leave in YCbCr format +   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + +   // determine actual number of components to generate +   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + +   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + +   if (z->s->img_n == 3 && n < 3 && !is_rgb) +      decode_n = 1; +   else +      decode_n = z->s->img_n; + +   // nothing to do if no components requested; check this now to avoid +   // accessing uninitialized coutput[0] later +   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } + +   // resample and color-convert +   { +      int k; +      unsigned int i,j; +      stbi_uc *output; +      stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; + +      stbi__resample res_comp[4]; + +      for (k=0; k < decode_n; ++k) { +         stbi__resample *r = &res_comp[k]; + +         // allocate line buffer big enough for upsampling off the edges +         // with upsample factor of 4 +         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); +         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + +         r->hs      = z->img_h_max / z->img_comp[k].h; +         r->vs      = z->img_v_max / z->img_comp[k].v; +         r->ystep   = r->vs >> 1; +         r->w_lores = (z->s->img_x + r->hs-1) / r->hs; +         r->ypos    = 0; +         r->line0   = r->line1 = z->img_comp[k].data; + +         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; +         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; +         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; +         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; +         else                               r->resample = stbi__resample_row_generic; +      } + +      // can't error after this so, this is safe +      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); +      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + +      // now go ahead and resample +      for (j=0; j < z->s->img_y; ++j) { +         stbi_uc *out = output + n * z->s->img_x * j; +         for (k=0; k < decode_n; ++k) { +            stbi__resample *r = &res_comp[k]; +            int y_bot = r->ystep >= (r->vs >> 1); +            coutput[k] = r->resample(z->img_comp[k].linebuf, +                                     y_bot ? r->line1 : r->line0, +                                     y_bot ? r->line0 : r->line1, +                                     r->w_lores, r->hs); +            if (++r->ystep >= r->vs) { +               r->ystep = 0; +               r->line0 = r->line1; +               if (++r->ypos < z->img_comp[k].y) +                  r->line1 += z->img_comp[k].w2; +            } +         } +         if (n >= 3) { +            stbi_uc *y = coutput[0]; +            if (z->s->img_n == 3) { +               if (is_rgb) { +                  for (i=0; i < z->s->img_x; ++i) { +                     out[0] = y[i]; +                     out[1] = coutput[1][i]; +                     out[2] = coutput[2][i]; +                     out[3] = 255; +                     out += n; +                  } +               } else { +                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); +               } +            } else if (z->s->img_n == 4) { +               if (z->app14_color_transform == 0) { // CMYK +                  for (i=0; i < z->s->img_x; ++i) { +                     stbi_uc m = coutput[3][i]; +                     out[0] = stbi__blinn_8x8(coutput[0][i], m); +                     out[1] = stbi__blinn_8x8(coutput[1][i], m); +                     out[2] = stbi__blinn_8x8(coutput[2][i], m); +                     out[3] = 255; +                     out += n; +                  } +               } else if (z->app14_color_transform == 2) { // YCCK +                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); +                  for (i=0; i < z->s->img_x; ++i) { +                     stbi_uc m = coutput[3][i]; +                     out[0] = stbi__blinn_8x8(255 - out[0], m); +                     out[1] = stbi__blinn_8x8(255 - out[1], m); +                     out[2] = stbi__blinn_8x8(255 - out[2], m); +                     out += n; +                  } +               } else { // YCbCr + alpha?  Ignore the fourth channel for now +                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); +               } +            } else +               for (i=0; i < z->s->img_x; ++i) { +                  out[0] = out[1] = out[2] = y[i]; +                  out[3] = 255; // not used if n==3 +                  out += n; +               } +         } else { +            if (is_rgb) { +               if (n == 1) +                  for (i=0; i < z->s->img_x; ++i) +                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); +               else { +                  for (i=0; i < z->s->img_x; ++i, out += 2) { +                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); +                     out[1] = 255; +                  } +               } +            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { +               for (i=0; i < z->s->img_x; ++i) { +                  stbi_uc m = coutput[3][i]; +                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); +                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); +                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); +                  out[0] = stbi__compute_y(r, g, b); +                  out[1] = 255; +                  out += n; +               } +            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { +               for (i=0; i < z->s->img_x; ++i) { +                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); +                  out[1] = 255; +                  out += n; +               } +            } else { +               stbi_uc *y = coutput[0]; +               if (n == 1) +                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; +               else +                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } +            } +         } +      } +      stbi__cleanup_jpeg(z); +      *out_x = z->s->img_x; +      *out_y = z->s->img_y; +      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output +      return output; +   } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   unsigned char* result; +   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); +   if (!j) return stbi__errpuc("outofmem", "Out of memory"); +   memset(j, 0, sizeof(stbi__jpeg)); +   STBI_NOTUSED(ri); +   j->s = s; +   stbi__setup_jpeg(j); +   result = load_jpeg_image(j, x,y,comp,req_comp); +   STBI_FREE(j); +   return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ +   int r; +   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); +   if (!j) return stbi__err("outofmem", "Out of memory"); +   memset(j, 0, sizeof(stbi__jpeg)); +   j->s = s; +   stbi__setup_jpeg(j); +   r = stbi__decode_jpeg_header(j, STBI__SCAN_type); +   stbi__rewind(s); +   STBI_FREE(j); +   return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ +   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { +      stbi__rewind( j->s ); +      return 0; +   } +   if (x) *x = j->s->img_x; +   if (y) *y = j->s->img_y; +   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; +   return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ +   int result; +   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); +   if (!j) return stbi__err("outofmem", "Out of memory"); +   memset(j, 0, sizeof(stbi__jpeg)); +   j->s = s; +   result = stbi__jpeg_info_raw(j, x, y, comp); +   STBI_FREE(j); +   return result; +} +#endif + +// public domain zlib decode    v0.2  Sean Barrett 2006-11-18 +//    simple implementation +//      - all input must be provided in an upfront buffer +//      - all output is written to a single output buffer (can malloc/realloc) +//    performance +//      - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1) +#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ +   stbi__uint16 fast[1 << STBI__ZFAST_BITS]; +   stbi__uint16 firstcode[16]; +   int maxcode[17]; +   stbi__uint16 firstsymbol[16]; +   stbi_uc  size[STBI__ZNSYMS]; +   stbi__uint16 value[STBI__ZNSYMS]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ +  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1); +  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2); +  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4); +  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8); +  return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ +   STBI_ASSERT(bits <= 16); +   // to bit reverse n bits, reverse 16 and shift +   // e.g. 11 bits, bit reverse and shift away 5 +   return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ +   int i,k=0; +   int code, next_code[16], sizes[17]; + +   // DEFLATE spec for generating codes +   memset(sizes, 0, sizeof(sizes)); +   memset(z->fast, 0, sizeof(z->fast)); +   for (i=0; i < num; ++i) +      ++sizes[sizelist[i]]; +   sizes[0] = 0; +   for (i=1; i < 16; ++i) +      if (sizes[i] > (1 << i)) +         return stbi__err("bad sizes", "Corrupt PNG"); +   code = 0; +   for (i=1; i < 16; ++i) { +      next_code[i] = code; +      z->firstcode[i] = (stbi__uint16) code; +      z->firstsymbol[i] = (stbi__uint16) k; +      code = (code + sizes[i]); +      if (sizes[i]) +         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); +      z->maxcode[i] = code << (16-i); // preshift for inner loop +      code <<= 1; +      k += sizes[i]; +   } +   z->maxcode[16] = 0x10000; // sentinel +   for (i=0; i < num; ++i) { +      int s = sizelist[i]; +      if (s) { +         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; +         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); +         z->size [c] = (stbi_uc     ) s; +         z->value[c] = (stbi__uint16) i; +         if (s <= STBI__ZFAST_BITS) { +            int j = stbi__bit_reverse(next_code[s],s); +            while (j < (1 << STBI__ZFAST_BITS)) { +               z->fast[j] = fastv; +               j += (1 << s); +            } +         } +         ++next_code[s]; +      } +   } +   return 1; +} + +// zlib-from-memory implementation for PNG reading +//    because PNG allows splitting the zlib stream arbitrarily, +//    and it's annoying structurally to have PNG call ZLIB call PNG, +//    we require PNG read all the IDATs and combine them into a single +//    memory buffer + +typedef struct +{ +   stbi_uc *zbuffer, *zbuffer_end; +   int num_bits; +   int hit_zeof_once; +   stbi__uint32 code_buffer; + +   char *zout; +   char *zout_start; +   char *zout_end; +   int   z_expandable; + +   stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static int stbi__zeof(stbi__zbuf *z) +{ +   return (z->zbuffer >= z->zbuffer_end); +} + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ +   return stbi__zeof(z) ? 0 : *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ +   do { +      if (z->code_buffer >= (1U << z->num_bits)) { +        z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */ +        return; +      } +      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; +      z->num_bits += 8; +   } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ +   unsigned int k; +   if (z->num_bits < n) stbi__fill_bits(z); +   k = z->code_buffer & ((1 << n) - 1); +   z->code_buffer >>= n; +   z->num_bits -= n; +   return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ +   int b,s,k; +   // not resolved by fast table, so compute it the slow way +   // use jpeg approach, which requires MSbits at top +   k = stbi__bit_reverse(a->code_buffer, 16); +   for (s=STBI__ZFAST_BITS+1; ; ++s) +      if (k < z->maxcode[s]) +         break; +   if (s >= 16) return -1; // invalid code! +   // code size is s, so: +   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; +   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! +   if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead. +   a->code_buffer >>= s; +   a->num_bits -= s; +   return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ +   int b,s; +   if (a->num_bits < 16) { +      if (stbi__zeof(a)) { +         if (!a->hit_zeof_once) { +            // This is the first time we hit eof, insert 16 extra padding btis +            // to allow us to keep going; if we actually consume any of them +            // though, that is invalid data. This is caught later. +            a->hit_zeof_once = 1; +            a->num_bits += 16; // add 16 implicit zero bits +         } else { +            // We already inserted our extra 16 padding bits and are again +            // out, this stream is actually prematurely terminated. +            return -1; +         } +      } else { +         stbi__fill_bits(a); +      } +   } +   b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; +   if (b) { +      s = b >> 9; +      a->code_buffer >>= s; +      a->num_bits -= s; +      return b & 511; +   } +   return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes +{ +   char *q; +   unsigned int cur, limit, old_limit; +   z->zout = zout; +   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); +   cur   = (unsigned int) (z->zout - z->zout_start); +   limit = old_limit = (unsigned) (z->zout_end - z->zout_start); +   if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); +   while (cur + n > limit) { +      if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); +      limit *= 2; +   } +   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); +   STBI_NOTUSED(old_limit); +   if (q == NULL) return stbi__err("outofmem", "Out of memory"); +   z->zout_start = q; +   z->zout       = q + cur; +   z->zout_end   = q + limit; +   return 1; +} + +static const int stbi__zlength_base[31] = { +   3,4,5,6,7,8,9,10,11,13, +   15,17,19,23,27,31,35,43,51,59, +   67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ +   char *zout = a->zout; +   for(;;) { +      int z = stbi__zhuffman_decode(a, &a->z_length); +      if (z < 256) { +         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes +         if (zout >= a->zout_end) { +            if (!stbi__zexpand(a, zout, 1)) return 0; +            zout = a->zout; +         } +         *zout++ = (char) z; +      } else { +         stbi_uc *p; +         int len,dist; +         if (z == 256) { +            a->zout = zout; +            if (a->hit_zeof_once && a->num_bits < 16) { +               // The first time we hit zeof, we inserted 16 extra zero bits into our bit +               // buffer so the decoder can just do its speculative decoding. But if we +               // actually consumed any of those bits (which is the case when num_bits < 16), +               // the stream actually read past the end so it is malformed. +               return stbi__err("unexpected end","Corrupt PNG"); +            } +            return 1; +         } +         if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data +         z -= 257; +         len = stbi__zlength_base[z]; +         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); +         z = stbi__zhuffman_decode(a, &a->z_distance); +         if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data +         dist = stbi__zdist_base[z]; +         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); +         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); +         if (len > a->zout_end - zout) { +            if (!stbi__zexpand(a, zout, len)) return 0; +            zout = a->zout; +         } +         p = (stbi_uc *) (zout - dist); +         if (dist == 1) { // run of one byte; common in images. +            stbi_uc v = *p; +            if (len) { do *zout++ = v; while (--len); } +         } else { +            if (len) { do *zout++ = *p++; while (--len); } +         } +      } +   } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ +   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; +   stbi__zhuffman z_codelength; +   stbi_uc lencodes[286+32+137];//padding for maximum single op +   stbi_uc codelength_sizes[19]; +   int i,n; + +   int hlit  = stbi__zreceive(a,5) + 257; +   int hdist = stbi__zreceive(a,5) + 1; +   int hclen = stbi__zreceive(a,4) + 4; +   int ntot  = hlit + hdist; + +   memset(codelength_sizes, 0, sizeof(codelength_sizes)); +   for (i=0; i < hclen; ++i) { +      int s = stbi__zreceive(a,3); +      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; +   } +   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + +   n = 0; +   while (n < ntot) { +      int c = stbi__zhuffman_decode(a, &z_codelength); +      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); +      if (c < 16) +         lencodes[n++] = (stbi_uc) c; +      else { +         stbi_uc fill = 0; +         if (c == 16) { +            c = stbi__zreceive(a,2)+3; +            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); +            fill = lencodes[n-1]; +         } else if (c == 17) { +            c = stbi__zreceive(a,3)+3; +         } else if (c == 18) { +            c = stbi__zreceive(a,7)+11; +         } else { +            return stbi__err("bad codelengths", "Corrupt PNG"); +         } +         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); +         memset(lencodes+n, fill, c); +         n += c; +      } +   } +   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); +   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; +   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; +   return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ +   stbi_uc header[4]; +   int len,nlen,k; +   if (a->num_bits & 7) +      stbi__zreceive(a, a->num_bits & 7); // discard +   // drain the bit-packed data into header +   k = 0; +   while (a->num_bits > 0) { +      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check +      a->code_buffer >>= 8; +      a->num_bits -= 8; +   } +   if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); +   // now fill header the normal way +   while (k < 4) +      header[k++] = stbi__zget8(a); +   len  = header[1] * 256 + header[0]; +   nlen = header[3] * 256 + header[2]; +   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); +   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); +   if (a->zout + len > a->zout_end) +      if (!stbi__zexpand(a, a->zout, len)) return 0; +   memcpy(a->zout, a->zbuffer, len); +   a->zbuffer += len; +   a->zout += len; +   return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ +   int cmf   = stbi__zget8(a); +   int cm    = cmf & 15; +   /* int cinfo = cmf >> 4; */ +   int flg   = stbi__zget8(a); +   if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec +   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec +   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png +   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png +   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output +   return 1; +} + +static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = +{ +   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, +   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, +   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, +   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, +   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ +   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ +   int i;   // use <= to match clearly with spec +   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8; +   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9; +   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7; +   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8; + +   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ +   int final, type; +   if (parse_header) +      if (!stbi__parse_zlib_header(a)) return 0; +   a->num_bits = 0; +   a->code_buffer = 0; +   a->hit_zeof_once = 0; +   do { +      final = stbi__zreceive(a,1); +      type = stbi__zreceive(a,2); +      if (type == 0) { +         if (!stbi__parse_uncompressed_block(a)) return 0; +      } else if (type == 3) { +         return 0; +      } else { +         if (type == 1) { +            // use fixed code lengths +            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0; +            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0; +         } else { +            if (!stbi__compute_huffman_codes(a)) return 0; +         } +         if (!stbi__parse_huffman_block(a)) return 0; +      } +   } while (!final); +   return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ +   a->zout_start = obuf; +   a->zout       = obuf; +   a->zout_end   = obuf + olen; +   a->z_expandable = exp; + +   return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ +   stbi__zbuf a; +   char *p = (char *) stbi__malloc(initial_size); +   if (p == NULL) return NULL; +   a.zbuffer = (stbi_uc *) buffer; +   a.zbuffer_end = (stbi_uc *) buffer + len; +   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { +      if (outlen) *outlen = (int) (a.zout - a.zout_start); +      return a.zout_start; +   } else { +      STBI_FREE(a.zout_start); +      return NULL; +   } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ +   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ +   stbi__zbuf a; +   char *p = (char *) stbi__malloc(initial_size); +   if (p == NULL) return NULL; +   a.zbuffer = (stbi_uc *) buffer; +   a.zbuffer_end = (stbi_uc *) buffer + len; +   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { +      if (outlen) *outlen = (int) (a.zout - a.zout_start); +      return a.zout_start; +   } else { +      STBI_FREE(a.zout_start); +      return NULL; +   } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ +   stbi__zbuf a; +   a.zbuffer = (stbi_uc *) ibuffer; +   a.zbuffer_end = (stbi_uc *) ibuffer + ilen; +   if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) +      return (int) (a.zout - a.zout_start); +   else +      return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ +   stbi__zbuf a; +   char *p = (char *) stbi__malloc(16384); +   if (p == NULL) return NULL; +   a.zbuffer = (stbi_uc *) buffer; +   a.zbuffer_end = (stbi_uc *) buffer+len; +   if (stbi__do_zlib(&a, p, 16384, 1, 0)) { +      if (outlen) *outlen = (int) (a.zout - a.zout_start); +      return a.zout_start; +   } else { +      STBI_FREE(a.zout_start); +      return NULL; +   } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ +   stbi__zbuf a; +   a.zbuffer = (stbi_uc *) ibuffer; +   a.zbuffer_end = (stbi_uc *) ibuffer + ilen; +   if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) +      return (int) (a.zout - a.zout_start); +   else +      return -1; +} +#endif + +// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18 +//    simple implementation +//      - only 8-bit samples +//      - no CRC checking +//      - allocates lots of intermediate memory +//        - avoids problem of streaming data between subsystems +//        - avoids explicit window management +//    performance +//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ +   stbi__uint32 length; +   stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ +   stbi__pngchunk c; +   c.length = stbi__get32be(s); +   c.type   = stbi__get32be(s); +   return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ +   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; +   int i; +   for (i=0; i < 8; ++i) +      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); +   return 1; +} + +typedef struct +{ +   stbi__context *s; +   stbi_uc *idata, *expanded, *out; +   int depth; +} stbi__png; + + +enum { +   STBI__F_none=0, +   STBI__F_sub=1, +   STBI__F_up=2, +   STBI__F_avg=3, +   STBI__F_paeth=4, +   // synthetic filter used for first scanline to avoid needing a dummy row of 0s +   STBI__F_avg_first +}; + +static stbi_uc first_row_filter[5] = +{ +   STBI__F_none, +   STBI__F_sub, +   STBI__F_none, +   STBI__F_avg_first, +   STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub +}; + +static int stbi__paeth(int a, int b, int c) +{ +   // This formulation looks very different from the reference in the PNG spec, but is +   // actually equivalent and has favorable data dependencies and admits straightforward +   // generation of branch-free code, which helps performance significantly. +   int thresh = c*3 - (a + b); +   int lo = a < b ? a : b; +   int hi = a < b ? b : a; +   int t0 = (hi <= thresh) ? lo : c; +   int t1 = (thresh <= lo) ? hi : t0; +   return t1; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ +   int i; +   // must process data backwards since we allow dest==src +   if (img_n == 1) { +      for (i=x-1; i >= 0; --i) { +         dest[i*2+1] = 255; +         dest[i*2+0] = src[i]; +      } +   } else { +      STBI_ASSERT(img_n == 3); +      for (i=x-1; i >= 0; --i) { +         dest[i*4+3] = 255; +         dest[i*4+2] = src[i*3+2]; +         dest[i*4+1] = src[i*3+1]; +         dest[i*4+0] = src[i*3+0]; +      } +   } +} + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ +   int bytes = (depth == 16 ? 2 : 1); +   stbi__context *s = a->s; +   stbi__uint32 i,j,stride = x*out_n*bytes; +   stbi__uint32 img_len, img_width_bytes; +   stbi_uc *filter_buf; +   int all_ok = 1; +   int k; +   int img_n = s->img_n; // copy it into a local for later + +   int output_bytes = out_n*bytes; +   int filter_bytes = img_n*bytes; +   int width = x; + +   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); +   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into +   if (!a->out) return stbi__err("outofmem", "Out of memory"); + +   // note: error exits here don't need to clean up a->out individually, +   // stbi__do_png always does on error. +   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); +   img_width_bytes = (((img_n * x * depth) + 7) >> 3); +   if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); +   img_len = (img_width_bytes + 1) * y; + +   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, +   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), +   // so just check for raw_len < img_len always. +   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + +   // Allocate two scan lines worth of filter workspace buffer. +   filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); +   if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + +   // Filtering for low-bit-depth images +   if (depth < 8) { +      filter_bytes = 1; +      width = img_width_bytes; +   } + +   for (j=0; j < y; ++j) { +      // cur/prior filter buffers alternate +      stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; +      stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; +      stbi_uc *dest = a->out + stride*j; +      int nk = width * filter_bytes; +      int filter = *raw++; + +      // check filter type +      if (filter > 4) { +         all_ok = stbi__err("invalid filter","Corrupt PNG"); +         break; +      } + +      // if first row, use special filter that doesn't sample previous row +      if (j == 0) filter = first_row_filter[filter]; + +      // perform actual filtering +      switch (filter) { +      case STBI__F_none: +         memcpy(cur, raw, nk); +         break; +      case STBI__F_sub: +         memcpy(cur, raw, filter_bytes); +         for (k = filter_bytes; k < nk; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); +         break; +      case STBI__F_up: +         for (k = 0; k < nk; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); +         break; +      case STBI__F_avg: +         for (k = 0; k < filter_bytes; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); +         for (k = filter_bytes; k < nk; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); +         break; +      case STBI__F_paeth: +         for (k = 0; k < filter_bytes; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) +         for (k = filter_bytes; k < nk; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); +         break; +      case STBI__F_avg_first: +         memcpy(cur, raw, filter_bytes); +         for (k = filter_bytes; k < nk; ++k) +            cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); +         break; +      } + +      raw += nk; + +      // expand decoded bits in cur to dest, also adding an extra alpha channel if desired +      if (depth < 8) { +         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range +         stbi_uc *in = cur; +         stbi_uc *out = dest; +         stbi_uc inb = 0; +         stbi__uint32 nsmp = x*img_n; + +         // expand bits to bytes first +         if (depth == 4) { +            for (i=0; i < nsmp; ++i) { +               if ((i & 1) == 0) inb = *in++; +               *out++ = scale * (inb >> 4); +               inb <<= 4; +            } +         } else if (depth == 2) { +            for (i=0; i < nsmp; ++i) { +               if ((i & 3) == 0) inb = *in++; +               *out++ = scale * (inb >> 6); +               inb <<= 2; +            } +         } else { +            STBI_ASSERT(depth == 1); +            for (i=0; i < nsmp; ++i) { +               if ((i & 7) == 0) inb = *in++; +               *out++ = scale * (inb >> 7); +               inb <<= 1; +            } +         } + +         // insert alpha=255 values if desired +         if (img_n != out_n) +            stbi__create_png_alpha_expand8(dest, dest, x, img_n); +      } else if (depth == 8) { +         if (img_n == out_n) +            memcpy(dest, cur, x*img_n); +         else +            stbi__create_png_alpha_expand8(dest, cur, x, img_n); +      } else if (depth == 16) { +         // convert the image data from big-endian to platform-native +         stbi__uint16 *dest16 = (stbi__uint16*)dest; +         stbi__uint32 nsmp = x*img_n; + +         if (img_n == out_n) { +            for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) +               *dest16 = (cur[0] << 8) | cur[1]; +         } else { +            STBI_ASSERT(img_n+1 == out_n); +            if (img_n == 1) { +               for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { +                  dest16[0] = (cur[0] << 8) | cur[1]; +                  dest16[1] = 0xffff; +               } +            } else { +               STBI_ASSERT(img_n == 3); +               for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { +                  dest16[0] = (cur[0] << 8) | cur[1]; +                  dest16[1] = (cur[2] << 8) | cur[3]; +                  dest16[2] = (cur[4] << 8) | cur[5]; +                  dest16[3] = 0xffff; +               } +            } +         } +      } +   } + +   STBI_FREE(filter_buf); +   if (!all_ok) return 0; + +   return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ +   int bytes = (depth == 16 ? 2 : 1); +   int out_bytes = out_n * bytes; +   stbi_uc *final; +   int p; +   if (!interlaced) +      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + +   // de-interlacing +   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); +   if (!final) return stbi__err("outofmem", "Out of memory"); +   for (p=0; p < 7; ++p) { +      int xorig[] = { 0,4,0,2,0,1,0 }; +      int yorig[] = { 0,0,4,0,2,0,1 }; +      int xspc[]  = { 8,8,4,4,2,2,1 }; +      int yspc[]  = { 8,8,8,4,4,2,2 }; +      int i,j,x,y; +      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 +      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; +      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; +      if (x && y) { +         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; +         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { +            STBI_FREE(final); +            return 0; +         } +         for (j=0; j < y; ++j) { +            for (i=0; i < x; ++i) { +               int out_y = j*yspc[p]+yorig[p]; +               int out_x = i*xspc[p]+xorig[p]; +               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, +                      a->out + (j*x+i)*out_bytes, out_bytes); +            } +         } +         STBI_FREE(a->out); +         image_data += img_len; +         image_data_len -= img_len; +      } +   } +   a->out = final; + +   return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ +   stbi__context *s = z->s; +   stbi__uint32 i, pixel_count = s->img_x * s->img_y; +   stbi_uc *p = z->out; + +   // compute color-based transparency, assuming we've +   // already got 255 as the alpha value in the output +   STBI_ASSERT(out_n == 2 || out_n == 4); + +   if (out_n == 2) { +      for (i=0; i < pixel_count; ++i) { +         p[1] = (p[0] == tc[0] ? 0 : 255); +         p += 2; +      } +   } else { +      for (i=0; i < pixel_count; ++i) { +         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) +            p[3] = 0; +         p += 4; +      } +   } +   return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ +   stbi__context *s = z->s; +   stbi__uint32 i, pixel_count = s->img_x * s->img_y; +   stbi__uint16 *p = (stbi__uint16*) z->out; + +   // compute color-based transparency, assuming we've +   // already got 65535 as the alpha value in the output +   STBI_ASSERT(out_n == 2 || out_n == 4); + +   if (out_n == 2) { +      for (i = 0; i < pixel_count; ++i) { +         p[1] = (p[0] == tc[0] ? 0 : 65535); +         p += 2; +      } +   } else { +      for (i = 0; i < pixel_count; ++i) { +         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) +            p[3] = 0; +         p += 4; +      } +   } +   return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ +   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; +   stbi_uc *p, *temp_out, *orig = a->out; + +   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); +   if (p == NULL) return stbi__err("outofmem", "Out of memory"); + +   // between here and free(out) below, exitting would leak +   temp_out = p; + +   if (pal_img_n == 3) { +      for (i=0; i < pixel_count; ++i) { +         int n = orig[i]*4; +         p[0] = palette[n  ]; +         p[1] = palette[n+1]; +         p[2] = palette[n+2]; +         p += 3; +      } +   } else { +      for (i=0; i < pixel_count; ++i) { +         int n = orig[i]*4; +         p[0] = palette[n  ]; +         p[1] = palette[n+1]; +         p[2] = palette[n+2]; +         p[3] = palette[n+3]; +         p += 4; +      } +   } +   STBI_FREE(a->out); +   a->out = temp_out; + +   STBI_NOTUSED(len); + +   return 1; +} + +static int stbi__unpremultiply_on_load_global = 0; +static int stbi__de_iphone_flag_global = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ +   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ +   stbi__de_iphone_flag_global = flag_true_if_should_convert; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global +#define stbi__de_iphone_flag  stbi__de_iphone_flag_global +#else +static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; +static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; + +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) +{ +   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; +   stbi__unpremultiply_on_load_set = 1; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) +{ +   stbi__de_iphone_flag_local = flag_true_if_should_convert; +   stbi__de_iphone_flag_set = 1; +} + +#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \ +                                       ? stbi__unpremultiply_on_load_local      \ +                                       : stbi__unpremultiply_on_load_global) +#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \ +                                ? stbi__de_iphone_flag_local                    \ +                                : stbi__de_iphone_flag_global) +#endif // STBI_THREAD_LOCAL + +static void stbi__de_iphone(stbi__png *z) +{ +   stbi__context *s = z->s; +   stbi__uint32 i, pixel_count = s->img_x * s->img_y; +   stbi_uc *p = z->out; + +   if (s->img_out_n == 3) {  // convert bgr to rgb +      for (i=0; i < pixel_count; ++i) { +         stbi_uc t = p[0]; +         p[0] = p[2]; +         p[2] = t; +         p += 3; +      } +   } else { +      STBI_ASSERT(s->img_out_n == 4); +      if (stbi__unpremultiply_on_load) { +         // convert bgr to rgb and unpremultiply +         for (i=0; i < pixel_count; ++i) { +            stbi_uc a = p[3]; +            stbi_uc t = p[0]; +            if (a) { +               stbi_uc half = a / 2; +               p[0] = (p[2] * 255 + half) / a; +               p[1] = (p[1] * 255 + half) / a; +               p[2] = ( t   * 255 + half) / a; +            } else { +               p[0] = p[2]; +               p[2] = t; +            } +            p += 4; +         } +      } else { +         // convert bgr to rgb +         for (i=0; i < pixel_count; ++i) { +            stbi_uc t = p[0]; +            p[0] = p[2]; +            p[2] = t; +            p += 4; +         } +      } +   } +} + +#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ +   stbi_uc palette[1024], pal_img_n=0; +   stbi_uc has_trans=0, tc[3]={0}; +   stbi__uint16 tc16[3]; +   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; +   int first=1,k,interlace=0, color=0, is_iphone=0; +   stbi__context *s = z->s; + +   z->expanded = NULL; +   z->idata = NULL; +   z->out = NULL; + +   if (!stbi__check_png_header(s)) return 0; + +   if (scan == STBI__SCAN_type) return 1; + +   for (;;) { +      stbi__pngchunk c = stbi__get_chunk_header(s); +      switch (c.type) { +         case STBI__PNG_TYPE('C','g','B','I'): +            is_iphone = 1; +            stbi__skip(s, c.length); +            break; +         case STBI__PNG_TYPE('I','H','D','R'): { +            int comp,filter; +            if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); +            first = 0; +            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); +            s->img_x = stbi__get32be(s); +            s->img_y = stbi__get32be(s); +            if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); +            if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); +            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); +            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG"); +            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG"); +            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); +            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG"); +            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG"); +            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); +            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); +            if (!pal_img_n) { +               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); +               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); +            } else { +               // if paletted, then pal_n is our final components, and +               // img_n is # components to decompress/filter. +               s->img_n = 1; +               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); +            } +            // even with SCAN_header, have to scan to see if we have a tRNS +            break; +         } + +         case STBI__PNG_TYPE('P','L','T','E'):  { +            if (first) return stbi__err("first not IHDR", "Corrupt PNG"); +            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); +            pal_len = c.length / 3; +            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); +            for (i=0; i < pal_len; ++i) { +               palette[i*4+0] = stbi__get8(s); +               palette[i*4+1] = stbi__get8(s); +               palette[i*4+2] = stbi__get8(s); +               palette[i*4+3] = 255; +            } +            break; +         } + +         case STBI__PNG_TYPE('t','R','N','S'): { +            if (first) return stbi__err("first not IHDR", "Corrupt PNG"); +            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); +            if (pal_img_n) { +               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } +               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); +               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); +               pal_img_n = 4; +               for (i=0; i < c.length; ++i) +                  palette[i*4+3] = stbi__get8(s); +            } else { +               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); +               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); +               has_trans = 1; +               // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. +               if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } +               if (z->depth == 16) { +                  for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning +                     tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is +               } else { +                  for (k = 0; k < s->img_n && k < 3; ++k) +                     tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger +               } +            } +            break; +         } + +         case STBI__PNG_TYPE('I','D','A','T'): { +            if (first) return stbi__err("first not IHDR", "Corrupt PNG"); +            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); +            if (scan == STBI__SCAN_header) { +               // header scan definitely stops at first IDAT +               if (pal_img_n) +                  s->img_n = pal_img_n; +               return 1; +            } +            if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes"); +            if ((int)(ioff + c.length) < (int)ioff) return 0; +            if (ioff + c.length > idata_limit) { +               stbi__uint32 idata_limit_old = idata_limit; +               stbi_uc *p; +               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; +               while (ioff + c.length > idata_limit) +                  idata_limit *= 2; +               STBI_NOTUSED(idata_limit_old); +               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); +               z->idata = p; +            } +            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); +            ioff += c.length; +            break; +         } + +         case STBI__PNG_TYPE('I','E','N','D'): { +            stbi__uint32 raw_len, bpl; +            if (first) return stbi__err("first not IHDR", "Corrupt PNG"); +            if (scan != STBI__SCAN_load) return 1; +            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); +            // initial guess for decoded data size to avoid unnecessary reallocs +            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component +            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; +            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); +            if (z->expanded == NULL) return 0; // zlib should set error +            STBI_FREE(z->idata); z->idata = NULL; +            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) +               s->img_out_n = s->img_n+1; +            else +               s->img_out_n = s->img_n; +            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; +            if (has_trans) { +               if (z->depth == 16) { +                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; +               } else { +                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; +               } +            } +            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) +               stbi__de_iphone(z); +            if (pal_img_n) { +               // pal_img_n == 3 or 4 +               s->img_n = pal_img_n; // record the actual colors we had +               s->img_out_n = pal_img_n; +               if (req_comp >= 3) s->img_out_n = req_comp; +               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) +                  return 0; +            } else if (has_trans) { +               // non-paletted image with tRNS -> source image has (constant) alpha +               ++s->img_n; +            } +            STBI_FREE(z->expanded); z->expanded = NULL; +            // end of PNG chunk, read and skip CRC +            stbi__get32be(s); +            return 1; +         } + +         default: +            // if critical, fail +            if (first) return stbi__err("first not IHDR", "Corrupt PNG"); +            if ((c.type & (1 << 29)) == 0) { +               #ifndef STBI_NO_FAILURE_STRINGS +               // not threadsafe +               static char invalid_chunk[] = "XXXX PNG chunk not known"; +               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); +               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); +               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8); +               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0); +               #endif +               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); +            } +            stbi__skip(s, c.length); +            break; +      } +      // end of PNG chunk, read and skip CRC +      stbi__get32be(s); +   } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ +   void *result=NULL; +   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); +   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { +      if (p->depth <= 8) +         ri->bits_per_channel = 8; +      else if (p->depth == 16) +         ri->bits_per_channel = 16; +      else +         return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); +      result = p->out; +      p->out = NULL; +      if (req_comp && req_comp != p->s->img_out_n) { +         if (ri->bits_per_channel == 8) +            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); +         else +            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); +         p->s->img_out_n = req_comp; +         if (result == NULL) return result; +      } +      *x = p->s->img_x; +      *y = p->s->img_y; +      if (n) *n = p->s->img_n; +   } +   STBI_FREE(p->out);      p->out      = NULL; +   STBI_FREE(p->expanded); p->expanded = NULL; +   STBI_FREE(p->idata);    p->idata    = NULL; + +   return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   stbi__png p; +   p.s = s; +   return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ +   int r; +   r = stbi__check_png_header(s); +   stbi__rewind(s); +   return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ +   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { +      stbi__rewind( p->s ); +      return 0; +   } +   if (x) *x = p->s->img_x; +   if (y) *y = p->s->img_y; +   if (comp) *comp = p->s->img_n; +   return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ +   stbi__png p; +   p.s = s; +   return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ +   stbi__png p; +   p.s = s; +   if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) +	   return 0; +   if (p.depth != 16) { +      stbi__rewind(p.s); +      return 0; +   } +   return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ +   int r; +   int sz; +   if (stbi__get8(s) != 'B') return 0; +   if (stbi__get8(s) != 'M') return 0; +   stbi__get32le(s); // discard filesize +   stbi__get16le(s); // discard reserved +   stbi__get16le(s); // discard reserved +   stbi__get32le(s); // discard data offset +   sz = stbi__get32le(s); +   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); +   return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ +   int r = stbi__bmp_test_raw(s); +   stbi__rewind(s); +   return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ +   int n=0; +   if (z == 0) return -1; +   if (z >= 0x10000) { n += 16; z >>= 16; } +   if (z >= 0x00100) { n +=  8; z >>=  8; } +   if (z >= 0x00010) { n +=  4; z >>=  4; } +   if (z >= 0x00004) { n +=  2; z >>=  2; } +   if (z >= 0x00002) { n +=  1;/* >>=  1;*/ } +   return n; +} + +static int stbi__bitcount(unsigned int a) +{ +   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2 +   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4 +   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits +   a = (a + (a >> 8)); // max 16 per 8 bits +   a = (a + (a >> 16)); // max 32 per 8 bits +   return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(unsigned int v, int shift, int bits) +{ +   static unsigned int mul_table[9] = { +      0, +      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, +      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, +   }; +   static unsigned int shift_table[9] = { +      0, 0,0,1,0,2,4,6,0, +   }; +   if (shift < 0) +      v <<= -shift; +   else +      v >>= shift; +   STBI_ASSERT(v < 256); +   v >>= (8-bits); +   STBI_ASSERT(bits >= 0 && bits <= 8); +   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ +   int bpp, offset, hsz; +   unsigned int mr,mg,mb,ma, all_a; +   int extra_read; +} stbi__bmp_data; + +static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) +{ +   // BI_BITFIELDS specifies masks explicitly, don't override +   if (compress == 3) +      return 1; + +   if (compress == 0) { +      if (info->bpp == 16) { +         info->mr = 31u << 10; +         info->mg = 31u <<  5; +         info->mb = 31u <<  0; +      } else if (info->bpp == 32) { +         info->mr = 0xffu << 16; +         info->mg = 0xffu <<  8; +         info->mb = 0xffu <<  0; +         info->ma = 0xffu << 24; +         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 +      } else { +         // otherwise, use defaults, which is all-0 +         info->mr = info->mg = info->mb = info->ma = 0; +      } +      return 1; +   } +   return 0; // error +} + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ +   int hsz; +   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); +   stbi__get32le(s); // discard filesize +   stbi__get16le(s); // discard reserved +   stbi__get16le(s); // discard reserved +   info->offset = stbi__get32le(s); +   info->hsz = hsz = stbi__get32le(s); +   info->mr = info->mg = info->mb = info->ma = 0; +   info->extra_read = 14; + +   if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); + +   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); +   if (hsz == 12) { +      s->img_x = stbi__get16le(s); +      s->img_y = stbi__get16le(s); +   } else { +      s->img_x = stbi__get32le(s); +      s->img_y = stbi__get32le(s); +   } +   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); +   info->bpp = stbi__get16le(s); +   if (hsz != 12) { +      int compress = stbi__get32le(s); +      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); +      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes +      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel +      stbi__get32le(s); // discard sizeof +      stbi__get32le(s); // discard hres +      stbi__get32le(s); // discard vres +      stbi__get32le(s); // discard colorsused +      stbi__get32le(s); // discard max important +      if (hsz == 40 || hsz == 56) { +         if (hsz == 56) { +            stbi__get32le(s); +            stbi__get32le(s); +            stbi__get32le(s); +            stbi__get32le(s); +         } +         if (info->bpp == 16 || info->bpp == 32) { +            if (compress == 0) { +               stbi__bmp_set_mask_defaults(info, compress); +            } else if (compress == 3) { +               info->mr = stbi__get32le(s); +               info->mg = stbi__get32le(s); +               info->mb = stbi__get32le(s); +               info->extra_read += 12; +               // not documented, but generated by photoshop and handled by mspaint +               if (info->mr == info->mg && info->mg == info->mb) { +                  // ?!?!? +                  return stbi__errpuc("bad BMP", "bad BMP"); +               } +            } else +               return stbi__errpuc("bad BMP", "bad BMP"); +         } +      } else { +         // V4/V5 header +         int i; +         if (hsz != 108 && hsz != 124) +            return stbi__errpuc("bad BMP", "bad BMP"); +         info->mr = stbi__get32le(s); +         info->mg = stbi__get32le(s); +         info->mb = stbi__get32le(s); +         info->ma = stbi__get32le(s); +         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs +            stbi__bmp_set_mask_defaults(info, compress); +         stbi__get32le(s); // discard color space +         for (i=0; i < 12; ++i) +            stbi__get32le(s); // discard color space parameters +         if (hsz == 124) { +            stbi__get32le(s); // discard rendering intent +            stbi__get32le(s); // discard offset of profile data +            stbi__get32le(s); // discard size of profile data +            stbi__get32le(s); // discard reserved +         } +      } +   } +   return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   stbi_uc *out; +   unsigned int mr=0,mg=0,mb=0,ma=0, all_a; +   stbi_uc pal[256][4]; +   int psize=0,i,j,width; +   int flip_vertically, pad, target; +   stbi__bmp_data info; +   STBI_NOTUSED(ri); + +   info.all_a = 255; +   if (stbi__bmp_parse_header(s, &info) == NULL) +      return NULL; // error code already set + +   flip_vertically = ((int) s->img_y) > 0; +   s->img_y = abs((int) s->img_y); + +   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); +   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + +   mr = info.mr; +   mg = info.mg; +   mb = info.mb; +   ma = info.ma; +   all_a = info.all_a; + +   if (info.hsz == 12) { +      if (info.bpp < 24) +         psize = (info.offset - info.extra_read - 24) / 3; +   } else { +      if (info.bpp < 16) +         psize = (info.offset - info.extra_read - info.hsz) >> 2; +   } +   if (psize == 0) { +      // accept some number of extra bytes after the header, but if the offset points either to before +      // the header ends or implies a large amount of extra data, reject the file as malformed +      int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original); +      int header_limit = 1024; // max we actually read is below 256 bytes currently. +      int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size. +      if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) { +         return stbi__errpuc("bad header", "Corrupt BMP"); +      } +      // we established that bytes_read_so_far is positive and sensible. +      // the first half of this test rejects offsets that are either too small positives, or +      // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn +      // ensures the number computed in the second half of the test can't overflow. +      if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) { +         return stbi__errpuc("bad offset", "Corrupt BMP"); +      } else { +         stbi__skip(s, info.offset - bytes_read_so_far); +      } +   } + +   if (info.bpp == 24 && ma == 0xff000000) +      s->img_n = 3; +   else +      s->img_n = ma ? 4 : 3; +   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 +      target = req_comp; +   else +      target = s->img_n; // if they want monochrome, we'll post-convert + +   // sanity-check size +   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) +      return stbi__errpuc("too large", "Corrupt BMP"); + +   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); +   if (!out) return stbi__errpuc("outofmem", "Out of memory"); +   if (info.bpp < 16) { +      int z=0; +      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } +      for (i=0; i < psize; ++i) { +         pal[i][2] = stbi__get8(s); +         pal[i][1] = stbi__get8(s); +         pal[i][0] = stbi__get8(s); +         if (info.hsz != 12) stbi__get8(s); +         pal[i][3] = 255; +      } +      stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); +      if (info.bpp == 1) width = (s->img_x + 7) >> 3; +      else if (info.bpp == 4) width = (s->img_x + 1) >> 1; +      else if (info.bpp == 8) width = s->img_x; +      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } +      pad = (-width)&3; +      if (info.bpp == 1) { +         for (j=0; j < (int) s->img_y; ++j) { +            int bit_offset = 7, v = stbi__get8(s); +            for (i=0; i < (int) s->img_x; ++i) { +               int color = (v>>bit_offset)&0x1; +               out[z++] = pal[color][0]; +               out[z++] = pal[color][1]; +               out[z++] = pal[color][2]; +               if (target == 4) out[z++] = 255; +               if (i+1 == (int) s->img_x) break; +               if((--bit_offset) < 0) { +                  bit_offset = 7; +                  v = stbi__get8(s); +               } +            } +            stbi__skip(s, pad); +         } +      } else { +         for (j=0; j < (int) s->img_y; ++j) { +            for (i=0; i < (int) s->img_x; i += 2) { +               int v=stbi__get8(s),v2=0; +               if (info.bpp == 4) { +                  v2 = v & 15; +                  v >>= 4; +               } +               out[z++] = pal[v][0]; +               out[z++] = pal[v][1]; +               out[z++] = pal[v][2]; +               if (target == 4) out[z++] = 255; +               if (i+1 == (int) s->img_x) break; +               v = (info.bpp == 8) ? stbi__get8(s) : v2; +               out[z++] = pal[v][0]; +               out[z++] = pal[v][1]; +               out[z++] = pal[v][2]; +               if (target == 4) out[z++] = 255; +            } +            stbi__skip(s, pad); +         } +      } +   } else { +      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; +      int z = 0; +      int easy=0; +      stbi__skip(s, info.offset - info.extra_read - info.hsz); +      if (info.bpp == 24) width = 3 * s->img_x; +      else if (info.bpp == 16) width = 2*s->img_x; +      else /* bpp = 32 and pad = 0 */ width=0; +      pad = (-width) & 3; +      if (info.bpp == 24) { +         easy = 1; +      } else if (info.bpp == 32) { +         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) +            easy = 2; +      } +      if (!easy) { +         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } +         // right shift amt to put high bit in position #7 +         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); +         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); +         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); +         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); +         if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } +      } +      for (j=0; j < (int) s->img_y; ++j) { +         if (easy) { +            for (i=0; i < (int) s->img_x; ++i) { +               unsigned char a; +               out[z+2] = stbi__get8(s); +               out[z+1] = stbi__get8(s); +               out[z+0] = stbi__get8(s); +               z += 3; +               a = (easy == 2 ? stbi__get8(s) : 255); +               all_a |= a; +               if (target == 4) out[z++] = a; +            } +         } else { +            int bpp = info.bpp; +            for (i=0; i < (int) s->img_x; ++i) { +               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); +               unsigned int a; +               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); +               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); +               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); +               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); +               all_a |= a; +               if (target == 4) out[z++] = STBI__BYTECAST(a); +            } +         } +         stbi__skip(s, pad); +      } +   } + +   // if alpha channel is all 0s, replace with all 255s +   if (target == 4 && all_a == 0) +      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) +         out[i] = 255; + +   if (flip_vertically) { +      stbi_uc t; +      for (j=0; j < (int) s->img_y>>1; ++j) { +         stbi_uc *p1 = out +      j     *s->img_x*target; +         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; +         for (i=0; i < (int) s->img_x*target; ++i) { +            t = p1[i]; p1[i] = p2[i]; p2[i] = t; +         } +      } +   } + +   if (req_comp && req_comp != target) { +      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); +      if (out == NULL) return out; // stbi__convert_format frees input on failure +   } + +   *x = s->img_x; +   *y = s->img_y; +   if (comp) *comp = s->img_n; +   return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ +   // only RGB or RGBA (incl. 16bit) or grey allowed +   if (is_rgb16) *is_rgb16 = 0; +   switch(bits_per_pixel) { +      case 8:  return STBI_grey; +      case 16: if(is_grey) return STBI_grey_alpha; +               // fallthrough +      case 15: if(is_rgb16) *is_rgb16 = 1; +               return STBI_rgb; +      case 24: // fallthrough +      case 32: return bits_per_pixel/8; +      default: return 0; +   } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ +    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; +    int sz, tga_colormap_type; +    stbi__get8(s);                   // discard Offset +    tga_colormap_type = stbi__get8(s); // colormap type +    if( tga_colormap_type > 1 ) { +        stbi__rewind(s); +        return 0;      // only RGB or indexed allowed +    } +    tga_image_type = stbi__get8(s); // image type +    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image +        if (tga_image_type != 1 && tga_image_type != 9) { +            stbi__rewind(s); +            return 0; +        } +        stbi__skip(s,4);       // skip index of first colormap entry and number of entries +        sz = stbi__get8(s);    //   check bits per palette color entry +        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { +            stbi__rewind(s); +            return 0; +        } +        stbi__skip(s,4);       // skip image x and y origin +        tga_colormap_bpp = sz; +    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE +        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { +            stbi__rewind(s); +            return 0; // only RGB or grey allowed, +/- RLE +        } +        stbi__skip(s,9); // skip colormap specification and image x/y origin +        tga_colormap_bpp = 0; +    } +    tga_w = stbi__get16le(s); +    if( tga_w < 1 ) { +        stbi__rewind(s); +        return 0;   // test width +    } +    tga_h = stbi__get16le(s); +    if( tga_h < 1 ) { +        stbi__rewind(s); +        return 0;   // test height +    } +    tga_bits_per_pixel = stbi__get8(s); // bits per pixel +    stbi__get8(s); // ignore alpha bits +    if (tga_colormap_bpp != 0) { +        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { +            // when using a colormap, tga_bits_per_pixel is the size of the indexes +            // I don't think anything but 8 or 16bit indexes makes sense +            stbi__rewind(s); +            return 0; +        } +        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); +    } else { +        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); +    } +    if(!tga_comp) { +      stbi__rewind(s); +      return 0; +    } +    if (x) *x = tga_w; +    if (y) *y = tga_h; +    if (comp) *comp = tga_comp; +    return 1;                   // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ +   int res = 0; +   int sz, tga_color_type; +   stbi__get8(s);      //   discard Offset +   tga_color_type = stbi__get8(s);   //   color type +   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed +   sz = stbi__get8(s);   //   image type +   if ( tga_color_type == 1 ) { // colormapped (paletted) image +      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 +      stbi__skip(s,4);       // skip index of first colormap entry and number of entries +      sz = stbi__get8(s);    //   check bits per palette color entry +      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; +      stbi__skip(s,4);       // skip image x and y origin +   } else { // "normal" image w/o colormap +      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE +      stbi__skip(s,9); // skip colormap specification and image x/y origin +   } +   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width +   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height +   sz = stbi__get8(s);   //   bits per pixel +   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index +   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + +   res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: +   stbi__rewind(s); +   return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ +   stbi__uint16 px = (stbi__uint16)stbi__get16le(s); +   stbi__uint16 fiveBitMask = 31; +   // we have 3 channels with 5bits each +   int r = (px >> 10) & fiveBitMask; +   int g = (px >> 5) & fiveBitMask; +   int b = px & fiveBitMask; +   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later +   out[0] = (stbi_uc)((r * 255)/31); +   out[1] = (stbi_uc)((g * 255)/31); +   out[2] = (stbi_uc)((b * 255)/31); + +   // some people claim that the most significant bit might be used for alpha +   // (possibly if an alpha-bit is set in the "image descriptor byte") +   // but that only made 16bit test images completely translucent.. +   // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   //   read in the TGA header stuff +   int tga_offset = stbi__get8(s); +   int tga_indexed = stbi__get8(s); +   int tga_image_type = stbi__get8(s); +   int tga_is_RLE = 0; +   int tga_palette_start = stbi__get16le(s); +   int tga_palette_len = stbi__get16le(s); +   int tga_palette_bits = stbi__get8(s); +   int tga_x_origin = stbi__get16le(s); +   int tga_y_origin = stbi__get16le(s); +   int tga_width = stbi__get16le(s); +   int tga_height = stbi__get16le(s); +   int tga_bits_per_pixel = stbi__get8(s); +   int tga_comp, tga_rgb16=0; +   int tga_inverted = stbi__get8(s); +   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) +   //   image data +   unsigned char *tga_data; +   unsigned char *tga_palette = NULL; +   int i, j; +   unsigned char raw_data[4] = {0}; +   int RLE_count = 0; +   int RLE_repeating = 0; +   int read_next_pixel = 1; +   STBI_NOTUSED(ri); +   STBI_NOTUSED(tga_x_origin); // @TODO +   STBI_NOTUSED(tga_y_origin); // @TODO + +   if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); +   if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + +   //   do a tiny bit of precessing +   if ( tga_image_type >= 8 ) +   { +      tga_image_type -= 8; +      tga_is_RLE = 1; +   } +   tga_inverted = 1 - ((tga_inverted >> 5) & 1); + +   //   If I'm paletted, then I'll use the number of bits from the palette +   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); +   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + +   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency +      return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + +   //   tga info +   *x = tga_width; +   *y = tga_height; +   if (comp) *comp = tga_comp; + +   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) +      return stbi__errpuc("too large", "Corrupt TGA"); + +   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); +   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + +   // skip to the data's starting position (offset usually = 0) +   stbi__skip(s, tga_offset ); + +   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { +      for (i=0; i < tga_height; ++i) { +         int row = tga_inverted ? tga_height -i - 1 : i; +         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; +         stbi__getn(s, tga_row, tga_width * tga_comp); +      } +   } else  { +      //   do I need to load a palette? +      if ( tga_indexed) +      { +         if (tga_palette_len == 0) {  /* you have to have at least one entry! */ +            STBI_FREE(tga_data); +            return stbi__errpuc("bad palette", "Corrupt TGA"); +         } + +         //   any data to skip? (offset usually = 0) +         stbi__skip(s, tga_palette_start ); +         //   load the palette +         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); +         if (!tga_palette) { +            STBI_FREE(tga_data); +            return stbi__errpuc("outofmem", "Out of memory"); +         } +         if (tga_rgb16) { +            stbi_uc *pal_entry = tga_palette; +            STBI_ASSERT(tga_comp == STBI_rgb); +            for (i=0; i < tga_palette_len; ++i) { +               stbi__tga_read_rgb16(s, pal_entry); +               pal_entry += tga_comp; +            } +         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { +               STBI_FREE(tga_data); +               STBI_FREE(tga_palette); +               return stbi__errpuc("bad palette", "Corrupt TGA"); +         } +      } +      //   load the data +      for (i=0; i < tga_width * tga_height; ++i) +      { +         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? +         if ( tga_is_RLE ) +         { +            if ( RLE_count == 0 ) +            { +               //   yep, get the next byte as a RLE command +               int RLE_cmd = stbi__get8(s); +               RLE_count = 1 + (RLE_cmd & 127); +               RLE_repeating = RLE_cmd >> 7; +               read_next_pixel = 1; +            } else if ( !RLE_repeating ) +            { +               read_next_pixel = 1; +            } +         } else +         { +            read_next_pixel = 1; +         } +         //   OK, if I need to read a pixel, do it now +         if ( read_next_pixel ) +         { +            //   load however much data we did have +            if ( tga_indexed ) +            { +               // read in index, then perform the lookup +               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); +               if ( pal_idx >= tga_palette_len ) { +                  // invalid index +                  pal_idx = 0; +               } +               pal_idx *= tga_comp; +               for (j = 0; j < tga_comp; ++j) { +                  raw_data[j] = tga_palette[pal_idx+j]; +               } +            } else if(tga_rgb16) { +               STBI_ASSERT(tga_comp == STBI_rgb); +               stbi__tga_read_rgb16(s, raw_data); +            } else { +               //   read in the data raw +               for (j = 0; j < tga_comp; ++j) { +                  raw_data[j] = stbi__get8(s); +               } +            } +            //   clear the reading flag for the next pixel +            read_next_pixel = 0; +         } // end of reading a pixel + +         // copy data +         for (j = 0; j < tga_comp; ++j) +           tga_data[i*tga_comp+j] = raw_data[j]; + +         //   in case we're in RLE mode, keep counting down +         --RLE_count; +      } +      //   do I need to invert the image? +      if ( tga_inverted ) +      { +         for (j = 0; j*2 < tga_height; ++j) +         { +            int index1 = j * tga_width * tga_comp; +            int index2 = (tga_height - 1 - j) * tga_width * tga_comp; +            for (i = tga_width * tga_comp; i > 0; --i) +            { +               unsigned char temp = tga_data[index1]; +               tga_data[index1] = tga_data[index2]; +               tga_data[index2] = temp; +               ++index1; +               ++index2; +            } +         } +      } +      //   clear my palette, if I had one +      if ( tga_palette != NULL ) +      { +         STBI_FREE( tga_palette ); +      } +   } + +   // swap RGB - if the source data was RGB16, it already is in the right order +   if (tga_comp >= 3 && !tga_rgb16) +   { +      unsigned char* tga_pixel = tga_data; +      for (i=0; i < tga_width * tga_height; ++i) +      { +         unsigned char temp = tga_pixel[0]; +         tga_pixel[0] = tga_pixel[2]; +         tga_pixel[2] = temp; +         tga_pixel += tga_comp; +      } +   } + +   // convert to target component count +   if (req_comp && req_comp != tga_comp) +      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + +   //   the things I do to get rid of an error message, and yet keep +   //   Microsoft's C compilers happy... [8^( +   tga_palette_start = tga_palette_len = tga_palette_bits = +         tga_x_origin = tga_y_origin = 0; +   STBI_NOTUSED(tga_palette_start); +   //   OK, done +   return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ +   int r = (stbi__get32be(s) == 0x38425053); +   stbi__rewind(s); +   return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ +   int count, nleft, len; + +   count = 0; +   while ((nleft = pixelCount - count) > 0) { +      len = stbi__get8(s); +      if (len == 128) { +         // No-op. +      } else if (len < 128) { +         // Copy next len+1 bytes literally. +         len++; +         if (len > nleft) return 0; // corrupt data +         count += len; +         while (len) { +            *p = stbi__get8(s); +            p += 4; +            len--; +         } +      } else if (len > 128) { +         stbi_uc   val; +         // Next -len+1 bytes in the dest are replicated from next source byte. +         // (Interpret len as a negative 8-bit int.) +         len = 257 - len; +         if (len > nleft) return 0; // corrupt data +         val = stbi__get8(s); +         count += len; +         while (len) { +            *p = val; +            p += 4; +            len--; +         } +      } +   } + +   return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ +   int pixelCount; +   int channelCount, compression; +   int channel, i; +   int bitdepth; +   int w,h; +   stbi_uc *out; +   STBI_NOTUSED(ri); + +   // Check identifier +   if (stbi__get32be(s) != 0x38425053)   // "8BPS" +      return stbi__errpuc("not PSD", "Corrupt PSD image"); + +   // Check file type version. +   if (stbi__get16be(s) != 1) +      return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + +   // Skip 6 reserved bytes. +   stbi__skip(s, 6 ); + +   // Read the number of channels (R, G, B, A, etc). +   channelCount = stbi__get16be(s); +   if (channelCount < 0 || channelCount > 16) +      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + +   // Read the rows and columns of the image. +   h = stbi__get32be(s); +   w = stbi__get32be(s); + +   if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); +   if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + +   // Make sure the depth is 8 bits. +   bitdepth = stbi__get16be(s); +   if (bitdepth != 8 && bitdepth != 16) +      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + +   // Make sure the color mode is RGB. +   // Valid options are: +   //   0: Bitmap +   //   1: Grayscale +   //   2: Indexed color +   //   3: RGB color +   //   4: CMYK color +   //   7: Multichannel +   //   8: Duotone +   //   9: Lab color +   if (stbi__get16be(s) != 3) +      return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + +   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.) +   stbi__skip(s,stbi__get32be(s) ); + +   // Skip the image resources.  (resolution, pen tool paths, etc) +   stbi__skip(s, stbi__get32be(s) ); + +   // Skip the reserved data. +   stbi__skip(s, stbi__get32be(s) ); + +   // Find out if the data is compressed. +   // Known values: +   //   0: no compression +   //   1: RLE compressed +   compression = stbi__get16be(s); +   if (compression > 1) +      return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + +   // Check size +   if (!stbi__mad3sizes_valid(4, w, h, 0)) +      return stbi__errpuc("too large", "Corrupt PSD"); + +   // Create the destination image. + +   if (!compression && bitdepth == 16 && bpc == 16) { +      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); +      ri->bits_per_channel = 16; +   } else +      out = (stbi_uc *) stbi__malloc(4 * w*h); + +   if (!out) return stbi__errpuc("outofmem", "Out of memory"); +   pixelCount = w*h; + +   // Initialize the data to zero. +   //memset( out, 0, pixelCount * 4 ); + +   // Finally, the image data. +   if (compression) { +      // RLE as used by .PSD and .TIFF +      // Loop until you get the number of unpacked bytes you are expecting: +      //     Read the next source byte into n. +      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. +      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. +      //     Else if n is 128, noop. +      // Endloop + +      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, +      // which we're going to just skip. +      stbi__skip(s, h * channelCount * 2 ); + +      // Read the RLE data by channel. +      for (channel = 0; channel < 4; channel++) { +         stbi_uc *p; + +         p = out+channel; +         if (channel >= channelCount) { +            // Fill this channel with default data. +            for (i = 0; i < pixelCount; i++, p += 4) +               *p = (channel == 3 ? 255 : 0); +         } else { +            // Read the RLE data. +            if (!stbi__psd_decode_rle(s, p, pixelCount)) { +               STBI_FREE(out); +               return stbi__errpuc("corrupt", "bad RLE data"); +            } +         } +      } + +   } else { +      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...) +      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + +      // Read the data by channel. +      for (channel = 0; channel < 4; channel++) { +         if (channel >= channelCount) { +            // Fill this channel with default data. +            if (bitdepth == 16 && bpc == 16) { +               stbi__uint16 *q = ((stbi__uint16 *) out) + channel; +               stbi__uint16 val = channel == 3 ? 65535 : 0; +               for (i = 0; i < pixelCount; i++, q += 4) +                  *q = val; +            } else { +               stbi_uc *p = out+channel; +               stbi_uc val = channel == 3 ? 255 : 0; +               for (i = 0; i < pixelCount; i++, p += 4) +                  *p = val; +            } +         } else { +            if (ri->bits_per_channel == 16) {    // output bpc +               stbi__uint16 *q = ((stbi__uint16 *) out) + channel; +               for (i = 0; i < pixelCount; i++, q += 4) +                  *q = (stbi__uint16) stbi__get16be(s); +            } else { +               stbi_uc *p = out+channel; +               if (bitdepth == 16) {  // input bpc +                  for (i = 0; i < pixelCount; i++, p += 4) +                     *p = (stbi_uc) (stbi__get16be(s) >> 8); +               } else { +                  for (i = 0; i < pixelCount; i++, p += 4) +                     *p = stbi__get8(s); +               } +            } +         } +      } +   } + +   // remove weird white matte from PSD +   if (channelCount >= 4) { +      if (ri->bits_per_channel == 16) { +         for (i=0; i < w*h; ++i) { +            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; +            if (pixel[3] != 0 && pixel[3] != 65535) { +               float a = pixel[3] / 65535.0f; +               float ra = 1.0f / a; +               float inv_a = 65535.0f * (1 - ra); +               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); +               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); +               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); +            } +         } +      } else { +         for (i=0; i < w*h; ++i) { +            unsigned char *pixel = out + 4*i; +            if (pixel[3] != 0 && pixel[3] != 255) { +               float a = pixel[3] / 255.0f; +               float ra = 1.0f / a; +               float inv_a = 255.0f * (1 - ra); +               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); +               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); +               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); +            } +         } +      } +   } + +   // convert to desired output format +   if (req_comp && req_comp != 4) { +      if (ri->bits_per_channel == 16) +         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); +      else +         out = stbi__convert_format(out, 4, req_comp, w, h); +      if (out == NULL) return out; // stbi__convert_format frees input on failure +   } + +   if (comp) *comp = 4; +   *y = h; +   *x = w; + +   return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ +   int i; +   for (i=0; i<4; ++i) +      if (stbi__get8(s) != (stbi_uc)str[i]) +         return 0; + +   return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ +   int i; + +   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) +      return 0; + +   for(i=0;i<84;++i) +      stbi__get8(s); + +   if (!stbi__pic_is4(s,"PICT")) +      return 0; + +   return 1; +} + +typedef struct +{ +   stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ +   int mask=0x80, i; + +   for (i=0; i<4; ++i, mask>>=1) { +      if (channel & mask) { +         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); +         dest[i]=stbi__get8(s); +      } +   } + +   return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ +   int mask=0x80,i; + +   for (i=0;i<4; ++i, mask>>=1) +      if (channel&mask) +         dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ +   int act_comp=0,num_packets=0,y,chained; +   stbi__pic_packet packets[10]; + +   // this will (should...) cater for even some bizarre stuff like having data +    // for the same channel in multiple packets. +   do { +      stbi__pic_packet *packet; + +      if (num_packets==sizeof(packets)/sizeof(packets[0])) +         return stbi__errpuc("bad format","too many packets"); + +      packet = &packets[num_packets++]; + +      chained = stbi__get8(s); +      packet->size    = stbi__get8(s); +      packet->type    = stbi__get8(s); +      packet->channel = stbi__get8(s); + +      act_comp |= packet->channel; + +      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)"); +      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp"); +   } while (chained); + +   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + +   for(y=0; y<height; ++y) { +      int packet_idx; + +      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) { +         stbi__pic_packet *packet = &packets[packet_idx]; +         stbi_uc *dest = result+y*width*4; + +         switch (packet->type) { +            default: +               return stbi__errpuc("bad format","packet has bad compression type"); + +            case 0: {//uncompressed +               int x; + +               for(x=0;x<width;++x, dest+=4) +                  if (!stbi__readval(s,packet->channel,dest)) +                     return 0; +               break; +            } + +            case 1://Pure RLE +               { +                  int left=width, i; + +                  while (left>0) { +                     stbi_uc count,value[4]; + +                     count=stbi__get8(s); +                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)"); + +                     if (count > left) +                        count = (stbi_uc) left; + +                     if (!stbi__readval(s,packet->channel,value))  return 0; + +                     for(i=0; i<count; ++i,dest+=4) +                        stbi__copyval(packet->channel,dest,value); +                     left -= count; +                  } +               } +               break; + +            case 2: {//Mixed RLE +               int left=width; +               while (left>0) { +                  int count = stbi__get8(s), i; +                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)"); + +                  if (count >= 128) { // Repeated +                     stbi_uc value[4]; + +                     if (count==128) +                        count = stbi__get16be(s); +                     else +                        count -= 127; +                     if (count > left) +                        return stbi__errpuc("bad file","scanline overrun"); + +                     if (!stbi__readval(s,packet->channel,value)) +                        return 0; + +                     for(i=0;i<count;++i, dest += 4) +                        stbi__copyval(packet->channel,dest,value); +                  } else { // Raw +                     ++count; +                     if (count>left) return stbi__errpuc("bad file","scanline overrun"); + +                     for(i=0;i<count;++i, dest+=4) +                        if (!stbi__readval(s,packet->channel,dest)) +                           return 0; +                  } +                  left-=count; +               } +               break; +            } +         } +      } +   } + +   return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ +   stbi_uc *result; +   int i, x,y, internal_comp; +   STBI_NOTUSED(ri); + +   if (!comp) comp = &internal_comp; + +   for (i=0; i<92; ++i) +      stbi__get8(s); + +   x = stbi__get16be(s); +   y = stbi__get16be(s); + +   if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); +   if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + +   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)"); +   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + +   stbi__get32be(s); //skip `ratio' +   stbi__get16be(s); //skip `fields' +   stbi__get16be(s); //skip `pad' + +   // intermediate buffer is RGBA +   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); +   if (!result) return stbi__errpuc("outofmem", "Out of memory"); +   memset(result, 0xff, x*y*4); + +   if (!stbi__pic_load_core(s,x,y,comp, result)) { +      STBI_FREE(result); +      result=0; +   } +   *px = x; +   *py = y; +   if (req_comp == 0) req_comp = *comp; +   result=stbi__convert_format(result,4,req_comp,x,y); + +   return result; +} + +static int stbi__pic_test(stbi__context *s) +{ +   int r = stbi__pic_test_core(s); +   stbi__rewind(s); +   return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ +   stbi__int16 prefix; +   stbi_uc first; +   stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ +   int w,h; +   stbi_uc *out;                 // output buffer (always 4 components) +   stbi_uc *background;          // The current "background" as far as a gif is concerned +   stbi_uc *history; +   int flags, bgindex, ratio, transparent, eflags; +   stbi_uc  pal[256][4]; +   stbi_uc lpal[256][4]; +   stbi__gif_lzw codes[8192]; +   stbi_uc *color_table; +   int parse, step; +   int lflags; +   int start_x, start_y; +   int max_x, max_y; +   int cur_x, cur_y; +   int line_size; +   int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ +   int sz; +   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; +   sz = stbi__get8(s); +   if (sz != '9' && sz != '7') return 0; +   if (stbi__get8(s) != 'a') return 0; +   return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ +   int r = stbi__gif_test_raw(s); +   stbi__rewind(s); +   return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ +   int i; +   for (i=0; i < num_entries; ++i) { +      pal[i][2] = stbi__get8(s); +      pal[i][1] = stbi__get8(s); +      pal[i][0] = stbi__get8(s); +      pal[i][3] = transp == i ? 0 : 255; +   } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ +   stbi_uc version; +   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') +      return stbi__err("not GIF", "Corrupt GIF"); + +   version = stbi__get8(s); +   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF"); +   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF"); + +   stbi__g_failure_reason = ""; +   g->w = stbi__get16le(s); +   g->h = stbi__get16le(s); +   g->flags = stbi__get8(s); +   g->bgindex = stbi__get8(s); +   g->ratio = stbi__get8(s); +   g->transparent = -1; + +   if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); +   if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + +   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments + +   if (is_info) return 1; + +   if (g->flags & 0x80) +      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + +   return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ +   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); +   if (!g) return stbi__err("outofmem", "Out of memory"); +   if (!stbi__gif_header(s, g, comp, 1)) { +      STBI_FREE(g); +      stbi__rewind( s ); +      return 0; +   } +   if (x) *x = g->w; +   if (y) *y = g->h; +   STBI_FREE(g); +   return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ +   stbi_uc *p, *c; +   int idx; + +   // recurse to decode the prefixes, since the linked-list is backwards, +   // and working backwards through an interleaved image would be nasty +   if (g->codes[code].prefix >= 0) +      stbi__out_gif_code(g, g->codes[code].prefix); + +   if (g->cur_y >= g->max_y) return; + +   idx = g->cur_x + g->cur_y; +   p = &g->out[idx]; +   g->history[idx / 4] = 1; + +   c = &g->color_table[g->codes[code].suffix * 4]; +   if (c[3] > 128) { // don't render transparent pixels; +      p[0] = c[2]; +      p[1] = c[1]; +      p[2] = c[0]; +      p[3] = c[3]; +   } +   g->cur_x += 4; + +   if (g->cur_x >= g->max_x) { +      g->cur_x = g->start_x; +      g->cur_y += g->step; + +      while (g->cur_y >= g->max_y && g->parse > 0) { +         g->step = (1 << g->parse) * g->line_size; +         g->cur_y = g->start_y + (g->step >> 1); +         --g->parse; +      } +   } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ +   stbi_uc lzw_cs; +   stbi__int32 len, init_code; +   stbi__uint32 first; +   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; +   stbi__gif_lzw *p; + +   lzw_cs = stbi__get8(s); +   if (lzw_cs > 12) return NULL; +   clear = 1 << lzw_cs; +   first = 1; +   codesize = lzw_cs + 1; +   codemask = (1 << codesize) - 1; +   bits = 0; +   valid_bits = 0; +   for (init_code = 0; init_code < clear; init_code++) { +      g->codes[init_code].prefix = -1; +      g->codes[init_code].first = (stbi_uc) init_code; +      g->codes[init_code].suffix = (stbi_uc) init_code; +   } + +   // support no starting clear code +   avail = clear+2; +   oldcode = -1; + +   len = 0; +   for(;;) { +      if (valid_bits < codesize) { +         if (len == 0) { +            len = stbi__get8(s); // start new block +            if (len == 0) +               return g->out; +         } +         --len; +         bits |= (stbi__int32) stbi__get8(s) << valid_bits; +         valid_bits += 8; +      } else { +         stbi__int32 code = bits & codemask; +         bits >>= codesize; +         valid_bits -= codesize; +         // @OPTIMIZE: is there some way we can accelerate the non-clear path? +         if (code == clear) {  // clear code +            codesize = lzw_cs + 1; +            codemask = (1 << codesize) - 1; +            avail = clear + 2; +            oldcode = -1; +            first = 0; +         } else if (code == clear + 1) { // end of stream code +            stbi__skip(s, len); +            while ((len = stbi__get8(s)) > 0) +               stbi__skip(s,len); +            return g->out; +         } else if (code <= avail) { +            if (first) { +               return stbi__errpuc("no clear code", "Corrupt GIF"); +            } + +            if (oldcode >= 0) { +               p = &g->codes[avail++]; +               if (avail > 8192) { +                  return stbi__errpuc("too many codes", "Corrupt GIF"); +               } + +               p->prefix = (stbi__int16) oldcode; +               p->first = g->codes[oldcode].first; +               p->suffix = (code == avail) ? p->first : g->codes[code].first; +            } else if (code == avail) +               return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + +            stbi__out_gif_code(g, (stbi__uint16) code); + +            if ((avail & codemask) == 0 && avail <= 0x0FFF) { +               codesize++; +               codemask = (1 << codesize) - 1; +            } + +            oldcode = code; +         } else { +            return stbi__errpuc("illegal code in raster", "Corrupt GIF"); +         } +      } +   } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ +   int dispose; +   int first_frame; +   int pi; +   int pcount; +   STBI_NOTUSED(req_comp); + +   // on first frame, any non-written pixels get the background colour (non-transparent) +   first_frame = 0; +   if (g->out == 0) { +      if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header +      if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) +         return stbi__errpuc("too large", "GIF image is too large"); +      pcount = g->w * g->h; +      g->out = (stbi_uc *) stbi__malloc(4 * pcount); +      g->background = (stbi_uc *) stbi__malloc(4 * pcount); +      g->history = (stbi_uc *) stbi__malloc(pcount); +      if (!g->out || !g->background || !g->history) +         return stbi__errpuc("outofmem", "Out of memory"); + +      // image is treated as "transparent" at the start - ie, nothing overwrites the current background; +      // background colour is only used for pixels that are not rendered first frame, after that "background" +      // color refers to the color that was there the previous frame. +      memset(g->out, 0x00, 4 * pcount); +      memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) +      memset(g->history, 0x00, pcount);        // pixels that were affected previous frame +      first_frame = 1; +   } else { +      // second frame - how do we dispose of the previous one? +      dispose = (g->eflags & 0x1C) >> 2; +      pcount = g->w * g->h; + +      if ((dispose == 3) && (two_back == 0)) { +         dispose = 2; // if I don't have an image to revert back to, default to the old background +      } + +      if (dispose == 3) { // use previous graphic +         for (pi = 0; pi < pcount; ++pi) { +            if (g->history[pi]) { +               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); +            } +         } +      } else if (dispose == 2) { +         // restore what was changed last frame to background before that frame; +         for (pi = 0; pi < pcount; ++pi) { +            if (g->history[pi]) { +               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); +            } +         } +      } else { +         // This is a non-disposal case eithe way, so just +         // leave the pixels as is, and they will become the new background +         // 1: do not dispose +         // 0:  not specified. +      } + +      // background is what out is after the undoing of the previou frame; +      memcpy( g->background, g->out, 4 * g->w * g->h ); +   } + +   // clear my history; +   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame + +   for (;;) { +      int tag = stbi__get8(s); +      switch (tag) { +         case 0x2C: /* Image Descriptor */ +         { +            stbi__int32 x, y, w, h; +            stbi_uc *o; + +            x = stbi__get16le(s); +            y = stbi__get16le(s); +            w = stbi__get16le(s); +            h = stbi__get16le(s); +            if (((x + w) > (g->w)) || ((y + h) > (g->h))) +               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + +            g->line_size = g->w * 4; +            g->start_x = x * 4; +            g->start_y = y * g->line_size; +            g->max_x   = g->start_x + w * 4; +            g->max_y   = g->start_y + h * g->line_size; +            g->cur_x   = g->start_x; +            g->cur_y   = g->start_y; + +            // if the width of the specified rectangle is 0, that means +            // we may not see *any* pixels or the image is malformed; +            // to make sure this is caught, move the current y down to +            // max_y (which is what out_gif_code checks). +            if (w == 0) +               g->cur_y = g->max_y; + +            g->lflags = stbi__get8(s); + +            if (g->lflags & 0x40) { +               g->step = 8 * g->line_size; // first interlaced spacing +               g->parse = 3; +            } else { +               g->step = g->line_size; +               g->parse = 0; +            } + +            if (g->lflags & 0x80) { +               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); +               g->color_table = (stbi_uc *) g->lpal; +            } else if (g->flags & 0x80) { +               g->color_table = (stbi_uc *) g->pal; +            } else +               return stbi__errpuc("missing color table", "Corrupt GIF"); + +            o = stbi__process_gif_raster(s, g); +            if (!o) return NULL; + +            // if this was the first frame, +            pcount = g->w * g->h; +            if (first_frame && (g->bgindex > 0)) { +               // if first frame, any pixel not drawn to gets the background color +               for (pi = 0; pi < pcount; ++pi) { +                  if (g->history[pi] == 0) { +                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; +                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); +                  } +               } +            } + +            return o; +         } + +         case 0x21: // Comment Extension. +         { +            int len; +            int ext = stbi__get8(s); +            if (ext == 0xF9) { // Graphic Control Extension. +               len = stbi__get8(s); +               if (len == 4) { +                  g->eflags = stbi__get8(s); +                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + +                  // unset old transparent +                  if (g->transparent >= 0) { +                     g->pal[g->transparent][3] = 255; +                  } +                  if (g->eflags & 0x01) { +                     g->transparent = stbi__get8(s); +                     if (g->transparent >= 0) { +                        g->pal[g->transparent][3] = 0; +                     } +                  } else { +                     // don't need transparent +                     stbi__skip(s, 1); +                     g->transparent = -1; +                  } +               } else { +                  stbi__skip(s, len); +                  break; +               } +            } +            while ((len = stbi__get8(s)) != 0) { +               stbi__skip(s, len); +            } +            break; +         } + +         case 0x3B: // gif stream termination code +            return (stbi_uc *) s; // using '1' causes warning on some compilers + +         default: +            return stbi__errpuc("unknown code", "Corrupt GIF"); +      } +   } +} + +static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) +{ +   STBI_FREE(g->out); +   STBI_FREE(g->history); +   STBI_FREE(g->background); + +   if (out) STBI_FREE(out); +   if (delays && *delays) STBI_FREE(*delays); +   return stbi__errpuc("outofmem", "Out of memory"); +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ +   if (stbi__gif_test(s)) { +      int layers = 0; +      stbi_uc *u = 0; +      stbi_uc *out = 0; +      stbi_uc *two_back = 0; +      stbi__gif g; +      int stride; +      int out_size = 0; +      int delays_size = 0; + +      STBI_NOTUSED(out_size); +      STBI_NOTUSED(delays_size); + +      memset(&g, 0, sizeof(g)); +      if (delays) { +         *delays = 0; +      } + +      do { +         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); +         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker + +         if (u) { +            *x = g.w; +            *y = g.h; +            ++layers; +            stride = g.w * g.h * 4; + +            if (out) { +               void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); +               if (!tmp) +                  return stbi__load_gif_main_outofmem(&g, out, delays); +               else { +                   out = (stbi_uc*) tmp; +                   out_size = layers * stride; +               } + +               if (delays) { +                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); +                  if (!new_delays) +                     return stbi__load_gif_main_outofmem(&g, out, delays); +                  *delays = new_delays; +                  delays_size = layers * sizeof(int); +               } +            } else { +               out = (stbi_uc*)stbi__malloc( layers * stride ); +               if (!out) +                  return stbi__load_gif_main_outofmem(&g, out, delays); +               out_size = layers * stride; +               if (delays) { +                  *delays = (int*) stbi__malloc( layers * sizeof(int) ); +                  if (!*delays) +                     return stbi__load_gif_main_outofmem(&g, out, delays); +                  delays_size = layers * sizeof(int); +               } +            } +            memcpy( out + ((layers - 1) * stride), u, stride ); +            if (layers >= 2) { +               two_back = out - 2 * stride; +            } + +            if (delays) { +               (*delays)[layers - 1U] = g.delay; +            } +         } +      } while (u != 0); + +      // free temp buffer; +      STBI_FREE(g.out); +      STBI_FREE(g.history); +      STBI_FREE(g.background); + +      // do the final conversion after loading everything; +      if (req_comp && req_comp != 4) +         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + +      *z = layers; +      return out; +   } else { +      return stbi__errpuc("not GIF", "Image was not as a gif type."); +   } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   stbi_uc *u = 0; +   stbi__gif g; +   memset(&g, 0, sizeof(g)); +   STBI_NOTUSED(ri); + +   u = stbi__gif_load_next(s, &g, comp, req_comp, 0); +   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker +   if (u) { +      *x = g.w; +      *y = g.h; + +      // moved conversion to after successful load so that the same +      // can be done for multiple frames. +      if (req_comp && req_comp != 4) +         u = stbi__convert_format(u, 4, req_comp, g.w, g.h); +   } else if (g.out) { +      // if there was an error and we allocated an image buffer, free it! +      STBI_FREE(g.out); +   } + +   // free buffers needed for multiple frame loading; +   STBI_FREE(g.history); +   STBI_FREE(g.background); + +   return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ +   return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ +   int i; +   for (i=0; signature[i]; ++i) +      if (stbi__get8(s) != signature[i]) +          return 0; +   stbi__rewind(s); +   return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ +   int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); +   stbi__rewind(s); +   if(!r) { +       r = stbi__hdr_test_core(s, "#?RGBE\n"); +       stbi__rewind(s); +   } +   return r; +} + +#define STBI__HDR_BUFLEN  1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ +   int len=0; +   char c = '\0'; + +   c = (char) stbi__get8(z); + +   while (!stbi__at_eof(z) && c != '\n') { +      buffer[len++] = c; +      if (len == STBI__HDR_BUFLEN-1) { +         // flush to end of line +         while (!stbi__at_eof(z) && stbi__get8(z) != '\n') +            ; +         break; +      } +      c = (char) stbi__get8(z); +   } + +   buffer[len] = 0; +   return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ +   if ( input[3] != 0 ) { +      float f1; +      // Exponent +      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); +      if (req_comp <= 2) +         output[0] = (input[0] + input[1] + input[2]) * f1 / 3; +      else { +         output[0] = input[0] * f1; +         output[1] = input[1] * f1; +         output[2] = input[2] * f1; +      } +      if (req_comp == 2) output[1] = 1; +      if (req_comp == 4) output[3] = 1; +   } else { +      switch (req_comp) { +         case 4: output[3] = 1; /* fallthrough */ +         case 3: output[0] = output[1] = output[2] = 0; +                 break; +         case 2: output[1] = 1; /* fallthrough */ +         case 1: output[0] = 0; +                 break; +      } +   } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   char buffer[STBI__HDR_BUFLEN]; +   char *token; +   int valid = 0; +   int width, height; +   stbi_uc *scanline; +   float *hdr_data; +   int len; +   unsigned char count, value; +   int i, j, k, c1,c2, z; +   const char *headerToken; +   STBI_NOTUSED(ri); + +   // Check identifier +   headerToken = stbi__hdr_gettoken(s,buffer); +   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) +      return stbi__errpf("not HDR", "Corrupt HDR image"); + +   // Parse header +   for(;;) { +      token = stbi__hdr_gettoken(s,buffer); +      if (token[0] == 0) break; +      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; +   } + +   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format"); + +   // Parse width and height +   // can't use sscanf() if we're not using stdio! +   token = stbi__hdr_gettoken(s,buffer); +   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format"); +   token += 3; +   height = (int) strtol(token, &token, 10); +   while (*token == ' ') ++token; +   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format"); +   token += 3; +   width = (int) strtol(token, NULL, 10); + +   if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); +   if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + +   *x = width; +   *y = height; + +   if (comp) *comp = 3; +   if (req_comp == 0) req_comp = 3; + +   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) +      return stbi__errpf("too large", "HDR image is too large"); + +   // Read data +   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); +   if (!hdr_data) +      return stbi__errpf("outofmem", "Out of memory"); + +   // Load image data +   // image data is stored as some number of sca +   if ( width < 8 || width >= 32768) { +      // Read flat data +      for (j=0; j < height; ++j) { +         for (i=0; i < width; ++i) { +            stbi_uc rgbe[4]; +           main_decode_loop: +            stbi__getn(s, rgbe, 4); +            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); +         } +      } +   } else { +      // Read RLE-encoded data +      scanline = NULL; + +      for (j = 0; j < height; ++j) { +         c1 = stbi__get8(s); +         c2 = stbi__get8(s); +         len = stbi__get8(s); +         if (c1 != 2 || c2 != 2 || (len & 0x80)) { +            // not run-length encoded, so we have to actually use THIS data as a decoded +            // pixel (note this can't be a valid pixel--one of RGB must be >= 128) +            stbi_uc rgbe[4]; +            rgbe[0] = (stbi_uc) c1; +            rgbe[1] = (stbi_uc) c2; +            rgbe[2] = (stbi_uc) len; +            rgbe[3] = (stbi_uc) stbi__get8(s); +            stbi__hdr_convert(hdr_data, rgbe, req_comp); +            i = 1; +            j = 0; +            STBI_FREE(scanline); +            goto main_decode_loop; // yes, this makes no sense +         } +         len <<= 8; +         len |= stbi__get8(s); +         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } +         if (scanline == NULL) { +            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); +            if (!scanline) { +               STBI_FREE(hdr_data); +               return stbi__errpf("outofmem", "Out of memory"); +            } +         } + +         for (k = 0; k < 4; ++k) { +            int nleft; +            i = 0; +            while ((nleft = width - i) > 0) { +               count = stbi__get8(s); +               if (count > 128) { +                  // Run +                  value = stbi__get8(s); +                  count -= 128; +                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } +                  for (z = 0; z < count; ++z) +                     scanline[i++ * 4 + k] = value; +               } else { +                  // Dump +                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } +                  for (z = 0; z < count; ++z) +                     scanline[i++ * 4 + k] = stbi__get8(s); +               } +            } +         } +         for (i=0; i < width; ++i) +            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); +      } +      if (scanline) +         STBI_FREE(scanline); +   } + +   return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ +   char buffer[STBI__HDR_BUFLEN]; +   char *token; +   int valid = 0; +   int dummy; + +   if (!x) x = &dummy; +   if (!y) y = &dummy; +   if (!comp) comp = &dummy; + +   if (stbi__hdr_test(s) == 0) { +       stbi__rewind( s ); +       return 0; +   } + +   for(;;) { +      token = stbi__hdr_gettoken(s,buffer); +      if (token[0] == 0) break; +      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; +   } + +   if (!valid) { +       stbi__rewind( s ); +       return 0; +   } +   token = stbi__hdr_gettoken(s,buffer); +   if (strncmp(token, "-Y ", 3)) { +       stbi__rewind( s ); +       return 0; +   } +   token += 3; +   *y = (int) strtol(token, &token, 10); +   while (*token == ' ') ++token; +   if (strncmp(token, "+X ", 3)) { +       stbi__rewind( s ); +       return 0; +   } +   token += 3; +   *x = (int) strtol(token, NULL, 10); +   *comp = 3; +   return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ +   void *p; +   stbi__bmp_data info; + +   info.all_a = 255; +   p = stbi__bmp_parse_header(s, &info); +   if (p == NULL) { +      stbi__rewind( s ); +      return 0; +   } +   if (x) *x = s->img_x; +   if (y) *y = s->img_y; +   if (comp) { +      if (info.bpp == 24 && info.ma == 0xff000000) +         *comp = 3; +      else +         *comp = info.ma ? 4 : 3; +   } +   return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ +   int channelCount, dummy, depth; +   if (!x) x = &dummy; +   if (!y) y = &dummy; +   if (!comp) comp = &dummy; +   if (stbi__get32be(s) != 0x38425053) { +       stbi__rewind( s ); +       return 0; +   } +   if (stbi__get16be(s) != 1) { +       stbi__rewind( s ); +       return 0; +   } +   stbi__skip(s, 6); +   channelCount = stbi__get16be(s); +   if (channelCount < 0 || channelCount > 16) { +       stbi__rewind( s ); +       return 0; +   } +   *y = stbi__get32be(s); +   *x = stbi__get32be(s); +   depth = stbi__get16be(s); +   if (depth != 8 && depth != 16) { +       stbi__rewind( s ); +       return 0; +   } +   if (stbi__get16be(s) != 3) { +       stbi__rewind( s ); +       return 0; +   } +   *comp = 4; +   return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ +   int channelCount, depth; +   if (stbi__get32be(s) != 0x38425053) { +       stbi__rewind( s ); +       return 0; +   } +   if (stbi__get16be(s) != 1) { +       stbi__rewind( s ); +       return 0; +   } +   stbi__skip(s, 6); +   channelCount = stbi__get16be(s); +   if (channelCount < 0 || channelCount > 16) { +       stbi__rewind( s ); +       return 0; +   } +   STBI_NOTUSED(stbi__get32be(s)); +   STBI_NOTUSED(stbi__get32be(s)); +   depth = stbi__get16be(s); +   if (depth != 16) { +       stbi__rewind( s ); +       return 0; +   } +   return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ +   int act_comp=0,num_packets=0,chained,dummy; +   stbi__pic_packet packets[10]; + +   if (!x) x = &dummy; +   if (!y) y = &dummy; +   if (!comp) comp = &dummy; + +   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { +      stbi__rewind(s); +      return 0; +   } + +   stbi__skip(s, 88); + +   *x = stbi__get16be(s); +   *y = stbi__get16be(s); +   if (stbi__at_eof(s)) { +      stbi__rewind( s); +      return 0; +   } +   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { +      stbi__rewind( s ); +      return 0; +   } + +   stbi__skip(s, 8); + +   do { +      stbi__pic_packet *packet; + +      if (num_packets==sizeof(packets)/sizeof(packets[0])) +         return 0; + +      packet = &packets[num_packets++]; +      chained = stbi__get8(s); +      packet->size    = stbi__get8(s); +      packet->type    = stbi__get8(s); +      packet->channel = stbi__get8(s); +      act_comp |= packet->channel; + +      if (stbi__at_eof(s)) { +          stbi__rewind( s ); +          return 0; +      } +      if (packet->size != 8) { +          stbi__rewind( s ); +          return 0; +      } +   } while (chained); + +   *comp = (act_comp & 0x10 ? 4 : 3); + +   return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +//    Does not support comments in the header section +//    Does not support ASCII image data (formats P2 and P3) + +#ifndef STBI_NO_PNM + +static int      stbi__pnm_test(stbi__context *s) +{ +   char p, t; +   p = (char) stbi__get8(s); +   t = (char) stbi__get8(s); +   if (p != 'P' || (t != '5' && t != '6')) { +       stbi__rewind( s ); +       return 0; +   } +   return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ +   stbi_uc *out; +   STBI_NOTUSED(ri); + +   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); +   if (ri->bits_per_channel == 0) +      return 0; + +   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); +   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + +   *x = s->img_x; +   *y = s->img_y; +   if (comp) *comp = s->img_n; + +   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) +      return stbi__errpuc("too large", "PNM too large"); + +   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); +   if (!out) return stbi__errpuc("outofmem", "Out of memory"); +   if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { +      STBI_FREE(out); +      return stbi__errpuc("bad PNM", "PNM file truncated"); +   } + +   if (req_comp && req_comp != s->img_n) { +      if (ri->bits_per_channel == 16) { +         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y); +      } else { +         out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); +      } +      if (out == NULL) return out; // stbi__convert_format frees input on failure +   } +   return out; +} + +static int      stbi__pnm_isspace(char c) +{ +   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ +   for (;;) { +      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) +         *c = (char) stbi__get8(s); + +      if (stbi__at_eof(s) || *c != '#') +         break; + +      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) +         *c = (char) stbi__get8(s); +   } +} + +static int      stbi__pnm_isdigit(char c) +{ +   return c >= '0' && c <= '9'; +} + +static int      stbi__pnm_getinteger(stbi__context *s, char *c) +{ +   int value = 0; + +   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { +      value = value*10 + (*c - '0'); +      *c = (char) stbi__get8(s); +      if((value > 214748364) || (value == 214748364 && *c > '7')) +          return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int"); +   } + +   return value; +} + +static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ +   int maxv, dummy; +   char c, p, t; + +   if (!x) x = &dummy; +   if (!y) y = &dummy; +   if (!comp) comp = &dummy; + +   stbi__rewind(s); + +   // Get identifier +   p = (char) stbi__get8(s); +   t = (char) stbi__get8(s); +   if (p != 'P' || (t != '5' && t != '6')) { +       stbi__rewind(s); +       return 0; +   } + +   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm + +   c = (char) stbi__get8(s); +   stbi__pnm_skip_whitespace(s, &c); + +   *x = stbi__pnm_getinteger(s, &c); // read width +   if(*x == 0) +       return stbi__err("invalid width", "PPM image header had zero or overflowing width"); +   stbi__pnm_skip_whitespace(s, &c); + +   *y = stbi__pnm_getinteger(s, &c); // read height +   if (*y == 0) +       return stbi__err("invalid width", "PPM image header had zero or overflowing width"); +   stbi__pnm_skip_whitespace(s, &c); + +   maxv = stbi__pnm_getinteger(s, &c);  // read max value +   if (maxv > 65535) +      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); +   else if (maxv > 255) +      return 16; +   else +      return 8; +} + +static int stbi__pnm_is16(stbi__context *s) +{ +   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) +	   return 1; +   return 0; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ +   #ifndef STBI_NO_JPEG +   if (stbi__jpeg_info(s, x, y, comp)) return 1; +   #endif + +   #ifndef STBI_NO_PNG +   if (stbi__png_info(s, x, y, comp))  return 1; +   #endif + +   #ifndef STBI_NO_GIF +   if (stbi__gif_info(s, x, y, comp))  return 1; +   #endif + +   #ifndef STBI_NO_BMP +   if (stbi__bmp_info(s, x, y, comp))  return 1; +   #endif + +   #ifndef STBI_NO_PSD +   if (stbi__psd_info(s, x, y, comp))  return 1; +   #endif + +   #ifndef STBI_NO_PIC +   if (stbi__pic_info(s, x, y, comp))  return 1; +   #endif + +   #ifndef STBI_NO_PNM +   if (stbi__pnm_info(s, x, y, comp))  return 1; +   #endif + +   #ifndef STBI_NO_HDR +   if (stbi__hdr_info(s, x, y, comp))  return 1; +   #endif + +   // test tga last because it's a crappy test! +   #ifndef STBI_NO_TGA +   if (stbi__tga_info(s, x, y, comp)) +       return 1; +   #endif +   return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ +   #ifndef STBI_NO_PNG +   if (stbi__png_is16(s))  return 1; +   #endif + +   #ifndef STBI_NO_PSD +   if (stbi__psd_is16(s))  return 1; +   #endif + +   #ifndef STBI_NO_PNM +   if (stbi__pnm_is16(s))  return 1; +   #endif +   return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ +    FILE *f = stbi__fopen(filename, "rb"); +    int result; +    if (!f) return stbi__err("can't fopen", "Unable to open file"); +    result = stbi_info_from_file(f, x, y, comp); +    fclose(f); +    return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ +   int r; +   stbi__context s; +   long pos = ftell(f); +   stbi__start_file(&s, f); +   r = stbi__info_main(&s,x,y,comp); +   fseek(f,pos,SEEK_SET); +   return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ +    FILE *f = stbi__fopen(filename, "rb"); +    int result; +    if (!f) return stbi__err("can't fopen", "Unable to open file"); +    result = stbi_is_16_bit_from_file(f); +    fclose(f); +    return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ +   int r; +   stbi__context s; +   long pos = ftell(f); +   stbi__start_file(&s, f); +   r = stbi__is_16_main(&s); +   fseek(f,pos,SEEK_SET); +   return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ +   stbi__context s; +   stbi__start_mem(&s,buffer,len); +   return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ +   stbi__context s; +   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); +   return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ +   stbi__context s; +   stbi__start_mem(&s,buffer,len); +   return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ +   stbi__context s; +   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); +   return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* +   revision history: +      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs +      2.19  (2018-02-11) fix warning +      2.18  (2018-01-30) fix warnings +      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug +                         1-bit BMP +                         *_is_16_bit api +                         avoid warnings +      2.16  (2017-07-23) all functions have 16-bit variants; +                         STBI_NO_STDIO works again; +                         compilation fixes; +                         fix rounding in unpremultiply; +                         optimize vertical flip; +                         disable raw_len validation; +                         documentation fixes +      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; +                         warning fixes; disable run-time SSE detection on gcc; +                         uniform handling of optional "return" values; +                         thread-safe initialization of zlib tables +      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs +      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now +      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes +      2.11  (2016-04-02) allocate large structures on the stack +                         remove white matting for transparent PSD +                         fix reported channel count for PNG & BMP +                         re-enable SSE2 in non-gcc 64-bit +                         support RGB-formatted JPEG +                         read 16-bit PNGs (only as 8-bit) +      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED +      2.09  (2016-01-16) allow comments in PNM files +                         16-bit-per-pixel TGA (not bit-per-component) +                         info() for TGA could break due to .hdr handling +                         info() for BMP to shares code instead of sloppy parse +                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc +                         code cleanup +      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA +      2.07  (2015-09-13) fix compiler warnings +                         partial animated GIF support +                         limited 16-bpc PSD support +                         #ifdef unused functions +                         bug with < 92 byte PIC,PNM,HDR,TGA +      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value +      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning +      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit +      2.03  (2015-04-12) extra corruption checking (mmozeiko) +                         stbi_set_flip_vertically_on_load (nguillemot) +                         fix NEON support; fix mingw support +      2.02  (2015-01-19) fix incorrect assert, fix warning +      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 +      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG +      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) +                         progressive JPEG (stb) +                         PGM/PPM support (Ken Miller) +                         STBI_MALLOC,STBI_REALLOC,STBI_FREE +                         GIF bugfix -- seemingly never worked +                         STBI_NO_*, STBI_ONLY_* +      1.48  (2014-12-14) fix incorrectly-named assert() +      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) +                         optimize PNG (ryg) +                         fix bug in interlaced PNG with user-specified channel count (stb) +      1.46  (2014-08-26) +              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG +      1.45  (2014-08-16) +              fix MSVC-ARM internal compiler error by wrapping malloc +      1.44  (2014-08-07) +              various warning fixes from Ronny Chevalier +      1.43  (2014-07-15) +              fix MSVC-only compiler problem in code changed in 1.42 +      1.42  (2014-07-09) +              don't define _CRT_SECURE_NO_WARNINGS (affects user code) +              fixes to stbi__cleanup_jpeg path +              added STBI_ASSERT to avoid requiring assert.h +      1.41  (2014-06-25) +              fix search&replace from 1.36 that messed up comments/error messages +      1.40  (2014-06-22) +              fix gcc struct-initialization warning +      1.39  (2014-06-15) +              fix to TGA optimization when req_comp != number of components in TGA; +              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) +              add support for BMP version 5 (more ignored fields) +      1.38  (2014-06-06) +              suppress MSVC warnings on integer casts truncating values +              fix accidental rename of 'skip' field of I/O +      1.37  (2014-06-04) +              remove duplicate typedef +      1.36  (2014-06-03) +              convert to header file single-file library +              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL +      1.35  (2014-05-27) +              various warnings +              fix broken STBI_SIMD path +              fix bug where stbi_load_from_file no longer left file pointer in correct place +              fix broken non-easy path for 32-bit BMP (possibly never used) +              TGA optimization by Arseny Kapoulkine +      1.34  (unknown) +              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case +      1.33  (2011-07-14) +              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements +      1.32  (2011-07-13) +              support for "info" function for all supported filetypes (SpartanJ) +      1.31  (2011-06-20) +              a few more leak fixes, bug in PNG handling (SpartanJ) +      1.30  (2011-06-11) +              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) +              removed deprecated format-specific test/load functions +              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway +              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) +              fix inefficiency in decoding 32-bit BMP (David Woo) +      1.29  (2010-08-16) +              various warning fixes from Aurelien Pocheville +      1.28  (2010-08-01) +              fix bug in GIF palette transparency (SpartanJ) +      1.27  (2010-08-01) +              cast-to-stbi_uc to fix warnings +      1.26  (2010-07-24) +              fix bug in file buffering for PNG reported by SpartanJ +      1.25  (2010-07-17) +              refix trans_data warning (Won Chun) +      1.24  (2010-07-12) +              perf improvements reading from files on platforms with lock-heavy fgetc() +              minor perf improvements for jpeg +              deprecated type-specific functions so we'll get feedback if they're needed +              attempt to fix trans_data warning (Won Chun) +      1.23    fixed bug in iPhone support +      1.22  (2010-07-10) +              removed image *writing* support +              stbi_info support from Jetro Lauha +              GIF support from Jean-Marc Lienher +              iPhone PNG-extensions from James Brown +              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) +      1.21    fix use of 'stbi_uc' in header (reported by jon blow) +      1.20    added support for Softimage PIC, by Tom Seddon +      1.19    bug in interlaced PNG corruption check (found by ryg) +      1.18  (2008-08-02) +              fix a threading bug (local mutable static) +      1.17    support interlaced PNG +      1.16    major bugfix - stbi__convert_format converted one too many pixels +      1.15    initialize some fields for thread safety +      1.14    fix threadsafe conversion bug +              header-file-only version (#define STBI_HEADER_FILE_ONLY before including) +      1.13    threadsafe +      1.12    const qualifiers in the API +      1.11    Support installable IDCT, colorspace conversion routines +      1.10    Fixes for 64-bit (don't use "unsigned long") +              optimized upsampling by Fabian "ryg" Giesen +      1.09    Fix format-conversion for PSD code (bad global variables!) +      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz +      1.07    attempt to fix C++ warning/errors again +      1.06    attempt to fix C++ warning/errors again +      1.05    fix TGA loading to return correct *comp and use good luminance calc +      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free +      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR +      1.02    support for (subset of) HDR files, float interface for preferred access to them +      1.01    fix bug: possible bug in handling right-side up bmps... not sure +              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all +      1.00    interface to zlib that skips zlib header +      0.99    correct handling of alpha in palette +      0.98    TGA loader by lonesock; dynamically add loaders (untested) +      0.97    jpeg errors on too large a file; also catch another malloc failure +      0.96    fix detection of invalid v value - particleman@mollyrocket forum +      0.95    during header scan, seek to markers in case of padding +      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same +      0.93    handle jpegtran output; verbose errors +      0.92    read 4,8,16,24,32-bit BMP files of several formats +      0.91    output 24-bit Windows 3.0 BMP files +      0.90    fix a few more warnings; bump version number to approach 1.0 +      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd +      0.60    fix compiling as c++ +      0.59    fix warnings: merge Dave Moore's -Wall fixes +      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian +      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available +      0.56    fix bug: zlib uncompressed mode len vs. nlen +      0.55    fix bug: restart_interval not initialized to 0 +      0.54    allow NULL for 'int *comp' +      0.53    fix bug in png 3->4; speedup png decoding +      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments +      0.51    obey req_comp requests, 1-component jpegs return as 1-component, +              on 'test' only check type, not whether we support this variant +      0.50  (2006-11-19) +              first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/vendor/tiny_obj_loader_c.h b/vendor/tiny_obj_loader_c.h new file mode 100644 index 0000000..09087fe --- /dev/null +++ b/vendor/tiny_obj_loader_c.h @@ -0,0 +1,1793 @@ +/* +   The MIT License (MIT) + +   Copyright (c) 2016 - 2019 Syoyo Fujita and many contributors. + +   Permission is hereby granted, free of charge, to any person obtaining a copy +   of this software and associated documentation files (the "Software"), to deal +   in the Software without restriction, including without limitation the rights +   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +   copies of the Software, and to permit persons to whom the Software is +   furnished to do so, subject to the following conditions: + +   The above copyright notice and this permission notice shall be included in +   all copies or substantial portions of the Software. + +   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +   THE SOFTWARE. +   */ +#ifndef TINOBJ_LOADER_C_H_ +#define TINOBJ_LOADER_C_H_ + +/* @todo { Remove stddef dependency. size_t? } */ +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { +  char *name; + +  float ambient[3]; +  float diffuse[3]; +  float specular[3]; +  float transmittance[3]; +  float emission[3]; +  float shininess; +  float ior;      /* index of refraction */ +  float dissolve; /* 1 == opaque; 0 == fully transparent */ +  /* illumination model (see http://www.fileformat.info/format/material/) */ +  int illum; + +  int pad0; + +  char *ambient_texname;            /* map_Ka */ +  char *diffuse_texname;            /* map_Kd */ +  char *specular_texname;           /* map_Ks */ +  char *specular_highlight_texname; /* map_Ns */ +  char *bump_texname;               /* map_bump, bump */ +  char *displacement_texname;       /* disp */ +  char *alpha_texname;              /* map_d */ +} tinyobj_material_t; + +typedef struct { +  char *name; /* group name or object name. */ +  unsigned int face_offset; +  unsigned int length; +} tinyobj_shape_t; + +typedef struct { +  int v_idx, vt_idx, vn_idx; +} tinyobj_vertex_index_t; + +typedef struct { +  unsigned int num_vertices; +  unsigned int num_normals; +  unsigned int num_texcoords; +  unsigned int num_faces; +  unsigned int num_face_num_verts; + +  int pad0; + +  float *vertices; +  float *normals; +  float *texcoords; +  tinyobj_vertex_index_t *faces; +  int *face_num_verts; +  int *material_ids; +} tinyobj_attrib_t; + +#define TINYOBJ_FLAG_TRIANGULATE (1 << 0) + +#define TINYOBJ_INVALID_INDEX (0x80000000) + +#define TINYOBJ_SUCCESS (0) +#define TINYOBJ_ERROR_EMPTY (-1) +#define TINYOBJ_ERROR_INVALID_PARAMETER (-2) +#define TINYOBJ_ERROR_FILE_OPERATION (-3) + +/* Provide a callback that can read text file without any parsing or + * modification. The obj and mtl parser is going to read all the necessary data: + * tinyobj_parse_obj + * tinyobj_parse_mtl_file + * + * @param[in] ctx User provided context. + * @param[in] filename Filename to be loaded. + * @param[in] is_mtl 1 when the callback is invoked for loading .mtl. 0 for .obj + * @param[in] obj_filename .obj filename. Useful when you load .mtl from same + * location of .obj. When the callback is called to load .obj, `filename` and + * `obj_filename` are same. + * @param[out] buf Content of loaded file + * @param[out] len Size of content(file) + */ +typedef void (*file_reader_callback)(void *ctx, const char *filename, +                                     int is_mtl, const char *obj_filename, +                                     char **buf, size_t *len); + +/* Parse wavefront .obj + * @param[out] attrib Attibutes + * @param[out] shapes Array of parsed shapes + * @param[out] num_shapes Array length of `shapes` + * @param[out] materials Array of parsed materials + * @param[out] num_materials Array length of `materials` + * @param[in] file_name File name of .obj + * @param[in] file_reader File reader callback function(to read .obj and .mtl). + * @param[in] ctx Context pointer passed to the file_reader_callback. + * @param[in] flags combination of TINYOBJ_FLAG_*** + * + * Returns TINYOBJ_SUCCESS if things goes well. + * Returns TINYOBJ_ERROR_*** when there is an error. + */ +extern int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes, +                             size_t *num_shapes, tinyobj_material_t **materials, +                             size_t *num_materials, const char *file_name, +                             file_reader_callback file_reader, void *ctx, +                             unsigned int flags); + +/* Parse wavefront .mtl + * + * @param[out] materials_out + * @param[out] num_materials_out + * @param[in] filename .mtl filename + * @param[in] filename of .obj filename. could be NULL if you just want to parse + .mtl file. + * @param[in] file_reader File reader callback + * @param[in[ ctx Context pointer passed to the file_reader callack. + + * Returns TINYOBJ_SUCCESS if things goes well. + * Returns TINYOBJ_ERROR_*** when there is an error. + */ +extern int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out, +                                  size_t *num_materials_out, +                                  const char *filename, +                                  const char *obj_filename, +                                  file_reader_callback file_reader, void *ctx); + +extern void tinyobj_attrib_init(tinyobj_attrib_t *attrib); +extern void tinyobj_attrib_free(tinyobj_attrib_t *attrib); +extern void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes); +extern void tinyobj_materials_free(tinyobj_material_t *materials, +                                   size_t num_materials); + +#ifdef __cplusplus +} +#endif +#endif /* TINOBJ_LOADER_C_H_ */ + +#ifdef TINYOBJ_LOADER_C_IMPLEMENTATION +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#if defined(TINYOBJ_MALLOC) && defined(TINYOBJ_CALLOC) &&                      \ +    defined(TINYOBJ_FREE) &&                                                   \ +    (defined(TINYOBJ_REALLOC) || defined(TINYOBJ_REALLOC_SIZED)) +/* ok */ +#elif !defined(TINYOBJ_MALLOC) && !defined(TINYOBJ_CALLOC) &&                  \ +    !defined(TINYOBJ_FREE) && !defined(TINYOBJ_REALLOC) &&                     \ +    !defined(TINYOBJ_REALLOC_SIZED) +/* ok */ +#else +#error                                                                         \ +    "Must define all or none of TINYOBJ_MALLOC, TINYOBJ_CALLOC, TINYOBJ_FREE, and TINYOBJ_REALLOC (or TINYOBJ_REALLOC_SIZED)." +#endif + +#ifndef TINYOBJ_MALLOC +#include <stdlib.h> +#define TINYOBJ_MALLOC malloc +#define TINYOBJ_REALLOC realloc +#define TINYOBJ_CALLOC calloc +#define TINYOBJ_FREE free +#endif + +#ifndef TINYOBJ_REALLOC_SIZED +#define TINYOBJ_REALLOC_SIZED(p, oldsz, newsz) TINYOBJ_REALLOC(p, newsz) +#endif + +#define TINYOBJ_MAX_FACES_PER_F_LINE (16) +#define TINYOBJ_MAX_FILEPATH (8192) + +#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t')) +#define IS_DIGIT(x) ((unsigned int)((x) - '0') < (unsigned int)(10)) +#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0')) + +static void skip_space(const char **token) { +  while ((*token)[0] == ' ' || (*token)[0] == '\t') { +    (*token)++; +  } +} + +static void skip_space_and_cr(const char **token) { +  while ((*token)[0] == ' ' || (*token)[0] == '\t' || (*token)[0] == '\r') { +    (*token)++; +  } +} + +static int until_space(const char *token) { +  const char *p = token; +  while (p[0] != '\0' && p[0] != ' ' && p[0] != '\t' && p[0] != '\r') { +    p++; +  } + +  return (int)(p - token); +} + +static size_t length_until_newline(const char *token, size_t n) { +  size_t len = 0; + +  /* Assume token[n-1] = '\0' */ +  for (len = 0; len < n - 1; len++) { +    if (token[len] == '\n') { +      break; +    } +    if ((token[len] == '\r') && ((len < (n - 2)) && (token[len + 1] != '\n'))) { +      break; +    } +  } + +  return len; +} + +static size_t length_until_line_feed(const char *token, size_t n) { +  size_t len = 0; + +  /* Assume token[n-1] = '\0' */ +  for (len = 0; len < n; len++) { +    if ((token[len] == '\n') || (token[len] == '\r')) { +      break; +    } +  } + +  return len; +} + +/* http://stackoverflow.com/questions/5710091/how-does-atoi-function-in-c-work + */ +static int my_atoi(const char *c) { +  int value = 0; +  int sign = 1; +  if (*c == '+' || *c == '-') { +    if (*c == '-') +      sign = -1; +    c++; +  } +  while (((*c) >= '0') && ((*c) <= '9')) { /* isdigit(*c) */ +    value *= 10; +    value += (int)(*c - '0'); +    c++; +  } +  return value * sign; +} + +/* Make index zero-base, and also support relative index. */ +static int fixIndex(int idx, size_t n) { +  if (idx > 0) +    return idx - 1; +  if (idx == 0) +    return 0; +  return (int)n + idx; /* negative value = relative */ +} + +/* Parse raw triples: i, i/j/k, i//k, i/j */ +static tinyobj_vertex_index_t parseRawTriple(const char **token) { +  tinyobj_vertex_index_t vi; +  /* 0x80000000 = -2147483648 = invalid */ +  vi.v_idx = (int)(0x80000000); +  vi.vn_idx = (int)(0x80000000); +  vi.vt_idx = (int)(0x80000000); + +  vi.v_idx = my_atoi((*token)); +  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && +         (*token)[0] != '\t' && (*token)[0] != '\r') { +    (*token)++; +  } +  if ((*token)[0] != '/') { +    return vi; +  } +  (*token)++; + +  /* i//k */ +  if ((*token)[0] == '/') { +    (*token)++; +    vi.vn_idx = my_atoi((*token)); +    while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && +           (*token)[0] != '\t' && (*token)[0] != '\r') { +      (*token)++; +    } +    return vi; +  } + +  /* i/j/k or i/j */ +  vi.vt_idx = my_atoi((*token)); +  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && +         (*token)[0] != '\t' && (*token)[0] != '\r') { +    (*token)++; +  } +  if ((*token)[0] != '/') { +    return vi; +  } + +  /* i/j/k */ +  (*token)++; /* skip '/' */ +  vi.vn_idx = my_atoi((*token)); +  while ((*token)[0] != '\0' && (*token)[0] != '/' && (*token)[0] != ' ' && +         (*token)[0] != '\t' && (*token)[0] != '\r') { +    (*token)++; +  } +  return vi; +} + +static int parseInt(const char **token) { +  int i = 0; +  skip_space(token); +  i = my_atoi((*token)); +  (*token) += until_space((*token)); +  return i; +} + +/* + * Tries to parse a floating point number located at s. + * + * s_end should be a location in the string where reading should absolutely + * stop. For example at the end of the string, to prevent buffer overflows. + * + * Parses the following EBNF grammar: + *   sign    = "+" | "-" ; + *   END     = ? anything not in digit ? + *   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + *   integer = [sign] , digit , {digit} ; + *   decimal = integer , ["." , integer] ; + *   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; + * + *  Valid strings are for example: + *   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2 + * + * If the parsing is a success, result is set to the parsed value and true + * is returned. + * + * The function is greedy and will parse until any of the following happens: + *  - a non-conforming character is encountered. + *  - s_end is reached. + * + * The following situations triggers a failure: + *  - s >= s_end. + *  - parse failure. + */ +static int tryParseDouble(const char *s, const char *s_end, double *result) { +  double mantissa = 0.0; +  /* This exponent is base 2 rather than 10. +   * However the exponent we parse is supposed to be one of ten, +   * thus we must take care to convert the exponent/and or the +   * mantissa to a * 2^E, where a is the mantissa and E is the +   * exponent. +   * To get the final double we will use ldexp, it requires the +   * exponent to be in base 2. +   */ +  int exponent = 0; + +  /* NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED +   * TO JUMP OVER DEFINITIONS. +   */ +  char sign = '+'; +  char exp_sign = '+'; +  char const *curr = s; + +  /* How many characters were read in a loop. */ +  int read = 0; +  /* Tells whether a loop terminated due to reaching s_end. */ +  int end_not_reached = 0; + +  /* +     BEGIN PARSING. +     */ + +  if (s >= s_end) { +    return 0; /* fail */ +  } + +  /* Find out what sign we've got. */ +  if (*curr == '+' || *curr == '-') { +    sign = *curr; +    curr++; +  } else if (IS_DIGIT(*curr)) { /* Pass through. */ +  } else { +    goto fail; +  } + +  /* Read the integer part. */ +  end_not_reached = (curr != s_end); +  while (end_not_reached && IS_DIGIT(*curr)) { +    mantissa *= 10; +    mantissa += (int)(*curr - 0x30); +    curr++; +    read++; +    end_not_reached = (curr != s_end); +  } + +  /* We must make sure we actually got something. */ +  if (read == 0) +    goto fail; +  /* We allow numbers of form "#", "###" etc. */ +  if (!end_not_reached) +    goto assemble; + +  /* Read the decimal part. */ +  if (*curr == '.') { +    curr++; +    read = 1; +    end_not_reached = (curr != s_end); +    while (end_not_reached && IS_DIGIT(*curr)) { +      /* pow(10.0, -read) */ +      double frac_value = 1.0; +      int f; +      for (f = 0; f < read; f++) { +        frac_value *= 0.1; +      } +      mantissa += (int)(*curr - 0x30) * frac_value; +      read++; +      curr++; +      end_not_reached = (curr != s_end); +    } +  } else if (*curr == 'e' || *curr == 'E') { +  } else { +    goto assemble; +  } + +  if (!end_not_reached) +    goto assemble; + +  /* Read the exponent part. */ +  if (*curr == 'e' || *curr == 'E') { +    curr++; +    /* Figure out if a sign is present and if it is. */ +    end_not_reached = (curr != s_end); +    if (end_not_reached && (*curr == '+' || *curr == '-')) { +      exp_sign = *curr; +      curr++; +    } else if (IS_DIGIT(*curr)) { /* Pass through. */ +    } else { +      /* Empty E is not allowed. */ +      goto fail; +    } + +    read = 0; +    end_not_reached = (curr != s_end); +    while (end_not_reached && IS_DIGIT(*curr)) { +      exponent *= 10; +      exponent += (int)(*curr - 0x30); +      curr++; +      read++; +      end_not_reached = (curr != s_end); +    } +    if (read == 0) +      goto fail; +  } + +assemble: + +{ +  double a = 1.0; /* = pow(5.0, exponent); */ +  double b = 1.0; /* = 2.0^exponent */ +  int i; +  for (i = 0; i < exponent; i++) { +    a = a * 5.0; +  } + +  for (i = 0; i < exponent; i++) { +    b = b * 2.0; +  } + +  if (exp_sign == '-') { +    a = 1.0 / a; +    b = 1.0 / b; +  } + +  *result = +      /* (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent), +         exponent); */ +      (sign == '+' ? 1 : -1) * (mantissa * a * b); +} + +  return 1; +fail: +  return 0; +} + +static float parseFloat(const char **token) { +  const char *end; +  double val = 0.0; +  float f = 0.0f; +  skip_space(token); +  end = (*token) + until_space((*token)); +  val = 0.0; +  tryParseDouble((*token), end, &val); +  f = (float)(val); +  (*token) = end; +  return f; +} + +static void parseFloat2(float *x, float *y, const char **token) { +  (*x) = parseFloat(token); +  (*y) = parseFloat(token); +} + +static void parseFloat3(float *x, float *y, float *z, const char **token) { +  (*x) = parseFloat(token); +  (*y) = parseFloat(token); +  (*z) = parseFloat(token); +} + +static size_t my_strnlen(const char *s, size_t n) { +  const char *p = (char *)memchr(s, 0, n); +  return p ? (size_t)(p - s) : n; +} + +static char *my_strdup(const char *s, size_t max_length) { +  char *d; +  size_t len; + +  if (s == NULL) +    return NULL; + +  /* Do not consider CRLF line ending(#19) */ +  len = length_until_line_feed(s, max_length); +  /* len = strlen(s); */ + +  /* trim line ending and append '\0' */ +  d = (char *)TINYOBJ_MALLOC(len + 1); /* + '\0' */ +  memcpy(d, s, (size_t)(len)); +  d[len] = '\0'; + +  return d; +} + +static char *my_strndup(const char *s, size_t len) { +  char *d; +  size_t slen; + +  if (s == NULL) +    return NULL; +  if (len == 0) +    return NULL; + +  slen = my_strnlen(s, len); +  d = (char *)TINYOBJ_MALLOC(slen + 1); /* + '\0' */ +  if (!d) { +    return NULL; +  } +  memcpy(d, s, slen); +  d[slen] = '\0'; + +  return d; +} + +char *dynamic_fgets(char **buf, size_t *size, FILE *file) { +  char *offset; +  char *ret; +  size_t old_size; + +  if (!(ret = fgets(*buf, (int)*size, file))) { +    return ret; +  } + +  if (NULL != strchr(*buf, '\n')) { +    return ret; +  } + +  do { +    old_size = *size; +    *size *= 2; +    *buf = (char *)TINYOBJ_REALLOC_SIZED(*buf, old_size, *size); +    offset = &((*buf)[old_size - 1]); + +    ret = fgets(offset, (int)(old_size + 1), file); +  } while (ret && (NULL == strchr(*buf, '\n'))); + +  return ret; +} + +static void initMaterial(tinyobj_material_t *material) { +  int i; +  material->name = NULL; +  material->ambient_texname = NULL; +  material->diffuse_texname = NULL; +  material->specular_texname = NULL; +  material->specular_highlight_texname = NULL; +  material->bump_texname = NULL; +  material->displacement_texname = NULL; +  material->alpha_texname = NULL; +  for (i = 0; i < 3; i++) { +    material->ambient[i] = 0.f; +    material->diffuse[i] = 0.f; +    material->specular[i] = 0.f; +    material->transmittance[i] = 0.f; +    material->emission[i] = 0.f; +  } +  material->illum = 0; +  material->dissolve = 1.f; +  material->shininess = 1.f; +  material->ior = 1.f; +} + +/* Implementation of string to int hashtable */ + +#define HASH_TABLE_ERROR 1 +#define HASH_TABLE_SUCCESS 0 + +#define HASH_TABLE_DEFAULT_SIZE 10 + +typedef struct hash_table_entry_t { +  unsigned long hash; +  int filled; +  int pad0; +  long value; + +  struct hash_table_entry_t *next; +} hash_table_entry_t; + +typedef struct { +  unsigned long *hashes; +  hash_table_entry_t *entries; +  size_t capacity; +  size_t n; +} hash_table_t; + +static unsigned long hash_djb2(const unsigned char *str) { +  unsigned long hash = 5381; +  int c; + +  while ((c = *str++)) { +    hash = ((hash << 5) + hash) + (unsigned long)(c); +  } + +  return hash; +} + +static void create_hash_table(size_t start_capacity, hash_table_t *hash_table) { +  if (start_capacity < 1) +    start_capacity = HASH_TABLE_DEFAULT_SIZE; +  hash_table->hashes = +      (unsigned long *)TINYOBJ_MALLOC(start_capacity * sizeof(unsigned long)); +  hash_table->entries = (hash_table_entry_t *)TINYOBJ_CALLOC( +      start_capacity, sizeof(hash_table_entry_t)); +  hash_table->capacity = start_capacity; +  hash_table->n = 0; +} + +static void destroy_hash_table(hash_table_t *hash_table) { +  TINYOBJ_FREE(hash_table->entries); +  TINYOBJ_FREE(hash_table->hashes); +} + +/* Insert with quadratic probing */ +static int hash_table_insert_value(unsigned long hash, long value, +                                   hash_table_t *hash_table) { +  /* Insert value */ +  size_t start_index = hash % hash_table->capacity; +  size_t index = start_index; +  hash_table_entry_t *start_entry = hash_table->entries + start_index; +  size_t i; +  hash_table_entry_t *entry; + +  for (i = 1; hash_table->entries[index].filled; i++) { +    if (i >= hash_table->capacity) +      return HASH_TABLE_ERROR; +    index = (start_index + (i * i)) % hash_table->capacity; +  } + +  entry = hash_table->entries + index; +  entry->hash = hash; +  entry->filled = 1; +  entry->value = value; + +  if (index != start_index) { +    /* This is a new entry, but not the start entry, hence we need to add a next +     * pointer to our entry */ +    entry->next = start_entry->next; +    start_entry->next = entry; +  } + +  return HASH_TABLE_SUCCESS; +} + +static int hash_table_insert(unsigned long hash, long value, +                             hash_table_t *hash_table) { +  int ret = hash_table_insert_value(hash, value, hash_table); +  if (ret == HASH_TABLE_SUCCESS) { +    hash_table->hashes[hash_table->n] = hash; +    hash_table->n++; +  } +  return ret; +} + +static hash_table_entry_t *hash_table_find(unsigned long hash, +                                           hash_table_t *hash_table) { +  hash_table_entry_t *entry = +      hash_table->entries + (hash % hash_table->capacity); +  while (entry) { +    if (entry->hash == hash && entry->filled) { +      return entry; +    } +    entry = entry->next; +  } +  return NULL; +} + +static void hash_table_grow(hash_table_t *hash_table) { +  size_t new_capacity; +  hash_table_t new_hash_table; +  size_t i; + +  new_capacity = 2 * hash_table->capacity; +  /* Create a new hash table. We're not calling create_hash_table because we +   * want to realloc the hash array */ +  new_hash_table.hashes = hash_table->hashes = +      (unsigned long *)TINYOBJ_REALLOC_SIZED( +          (void *)hash_table->hashes, +          sizeof(unsigned long) * hash_table->capacity, +          sizeof(unsigned long) * new_capacity); +  new_hash_table.entries = (hash_table_entry_t *)TINYOBJ_CALLOC( +      new_capacity, sizeof(hash_table_entry_t)); +  new_hash_table.capacity = new_capacity; +  new_hash_table.n = hash_table->n; + +  /* Rehash */ +  for (i = 0; i < hash_table->capacity; i++) { +    hash_table_entry_t *entry = &hash_table->entries[i]; +    if (entry->filled) { +      hash_table_insert_value(entry->hash, entry->value, &new_hash_table); +    } +  } + +  TINYOBJ_FREE(hash_table->entries); +  (*hash_table) = new_hash_table; +} + +static int hash_table_exists(const char *name, hash_table_t *hash_table) { +  return hash_table_find(hash_djb2((const unsigned char *)name), hash_table) != +         NULL; +} + +static void hash_table_set(const char *name, size_t val, +                           hash_table_t *hash_table) { +  /* Hash name */ +  unsigned long hash = hash_djb2((const unsigned char *)name); + +  hash_table_entry_t *entry = hash_table_find(hash, hash_table); +  if (entry) { +    entry->value = (long)val; +    return; +  } + +  /* Expand if necessary +   * Grow until the element has been added +   */ +  while (hash_table_insert(hash, (long)val, hash_table) != HASH_TABLE_SUCCESS) { +    hash_table_grow(hash_table); +  } +} + +static long hash_table_get(const char *name, hash_table_t *hash_table) { +  hash_table_entry_t *ret = +      hash_table_find(hash_djb2((const unsigned char *)(name)), hash_table); +  return ret->value; +} + +static tinyobj_material_t *tinyobj_material_add(tinyobj_material_t *prev, +                                                size_t num_materials, +                                                tinyobj_material_t *new_mat) { +  tinyobj_material_t *dst; +  size_t num_bytes = sizeof(tinyobj_material_t) * num_materials; +  dst = (tinyobj_material_t *)TINYOBJ_REALLOC_SIZED( +      prev, num_bytes, num_bytes + sizeof(tinyobj_material_t)); + +  dst[num_materials] = (*new_mat); /* Just copy pointer for char* members */ +  return dst; +} + +static int is_line_ending(const char *p, size_t i, size_t end_i) { +  if (p[i] == '\0') +    return 1; +  if (p[i] == '\n') +    return 1; /* this includes \r\n */ +  if (p[i] == '\r') { +    if (((i + 1) < end_i) && (p[i + 1] != '\n')) { /* detect only \r case */ +      return 1; +    } +  } +  return 0; +} + +typedef struct { +  size_t pos; +  size_t len; +} LineInfo; + +/* Find '\n' and create line data. */ +static int get_line_infos(const char *buf, size_t buf_len, +                          LineInfo **line_infos, size_t *num_lines) { +  size_t i = 0; +  size_t end_idx = buf_len; +  size_t prev_pos = 0; +  size_t line_no = 0; +  size_t last_line_ending = 0; + +  /* Count # of lines. */ +  for (i = 0; i < end_idx; i++) { +    if (is_line_ending(buf, i, end_idx)) { +      (*num_lines)++; +      last_line_ending = i; +    } +  } +  /* The last char from the input may not be a line +   * ending character so add an extra line if there +   * are more characters after the last line ending +   * that was found. */ +  if (end_idx - last_line_ending > 1) { +    (*num_lines)++; +  } + +  if (*num_lines == 0) +    return TINYOBJ_ERROR_EMPTY; + +  *line_infos = (LineInfo *)TINYOBJ_MALLOC(sizeof(LineInfo) * (*num_lines)); + +  /* Fill line infos. */ +  for (i = 0; i < end_idx; i++) { +    if (is_line_ending(buf, i, end_idx)) { +      (*line_infos)[line_no].pos = prev_pos; +      (*line_infos)[line_no].len = i - prev_pos; +      prev_pos = i + 1; +      line_no++; +    } +  } +  if (end_idx - last_line_ending > 1) { +    (*line_infos)[line_no].pos = prev_pos; +    (*line_infos)[line_no].len = end_idx - 1 - last_line_ending; +  } + +  return 0; +} + +static int tinyobj_parse_and_index_mtl_file( +    tinyobj_material_t **materials_out, size_t *num_materials_out, +    const char *mtl_filename, const char *obj_filename, +    file_reader_callback file_reader, void *ctx, hash_table_t *material_table) { +  tinyobj_material_t material; +  size_t num_materials = 0; +  tinyobj_material_t *materials = NULL; +  int has_previous_material = 0; +  const char *line_end = NULL; +  size_t num_lines = 0; +  LineInfo *line_infos = NULL; +  size_t i = 0; +  char *buf = NULL; +  size_t len = 0; + +  if (materials_out == NULL) { +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  } + +  if (num_materials_out == NULL) { +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  } + +  (*materials_out) = NULL; +  (*num_materials_out) = 0; + +  file_reader(ctx, mtl_filename, 1, obj_filename, &buf, &len); +  if (len < 1) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (buf == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; + +  if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) { +    TINYOBJ_FREE(line_infos); +    return TINYOBJ_ERROR_EMPTY; +  } + +  /* Create a default material */ +  initMaterial(&material); + +  for (i = 0; i < num_lines; i++) { +    const char *p = &buf[line_infos[i].pos]; +    size_t p_len = line_infos[i].len; + +    char linebuf[4096]; +    const char *token; +    assert(p_len < 4095); + +    memcpy(linebuf, p, p_len); +    linebuf[p_len] = '\0'; + +    token = linebuf; +    line_end = token + p_len; + +    /* Skip leading space. */ +    token += strspn(token, " \t"); + +    assert(token); +    if (token[0] == '\0') +      continue; /* empty line */ + +    if (token[0] == '#') +      continue; /* comment line */ + +    /* new mtl */ +    if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) { +      char namebuf[4096]; + +      /* flush previous material. */ +      if (has_previous_material) { +        materials = tinyobj_material_add(materials, num_materials, &material); +        num_materials++; +      } else { +        has_previous_material = 1; +      } + +      /* initial temporary material */ +      initMaterial(&material); + +      /* set new mtl name */ +      token += 7; +#ifdef _MSC_VER +      sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf)); +#else +      sscanf(token, "%s", namebuf); +#endif +      material.name = my_strdup(namebuf, (size_t)(line_end - token)); + +      /* Add material to material table */ +      if (material_table) +        hash_table_set(material.name, num_materials, material_table); + +      continue; +    } + +    /* ambient */ +    if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) { +      float r, g, b; +      token += 2; +      parseFloat3(&r, &g, &b, &token); +      material.ambient[0] = r; +      material.ambient[1] = g; +      material.ambient[2] = b; +      continue; +    } + +    /* diffuse */ +    if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) { +      float r, g, b; +      token += 2; +      parseFloat3(&r, &g, &b, &token); +      material.diffuse[0] = r; +      material.diffuse[1] = g; +      material.diffuse[2] = b; +      continue; +    } + +    /* specular */ +    if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) { +      float r, g, b; +      token += 2; +      parseFloat3(&r, &g, &b, &token); +      material.specular[0] = r; +      material.specular[1] = g; +      material.specular[2] = b; +      continue; +    } + +    /* transmittance */ +    if (token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) { +      float r, g, b; +      token += 2; +      parseFloat3(&r, &g, &b, &token); +      material.transmittance[0] = r; +      material.transmittance[1] = g; +      material.transmittance[2] = b; +      continue; +    } + +    /* ior(index of refraction) */ +    if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) { +      token += 2; +      material.ior = parseFloat(&token); +      continue; +    } + +    /* emission */ +    if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) { +      float r, g, b; +      token += 2; +      parseFloat3(&r, &g, &b, &token); +      material.emission[0] = r; +      material.emission[1] = g; +      material.emission[2] = b; +      continue; +    } + +    /* shininess */ +    if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) { +      token += 2; +      material.shininess = parseFloat(&token); +      continue; +    } + +    /* illum model */ +    if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) { +      token += 6; +      material.illum = parseInt(&token); +      continue; +    } + +    /* dissolve */ +    if ((token[0] == 'd' && IS_SPACE(token[1]))) { +      token += 1; +      material.dissolve = parseFloat(&token); +      continue; +    } +    if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) { +      token += 2; +      /* Invert value of Tr(assume Tr is in range [0, 1]) */ +      material.dissolve = 1.0f - parseFloat(&token); +      continue; +    } + +    /* ambient texture */ +    if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) { +      token += 7; +      material.ambient_texname = my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* diffuse texture */ +    if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) { +      token += 7; +      material.diffuse_texname = my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* specular texture */ +    if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) { +      token += 7; +      material.specular_texname = my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* specular highlight texture */ +    if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) { +      token += 7; +      material.specular_highlight_texname = +          my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* bump texture */ +    if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) { +      token += 9; +      material.bump_texname = my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* alpha texture */ +    if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) { +      token += 6; +      material.alpha_texname = my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* bump texture */ +    if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) { +      token += 5; +      material.bump_texname = my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* displacement texture */ +    if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) { +      token += 5; +      material.displacement_texname = +          my_strdup(token, (size_t)(line_end - token)); +      continue; +    } + +    /* @todo { unknown parameter } */ +  } + +  TINYOBJ_FREE(line_infos); + +  if (material.name) { +    /* Flush last material element */ +    materials = tinyobj_material_add(materials, num_materials, &material); +    num_materials++; +  } + +  (*num_materials_out) = num_materials; +  (*materials_out) = materials; + +  return TINYOBJ_SUCCESS; +} + +int tinyobj_parse_mtl_file(tinyobj_material_t **materials_out, +                           size_t *num_materials_out, const char *mtl_filename, +                           const char *obj_filename, +                           file_reader_callback file_reader, void *ctx) { +  return tinyobj_parse_and_index_mtl_file(materials_out, num_materials_out, +                                          mtl_filename, obj_filename, +                                          file_reader, ctx, NULL); +} + +typedef enum { +  COMMAND_EMPTY, +  COMMAND_V, +  COMMAND_VN, +  COMMAND_VT, +  COMMAND_F, +  COMMAND_G, +  COMMAND_O, +  COMMAND_USEMTL, +  COMMAND_MTLLIB + +} CommandType; + +typedef struct { +  float vx, vy, vz; +  float nx, ny, nz; +  float tx, ty; + +  /* @todo { Use dynamic array } */ +  tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE]; +  size_t num_f; + +  int f_num_verts[TINYOBJ_MAX_FACES_PER_F_LINE]; +  size_t num_f_num_verts; + +  const char *group_name; +  unsigned int group_name_len; +  int pad0; + +  const char *object_name; +  unsigned int object_name_len; +  int pad1; + +  const char *material_name; +  unsigned int material_name_len; +  int pad2; + +  const char *mtllib_name; +  unsigned int mtllib_name_len; + +  CommandType type; +} Command; + +static int parseLine(Command *command, const char *p, size_t p_len, +                     int triangulate) { +  char linebuf[4096]; +  const char *token; +  assert(p_len < 4095); + +  memcpy(linebuf, p, p_len); +  linebuf[p_len] = '\0'; + +  token = linebuf; + +  command->type = COMMAND_EMPTY; + +  /* Skip leading space. */ +  skip_space(&token); + +  assert(token); +  if (token[0] == '\0') { /* empty line */ +    return 0; +  } + +  if (token[0] == '#') { /* comment line */ +    return 0; +  } + +  /* vertex */ +  if (token[0] == 'v' && IS_SPACE((token[1]))) { +    float x, y, z; +    token += 2; +    parseFloat3(&x, &y, &z, &token); +    command->vx = x; +    command->vy = y; +    command->vz = z; +    command->type = COMMAND_V; +    return 1; +  } + +  /* normal */ +  if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { +    float x, y, z; +    token += 3; +    parseFloat3(&x, &y, &z, &token); +    command->nx = x; +    command->ny = y; +    command->nz = z; +    command->type = COMMAND_VN; +    return 1; +  } + +  /* texcoord */ +  if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { +    float x, y; +    token += 3; +    parseFloat2(&x, &y, &token); +    command->tx = x; +    command->ty = y; +    command->type = COMMAND_VT; +    return 1; +  } + +  /* face */ +  if (token[0] == 'f' && IS_SPACE((token[1]))) { +    size_t num_f = 0; + +    tinyobj_vertex_index_t f[TINYOBJ_MAX_FACES_PER_F_LINE]; +    token += 2; +    skip_space(&token); + +    while (!IS_NEW_LINE(token[0])) { +      tinyobj_vertex_index_t vi = parseRawTriple(&token); +      skip_space_and_cr(&token); + +      f[num_f] = vi; +      num_f++; +    } + +    command->type = COMMAND_F; + +    if (triangulate) { +      size_t k; +      size_t n = 0; + +      tinyobj_vertex_index_t i0 = f[0]; +      tinyobj_vertex_index_t i1; +      tinyobj_vertex_index_t i2 = f[1]; + +      assert(3 * num_f < TINYOBJ_MAX_FACES_PER_F_LINE); + +      for (k = 2; k < num_f; k++) { +        i1 = i2; +        i2 = f[k]; +        command->f[3 * n + 0] = i0; +        command->f[3 * n + 1] = i1; +        command->f[3 * n + 2] = i2; + +        command->f_num_verts[n] = 3; +        n++; +      } +      command->num_f = 3 * n; +      command->num_f_num_verts = n; + +    } else { +      size_t k = 0; +      assert(num_f < TINYOBJ_MAX_FACES_PER_F_LINE); +      for (k = 0; k < num_f; k++) { +        command->f[k] = f[k]; +      } + +      command->num_f = num_f; +      command->f_num_verts[0] = (int)num_f; +      command->num_f_num_verts = 1; +    } + +    return 1; +  } + +  /* use mtl */ +  if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) { +    token += 7; + +    skip_space(&token); +    command->material_name = p + (token - linebuf); +    command->material_name_len = (unsigned int)length_until_newline( +        token, (p_len - (size_t)(token - linebuf)) + 1); +    command->type = COMMAND_USEMTL; + +    return 1; +  } + +  /* load mtl */ +  if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { +    /* By specification, `mtllib` should be appear only once in .obj */ +    token += 7; + +    skip_space(&token); +    command->mtllib_name = p + (token - linebuf); +    command->mtllib_name_len = (unsigned int)length_until_newline( +                                   token, p_len - (size_t)(token - linebuf)) + +                               1; +    command->type = COMMAND_MTLLIB; + +    return 1; +  } + +  /* group name */ +  if (token[0] == 'g' && IS_SPACE((token[1]))) { +    /* @todo { multiple group name. } */ +    token += 2; + +    command->group_name = p + (token - linebuf); +    command->group_name_len = (unsigned int)length_until_newline( +                                  token, p_len - (size_t)(token - linebuf)) + +                              1; +    command->type = COMMAND_G; + +    return 1; +  } + +  /* object name */ +  if (token[0] == 'o' && IS_SPACE((token[1]))) { +    /* @todo { multiple object name? } */ +    token += 2; + +    command->object_name = p + (token - linebuf); +    command->object_name_len = (unsigned int)length_until_newline( +                                   token, p_len - (size_t)(token - linebuf)) + +                               1; +    command->type = COMMAND_O; + +    return 1; +  } + +  return 0; +} + +static size_t basename_len(const char *filename, size_t filename_length) { +  /* Count includes NUL terminator. */ +  const char *p = &filename[filename_length - 1]; +  size_t count = 1; + +/* On Windows, the directory delimiter is '\' and both it and '/' is + * reserved by the filesystem. On *nix platforms, only the '/' character + * is reserved, so account for the two cases separately. */ +#if _WIN32 +  while (p[-1] != '/' && p[-1] != '\\') { +    if (p == filename) { +      count = filename_length; +      return count; +    } +    count++; +    p--; +  } +  p++; +  return count; +#else +  while (*(--p) != '/') { +    if (p == filename) { +      count = filename_length; +      return count; +    } +    count++; +  } +  return count; +#endif +} + +static char *generate_mtl_filename(const char *obj_filename, +                                   size_t obj_filename_length, +                                   const char *mtllib_name, +                                   size_t mtllib_name_length) { +  /* Create a dynamically-allocated material filename. This allows the material +   * and obj files to be separated, however the mtllib name in the OBJ file +   * must be a relative path to the material file from the OBJ's directory. +   * This does not support the matllib name as an absolute address. */ +  char *mtl_filename; +  char *p; +  size_t mtl_filename_length; +  size_t obj_basename_length; + +  /* Calculate required size of mtl_filename and allocate */ +  obj_basename_length = basename_len(obj_filename, obj_filename_length); +  mtl_filename_length = +      (obj_filename_length - obj_basename_length) + mtllib_name_length; +  mtl_filename = (char *)TINYOBJ_MALLOC(mtl_filename_length); + +  /* Copy over the obj's path */ +  memcpy(mtl_filename, obj_filename, +         (obj_filename_length - obj_basename_length)); + +  /* Overwrite the obj basename with the mtllib name, filling the string */ +  p = &mtl_filename[mtl_filename_length - mtllib_name_length]; +  strcpy(p, mtllib_name); +  return mtl_filename; +} + +int tinyobj_parse_obj(tinyobj_attrib_t *attrib, tinyobj_shape_t **shapes, +                      size_t *num_shapes, tinyobj_material_t **materials_out, +                      size_t *num_materials_out, const char *obj_filename, +                      file_reader_callback file_reader, void *ctx, +                      unsigned int flags) { +  LineInfo *line_infos = NULL; +  Command *commands = NULL; +  size_t num_lines = 0; + +  size_t num_v = 0; +  size_t num_vn = 0; +  size_t num_vt = 0; +  size_t num_f = 0; +  size_t num_faces = 0; + +  int mtllib_line_index = -1; + +  tinyobj_material_t *materials = NULL; +  size_t num_materials = 0; + +  hash_table_t material_table; + +  char *buf = NULL; +  size_t len = 0; +  file_reader(ctx, obj_filename, /* is_mtl */ 0, obj_filename, &buf, &len); + +  if (len < 1) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (attrib == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (shapes == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (num_shapes == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (buf == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (materials_out == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; +  if (num_materials_out == NULL) +    return TINYOBJ_ERROR_INVALID_PARAMETER; + +  tinyobj_attrib_init(attrib); + +  /* 1. create line data */ +  if (get_line_infos(buf, len, &line_infos, &num_lines) != 0) { +    return TINYOBJ_ERROR_EMPTY; +  } + +  commands = (Command *)TINYOBJ_MALLOC(sizeof(Command) * num_lines); + +  create_hash_table(HASH_TABLE_DEFAULT_SIZE, &material_table); + +  /* 2. parse each line */ +  { +    size_t i = 0; +    for (i = 0; i < num_lines; i++) { +      int ret = parseLine(&commands[i], &buf[line_infos[i].pos], +                          line_infos[i].len, flags & TINYOBJ_FLAG_TRIANGULATE); +      if (ret) { +        if (commands[i].type == COMMAND_V) { +          num_v++; +        } else if (commands[i].type == COMMAND_VN) { +          num_vn++; +        } else if (commands[i].type == COMMAND_VT) { +          num_vt++; +        } else if (commands[i].type == COMMAND_F) { +          num_f += commands[i].num_f; +          num_faces += commands[i].num_f_num_verts; +        } + +        if (commands[i].type == COMMAND_MTLLIB) { +          mtllib_line_index = (int)i; +        } +      } +    } +  } + +  /* line_infos are not used anymore. Release memory. */ +  if (line_infos) { +    TINYOBJ_FREE(line_infos); +  } + +  /* Load material (if it exists) */ +  if (mtllib_line_index >= 0 && commands[mtllib_line_index].mtllib_name && +      commands[mtllib_line_index].mtllib_name_len > 0) { +    /* Maximum length allowed by Linux - higher than Windows and macOS */ +    size_t obj_filename_len = my_strnlen(obj_filename, 4096 + 255) + 1; +    char *mtl_filename; +    char *mtllib_name; +    size_t mtllib_name_len = 0; +    int ret; + +    mtllib_name_len = +        length_until_line_feed(commands[mtllib_line_index].mtllib_name, +                               commands[mtllib_line_index].mtllib_name_len); + +    mtllib_name = +        my_strndup(commands[mtllib_line_index].mtllib_name, mtllib_name_len); + +    /* allow for NUL terminator */ +    mtllib_name_len++; +    mtl_filename = generate_mtl_filename(obj_filename, obj_filename_len, +                                         mtllib_name, mtllib_name_len); + +    ret = tinyobj_parse_and_index_mtl_file(&materials, &num_materials, +                                           mtl_filename, obj_filename, +                                           file_reader, ctx, &material_table); + +    if (ret != TINYOBJ_SUCCESS) { +      /* warning. */ +      fprintf(stderr, "TINYOBJ: Failed to parse material file '%s': %d\n", +              mtl_filename, ret); +    } +    TINYOBJ_FREE(mtl_filename); +    TINYOBJ_FREE(mtllib_name); +  } + +  /* Construct attributes */ + +  { +    size_t v_count = 0; +    size_t n_count = 0; +    size_t t_count = 0; +    size_t f_count = 0; +    size_t face_count = 0; +    int material_id = -1; /* -1 = default unknown material. */ +    size_t i = 0; + +    attrib->vertices = (float *)TINYOBJ_MALLOC(sizeof(float) * num_v * 3); +    attrib->num_vertices = (unsigned int)num_v; +    attrib->normals = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vn * 3); +    attrib->num_normals = (unsigned int)num_vn; +    attrib->texcoords = (float *)TINYOBJ_MALLOC(sizeof(float) * num_vt * 2); +    attrib->num_texcoords = (unsigned int)num_vt; +    attrib->faces = (tinyobj_vertex_index_t *)TINYOBJ_MALLOC( +        sizeof(tinyobj_vertex_index_t) * num_f); +    attrib->num_faces = (unsigned int)num_f; +    attrib->face_num_verts = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces); +    attrib->material_ids = (int *)TINYOBJ_MALLOC(sizeof(int) * num_faces); +    attrib->num_face_num_verts = (unsigned int)num_faces; + +    for (i = 0; i < num_lines; i++) { +      if (commands[i].type == COMMAND_EMPTY) { +        continue; +      } else if (commands[i].type == COMMAND_USEMTL) { +        /* @todo +           if (commands[t][i].material_name && +           commands[t][i].material_name_len > 0) { +           std::string material_name(commands[t][i].material_name, +           commands[t][i].material_name_len); + +           if (material_map.find(material_name) != material_map.end()) { +           material_id = material_map[material_name]; +           } else { +        // Assign invalid material ID +        material_id = -1; +        } +        } +        */ +        if (commands[i].material_name && commands[i].material_name_len > 0) { +          /* Create a null terminated string */ +          char *material_name_null_term = +              (char *)TINYOBJ_MALLOC(commands[i].material_name_len + 1); +          memcpy((void *)material_name_null_term, +                 (const void *)commands[i].material_name, +                 commands[i].material_name_len); +          material_name_null_term[commands[i].material_name_len] = 0; + +          if (hash_table_exists(material_name_null_term, &material_table)) +            material_id = +                (int)hash_table_get(material_name_null_term, &material_table); +          else +            material_id = -1; + +          TINYOBJ_FREE(material_name_null_term); +        } +      } else if (commands[i].type == COMMAND_V) { +        attrib->vertices[3 * v_count + 0] = commands[i].vx; +        attrib->vertices[3 * v_count + 1] = commands[i].vy; +        attrib->vertices[3 * v_count + 2] = commands[i].vz; +        v_count++; +      } else if (commands[i].type == COMMAND_VN) { +        attrib->normals[3 * n_count + 0] = commands[i].nx; +        attrib->normals[3 * n_count + 1] = commands[i].ny; +        attrib->normals[3 * n_count + 2] = commands[i].nz; +        n_count++; +      } else if (commands[i].type == COMMAND_VT) { +        attrib->texcoords[2 * t_count + 0] = commands[i].tx; +        attrib->texcoords[2 * t_count + 1] = commands[i].ty; +        t_count++; +      } else if (commands[i].type == COMMAND_F) { +        size_t k = 0; +        for (k = 0; k < commands[i].num_f; k++) { +          tinyobj_vertex_index_t vi = commands[i].f[k]; +          int v_idx = fixIndex(vi.v_idx, v_count); +          int vn_idx = fixIndex(vi.vn_idx, n_count); +          int vt_idx = fixIndex(vi.vt_idx, t_count); +          attrib->faces[f_count + k].v_idx = v_idx; +          attrib->faces[f_count + k].vn_idx = vn_idx; +          attrib->faces[f_count + k].vt_idx = vt_idx; +        } + +        for (k = 0; k < commands[i].num_f_num_verts; k++) { +          attrib->material_ids[face_count + k] = material_id; +          attrib->face_num_verts[face_count + k] = commands[i].f_num_verts[k]; +        } + +        f_count += commands[i].num_f; +        face_count += commands[i].num_f_num_verts; +      } +    } +  } + +  /* 5. Construct shape information. */ +  { +    unsigned int face_count = 0; +    size_t i = 0; +    size_t n = 0; +    size_t shape_idx = 0; + +    const char *shape_name = NULL; +    unsigned int shape_name_len = 0; +    const char *prev_shape_name = NULL; +    unsigned int prev_shape_name_len = 0; +    unsigned int prev_shape_face_offset = 0; +    unsigned int prev_face_offset = 0; +    tinyobj_shape_t prev_shape = {NULL, 0, 0}; + +    /* Find the number of shapes in .obj */ +    for (i = 0; i < num_lines; i++) { +      if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) { +        n++; +      } +    } + +    /* Allocate array of shapes with maximum possible size(+1 for unnamed +     * group/object). +     * Actual # of shapes found in .obj is determined in the later */ +    (*shapes) = +        (tinyobj_shape_t *)TINYOBJ_MALLOC(sizeof(tinyobj_shape_t) * (n + 1)); + +    for (i = 0; i < num_lines; i++) { +      if (commands[i].type == COMMAND_O || commands[i].type == COMMAND_G) { +        if (commands[i].type == COMMAND_O) { +          shape_name = commands[i].object_name; +          shape_name_len = commands[i].object_name_len; +        } else { +          shape_name = commands[i].group_name; +          shape_name_len = commands[i].group_name_len; +        } + +        if (face_count == 0) { +          /* 'o' or 'g' appears before any 'f' */ +          prev_shape_name = shape_name; +          prev_shape_name_len = shape_name_len; +          prev_shape_face_offset = face_count; +          prev_face_offset = face_count; +        } else { +          if (shape_idx == 0) { +            /* 'o' or 'g' after some 'v' lines. */ +            (*shapes)[shape_idx].name = my_strndup( +                prev_shape_name, prev_shape_name_len); /* may be NULL */ +            (*shapes)[shape_idx].face_offset = prev_shape.face_offset; +            (*shapes)[shape_idx].length = face_count - prev_face_offset; +            shape_idx++; + +            prev_face_offset = face_count; + +          } else { +            if ((face_count - prev_face_offset) > 0) { +              (*shapes)[shape_idx].name = +                  my_strndup(prev_shape_name, prev_shape_name_len); +              (*shapes)[shape_idx].face_offset = prev_face_offset; +              (*shapes)[shape_idx].length = face_count - prev_face_offset; +              shape_idx++; +              prev_face_offset = face_count; +            } +          } + +          /* Record shape info for succeeding 'o' or 'g' command. */ +          prev_shape_name = shape_name; +          prev_shape_name_len = shape_name_len; +          prev_shape_face_offset = face_count; +        } +      } +      if (commands[i].type == COMMAND_F) { +        face_count++; +      } +    } + +    if ((face_count - prev_face_offset) > 0) { +      size_t length = face_count - prev_shape_face_offset; +      if (length > 0) { +        (*shapes)[shape_idx].name = +            my_strndup(prev_shape_name, prev_shape_name_len); +        (*shapes)[shape_idx].face_offset = prev_face_offset; +        (*shapes)[shape_idx].length = face_count - prev_face_offset; +        shape_idx++; +      } +    } else { +      /* Guess no 'v' line occurrence after 'o' or 'g', so discards current +       * shape information. */ +    } + +    (*num_shapes) = shape_idx; +  } + +  if (commands) { +    TINYOBJ_FREE(commands); +  } + +  destroy_hash_table(&material_table); + +  (*materials_out) = materials; +  (*num_materials_out) = num_materials; + +  return TINYOBJ_SUCCESS; +} + +void tinyobj_attrib_init(tinyobj_attrib_t *attrib) { +  attrib->vertices = NULL; +  attrib->num_vertices = 0; +  attrib->normals = NULL; +  attrib->num_normals = 0; +  attrib->texcoords = NULL; +  attrib->num_texcoords = 0; +  attrib->faces = NULL; +  attrib->num_faces = 0; +  attrib->face_num_verts = NULL; +  attrib->num_face_num_verts = 0; +  attrib->material_ids = NULL; +} + +void tinyobj_attrib_free(tinyobj_attrib_t *attrib) { +  if (attrib->vertices) +    TINYOBJ_FREE(attrib->vertices); +  if (attrib->normals) +    TINYOBJ_FREE(attrib->normals); +  if (attrib->texcoords) +    TINYOBJ_FREE(attrib->texcoords); +  if (attrib->faces) +    TINYOBJ_FREE(attrib->faces); +  if (attrib->face_num_verts) +    TINYOBJ_FREE(attrib->face_num_verts); +  if (attrib->material_ids) +    TINYOBJ_FREE(attrib->material_ids); +} + +void tinyobj_shapes_free(tinyobj_shape_t *shapes, size_t num_shapes) { +  size_t i; +  if (shapes == NULL) +    return; + +  for (i = 0; i < num_shapes; i++) { +    if (shapes[i].name) +      TINYOBJ_FREE(shapes[i].name); +  } + +  TINYOBJ_FREE(shapes); +} + +void tinyobj_materials_free(tinyobj_material_t *materials, +                            size_t num_materials) { +  size_t i; +  if (materials == NULL) +    return; + +  for (i = 0; i < num_materials; i++) { +    if (materials[i].name) +      TINYOBJ_FREE(materials[i].name); +    if (materials[i].ambient_texname) +      TINYOBJ_FREE(materials[i].ambient_texname); +    if (materials[i].diffuse_texname) +      TINYOBJ_FREE(materials[i].diffuse_texname); +    if (materials[i].specular_texname) +      TINYOBJ_FREE(materials[i].specular_texname); +    if (materials[i].specular_highlight_texname) +      TINYOBJ_FREE(materials[i].specular_highlight_texname); +    if (materials[i].bump_texname) +      TINYOBJ_FREE(materials[i].bump_texname); +    if (materials[i].displacement_texname) +      TINYOBJ_FREE(materials[i].displacement_texname); +    if (materials[i].alpha_texname) +      TINYOBJ_FREE(materials[i].alpha_texname); +  } + +  TINYOBJ_FREE(materials); +} +#endif /* TINYOBJ_LOADER_C_IMPLEMENTATION */ diff --git a/vendor/vk_mem_alloc.h b/vendor/vk_mem_alloc.h new file mode 100644 index 0000000..6f71d5b --- /dev/null +++ b/vendor/vk_mem_alloc.h @@ -0,0 +1,19111 @@ +// +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H +#define AMD_VULKAN_MEMORY_ALLOCATOR_H + +/** \mainpage Vulkan Memory Allocator + +<b>Version 3.2.1</b> + +Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT \n +See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/), +[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) + + +<b>API documentation divided into groups:</b> [Topics](topics.html) + +<b>General documentation chapters:</b> + +- <b>User guide</b> +  - \subpage quick_start +    - [Project setup](@ref quick_start_project_setup) +    - [Initialization](@ref quick_start_initialization) +    - [Resource allocation](@ref quick_start_resource_allocation) +  - \subpage choosing_memory_type +    - [Usage](@ref choosing_memory_type_usage) +    - [Required and preferred flags](@ref choosing_memory_type_required_preferred_flags) +    - [Explicit memory types](@ref choosing_memory_type_explicit_memory_types) +    - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools) +    - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations) +  - \subpage memory_mapping +    - [Copy functions](@ref memory_mapping_copy_functions) +    - [Mapping functions](@ref memory_mapping_mapping_functions) +    - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) +    - [Cache flush and invalidate](@ref memory_mapping_cache_control) +  - \subpage staying_within_budget +    - [Querying for budget](@ref staying_within_budget_querying_for_budget) +    - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) +  - \subpage resource_aliasing +  - \subpage custom_memory_pools +    - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) +    - [When not to use custom pools](@ref custom_memory_pools_when_not_use) +    - [Linear allocation algorithm](@ref linear_algorithm) +      - [Free-at-once](@ref linear_algorithm_free_at_once) +      - [Stack](@ref linear_algorithm_stack) +      - [Double stack](@ref linear_algorithm_double_stack) +      - [Ring buffer](@ref linear_algorithm_ring_buffer) +  - \subpage defragmentation +  - \subpage statistics +    - [Numeric statistics](@ref statistics_numeric_statistics) +    - [JSON dump](@ref statistics_json_dump) +  - \subpage allocation_annotation +    - [Allocation user data](@ref allocation_user_data) +    - [Allocation names](@ref allocation_names) +  - \subpage virtual_allocator +  - \subpage debugging_memory_usage +    - [Memory initialization](@ref debugging_memory_usage_initialization) +    - [Margins](@ref debugging_memory_usage_margins) +    - [Corruption detection](@ref debugging_memory_usage_corruption_detection) +    - [Leak detection features](@ref debugging_memory_usage_leak_detection) +  - \subpage other_api_interop +- \subpage usage_patterns +    - [GPU-only resource](@ref usage_patterns_gpu_only) +    - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) +    - [Readback](@ref usage_patterns_readback) +    - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) +    - [Other use cases](@ref usage_patterns_other_use_cases) +- \subpage configuration +  - [Pointers to Vulkan functions](@ref config_Vulkan_functions) +  - [Custom host memory allocator](@ref custom_memory_allocator) +  - [Device memory allocation callbacks](@ref allocation_callbacks) +  - [Device heap memory limit](@ref heap_memory_limit) +- <b>Extension support</b> +    - \subpage vk_khr_dedicated_allocation +    - \subpage enabling_buffer_device_address +    - \subpage vk_ext_memory_priority +    - \subpage vk_amd_device_coherent_memory +    - \subpage vk_khr_external_memory_win32 +- \subpage general_considerations +  - [Thread safety](@ref general_considerations_thread_safety) +  - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) +  - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) +  - [Allocation algorithm](@ref general_considerations_allocation_algorithm) +  - [Features not supported](@ref general_considerations_features_not_supported) + +\defgroup group_init Library initialization + +\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object. + +\defgroup group_alloc Memory allocation + +\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images. +Most basic ones being: vmaCreateBuffer(), vmaCreateImage(). + +\defgroup group_virtual Virtual allocator + +\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm +for user-defined purpose without allocating any real GPU memory. + +\defgroup group_stats Statistics + +\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(VULKAN_H_) +#include <vulkan/vulkan.h> +#endif + +#if !defined(VMA_VULKAN_VERSION) +    #if defined(VK_VERSION_1_4) +        #define VMA_VULKAN_VERSION 1004000 +    #elif defined(VK_VERSION_1_3) +        #define VMA_VULKAN_VERSION 1003000 +    #elif defined(VK_VERSION_1_2) +        #define VMA_VULKAN_VERSION 1002000 +    #elif defined(VK_VERSION_1_1) +        #define VMA_VULKAN_VERSION 1001000 +    #else +        #define VMA_VULKAN_VERSION 1000000 +    #endif +#endif + +#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS +    extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; +    extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; +    extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; +    extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; +    extern PFN_vkAllocateMemory vkAllocateMemory; +    extern PFN_vkFreeMemory vkFreeMemory; +    extern PFN_vkMapMemory vkMapMemory; +    extern PFN_vkUnmapMemory vkUnmapMemory; +    extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; +    extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; +    extern PFN_vkBindBufferMemory vkBindBufferMemory; +    extern PFN_vkBindImageMemory vkBindImageMemory; +    extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; +    extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; +    extern PFN_vkCreateBuffer vkCreateBuffer; +    extern PFN_vkDestroyBuffer vkDestroyBuffer; +    extern PFN_vkCreateImage vkCreateImage; +    extern PFN_vkDestroyImage vkDestroyImage; +    extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer; +    #if VMA_VULKAN_VERSION >= 1001000 +        extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2; +        extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2; +        extern PFN_vkBindBufferMemory2 vkBindBufferMemory2; +        extern PFN_vkBindImageMemory2 vkBindImageMemory2; +        extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2; +    #endif // #if VMA_VULKAN_VERSION >= 1001000 +#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES + +#if !defined(VMA_DEDICATED_ALLOCATION) +    #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation +        #define VMA_DEDICATED_ALLOCATION 1 +    #else +        #define VMA_DEDICATED_ALLOCATION 0 +    #endif +#endif + +#if !defined(VMA_BIND_MEMORY2) +    #if VK_KHR_bind_memory2 +        #define VMA_BIND_MEMORY2 1 +    #else +        #define VMA_BIND_MEMORY2 0 +    #endif +#endif + +#if !defined(VMA_MEMORY_BUDGET) +    #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000) +        #define VMA_MEMORY_BUDGET 1 +    #else +        #define VMA_MEMORY_BUDGET 0 +    #endif +#endif + +// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers. +#if !defined(VMA_BUFFER_DEVICE_ADDRESS) +    #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000 +        #define VMA_BUFFER_DEVICE_ADDRESS 1 +    #else +        #define VMA_BUFFER_DEVICE_ADDRESS 0 +    #endif +#endif + +// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers. +#if !defined(VMA_MEMORY_PRIORITY) +    #if VK_EXT_memory_priority +        #define VMA_MEMORY_PRIORITY 1 +    #else +        #define VMA_MEMORY_PRIORITY 0 +    #endif +#endif + +// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE4) +    #if VK_KHR_maintenance4 +        #define VMA_KHR_MAINTENANCE4 1 +    #else +        #define VMA_KHR_MAINTENANCE4 0 +    #endif +#endif + +// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE5) +    #if VK_KHR_maintenance5 +        #define VMA_KHR_MAINTENANCE5 1 +    #else +        #define VMA_KHR_MAINTENANCE5 0 +    #endif +#endif + + +// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY) +    #if VK_KHR_external_memory +        #define VMA_EXTERNAL_MEMORY 1 +    #else +        #define VMA_EXTERNAL_MEMORY 0 +    #endif +#endif + +// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY_WIN32) +    #if VK_KHR_external_memory_win32 +        #define VMA_EXTERNAL_MEMORY_WIN32 1 +    #else +        #define VMA_EXTERNAL_MEMORY_WIN32 0 +    #endif +#endif + +// Define these macros to decorate all public functions with additional code, +// before and after returned type, appropriately. This may be useful for +// exporting the functions when compiling VMA as a separate library. Example: +// #define VMA_CALL_PRE  __declspec(dllexport) +// #define VMA_CALL_POST __cdecl +#ifndef VMA_CALL_PRE +    #define VMA_CALL_PRE +#endif +#ifndef VMA_CALL_POST +    #define VMA_CALL_POST +#endif + +// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan +// structure that will be extended via the pNext chain. +#ifndef VMA_EXTENDS_VK_STRUCT +    #define VMA_EXTENDS_VK_STRUCT(vkStruct) +#endif + +// Define this macro to decorate pointers with an attribute specifying the +// length of the array they point to if they are not null. +// +// The length may be one of +// - The name of another parameter in the argument list where the pointer is declared +// - The name of another member in the struct where the pointer is declared +// - The name of a member of a struct type, meaning the value of that member in +//   the context of the call. For example +//   VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"), +//   this means the number of memory heaps available in the device associated +//   with the VmaAllocator being dealt with. +#ifndef VMA_LEN_IF_NOT_NULL +    #define VMA_LEN_IF_NOT_NULL(len) +#endif + +// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nullable +#ifndef VMA_NULLABLE +    #ifdef __clang__ +        #define VMA_NULLABLE _Nullable +    #else +        #define VMA_NULLABLE +    #endif +#endif + +// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull +#ifndef VMA_NOT_NULL +    #ifdef __clang__ +        #define VMA_NOT_NULL _Nonnull +    #else +        #define VMA_NOT_NULL +    #endif +#endif + +// If non-dispatchable handles are represented as pointers then we can give +// then nullability annotations +#ifndef VMA_NOT_NULL_NON_DISPATCHABLE +    #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) +        #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL +    #else +        #define VMA_NOT_NULL_NON_DISPATCHABLE +    #endif +#endif + +#ifndef VMA_NULLABLE_NON_DISPATCHABLE +    #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) +        #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE +    #else +        #define VMA_NULLABLE_NON_DISPATCHABLE +    #endif +#endif + +#ifndef VMA_STATS_STRING_ENABLED +    #define VMA_STATS_STRING_ENABLED 1 +#endif + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +//    INTERFACE +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE. +#ifndef _VMA_ENUM_DECLARATIONS + +/** +\addtogroup group_init +@{ +*/ + +/// Flags for created #VmaAllocator. +typedef enum VmaAllocatorCreateFlagBits +{ +    /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you. + +    Using this flag may increase performance because internal mutexes are not used. +    */ +    VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, +    /** \brief Enables usage of VK_KHR_dedicated_allocation extension. + +    The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. +    When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + +    Using this extension will automatically allocate dedicated blocks of memory for +    some buffers and images instead of suballocating place for them out of bigger +    memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT +    flag) when it is recommended by the driver. It may improve performance on some +    GPUs. + +    You may set this flag only if you found out that following device extensions are +    supported, you enabled them while creating Vulkan device passed as +    VmaAllocatorCreateInfo::device, and you want them to be used internally by this +    library: + +    - VK_KHR_get_memory_requirements2 (device extension) +    - VK_KHR_dedicated_allocation (device extension) + +    When this flag is set, you can experience following warnings reported by Vulkan +    validation layer. You can ignore them. + +    > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. +    */ +    VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002, +    /** +    Enables usage of VK_KHR_bind_memory2 extension. + +    The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. +    When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + +    You may set this flag only if you found out that this device extension is supported, +    you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, +    and you want it to be used internally by this library. + +    The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`, +    which allow to pass a chain of `pNext` structures while binding. +    This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2(). +    */ +    VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004, +    /** +    Enables usage of VK_EXT_memory_budget extension. + +    You may set this flag only if you found out that this device extension is supported, +    you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, +    and you want it to be used internally by this library, along with another instance extension +    VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted). + +    The extension provides query for current memory usage and budget, which will probably +    be more accurate than an estimation used by the library otherwise. +    */ +    VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008, +    /** +    Enables usage of VK_AMD_device_coherent_memory extension. + +    You may set this flag only if you: + +    - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, +    - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device, +    - want it to be used internally by this library. + +    The extension and accompanying device feature provide access to memory types with +    `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags. +    They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR. + +    When the extension is not enabled, such memory types are still enumerated, but their usage is illegal. +    To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type, +    returning `VK_ERROR_FEATURE_NOT_PRESENT`. +    */ +    VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010, +    /** +    Enables usage of "buffer device address" feature, which allows you to use function +    `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader. + +    You may set this flag only if you: + +    1. (For Vulkan version < 1.2) Found as available and enabled device extension +    VK_KHR_buffer_device_address. +    This extension is promoted to core Vulkan 1.2. +    2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`. + +    When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA. +    The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to +    allocated memory blocks wherever it might be needed. + +    For more information, see documentation chapter \ref enabling_buffer_device_address. +    */ +    VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020, +    /** +    Enables usage of VK_EXT_memory_priority extension in the library. + +    You may set this flag only if you found available and enabled this device extension, +    along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`, +    while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + +    When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority +    are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored. + +    A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. +    Larger values are higher priority. The granularity of the priorities is implementation-dependent. +    It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`. +    The value to be used for default priority is 0.5. +    For more details, see the documentation of the VK_EXT_memory_priority extension. +    */ +    VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, +    /** +    Enables usage of VK_KHR_maintenance4 extension in the library. + +    You may set this flag only if you found available and enabled this device extension, +    while creating Vulkan device passed as VmaAllocatorCreateInfo::device. +    */ +    VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080, +    /** +    Enables usage of VK_KHR_maintenance5 extension in the library. + +    You should set this flag if you found available and enabled this device extension, +    while creating Vulkan device passed as VmaAllocatorCreateInfo::device. +    */ +    VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, + +    /** +    Enables usage of VK_KHR_external_memory_win32 extension in the library. + +    You should set this flag if you found available and enabled this device extension, +    while creating Vulkan device passed as VmaAllocatorCreateInfo::device. +    For more information, see \ref vk_khr_external_memory_win32. +    */ +    VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200, + +    VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. +typedef VkFlags VmaAllocatorCreateFlags; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/// \brief Intended usage of the allocated memory. +typedef enum VmaMemoryUsage +{ +    /** No intended memory usage specified. +    Use other members of VmaAllocationCreateInfo to specify your requirements. +    */ +    VMA_MEMORY_USAGE_UNKNOWN = 0, +    /** +    \deprecated Obsolete, preserved for backward compatibility. +    Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. +    */ +    VMA_MEMORY_USAGE_GPU_ONLY = 1, +    /** +    \deprecated Obsolete, preserved for backward compatibility. +    Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. +    */ +    VMA_MEMORY_USAGE_CPU_ONLY = 2, +    /** +    \deprecated Obsolete, preserved for backward compatibility. +    Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. +    */ +    VMA_MEMORY_USAGE_CPU_TO_GPU = 3, +    /** +    \deprecated Obsolete, preserved for backward compatibility. +    Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. +    */ +    VMA_MEMORY_USAGE_GPU_TO_CPU = 4, +    /** +    \deprecated Obsolete, preserved for backward compatibility. +    Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. +    */ +    VMA_MEMORY_USAGE_CPU_COPY = 5, +    /** +    Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. +    Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. + +    Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. + +    Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +    */ +    VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, +    /** +    Selects best memory type automatically. +    This flag is recommended for most common use cases. + +    When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), +    you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +    in VmaAllocationCreateInfo::flags. + +    It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. +    vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() +    and not with generic memory allocation functions. +    */ +    VMA_MEMORY_USAGE_AUTO = 7, +    /** +    Selects best memory type automatically with preference for GPU (device) memory. + +    When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), +    you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +    in VmaAllocationCreateInfo::flags. + +    It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. +    vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() +    and not with generic memory allocation functions. +    */ +    VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, +    /** +    Selects best memory type automatically with preference for CPU (host) memory. + +    When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), +    you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +    in VmaAllocationCreateInfo::flags. + +    It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. +    vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() +    and not with generic memory allocation functions. +    */ +    VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, + +    VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF +} VmaMemoryUsage; + +/// Flags to be passed as VmaAllocationCreateInfo::flags. +typedef enum VmaAllocationCreateFlagBits +{ +    /** \brief Set this flag if the allocation should have its own memory block. + +    Use it for special, big resources, like fullscreen images used as attachments. + +    If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo` +    structure is applied if possible. +    */ +    VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, + +    /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. + +    If new allocation cannot be placed in any of the existing blocks, allocation +    fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + +    You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and +    #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. +    */ +    VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, +    /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. + +    Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. + +    It is valid to use this flag for allocation made from memory type that is not +    `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is +    useful if you need an allocation that is efficient to use on GPU +    (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that +    support it (e.g. Intel GPU). +    */ +    VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, +    /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + +    Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a +    null-terminated string. Instead of copying pointer value, a local copy of the +    string is made and stored in allocation's `pName`. The string is automatically +    freed together with the allocation. It is also used in vmaBuildStatsString(). +    */ +    VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, +    /** Allocation will be created from upper stack in a double stack pool. + +    This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. +    */ +    VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, +    /** Create both buffer/image and allocation, but don't bind them together. +    It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. +    The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). +    Otherwise it is ignored. + +    If you want to make sure the new buffer/image is not tied to the new memory allocation +    through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, +    use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. +    */ +    VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, +    /** Create allocation only if additional device memory required for it, if any, won't exceed +    memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +    */ +    VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, +    /** \brief Set this flag if the allocated memory will have aliasing resources. + +    Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. +    Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. +    */ +    VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, +    /** +    Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + +    - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, +      you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. +    - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. +      This includes allocations created in \ref custom_memory_pools. + +    Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, +    never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + +    \warning Violating this declaration may work correctly, but will likely be very slow. +    Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` +    Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. +    */ +    VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, +    /** +    Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + +    - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, +      you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. +    - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. +      This includes allocations created in \ref custom_memory_pools. + +    Declares that mapped memory can be read, written, and accessed in random order, +    so a `HOST_CACHED` memory type is preferred. +    */ +    VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, +    /** +    Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, +    it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected +    if it may improve performance. + +    By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type +    (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and +    issue an explicit transfer to write/read your data. +    To prepare for this possibility, don't forget to add appropriate flags like +    `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. +    */ +    VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, +    /** Allocation strategy that chooses smallest possible free range for the allocation +    to minimize memory usage and fragmentation, possibly at the expense of allocation time. +    */ +    VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000, +    /** Allocation strategy that chooses first suitable free range for the allocation - +    not necessarily in terms of the smallest offset but the one that is easiest and fastest to find +    to minimize allocation time, possibly at the expense of allocation quality. +    */ +    VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, +    /** Allocation strategy that chooses always the lowest offset in available space. +    This is not the most efficient strategy but achieves highly packed data. +    Used internally by defragmentation, not recommended in typical usage. +    */ +    VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT  = 0x00040000, +    /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. +    */ +    VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, +    /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT. +    */ +    VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, +    /** A bit mask to extract only `STRATEGY` bits from entire set of flags. +    */ +    VMA_ALLOCATION_CREATE_STRATEGY_MASK = +        VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | +        VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | +        VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + +    VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. +typedef VkFlags VmaAllocationCreateFlags; + +/// Flags to be passed as VmaPoolCreateInfo::flags. +typedef enum VmaPoolCreateFlagBits +{ +    /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. + +    This is an optional optimization flag. + +    If you always allocate using vmaCreateBuffer(), vmaCreateImage(), +    vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator +    knows exact type of your allocations so it can handle Buffer-Image Granularity +    in the optimal way. + +    If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), +    exact type of such allocations is not known, so allocator must be conservative +    in handling Buffer-Image Granularity, which can lead to suboptimal allocation +    (wasted memory). In that case, if you can make sure you always allocate only +    buffers and linear images or only optimal images out of this pool, use this flag +    to make allocator disregard Buffer-Image Granularity and so make allocations +    faster and more optimal. +    */ +    VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, + +    /** \brief Enables alternative, linear allocation algorithm in this pool. + +    Specify this flag to enable linear allocation algorithm, which always creates +    new allocations after last one and doesn't reuse space from allocations freed in +    between. It trades memory consumption for simplified algorithm and data +    structure, which has better performance and uses less memory for metadata. + +    By using this flag, you can achieve behavior of free-at-once, stack, +    ring buffer, and double stack. +    For details, see documentation chapter \ref linear_algorithm. +    */ +    VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, + +    /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. +    */ +    VMA_POOL_CREATE_ALGORITHM_MASK = +        VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, + +    VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaPoolCreateFlagBits; +/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. +typedef VkFlags VmaPoolCreateFlags; + +/// Flags to be passed as VmaDefragmentationInfo::flags. +typedef enum VmaDefragmentationFlagBits +{ +    /* \brief Use simple but fast algorithm for defragmentation. +    May not achieve best results but will require least time to compute and least allocations to copy. +    */ +    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, +    /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. +    Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. +    */ +    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, +    /* \brief Perform full defragmentation of memory. +    Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. +    */ +    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, +    /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. +    Only available when bufferImageGranularity is greater than 1, since it aims to reduce +    alignment issues between different types of resources. +    Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. +    */ +    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + +    /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. +    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = +        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | +        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | +        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | +        VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + +    VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. +typedef VkFlags VmaDefragmentationFlags; + +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. +typedef enum VmaDefragmentationMoveOperation +{ +    /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). +    VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, +    /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. +    VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, +    /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. +    VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. +typedef enum VmaVirtualBlockCreateFlagBits +{ +    /** \brief Enables alternative, linear allocation algorithm in this virtual block. + +    Specify this flag to enable linear allocation algorithm, which always creates +    new allocations after last one and doesn't reuse space from allocations freed in +    between. It trades memory consumption for simplified algorithm and data +    structure, which has better performance and uses less memory for metadata. + +    By using this flag, you can achieve behavior of free-at-once, stack, +    ring buffer, and double stack. +    For details, see documentation chapter \ref linear_algorithm. +    */ +    VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, + +    /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. +    */ +    VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = +        VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, + +    VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualBlockCreateFlagBits; +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits. +typedef VkFlags VmaVirtualBlockCreateFlags; + +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. +typedef enum VmaVirtualAllocationCreateFlagBits +{ +    /** \brief Allocation will be created from upper stack in a double stack pool. + +    This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag. +    */ +    VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT, +    /** \brief Allocation strategy that tries to minimize memory usage. +    */ +    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, +    /** \brief Allocation strategy that tries to minimize allocation time. +    */ +    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, +    /** Allocation strategy that chooses always the lowest offset in available space. +    This is not the most efficient strategy but achieves highly packed data. +    */ +    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, +    /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + +    These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. +    */ +    VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK, + +    VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualAllocationCreateFlagBits; +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits. +typedef VkFlags VmaVirtualAllocationCreateFlags; + +/** @} */ + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_DATA_TYPES_DECLARATIONS + +/** +\addtogroup group_init +@{ */ + +/** \struct VmaAllocator +\brief Represents main object of this library initialized. + +Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it. +Call function vmaDestroyAllocator() to destroy it. + +It is recommended to create just one object of this type per `VkDevice` object, +right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed. +*/ +VK_DEFINE_HANDLE(VmaAllocator) + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \struct VmaPool +\brief Represents custom memory pool + +Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it. +Call function vmaDestroyPool() to destroy it. + +For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools). +*/ +VK_DEFINE_HANDLE(VmaPool) + +/** \struct VmaAllocation +\brief Represents single memory allocation. + +It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type +plus unique offset. + +There are multiple ways to create such object. +You need to fill structure VmaAllocationCreateInfo. +For more information see [Choosing memory type](@ref choosing_memory_type). + +Although the library provides convenience functions that create Vulkan buffer or image, +allocate memory for it and bind them together, +binding of the allocation to a buffer or an image is out of scope of the allocation itself. +Allocation object can exist without buffer/image bound, +binding can be done manually by the user, and destruction of it can be done +independently of destruction of the allocation. + +The object also remembers its size and some other information. +To retrieve this information, use function vmaGetAllocationInfo() and inspect +returned structure VmaAllocationInfo. +*/ +VK_DEFINE_HANDLE(VmaAllocation) + +/** \struct VmaDefragmentationContext +\brief An opaque object that represents started defragmentation process. + +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. +*/ +VK_DEFINE_HANDLE(VmaDefragmentationContext) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualAllocation +\brief Represents single memory allocation done inside VmaVirtualBlock. + +Use it as a unique identifier to virtual allocation within the single block. + +Use value `VK_NULL_HANDLE` to represent a null/invalid allocation. +*/ +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualBlock +\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory. + +Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it. +For more information, see documentation chapter \ref virtual_allocator. + +This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally. +*/ +VK_DEFINE_HANDLE(VmaVirtualBlock) + +/** @} */ + +/** +\addtogroup group_init +@{ +*/ + +/// Callback function called after successful vkAllocateMemory. +typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)( +    VmaAllocator VMA_NOT_NULL                    allocator, +    uint32_t                                     memoryType, +    VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, +    VkDeviceSize                                 size, +    void* VMA_NULLABLE                           pUserData); + +/// Callback function called before vkFreeMemory. +typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)( +    VmaAllocator VMA_NOT_NULL                    allocator, +    uint32_t                                     memoryType, +    VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, +    VkDeviceSize                                 size, +    void* VMA_NULLABLE                           pUserData); + +/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. + +Provided for informative purpose, e.g. to gather statistics about number of +allocations or total amount of memory allocated in Vulkan. + +Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. +*/ +typedef struct VmaDeviceMemoryCallbacks +{ +    /// Optional, can be null. +    PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; +    /// Optional, can be null. +    PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; +    /// Optional, can be null. +    void* VMA_NULLABLE pUserData; +} VmaDeviceMemoryCallbacks; + +/** \brief Pointers to some Vulkan functions - a subset used by the library. + +Used in VmaAllocatorCreateInfo::pVulkanFunctions. +*/ +typedef struct VmaVulkanFunctions +{ +    /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. +    PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr; +    /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. +    PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr; +    PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties; +    PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties; +    PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory; +    PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory; +    PFN_vkMapMemory VMA_NULLABLE vkMapMemory; +    PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory; +    PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges; +    PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges; +    PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory; +    PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory; +    PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements; +    PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements; +    PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer; +    PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer; +    PFN_vkCreateImage VMA_NULLABLE vkCreateImage; +    PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage; +    PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer; +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. +    PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR; +    /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. +    PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR; +#endif +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 +    /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. +    PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR; +    /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. +    PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; +#endif +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 +    /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. +    PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; +#endif +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 +    /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. +    PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; +    /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. +    PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 +    PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#else +    void* VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#endif +} VmaVulkanFunctions; + +/// Description of a Allocator to be created. +typedef struct VmaAllocatorCreateInfo +{ +    /// Flags for created allocator. Use #VmaAllocatorCreateFlagBits enum. +    VmaAllocatorCreateFlags flags; +    /// Vulkan physical device. +    /** It must be valid throughout whole lifetime of created allocator. */ +    VkPhysicalDevice VMA_NOT_NULL physicalDevice; +    /// Vulkan device. +    /** It must be valid throughout whole lifetime of created allocator. */ +    VkDevice VMA_NOT_NULL device; +    /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional. +    /** Set to 0 to use default, which is currently 256 MiB. */ +    VkDeviceSize preferredLargeHeapBlockSize; +    /// Custom CPU memory allocation callbacks. Optional. +    /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */ +    const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; +    /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional. +    /** Optional, can be null. */ +    const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks; +    /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap. + +    If not NULL, it must be a pointer to an array of +    `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on +    maximum number of bytes that can be allocated out of particular Vulkan memory +    heap. + +    Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that +    heap. This is also the default in case of `pHeapSizeLimit` = NULL. + +    If there is a limit defined for a heap: + +    - If user tries to allocate more memory from that heap using this allocator, +      the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +    - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the +      value of this limit will be reported instead when using vmaGetMemoryProperties(). + +    Warning! Using this feature may not be equivalent to installing a GPU with +    smaller amount of memory, because graphics driver doesn't necessary fail new +    allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is +    exceeded. It may return success and just silently migrate some device memory +    blocks to system RAM. This driver behavior can also be controlled using +    VK_AMD_memory_overallocation_behavior extension. +    */ +    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit; + +    /** \brief Pointers to Vulkan functions. Can be null. + +    For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions). +    */ +    const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions; +    /** \brief Handle to Vulkan instance object. + +    Starting from version 3.0.0 this member is no longer optional, it must be set! +    */ +    VkInstance VMA_NOT_NULL instance; +    /** \brief Optional. Vulkan version that the application uses. + +    It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. +    The patch version number specified is ignored. Only the major and minor versions are considered. +    Only versions 1.0...1.4 are supported by the current implementation. +    Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. +    It must match the Vulkan version used by the application and supported on the selected physical device, +    so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance` +    and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used. +    */ +    uint32_t vulkanApiVersion; +#if VMA_EXTERNAL_MEMORY +    /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type. + +    If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount` +    elements, defining external memory handle types of particular Vulkan memory type, +    to be passed using `VkExportMemoryAllocateInfoKHR`. + +    Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type. +    This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL. +    */ +    const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes; +#endif // #if VMA_EXTERNAL_MEMORY +} VmaAllocatorCreateInfo; + +/// Information about existing #VmaAllocator object. +typedef struct VmaAllocatorInfo +{ +    /** \brief Handle to Vulkan instance object. + +    This is the same value as has been passed through VmaAllocatorCreateInfo::instance. +    */ +    VkInstance VMA_NOT_NULL instance; +    /** \brief Handle to Vulkan physical device object. + +    This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice. +    */ +    VkPhysicalDevice VMA_NOT_NULL physicalDevice; +    /** \brief Handle to Vulkan device object. + +    This is the same value as has been passed through VmaAllocatorCreateInfo::device. +    */ +    VkDevice VMA_NOT_NULL device; +} VmaAllocatorInfo; + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). +*/ +typedef struct VmaStatistics +{ +    /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. +    */ +    uint32_t blockCount; +    /** \brief Number of #VmaAllocation objects allocated. + +    Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. +    */ +    uint32_t allocationCount; +    /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + +    \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object +    (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls +    "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. +    */ +    VkDeviceSize blockBytes; +    /** \brief Total number of bytes occupied by all #VmaAllocation objects. + +    Always less or equal than `blockBytes`. +    Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan +    but unused by any #VmaAllocation. +    */ +    VkDeviceSize allocationBytes; +} VmaStatistics; + +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ +    /// Basic statistics. +    VmaStatistics statistics; +    /// Number of free ranges of memory between allocations. +    uint32_t unusedRangeCount; +    /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. +    VkDeviceSize allocationSizeMin; +    /// Largest allocation size. 0 if there are 0 allocations. +    VkDeviceSize allocationSizeMax; +    /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. +    VkDeviceSize unusedRangeSizeMin; +    /// Largest empty range size. 0 if there are 0 empty ranges. +    VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; + +/** \brief  General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. + +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). +*/ +typedef struct VmaTotalStatistics +{ +    VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; +    VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; +    VmaDetailedStatistics total; +} VmaTotalStatistics; + +/** \brief Statistics of current memory usage and available budget for a specific memory heap. + +These are fast to calculate. +See function vmaGetHeapBudgets(). +*/ +typedef struct VmaBudget +{ +    /** \brief Statistics fetched from the library. +    */ +    VmaStatistics statistics; +    /** \brief Estimated current memory usage of the program, in bytes. + +    Fetched from system using VK_EXT_memory_budget extension if enabled. + +    It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects +    also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or +    `VkDeviceMemory` blocks allocated outside of this library, if any. +    */ +    VkDeviceSize usage; +    /** \brief Estimated amount of memory available to the program, in bytes. + +    Fetched from system using VK_EXT_memory_budget extension if enabled. + +    It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors +    external to the program, decided by the operating system. +    Difference `budget - usage` is the amount of additional memory that can probably +    be allocated without problems. Exceeding the budget may result in various problems. +    */ +    VkDeviceSize budget; +} VmaBudget; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Parameters of new #VmaAllocation. + +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. +*/ +typedef struct VmaAllocationCreateInfo +{ +    /// Use #VmaAllocationCreateFlagBits enum. +    VmaAllocationCreateFlags flags; +    /** \brief Intended usage of memory. + +    You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n +    If `pool` is not null, this member is ignored. +    */ +    VmaMemoryUsage usage; +    /** \brief Flags that must be set in a Memory Type chosen for an allocation. + +    Leave 0 if you specify memory requirements in other way. \n +    If `pool` is not null, this member is ignored.*/ +    VkMemoryPropertyFlags requiredFlags; +    /** \brief Flags that preferably should be set in a memory type chosen for an allocation. + +    Set to 0 if no additional flags are preferred. \n +    If `pool` is not null, this member is ignored. */ +    VkMemoryPropertyFlags preferredFlags; +    /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. + +    Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if +    it meets other requirements specified by this structure, with no further +    restrictions on memory type index. \n +    If `pool` is not null, this member is ignored. +    */ +    uint32_t memoryTypeBits; +    /** \brief Pool that this allocation should be created in. + +    Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: +    `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. +    */ +    VmaPool VMA_NULLABLE pool; +    /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). + +    If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either +    null or pointer to a null-terminated string. The string will be then copied to +    internal buffer, so it doesn't need to be valid after allocation call. +    */ +    void* VMA_NULLABLE pUserData; +    /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + +    It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object +    and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +    Otherwise, it has the priority of a memory block where it is placed and this variable is ignored. +    */ +    float priority; +} VmaAllocationCreateInfo; + +/// Describes parameter of created #VmaPool. +typedef struct VmaPoolCreateInfo +{ +    /** \brief Vulkan memory type index to allocate this pool from. +    */ +    uint32_t memoryTypeIndex; +    /** \brief Use combination of #VmaPoolCreateFlagBits. +    */ +    VmaPoolCreateFlags flags; +    /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional. + +    Specify nonzero to set explicit, constant size of memory blocks used by this +    pool. + +    Leave 0 to use default and let the library manage block sizes automatically. +    Sizes of particular blocks may vary. +    In this case, the pool will also support dedicated allocations. +    */ +    VkDeviceSize blockSize; +    /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. + +    Set to 0 to have no preallocated blocks and allow the pool be completely empty. +    */ +    size_t minBlockCount; +    /** \brief Maximum number of blocks that can be allocated in this pool. Optional. + +    Set to 0 to use default, which is `SIZE_MAX`, which means no limit. + +    Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated +    throughout whole lifetime of this pool. +    */ +    size_t maxBlockCount; +    /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. + +    It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. +    Otherwise, this variable is ignored. +    */ +    float priority; +    /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0. + +    Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two. +    It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough, +    e.g. when doing interop with OpenGL. +    */ +    VkDeviceSize minAllocationAlignment; +    /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional. + +    Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`. +    It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`. +    Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool. + +    Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`, +    can be attached automatically by this library when using other, more convenient of its features. +    */ +    void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext; +} VmaPoolCreateInfo; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). + +There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2. +*/ +typedef struct VmaAllocationInfo +{ +    /** \brief Memory type index that this allocation was allocated from. + +    It never changes. +    */ +    uint32_t memoryType; +    /** \brief Handle to Vulkan memory object. + +    Same memory object can be shared by multiple allocations. + +    It can change after the allocation is moved during \ref defragmentation. +    */ +    VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; +    /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. + +    You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function +    vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, +    not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation +    and apply this offset automatically. + +    It can change after the allocation is moved during \ref defragmentation. +    */ +    VkDeviceSize offset; +    /** \brief Size of this allocation, in bytes. + +    It never changes. + +    \note Allocation size returned in this variable may be greater than the size +    requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the +    allocation is accessible for operations on memory e.g. using a pointer after +    mapping with vmaMapMemory(), but operations on the resource e.g. using +    `vkCmdCopyBuffer` must be limited to the size of the resource. +    */ +    VkDeviceSize size; +    /** \brief Pointer to the beginning of this allocation as mapped data. + +    If the allocation hasn't been mapped using vmaMapMemory() and hasn't been +    created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. + +    It can change after call to vmaMapMemory(), vmaUnmapMemory(). +    It can also change after the allocation is moved during \ref defragmentation. +    */ +    void* VMA_NULLABLE pMappedData; +    /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). + +    It can change after call to vmaSetAllocationUserData() for this allocation. +    */ +    void* VMA_NULLABLE pUserData; +    /** \brief Custom allocation name that was set with vmaSetAllocationName(). + +    It can change after call to vmaSetAllocationName() for this allocation. + +    Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with +    additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. +    */ +    const char* VMA_NULLABLE pName; +} VmaAllocationInfo; + +/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2(). +typedef struct VmaAllocationInfo2 +{ +    /** \brief Basic parameters of the allocation. +     +    If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead. +    */ +    VmaAllocationInfo allocationInfo; +    /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to. +     +    In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`. +    */ +    VkDeviceSize blockSize; +    /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block. +     +    When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation +    (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled). +    */ +    VkBool32 dedicatedMemory; +} VmaAllocationInfo2; + +/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass. + +Should return true if the defragmentation needs to stop current pass. +*/ +typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData); + +/** \brief Parameters for defragmentation. + +To be used with function vmaBeginDefragmentation(). +*/ +typedef struct VmaDefragmentationInfo +{ +    /// \brief Use combination of #VmaDefragmentationFlagBits. +    VmaDefragmentationFlags flags; +    /** \brief Custom pool to be defragmented. + +    If null then default pools will undergo defragmentation process. +    */ +    VmaPool VMA_NULLABLE pool; +    /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + +    `0` means no limit. +    */ +    VkDeviceSize maxBytesPerPass; +    /** \brief Maximum number of allocations that can be moved during single pass to a different place. + +    `0` means no limit. +    */ +    uint32_t maxAllocationsPerPass; +    /** \brief Optional custom callback for stopping vmaBeginDefragmentation(). + +    Have to return true for breaking current defragmentation pass. +    */ +    PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback; +    /// \brief Optional data to pass to custom callback for stopping pass of defragmentation. +    void* VMA_NULLABLE pBreakCallbackUserData; +} VmaDefragmentationInfo; + +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove +{ +    /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. +    VmaDefragmentationMoveOperation operation; +    /// Allocation that should be moved. +    VmaAllocation VMA_NOT_NULL srcAllocation; +    /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + +    \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, +    to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). +    vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. +    */ +    VmaAllocation VMA_NOT_NULL dstTmpAllocation; +} VmaDefragmentationMove; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaBeginDefragmentationPass(). +*/ +typedef struct VmaDefragmentationPassMoveInfo +{ +    /// Number of elements in the `pMoves` array. +    uint32_t moveCount; +    /** \brief Array of moves to be performed by the user in the current defragmentation pass. + +    Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). + +    For each element, you should: + +    1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. +    2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. +    3. Make sure these commands finished executing on the GPU. +    4. Destroy the old buffer/image. + +    Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). +    After this call, the allocation will point to the new place in memory. + +    Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + +    Alternatively, if you decide you want to completely remove the allocation: + +    1. Destroy its buffer/image. +    2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + +    Then, after vmaEndDefragmentationPass() the allocation will be freed. +    */ +    VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; + +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). +typedef struct VmaDefragmentationStats +{ +    /// Total number of bytes that have been copied while moving allocations to different places. +    VkDeviceSize bytesMoved; +    /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. +    VkDeviceSize bytesFreed; +    /// Number of allocations that have been moved to different places. +    uint32_t allocationsMoved; +    /// Number of empty `VkDeviceMemory` objects that have been released to the system. +    uint32_t deviceMemoryBlocksFreed; +} VmaDefragmentationStats; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock(). +typedef struct VmaVirtualBlockCreateInfo +{ +    /** \brief Total size of the virtual block. + +    Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. +    For example, if you allocate from some array of structures, 1 can mean single instance of entire structure. +    */ +    VkDeviceSize size; + +    /** \brief Use combination of #VmaVirtualBlockCreateFlagBits. +    */ +    VmaVirtualBlockCreateFlags flags; + +    /** \brief Custom CPU memory allocation callbacks. Optional. + +    Optional, can be null. When specified, they will be used for all CPU-side memory allocations. +    */ +    const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; +} VmaVirtualBlockCreateInfo; + +/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate(). +typedef struct VmaVirtualAllocationCreateInfo +{ +    /** \brief Size of the allocation. + +    Cannot be zero. +    */ +    VkDeviceSize size; +    /** \brief Required alignment of the allocation. Optional. + +    Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset. +    */ +    VkDeviceSize alignment; +    /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits. +    */ +    VmaVirtualAllocationCreateFlags flags; +    /** \brief Custom pointer to be associated with the allocation. Optional. + +    It can be any value and can be used for user-defined purposes. It can be fetched or changed later. +    */ +    void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationCreateInfo; + +/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo(). +typedef struct VmaVirtualAllocationInfo +{ +    /** \brief Offset of the allocation. + +    Offset at which the allocation was made. +    */ +    VkDeviceSize offset; +    /** \brief Size of the allocation. + +    Same value as passed in VmaVirtualAllocationCreateInfo::size. +    */ +    VkDeviceSize size; +    /** \brief Custom pointer associated with the allocation. + +    Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData(). +    */ +    void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationInfo; + +/** @} */ + +#endif // _VMA_DATA_TYPES_DECLARATIONS + +#ifndef _VMA_FUNCTION_HEADERS + +/** +\addtogroup group_init +@{ +*/ + +/// Creates #VmaAllocator object. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( +    const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator); + +/// Destroys allocator object. +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( +    VmaAllocator VMA_NULLABLE allocator); + +/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. + +It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to +`VkPhysicalDevice`, `VkDevice` etc. every time using this function. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); + +/** +PhysicalDeviceProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties); + +/** +PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); + +/** +\brief Given Memory Type Index, returns Property Flags of this memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( +    VmaAllocator VMA_NOT_NULL allocator, +    uint32_t memoryTypeIndex, +    VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +/** \brief Sets index of the current frame. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( +    VmaAllocator VMA_NOT_NULL allocator, +    uint32_t frameIndex); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics from current state of the Allocator. + +This function is called "calculate" not "get" because it has to traverse all +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaTotalStatistics* VMA_NOT_NULL pStats); + +/** \brief Retrieves information about current memory usage and budget for all memory heaps. + +\param allocator +\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. + +This function is called "get" not "calculate" because it is very fast, suitable to be called +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +\brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo. + +This algorithm tries to find a memory type that: + +- Is allowed by memoryTypeBits. +- Contains all the flags from pAllocationCreateInfo->requiredFlags. +- Matches intended usage. +- Has as many flags from pAllocationCreateInfo->preferredFlags as possible. + +\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result +from this function or any other allocating function probably means that your +device doesn't support any memory type with requested features for the specific +type of resource you want to use it for. Please check parameters of your +resource, like image layout (OPTIMAL versus LINEAR) or mip level count. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( +    VmaAllocator VMA_NOT_NULL allocator, +    uint32_t memoryTypeBits, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, +    uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy buffer that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, +    uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy image that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, +    uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** \brief Allocates Vulkan device memory and creates #VmaPool object. + +\param allocator Allocator object. +\param pCreateInfo Parameters of pool to create. +\param[out] pPool Handle to created pool. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( +    VmaAllocator VMA_NOT_NULL allocator, +    const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool); + +/** \brief Destroys #VmaPool object and frees Vulkan device memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaPool VMA_NULLABLE pool); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. + +Note that when using the pool from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaPool VMA_NOT_NULL pool, +    VmaStatistics* VMA_NOT_NULL pPoolStats); + +/** \brief Retrieves detailed statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaPool VMA_NOT_NULL pool, +    VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and the pool is created in memory type that is +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. +  `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaPool VMA_NOT_NULL pool); + +/** \brief Retrieves name of a custom pool. + +After the call `ppName` is either null or points to an internally-owned null-terminated string +containing name of the pool that was previously set. The pointer becomes invalid when the pool is +destroyed or its name is changed using vmaSetPoolName(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaPool VMA_NOT_NULL pool, +    const char* VMA_NULLABLE* VMA_NOT_NULL ppName); + +/** \brief Sets name of a custom pool. + +`pName` can be either null or pointer to a null-terminated string with new name for the pool. +Function makes internal copy of the string, so it can be changed or freed immediately after this call. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaPool VMA_NOT_NULL pool, +    const char* VMA_NULLABLE pName); + +/** \brief General purpose memory allocation. + +\param allocator +\param pVkMemoryRequirements +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(), +vmaCreateBuffer(), vmaCreateImage() instead whenever possible. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, +    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief General purpose memory allocation for multiple allocation objects at once. + +\param allocator Allocator object. +\param pVkMemoryRequirements Memory requirements for each allocation. +\param pCreateInfo Creation parameters for each allocation. +\param allocationCount Number of allocations to make. +\param[out] pAllocations Pointer to array that will be filled with handles to created allocations. +\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations. + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +Word "pages" is just a suggestion to use this function to allocate pieces of memory needed for sparse binding. +It is just a general purpose allocation function able to make multiple allocations at once. +It may be internally optimized to be more efficient than calling vmaAllocateMemory() `allocationCount` times. + +All allocations are made using same parameters. All of them are created out of the same memory pool and type. +If any allocation fails, all allocations already made within this function call are also freed, so that when +returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements, +    const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo, +    size_t allocationCount, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, +    VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkBuffer`. + +\param allocator +\param buffer +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( +    VmaAllocator VMA_NOT_NULL allocator, +    VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, +    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( +    VmaAllocator VMA_NOT_NULL allocator, +    VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, +    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). + +Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    const VmaAllocation VMA_NULLABLE allocation); + +/** \brief Frees memory and destroys multiple allocations. + +Word "pages" is just a suggestion to use this function to free pieces of memory used for sparse binding. +It is just a general purpose function to free memory and destroy allocations made using e.g. vmaAllocateMemory(), +vmaAllocateMemoryPages() and other functions. +It may be internally optimized to be more efficient than calling vmaFreeMemory() `allocationCount` times. + +Allocations in `pAllocations` array can come from any memory pools and types. +Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( +    VmaAllocator VMA_NOT_NULL allocator, +    size_t allocationCount, +    const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); + +/** \brief Returns current information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. + +Although this function doesn't lock any mutex, so it should be quite efficient, +you should avoid calling it too often. +You can retrieve same VmaAllocationInfo structure while creating your resource, from function +vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change +(e.g. due to defragmentation). + +There is also a new function vmaGetAllocationInfo2() that offers extended information +about the allocation, returned using new structure #VmaAllocationInfo2. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); + +/** \brief Returns extended information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. +Extended parameters in structure #VmaAllocationInfo2 include memory block size +and a flag telling whether the allocation has dedicated memory. +It can be useful e.g. for interop with OpenGL. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo); + +/** \brief Sets pUserData in given allocation to new value. + +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. +as a pointer, ordinal number or some handle to you own data. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    void* VMA_NULLABLE pUserData); + +/** \brief Sets pName in given allocation to new value. + +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    const char* VMA_NULLABLE pName); + +/** +\brief Given an allocation, returns Property Flags of its memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetAllocationInfo() + vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + + +#if VMA_EXTERNAL_MEMORY_WIN32 +/** +\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs. + +\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns +    handle for the current process. +\param[out] pHandle Output parameter that returns the handle. + +The function fills `pHandle` with handle that can be used in target process. +The handle is fetched using function `vkGetMemoryWin32HandleKHR`. +When no longer needed, you must close it using: + +\code +CloseHandle(handle); +\endcode + +You can close it any time, before or after destroying the allocation object. +It is reference-counted internally by Windows. + +Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to. +If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation +(VmaAllocationInfo::offset). + +If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check +that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS` +or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions. + +For more information, see chapter \ref vk_khr_external_memory_win32. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle); +#endif // VMA_EXTERNAL_MEMORY_WIN32 + +/** \brief Maps memory represented by given allocation and returns pointer to it. + +Maps memory represented by given allocation to make it accessible to CPU code. +When succeeded, `*ppData` contains pointer to first byte of this memory. + +\warning +If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is +correctly offsetted to the beginning of region assigned to this particular allocation. +Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block. +You should not add VmaAllocationInfo::offset to it! + +Mapping is internally reference-counted and synchronized, so despite raw Vulkan +function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory` +multiple times simultaneously, it is safe to call this function on allocations +assigned to the same memory block. Actual Vulkan memory will be mapped on first +mapping and unmapped on last unmapping. + +If the function succeeded, you must call vmaUnmapMemory() to unmap the +allocation when mapping is no longer needed or before freeing the allocation, at +the latest. + +It also safe to call this function multiple times on the same allocation. You +must call vmaUnmapMemory() same number of times as you called vmaMapMemory(). + +It is also safe to call this function on allocation created with +#VMA_ALLOCATION_CREATE_MAPPED_BIT flag. Its memory stays mapped all the time. +You must still call vmaUnmapMemory() same number of times as you called +vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the +"0-th" mapping made automatically due to #VMA_ALLOCATION_CREATE_MAPPED_BIT flag. + +This function fails when used on allocation made in memory type that is not +`HOST_VISIBLE`. + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    void* VMA_NULLABLE* VMA_NOT_NULL ppData); + +/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). + +For details, see description of vmaMapMemory(). + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation); + +/** \brief Flushes memory of given allocation. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`. +Unmap operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. +  They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, +  this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize offset, +    VkDeviceSize size); + +/** \brief Invalidates memory of given allocation. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`. +Map operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. +  They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, +  this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if +it is called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize offset, +    VkDeviceSize size); + +/** \brief Flushes memory of given set of allocations. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaFlushAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( +    VmaAllocator VMA_NOT_NULL allocator, +    uint32_t allocationCount, +    const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, +    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, +    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Invalidates memory of given set of allocations. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaInvalidateAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( +    VmaAllocator VMA_NOT_NULL allocator, +    uint32_t allocationCount, +    const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, +    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, +    const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed. + +\param allocator +\param pSrcHostPointer Pointer to the host data that become source of the copy. +\param dstAllocation   Handle to the allocation that becomes destination of the copy. +\param dstAllocationLocalOffset  Offset within `dstAllocation` where to write copied data, in bytes. +\param size            Number of bytes to copy. + +This is a convenience function that allows to copy data from a host pointer to an allocation easily. +Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation(). + +This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function will fail and generate a Validation Layers error. + +`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( +    VmaAllocator VMA_NOT_NULL allocator, +    const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer, +    VmaAllocation VMA_NOT_NULL dstAllocation, +    VkDeviceSize dstAllocationLocalOffset, +    VkDeviceSize size); + +/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer. + +\param allocator +\param srcAllocation   Handle to the allocation that becomes source of the copy. +\param srcAllocationLocalOffset  Offset within `srcAllocation` where to read copied data, in bytes. +\param pDstHostPointer Pointer to the host memory that become destination of the copy. +\param size            Number of bytes to copy. + +This is a convenience function that allows to copy data from an allocation to a host pointer easily. +Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory(). + +This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function may fail and generate a Validation Layers error. +It may also work very slowly when reading from an uncached memory. + +`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block as this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL srcAllocation, +    VkDeviceSize srcAllocationLocalOffset, +    void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer, +    VkDeviceSize size); + +/** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. + +\param allocator +\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. +  `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( +    VmaAllocator VMA_NOT_NULL allocator, +    uint32_t memoryTypeBits); + +/** \brief Begins defragmentation process. + +\param allocator Allocator object. +\param pInfo Structure filled with parameters of defragmentation. +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. + +For more information about defragmentation, see documentation chapter: +[Defragmentation](@ref defragmentation). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( +    VmaAllocator VMA_NOT_NULL allocator, +    const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, +    VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); + +/** \brief Ends defragmentation process. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. + +Use this function to finish defragmentation started by vmaBeginDefragmentation(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaDefragmentationContext VMA_NOT_NULL context, +    VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed information for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), +  and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaDefragmentationContext VMA_NOT_NULL context, +    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Ends single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. + +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY +  (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY +  will be freed. + +If no more moves are possible you can end whole defragmentation. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaDefragmentationContext VMA_NOT_NULL context, +    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Binds buffer to allocation. + +Binds specified buffer to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create a buffer, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindBufferMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateBuffer() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer); + +/** \brief Binds buffer to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param buffer +\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindBufferMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize allocationLocalOffset, +    VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, +    const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext); + +/** \brief Binds image to allocation. + +Binds specified image to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create an image, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindImageMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateImage() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkImage VMA_NOT_NULL_NON_DISPATCHABLE image); + +/** \brief Binds image to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param image +\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindImageMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize allocationLocalOffset, +    VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, +    const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext); + +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + +\param allocator +\param pBufferCreateInfo +\param pAllocationCreateInfo +\param[out] pBuffer Buffer that was created. +\param[out] pAllocation Allocation that was created. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +This function automatically: + +-# Creates buffer. +-# Allocates appropriate memory for it. +-# Binds the buffer with the memory. + +If any of these operations fail, buffer and allocation are not created, +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. + +If the function succeeded, you must destroy both buffer and allocation when you +no longer need them using either convenience function vmaDestroyBuffer() or +separately, using `vkDestroyBuffer()` and vmaFreeMemory(). + +If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, +VK_KHR_dedicated_allocation extension is used internally to query driver whether +it requires or prefers the new buffer to have dedicated allocation. If yes, +and if dedicated allocation is possible +(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated +allocation for this buffer, just like when using +#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + +\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer, +although recommended as a good practice, is out of scope of this library and could be implemented +by the user as a higher-level logic on top of VMA. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, +    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a buffer with additional minimum alignment. + +Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom, +minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g. +for interop with OpenGL. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, +    VkDeviceSize minAlignment, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, +    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0. +\param pBufferCreateInfo  +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note This is a new version of the function augmented with parameter `allocationLocalOffset`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize allocationLocalOffset, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Destroys Vulkan buffer and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyBuffer(device, buffer, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as buffer and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( +    VmaAllocator VMA_NOT_NULL allocator, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer, +    VmaAllocation VMA_NULLABLE allocation); + +/// Function similar to vmaCreateBuffer(). +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( +    VmaAllocator VMA_NOT_NULL allocator, +    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, +    const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage, +    VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, +    VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/// Function similar to vmaCreateAliasingBuffer() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/// Function similar to vmaCreateAliasingBuffer2() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize allocationLocalOffset, +    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/** \brief Destroys Vulkan image and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyImage(device, image, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as image and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( +    VmaAllocator VMA_NOT_NULL allocator, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE image, +    VmaAllocation VMA_NULLABLE allocation); + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \brief Creates new #VmaVirtualBlock object. + +\param pCreateInfo Parameters for creation. +\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( +    const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock); + +/** \brief Destroys #VmaVirtualBlock object. + +Please note that you should consciously handle virtual allocations that could remain unfreed in the block. +You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock() +if you are sure this is what you want. If you do neither, an assert is called. + +If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`, +don't forget to free them. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock( +    VmaVirtualBlock VMA_NULLABLE virtualBlock); + +/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations. +*/ +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo); + +/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock. + +If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned +(despite the function doesn't ever allocate actual GPU memory). +`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`. + +\param virtualBlock Virtual block +\param pCreateInfo Parameters for the allocation +\param[out] pAllocation Returned handle of the new allocation +\param[out] pOffset Returned offset of the new allocation. Optional, can be null. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, +    VkDeviceSize* VMA_NULLABLE pOffset); + +/** \brief Frees virtual allocation inside given #VmaVirtualBlock. + +It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation); + +/** \brief Frees all virtual allocations inside given #VmaVirtualBlock. + +You must either call this function or free each virtual allocation individually with vmaVirtualFree() +before destroying a virtual block. Otherwise, an assert is called. + +If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`, +don't forget to free it as well. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Changes custom pointer associated with given virtual allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, +    void* VMA_NULLABLE pUserData); + +/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaDetailedStatistics* VMA_NOT_NULL pStats); + +/** @} */ + +#if VMA_STATS_STRING_ENABLED +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. +\param virtualBlock Virtual block. +\param[out] ppStatsString Returned string. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. + +Returned string must be freed using vmaFreeVirtualBlockStatsString(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, +    VkBool32 detailedMap); + +/// Frees a string returned by vmaBuildVirtualBlockStatsString(). +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString( +    VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    char* VMA_NULLABLE pStatsString); + +/** \brief Builds and returns statistics as a null-terminated string in JSON format. +\param allocator +\param[out] ppStatsString Must be freed using vmaFreeStatsString() function. +\param detailedMap +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( +    VmaAllocator VMA_NOT_NULL allocator, +    char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, +    VkBool32 detailedMap); + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( +    VmaAllocator VMA_NOT_NULL allocator, +    char* VMA_NULLABLE pStatsString); + +/** @} */ + +#endif // VMA_STATS_STRING_ENABLED + +#endif // _VMA_FUNCTION_HEADERS + +#ifdef __cplusplus +} +#endif + +#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +//    IMPLEMENTATION +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// For Visual Studio IntelliSense. +#if defined(__cplusplus) && defined(__INTELLISENSE__) +#define VMA_IMPLEMENTATION +#endif + +#ifdef VMA_IMPLEMENTATION +#undef VMA_IMPLEMENTATION + +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <cinttypes> +#include <utility> +#include <type_traits> + +#if !defined(VMA_CPP20) +    #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 +        #define VMA_CPP20 1 +    #else +        #define VMA_CPP20 0 +    #endif +#endif + +#ifdef _MSC_VER +    #include <intrin.h> // For functions like __popcnt, _BitScanForward etc. +#endif +#if VMA_CPP20 +    #include <bit> +#endif + +#if VMA_STATS_STRING_ENABLED +    #include <cstdio> // For snprintf +#endif + +/******************************************************************************* +CONFIGURATION SECTION + +Define some of these macros before each #include of this header or change them +here if you need other then default behavior depending on your environment. +*/ +#ifndef _VMA_CONFIGURATION + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + +    vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; +*/ +#if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES) +    #define VMA_STATIC_VULKAN_FUNCTIONS 1 +#endif + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + +    vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory"); + +To use this feature in new versions of VMA you now have to pass +VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as +VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null. +*/ +#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS) +    #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#endif + +#ifndef VMA_USE_STL_SHARED_MUTEX +    #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 +        #define VMA_USE_STL_SHARED_MUTEX 1 +    // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus +    // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. +    #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L +        #define VMA_USE_STL_SHARED_MUTEX 1 +    #else +        #define VMA_USE_STL_SHARED_MUTEX 0 +    #endif +#endif + +/* +Define this macro to include custom header files without having to edit this file directly, e.g.: + +    // Inside of "my_vma_configuration_user_includes.h": + +    #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT +    #include "my_custom_min.h" // for my_custom_min +    #include <algorithm> +    #include <mutex> + +    // Inside a different file, which includes "vk_mem_alloc.h": + +    #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h" +    #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr) +    #define VMA_MIN(v1, v2)  (my_custom_min(v1, v2)) +    #include "vk_mem_alloc.h" +    ... + +The following headers are used in this CONFIGURATION section only, so feel free to +remove them if not needed. +*/ +#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H) +    #include <cassert> // for assert +    #include <algorithm> // for min, max, swap +    #include <mutex> +#else +    #include VMA_CONFIGURATION_USER_INCLUDES_H +#endif + +#ifndef VMA_NULL +   // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. +   #define VMA_NULL   nullptr +#endif + +#ifndef VMA_FALLTHROUGH +    #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 +        #define VMA_FALLTHROUGH [[fallthrough]] +    #else +        #define VMA_FALLTHROUGH +    #endif +#endif + +// Normal assert to check for programmer's errors, especially in Debug configuration. +#ifndef VMA_ASSERT +   #ifdef NDEBUG +       #define VMA_ASSERT(expr) +   #else +       #define VMA_ASSERT(expr)         assert(expr) +   #endif +#endif + +// Assert that will be called very often, like inside data structures e.g. operator[]. +// Making it non-empty can make program slow. +#ifndef VMA_HEAVY_ASSERT +   #ifdef NDEBUG +       #define VMA_HEAVY_ASSERT(expr) +   #else +       #define VMA_HEAVY_ASSERT(expr)   //VMA_ASSERT(expr) +   #endif +#endif + +// Assert used for reporting memory leaks - unfreed allocations. +#ifndef VMA_ASSERT_LEAK +    #define VMA_ASSERT_LEAK(expr)   VMA_ASSERT(expr) +#endif + +// If your compiler is not compatible with C++17 and definition of +// aligned_alloc() function is missing, uncommenting following line may help: + +//#include <malloc.h> + +#if defined(__ANDROID_API__) && (__ANDROID_API__ < 16) +#include <cstdlib> +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ +    // alignment must be >= sizeof(void*) +    if(alignment < sizeof(void*)) +    { +        alignment = sizeof(void*); +    } + +    return memalign(alignment, size); +} +#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)) +#include <cstdlib> + +#if defined(__APPLE__) +#include <AvailabilityMacros.h> +#endif + +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ +    // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4) +    // Therefore, for now disable this specific exception until a proper solution is found. +    //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) +    //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 +    //    // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only +    //    // with the MacOSX11.0 SDK in Xcode 12 (which is what adds +    //    // MAC_OS_X_VERSION_10_16), even though the function is marked +    //    // available for 10.15. That is why the preprocessor checks for 10.16 but +    //    // the __builtin_available checks for 10.15. +    //    // People who use C++17 could call aligned_alloc with the 10.15 SDK already. +    //    if (__builtin_available(macOS 10.15, iOS 13, *)) +    //        return aligned_alloc(alignment, size); +    //#endif +    //#endif + +    // alignment must be >= sizeof(void*) +    if(alignment < sizeof(void*)) +    { +        alignment = sizeof(void*); +    } + +    void *pointer; +    if(posix_memalign(&pointer, alignment, size) == 0) +        return pointer; +    return VMA_NULL; +} +#elif defined(_WIN32) +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ +    return _aligned_malloc(size, alignment); +} +#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ +    return aligned_alloc(alignment, size); +} +#else +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ +    VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system."); +    return VMA_NULL; +} +#endif + +#if defined(_WIN32) +static void vma_aligned_free(void* ptr) +{ +    _aligned_free(ptr); +} +#else +static void vma_aligned_free(void* VMA_NULLABLE ptr) +{ +    free(ptr); +} +#endif + +#ifndef VMA_ALIGN_OF +   #define VMA_ALIGN_OF(type)       (alignof(type)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_MALLOC +   #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_FREE +   // VMA_SYSTEM_FREE is the old name, but might have been defined by the user +   #if defined(VMA_SYSTEM_FREE) +      #define VMA_SYSTEM_ALIGNED_FREE(ptr)     VMA_SYSTEM_FREE(ptr) +   #else +      #define VMA_SYSTEM_ALIGNED_FREE(ptr)     vma_aligned_free(ptr) +    #endif +#endif + +#ifndef VMA_COUNT_BITS_SET +    // Returns number of bits set to 1 in (v) +    #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + +#ifndef VMA_BITSCAN_LSB +    // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX +    #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) +#endif + +#ifndef VMA_BITSCAN_MSB +    // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX +    #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask) +#endif + +#ifndef VMA_MIN +   #define VMA_MIN(v1, v2)    ((std::min)((v1), (v2))) +#endif + +#ifndef VMA_MAX +   #define VMA_MAX(v1, v2)    ((std::max)((v1), (v2))) +#endif + +#ifndef VMA_SORT +   #define VMA_SORT(beg, end, cmp)  std::sort(beg, end, cmp) +#endif + +#ifndef VMA_DEBUG_LOG_FORMAT +   #define VMA_DEBUG_LOG_FORMAT(format, ...) +   /* +   #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \ +       printf((format), __VA_ARGS__); \ +       printf("\n"); \ +   } while(false) +   */ +#endif + +#ifndef VMA_DEBUG_LOG +    #define VMA_DEBUG_LOG(str)   VMA_DEBUG_LOG_FORMAT("%s", (str)) +#endif + +#ifndef VMA_LEAK_LOG_FORMAT +    #define VMA_LEAK_LOG_FORMAT(format, ...)   VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__) +#endif + +#ifndef VMA_CLASS_NO_COPY +    #define VMA_CLASS_NO_COPY(className) \ +        private: \ +            className(const className&) = delete; \ +            className& operator=(const className&) = delete; +#endif +#ifndef VMA_CLASS_NO_COPY_NO_MOVE +    #define VMA_CLASS_NO_COPY_NO_MOVE(className) \ +        private: \ +            className(const className&) = delete; \ +            className(className&&) = delete; \ +            className& operator=(const className&) = delete; \ +            className& operator=(className&&) = delete; +#endif + +// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. +#if VMA_STATS_STRING_ENABLED +    static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num) +    { +        snprintf(outStr, strLen, "%" PRIu32, num); +    } +    static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num) +    { +        snprintf(outStr, strLen, "%" PRIu64, num); +    } +    static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr) +    { +        snprintf(outStr, strLen, "%p", ptr); +    } +#endif + +#ifndef VMA_MUTEX +    class VmaMutex +    { +    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex) +    public: +        VmaMutex() { } +        void Lock() { m_Mutex.lock(); } +        void Unlock() { m_Mutex.unlock(); } +        bool TryLock() { return m_Mutex.try_lock(); } +    private: +        std::mutex m_Mutex; +    }; +    #define VMA_MUTEX VmaMutex +#endif + +// Read-write mutex, where "read" is shared access, "write" is exclusive access. +#ifndef VMA_RW_MUTEX +    #if VMA_USE_STL_SHARED_MUTEX +        // Use std::shared_mutex from C++17. +        #include <shared_mutex> +        class VmaRWMutex +        { +        public: +            void LockRead() { m_Mutex.lock_shared(); } +            void UnlockRead() { m_Mutex.unlock_shared(); } +            bool TryLockRead() { return m_Mutex.try_lock_shared(); } +            void LockWrite() { m_Mutex.lock(); } +            void UnlockWrite() { m_Mutex.unlock(); } +            bool TryLockWrite() { return m_Mutex.try_lock(); } +        private: +            std::shared_mutex m_Mutex; +        }; +        #define VMA_RW_MUTEX VmaRWMutex +    #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600 +        // Use SRWLOCK from WinAPI. +        // Minimum supported client = Windows Vista, server = Windows Server 2008. +        class VmaRWMutex +        { +        public: +            VmaRWMutex() { InitializeSRWLock(&m_Lock); } +            void LockRead() { AcquireSRWLockShared(&m_Lock); } +            void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } +            bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; } +            void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } +            void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } +            bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; } +        private: +            SRWLOCK m_Lock; +        }; +        #define VMA_RW_MUTEX VmaRWMutex +    #else +        // Less efficient fallback: Use normal mutex. +        class VmaRWMutex +        { +        public: +            void LockRead() { m_Mutex.Lock(); } +            void UnlockRead() { m_Mutex.Unlock(); } +            bool TryLockRead() { return m_Mutex.TryLock(); } +            void LockWrite() { m_Mutex.Lock(); } +            void UnlockWrite() { m_Mutex.Unlock(); } +            bool TryLockWrite() { return m_Mutex.TryLock(); } +        private: +            VMA_MUTEX m_Mutex; +        }; +        #define VMA_RW_MUTEX VmaRWMutex +    #endif // #if VMA_USE_STL_SHARED_MUTEX +#endif // #ifndef VMA_RW_MUTEX + +/* +If providing your own implementation, you need to implement a subset of std::atomic. +*/ +#ifndef VMA_ATOMIC_UINT32 +    #include <atomic> +    #define VMA_ATOMIC_UINT32 std::atomic<uint32_t> +#endif + +#ifndef VMA_ATOMIC_UINT64 +    #include <atomic> +    #define VMA_ATOMIC_UINT64 std::atomic<uint64_t> +#endif + +#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY +    /** +    Every allocation will have its own memory block. +    Define to 1 for debugging purposes only. +    */ +    #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0) +#endif + +#ifndef VMA_MIN_ALIGNMENT +    /** +    Minimum alignment of all allocations, in bytes. +    Set to more than 1 for debugging purposes. Must be power of two. +    */ +    #ifdef VMA_DEBUG_ALIGNMENT // Old name +        #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT +    #else +        #define VMA_MIN_ALIGNMENT (1) +    #endif +#endif + +#ifndef VMA_DEBUG_MARGIN +    /** +    Minimum margin after every allocation, in bytes. +    Set nonzero for debugging purposes only. +    */ +    #define VMA_DEBUG_MARGIN (0) +#endif + +#ifndef VMA_DEBUG_INITIALIZE_ALLOCATIONS +    /** +    Define this macro to 1 to automatically fill new allocations and destroyed +    allocations with some bit pattern. +    */ +    #define VMA_DEBUG_INITIALIZE_ALLOCATIONS (0) +#endif + +#ifndef VMA_DEBUG_DETECT_CORRUPTION +    /** +    Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to +    enable writing magic value to the margin after every allocation and +    validating it, so that memory corruptions (out-of-bounds writes) are detected. +    */ +    #define VMA_DEBUG_DETECT_CORRUPTION (0) +#endif + +#ifndef VMA_DEBUG_GLOBAL_MUTEX +    /** +    Set this to 1 for debugging purposes only, to enable single mutex protecting all +    entry calls to the library. Can be useful for debugging multithreading issues. +    */ +    #define VMA_DEBUG_GLOBAL_MUTEX (0) +#endif + +#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY +    /** +    Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity. +    Set to more than 1 for debugging purposes only. Must be power of two. +    */ +    #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1) +#endif + +#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT +    /* +    Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount +    and return error instead of leaving up to Vulkan implementation what to do in such cases. +    */ +    #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0) +#endif + +#ifndef VMA_SMALL_HEAP_MAX_SIZE +   /// Maximum size of a memory heap in Vulkan to consider it "small". +   #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024) +#endif + +#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE +   /// Default size of a block allocated as single VkDeviceMemory from a "large" heap. +   #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) +#endif + +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDoc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED +    #define VMA_MAPPING_HYSTERESIS_ENABLED 1 +#endif + +#define VMA_VALIDATE(cond) do { if(!(cond)) { \ +        VMA_ASSERT(0 && "Validation failed: " #cond); \ +        return false; \ +    } } while(false) + +/******************************************************************************* +END OF CONFIGURATION +*/ +#endif // _VMA_CONFIGURATION + + +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC; +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; +// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F. +static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; + +// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; +static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; +static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200; +static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000; +static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; +static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; +static const uint32_t VMA_VENDOR_ID_AMD = 4098; + +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) + + +#if VMA_STATS_STRING_ENABLED +// Correspond to values of enum VmaSuballocationType. +static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = +{ +    "FREE", +    "UNKNOWN", +    "BUFFER", +    "IMAGE_UNKNOWN", +    "IMAGE_LINEAR", +    "IMAGE_OPTIMAL", +}; +#endif + +static VkAllocationCallbacks VmaEmptyAllocationCallbacks = +    { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; + + +#ifndef _VMA_ENUM_DECLARATIONS + +enum VmaSuballocationType +{ +    VMA_SUBALLOCATION_TYPE_FREE = 0, +    VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, +    VMA_SUBALLOCATION_TYPE_BUFFER = 2, +    VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, +    VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, +    VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, +    VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +}; + +enum VMA_CACHE_OPERATION +{ +    VMA_CACHE_FLUSH, +    VMA_CACHE_INVALIDATE +}; + +enum class VmaAllocationRequestType +{ +    Normal, +    TLSF, +    // Used by "Linear" algorithm. +    UpperAddress, +    EndOf1st, +    EndOf2nd, +}; + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_FORWARD_DECLARATIONS +// Opaque handle used by allocation algorithms to identify single allocation in any conforming way. +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle); + +struct VmaMutexLock; +struct VmaMutexLockRead; +struct VmaMutexLockWrite; + +template<typename T> +struct AtomicTransactionalIncrement; + +template<typename T> +struct VmaStlAllocator; + +template<typename T, typename AllocatorT> +class VmaVector; + +template<typename T, typename AllocatorT, size_t N> +class VmaSmallVector; + +template<typename T> +class VmaPoolAllocator; + +template<typename T> +struct VmaListItem; + +template<typename T> +class VmaRawList; + +template<typename T, typename AllocatorT> +class VmaList; + +template<typename ItemTypeTraits> +class VmaIntrusiveLinkedList; + +#if VMA_STATS_STRING_ENABLED +class VmaStringBuilder; +class VmaJsonWriter; +#endif + +class VmaDeviceMemoryBlock; + +struct VmaDedicatedAllocationListItemTraits; +class VmaDedicatedAllocationList; + +struct VmaSuballocation; +struct VmaSuballocationOffsetLess; +struct VmaSuballocationOffsetGreater; +struct VmaSuballocationItemSizeLess; + +typedef VmaList<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> VmaSuballocationList; + +struct VmaAllocationRequest; + +class VmaBlockMetadata; +class VmaBlockMetadata_Linear; +class VmaBlockMetadata_TLSF; + +class VmaBlockVector; + +struct VmaPoolListItemTraits; + +struct VmaCurrentBudgetData; + +class VmaAllocationObjectAllocator; + +#endif // _VMA_FORWARD_DECLARATIONS + + +#ifndef _VMA_FUNCTIONS + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use intrinsics like: + +Visual Studio: +    return __popcnt(v); +GCC, Clang: +    return static_cast<uint32_t>(__builtin_popcount(v)); + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ +static inline uint32_t VmaCountBitsSet(uint32_t v) +{ +#if VMA_CPP20 +    return std::popcount(v); +#else +    uint32_t c = v - ((v >> 1) & 0x55555555); +    c = ((c >> 2) & 0x33333333) + (c & 0x33333333); +    c = ((c >> 4) + c) & 0x0F0F0F0F; +    c = ((c >> 8) + c) & 0x00FF00FF; +    c = ((c >> 16) + c) & 0x0000FFFF; +    return c; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) +    unsigned long pos; +    if (_BitScanForward64(&pos, mask)) +        return static_cast<uint8_t>(pos); +    return UINT8_MAX; +#elif VMA_CPP20 +    if(mask) +        return static_cast<uint8_t>(std::countr_zero(mask)); +    return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ +    return static_cast<uint8_t>(__builtin_ffsll(mask)) - 1U; +#else +    uint8_t pos = 0; +    uint64_t bit = 1; +    do +    { +        if (mask & bit) +            return pos; +        bit <<= 1; +    } while (pos++ < 63); +    return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint32_t mask) +{ +#ifdef _MSC_VER +    unsigned long pos; +    if (_BitScanForward(&pos, mask)) +        return static_cast<uint8_t>(pos); +    return UINT8_MAX; +#elif VMA_CPP20 +    if(mask) +        return static_cast<uint8_t>(std::countr_zero(mask)); +    return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ +    return static_cast<uint8_t>(__builtin_ffs(mask)) - 1U; +#else +    uint8_t pos = 0; +    uint32_t bit = 1; +    do +    { +        if (mask & bit) +            return pos; +        bit <<= 1; +    } while (pos++ < 31); +    return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanMSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) +    unsigned long pos; +    if (_BitScanReverse64(&pos, mask)) +        return static_cast<uint8_t>(pos); +#elif VMA_CPP20 +    if(mask) +        return 63 - static_cast<uint8_t>(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ +    if (mask) +        return 63 - static_cast<uint8_t>(__builtin_clzll(mask)); +#else +    uint8_t pos = 63; +    uint64_t bit = 1ULL << 63; +    do +    { +        if (mask & bit) +            return pos; +        bit >>= 1; +    } while (pos-- > 0); +#endif +    return UINT8_MAX; +} + +static inline uint8_t VmaBitScanMSB(uint32_t mask) +{ +#ifdef _MSC_VER +    unsigned long pos; +    if (_BitScanReverse(&pos, mask)) +        return static_cast<uint8_t>(pos); +#elif VMA_CPP20 +    if(mask) +        return 31 - static_cast<uint8_t>(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ +    if (mask) +        return 31 - static_cast<uint8_t>(__builtin_clz(mask)); +#else +    uint8_t pos = 31; +    uint32_t bit = 1UL << 31; +    do +    { +        if (mask & bit) +            return pos; +        bit >>= 1; +    } while (pos-- > 0); +#endif +    return UINT8_MAX; +} + +/* +Returns true if given number is a power of two. +T must be unsigned integer number or signed integer but always nonnegative. +For 0 returns true. +*/ +template <typename T> +inline bool VmaIsPow2(T x) +{ +    return (x & (x - 1)) == 0; +} + +// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. +// Use types like uint32_t, uint64_t as T. +template <typename T> +static inline T VmaAlignUp(T val, T alignment) +{ +    VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); +    return (val + alignment - 1) & ~(alignment - 1); +} + +// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8. +// Use types like uint32_t, uint64_t as T. +template <typename T> +static inline T VmaAlignDown(T val, T alignment) +{ +    VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); +    return val & ~(alignment - 1); +} + +// Division with mathematical rounding to nearest number. +template <typename T> +static inline T VmaRoundDiv(T x, T y) +{ +    return (x + (y / (T)2)) / y; +} + +// Divide by 'y' and round up to nearest integer. +template <typename T> +static inline T VmaDivideRoundingUp(T x, T y) +{ +    return (x + y - (T)1) / y; +} + +// Returns smallest power of 2 greater or equal to v. +static inline uint32_t VmaNextPow2(uint32_t v) +{ +    v--; +    v |= v >> 1; +    v |= v >> 2; +    v |= v >> 4; +    v |= v >> 8; +    v |= v >> 16; +    v++; +    return v; +} + +static inline uint64_t VmaNextPow2(uint64_t v) +{ +    v--; +    v |= v >> 1; +    v |= v >> 2; +    v |= v >> 4; +    v |= v >> 8; +    v |= v >> 16; +    v |= v >> 32; +    v++; +    return v; +} + +// Returns largest power of 2 less or equal to v. +static inline uint32_t VmaPrevPow2(uint32_t v) +{ +    v |= v >> 1; +    v |= v >> 2; +    v |= v >> 4; +    v |= v >> 8; +    v |= v >> 16; +    v = v ^ (v >> 1); +    return v; +} + +static inline uint64_t VmaPrevPow2(uint64_t v) +{ +    v |= v >> 1; +    v |= v >> 2; +    v |= v >> 4; +    v |= v >> 8; +    v |= v >> 16; +    v |= v >> 32; +    v = v ^ (v >> 1); +    return v; +} + +static inline bool VmaStrIsEmpty(const char* pStr) +{ +    return pStr == VMA_NULL || *pStr == '\0'; +} + +/* +Returns true if two memory blocks occupy overlapping pages. +ResourceA must be in less memory offset than ResourceB. + +Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)" +chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity". +*/ +static inline bool VmaBlocksOnSamePage( +    VkDeviceSize resourceAOffset, +    VkDeviceSize resourceASize, +    VkDeviceSize resourceBOffset, +    VkDeviceSize pageSize) +{ +    VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0); +    VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1; +    VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1); +    VkDeviceSize resourceBStart = resourceBOffset; +    VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1); +    return resourceAEndPage == resourceBStartPage; +} + +/* +Returns true if given suballocation types could conflict and must respect +VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer +or linear image and another one is optimal image. If type is unknown, behave +conservatively. +*/ +static inline bool VmaIsBufferImageGranularityConflict( +    VmaSuballocationType suballocType1, +    VmaSuballocationType suballocType2) +{ +    if (suballocType1 > suballocType2) +    { +        std::swap(suballocType1, suballocType2); +    } + +    switch (suballocType1) +    { +    case VMA_SUBALLOCATION_TYPE_FREE: +        return false; +    case VMA_SUBALLOCATION_TYPE_UNKNOWN: +        return true; +    case VMA_SUBALLOCATION_TYPE_BUFFER: +        return +            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || +            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; +    case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN: +        return +            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || +            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR || +            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; +    case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR: +        return +            suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; +    case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL: +        return false; +    default: +        VMA_ASSERT(0); +        return true; +    } +} + +static void VmaWriteMagicValue(void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION +    uint32_t* pDst = (uint32_t*)((char*)pData + offset); +    const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); +    for (size_t i = 0; i < numberCount; ++i, ++pDst) +    { +        *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE; +    } +#else +    // no-op +#endif +} + +static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION +    const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset); +    const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); +    for (size_t i = 0; i < numberCount; ++i, ++pSrc) +    { +        if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE) +        { +            return false; +        } +    } +#endif +    return true; +} + +/* +Fills structure with parameters of an example buffer to be used for transfers +during GPU memory defragmentation. +*/ +static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo) +{ +    memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo)); +    outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; +    outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +    outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. +} + + +/* +Performs binary search and returns iterator to first element that is greater or +equal to (key), according to comparison (cmp). + +Cmp should return true if first argument is less than second argument. + +Returned value is the found element, if present in the collection or place where +new element with value (key) should be inserted. +*/ +template <typename CmpLess, typename IterT, typename KeyT> +static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) +{ +    size_t down = 0, up = size_t(end - beg); +    while (down < up) +    { +        const size_t mid = down + (up - down) / 2;  // Overflow-safe midpoint calculation +        if (cmp(*(beg + mid), key)) +        { +            down = mid + 1; +        } +        else +        { +            up = mid; +        } +    } +    return beg + down; +} + +template<typename CmpLess, typename IterT, typename KeyT> +IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +{ +    IterT it = VmaBinaryFindFirstNotLess<CmpLess, IterT, KeyT>( +        beg, end, value, cmp); +    if (it == end || +        (!cmp(*it, value) && !cmp(value, *it))) +    { +        return it; +    } +    return end; +} + +/* +Returns true if all pointers in the array are not-null and unique. +Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT. +T must be pointer type, e.g. VmaAllocation, VmaPool. +*/ +template<typename T> +static bool VmaValidatePointerArray(uint32_t count, const T* arr) +{ +    for (uint32_t i = 0; i < count; ++i) +    { +        const T iPtr = arr[i]; +        if (iPtr == VMA_NULL) +        { +            return false; +        } +        for (uint32_t j = i + 1; j < count; ++j) +        { +            if (iPtr == arr[j]) +            { +                return false; +            } +        } +    } +    return true; +} + +template<typename MainT, typename NewT> +static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) +{ +    newStruct->pNext = mainStruct->pNext; +    mainStruct->pNext = newStruct; +} +// Finds structure with s->sType == sType in mainStruct->pNext chain. +// Returns pointer to it. If not found, returns null. +template<typename FindT, typename MainT> +static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType) +{ +    for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext; +        s != VMA_NULL; s = s->pNext) +    { +        if(s->sType == sType) +        { +            return (const FindT*)s; +        } +    } +    return VMA_NULL; +} + +// An abstraction over buffer or image `usage` flags, depending on available extensions. +struct VmaBufferImageUsage +{ +#if VMA_KHR_MAINTENANCE5 +    typedef uint64_t BaseType; // VkFlags64 +#else +    typedef uint32_t BaseType; // VkFlags32 +#endif + +    static const VmaBufferImageUsage UNKNOWN; + +    BaseType Value; + +    VmaBufferImageUsage() { *this = UNKNOWN; } +    explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { } +    VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5); +    explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo); + +    bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; } +    bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; } + +    bool Contains(BaseType flag) const { return (Value & flag) != 0; } +    bool ContainsDeviceAccess() const +    { +        // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*. +        return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; +    } +}; + +const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); + +VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, +    bool useKhrMaintenance5) +{ +#if VMA_KHR_MAINTENANCE5 +    if(useKhrMaintenance5) +    { +        // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR, +        // take usage from it and ignore VkBufferCreateInfo::usage, per specification +        // of the VK_KHR_maintenance5 extension. +        const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 = +            VmaPnextChainFind<VkBufferUsageFlags2CreateInfoKHR>(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); +        if(usageFlags2) +        { +            this->Value = usageFlags2->usage; +            return; +        } +    } +#endif + +    this->Value = (BaseType)createInfo.usage; +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo) +{ +    // Maybe in the future there will be VK_KHR_maintenanceN extension with structure +    // VkImageUsageFlags2CreateInfoKHR, like the one for buffers... + +    this->Value = (BaseType)createInfo.usage; +} + +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( +    bool isIntegratedGPU, +    const VmaAllocationCreateInfo& allocCreateInfo, +    VmaBufferImageUsage bufImgUsage, +    VkMemoryPropertyFlags& outRequiredFlags, +    VkMemoryPropertyFlags& outPreferredFlags, +    VkMemoryPropertyFlags& outNotPreferredFlags) +{ +    outRequiredFlags = allocCreateInfo.requiredFlags; +    outPreferredFlags = allocCreateInfo.preferredFlags; +    outNotPreferredFlags = 0; + +    switch(allocCreateInfo.usage) +    { +    case VMA_MEMORY_USAGE_UNKNOWN: +        break; +    case VMA_MEMORY_USAGE_GPU_ONLY: +        if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) +        { +            outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +        } +        break; +    case VMA_MEMORY_USAGE_CPU_ONLY: +        outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +        break; +    case VMA_MEMORY_USAGE_CPU_TO_GPU: +        outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +        if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) +        { +            outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +        } +        break; +    case VMA_MEMORY_USAGE_GPU_TO_CPU: +        outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +        outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +        break; +    case VMA_MEMORY_USAGE_CPU_COPY: +        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +        break; +    case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: +        outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; +        break; +    case VMA_MEMORY_USAGE_AUTO: +    case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: +    case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: +    { +        if(bufImgUsage == VmaBufferImageUsage::UNKNOWN) +        { +            VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known." +                " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" ); +            return false; +        } + +        const bool deviceAccess = bufImgUsage.ContainsDeviceAccess(); +        const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; +        const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; +        const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; +        const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; +        const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + +        // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. +        if(hostAccessRandom) +        { +            // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)! +            outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + +            if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) +            { +                // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. +                // Omitting HOST_VISIBLE here is intentional. +                // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. +                // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. +                outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +            } +            else +            { +                // Always CPU memory. +                outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +            } +        } +        // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. +        else if(hostAccessSequentialWrite) +        { +            // Want uncached and write-combined. +            outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + +            if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) +            { +                outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +            } +            else +            { +                outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +                // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) +                if(deviceAccess) +                { +                    // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. +                    if(preferHost) +                        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +                    else +                        outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +                } +                // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) +                else +                { +                    // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. +                    if(preferDevice) +                        outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +                    else +                        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +                } +            } +        } +        // No CPU access +        else +        { +            // if(deviceAccess) +            // +            // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory, +            // unless there is a clear preference from the user not to do so. +            // +            // else: +            // +            // No direct GPU access, no CPU access, just transfers. +            // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or +            // a "swap file" copy to free some GPU memory (then better CPU memory). +            // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + +            if(preferHost) +                outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +            else +                outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; +        } +        break; +    } +    default: +        VMA_ASSERT(0); +    } + +    // Avoid DEVICE_COHERENT unless explicitly requested. +    if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & +        (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) +    { +        outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; +    } + +    return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Memory allocation + +static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) +{ +    void* result = VMA_NULL; +    if ((pAllocationCallbacks != VMA_NULL) && +        (pAllocationCallbacks->pfnAllocation != VMA_NULL)) +    { +        result = (*pAllocationCallbacks->pfnAllocation)( +            pAllocationCallbacks->pUserData, +            size, +            alignment, +            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); +    } +    else +    { +        result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); +    } +    VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed."); +    return result; +} + +static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) +{ +    if ((pAllocationCallbacks != VMA_NULL) && +        (pAllocationCallbacks->pfnFree != VMA_NULL)) +    { +        (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr); +    } +    else +    { +        VMA_SYSTEM_ALIGNED_FREE(ptr); +    } +} + +template<typename T> +static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks) +{ +    return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T)); +} + +template<typename T> +static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count) +{ +    return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +#define vma_new(allocator, type)   new(VmaAllocate<type>(allocator))(type) + +#define vma_new_array(allocator, type, count)   new(VmaAllocateArray<type>((allocator), (count)))(type) + +template<typename T> +static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr) +{ +    ptr->~T(); +    VmaFree(pAllocationCallbacks, ptr); +} + +template<typename T> +static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count) +{ +    if (ptr != VMA_NULL) +    { +        for (size_t i = count; i--; ) +        { +            ptr[i].~T(); +        } +        VmaFree(pAllocationCallbacks, ptr); +    } +} + +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr) +{ +    if (srcStr != VMA_NULL) +    { +        const size_t len = strlen(srcStr); +        char* const result = vma_new_array(allocs, char, len + 1); +        memcpy(result, srcStr, len + 1); +        return result; +    } +    return VMA_NULL; +} + +#if VMA_STATS_STRING_ENABLED +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen) +{ +    if (srcStr != VMA_NULL) +    { +        char* const result = vma_new_array(allocs, char, strLen + 1); +        memcpy(result, srcStr, strLen); +        result[strLen] = '\0'; +        return result; +    } +    return VMA_NULL; +} +#endif // VMA_STATS_STRING_ENABLED + +static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str) +{ +    if (str != VMA_NULL) +    { +        const size_t len = strlen(str); +        vma_delete_array(allocs, str, len + 1); +    } +} + +template<typename CmpLess, typename VectorT> +size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value) +{ +    const size_t indexToInsert = VmaBinaryFindFirstNotLess( +        vector.data(), +        vector.data() + vector.size(), +        value, +        CmpLess()) - vector.data(); +    VmaVectorInsert(vector, indexToInsert, value); +    return indexToInsert; +} + +template<typename CmpLess, typename VectorT> +bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value) +{ +    CmpLess comparator; +    typename VectorT::iterator it = VmaBinaryFindFirstNotLess( +        vector.begin(), +        vector.end(), +        value, +        comparator); +    if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) +    { +        size_t indexToRemove = it - vector.begin(); +        VmaVectorRemove(vector, indexToRemove); +        return true; +    } +    return false; +} +#endif // _VMA_FUNCTIONS + +#ifndef _VMA_STATISTICS_FUNCTIONS + +static void VmaClearStatistics(VmaStatistics& outStats) +{ +    outStats.blockCount = 0; +    outStats.allocationCount = 0; +    outStats.blockBytes = 0; +    outStats.allocationBytes = 0; +} + +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) +{ +    inoutStats.blockCount += src.blockCount; +    inoutStats.allocationCount += src.allocationCount; +    inoutStats.blockBytes += src.blockBytes; +    inoutStats.allocationBytes += src.allocationBytes; +} + +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) +{ +    VmaClearStatistics(outStats.statistics); +    outStats.unusedRangeCount = 0; +    outStats.allocationSizeMin = VK_WHOLE_SIZE; +    outStats.allocationSizeMax = 0; +    outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; +    outStats.unusedRangeSizeMax = 0; +} + +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ +    inoutStats.statistics.allocationCount++; +    inoutStats.statistics.allocationBytes += size; +    inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); +    inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); +} + +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ +    inoutStats.unusedRangeCount++; +    inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); +    inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); +} + +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) +{ +    VmaAddStatistics(inoutStats.statistics, src.statistics); +    inoutStats.unusedRangeCount += src.unusedRangeCount; +    inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); +    inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); +    inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); +    inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); +} + +#endif // _VMA_STATISTICS_FUNCTIONS + +#ifndef _VMA_MUTEX_LOCK +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct VmaMutexLock +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock) +public: +    VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : +        m_pMutex(useMutex ? &mutex : VMA_NULL) +    { +        if (m_pMutex) { m_pMutex->Lock(); } +    } +    ~VmaMutexLock() {  if (m_pMutex) { m_pMutex->Unlock(); } } + +private: +    VMA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct VmaMutexLockRead +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead) +public: +    VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : +        m_pMutex(useMutex ? &mutex : VMA_NULL) +    { +        if (m_pMutex) { m_pMutex->LockRead(); } +    } +    ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } } + +private: +    VMA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct VmaMutexLockWrite +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite) +public: +    VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) +        : m_pMutex(useMutex ? &mutex : VMA_NULL) +    { +        if (m_pMutex) { m_pMutex->LockWrite(); } +    } +    ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } } + +private: +    VMA_RW_MUTEX* m_pMutex; +}; + +#if VMA_DEBUG_GLOBAL_MUTEX +    static VMA_MUTEX gDebugGlobalMutex; +    #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); +#else +    #define VMA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _VMA_MUTEX_LOCK + +#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT +// An object that increments given atomic but decrements it back in the destructor unless Commit() is called. +template<typename AtomicT> +struct AtomicTransactionalIncrement +{ +public: +    using T = decltype(AtomicT().load()); + +    ~AtomicTransactionalIncrement() +    { +        if(m_Atomic) +            --(*m_Atomic); +    } + +    void Commit() { m_Atomic = VMA_NULL; } +    T Increment(AtomicT* atomic) +    { +        m_Atomic = atomic; +        return m_Atomic->fetch_add(1); +    } + +private: +    AtomicT* m_Atomic = VMA_NULL; +}; +#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT + +#ifndef _VMA_STL_ALLOCATOR +// STL-compatible allocator. +template<typename T> +struct VmaStlAllocator +{ +    const VkAllocationCallbacks* const m_pCallbacks; +    typedef T value_type; + +    VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {} +    template<typename U> +    VmaStlAllocator(const VmaStlAllocator<U>& src) : m_pCallbacks(src.m_pCallbacks) {} +    VmaStlAllocator(const VmaStlAllocator&) = default; +    VmaStlAllocator& operator=(const VmaStlAllocator&) = delete; + +    T* allocate(size_t n) { return VmaAllocateArray<T>(m_pCallbacks, n); } +    void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } + +    template<typename U> +    bool operator==(const VmaStlAllocator<U>& rhs) const +    { +        return m_pCallbacks == rhs.m_pCallbacks; +    } +    template<typename U> +    bool operator!=(const VmaStlAllocator<U>& rhs) const +    { +        return m_pCallbacks != rhs.m_pCallbacks; +    } +}; +#endif // _VMA_STL_ALLOCATOR + +#ifndef _VMA_VECTOR +/* Class with interface compatible with subset of std::vector. +T must be POD because constructors and destructors are not called and memcpy is +used for these objects. */ +template<typename T, typename AllocatorT> +class VmaVector +{ +public: +    typedef T value_type; +    typedef T* iterator; +    typedef const T* const_iterator; + +    VmaVector(const AllocatorT& allocator); +    VmaVector(size_t count, const AllocatorT& allocator); +    // This version of the constructor is here for compatibility with pre-C++14 std::vector. +    // value is unused. +    VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {} +    VmaVector(const VmaVector<T, AllocatorT>& src); +    VmaVector& operator=(const VmaVector& rhs); +    ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); } + +    bool empty() const { return m_Count == 0; } +    size_t size() const { return m_Count; } +    T* data() { return m_pArray; } +    T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } +    T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } +    const T* data() const { return m_pArray; } +    const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } +    const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + +    iterator begin() { return m_pArray; } +    iterator end() { return m_pArray + m_Count; } +    const_iterator cbegin() const { return m_pArray; } +    const_iterator cend() const { return m_pArray + m_Count; } +    const_iterator begin() const { return cbegin(); } +    const_iterator end() const { return cend(); } + +    void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } +    void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } +    void push_front(const T& src) { insert(0, src); } + +    void push_back(const T& src); +    void reserve(size_t newCapacity, bool freeMemory = false); +    void resize(size_t newCount); +    void clear() { resize(0); } +    void shrink_to_fit(); +    void insert(size_t index, const T& src); +    void remove(size_t index); + +    T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } +    const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + +private: +    AllocatorT m_Allocator; +    T* m_pArray; +    size_t m_Count; +    size_t m_Capacity; +}; + +#ifndef _VMA_VECTOR_FUNCTIONS +template<typename T, typename AllocatorT> +VmaVector<T, AllocatorT>::VmaVector(const AllocatorT& allocator) +    : m_Allocator(allocator), +    m_pArray(VMA_NULL), +    m_Count(0), +    m_Capacity(0) {} + +template<typename T, typename AllocatorT> +VmaVector<T, AllocatorT>::VmaVector(size_t count, const AllocatorT& allocator) +    : m_Allocator(allocator), +    m_pArray(count ? (T*)VmaAllocateArray<T>(allocator.m_pCallbacks, count) : VMA_NULL), +    m_Count(count), +    m_Capacity(count) {} + +template<typename T, typename AllocatorT> +VmaVector<T, AllocatorT>::VmaVector(const VmaVector& src) +    : m_Allocator(src.m_Allocator), +    m_pArray(src.m_Count ? (T*)VmaAllocateArray<T>(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), +    m_Count(src.m_Count), +    m_Capacity(src.m_Count) +{ +    if (m_Count != 0) +    { +        memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); +    } +} + +template<typename T, typename AllocatorT> +VmaVector<T, AllocatorT>& VmaVector<T, AllocatorT>::operator=(const VmaVector& rhs) +{ +    if (&rhs != this) +    { +        resize(rhs.m_Count); +        if (m_Count != 0) +        { +            memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); +        } +    } +    return *this; +} + +template<typename T, typename AllocatorT> +void VmaVector<T, AllocatorT>::push_back(const T& src) +{ +    const size_t newIndex = size(); +    resize(newIndex + 1); +    m_pArray[newIndex] = src; +} + +template<typename T, typename AllocatorT> +void VmaVector<T, AllocatorT>::reserve(size_t newCapacity, bool freeMemory) +{ +    newCapacity = VMA_MAX(newCapacity, m_Count); + +    if ((newCapacity < m_Capacity) && !freeMemory) +    { +        newCapacity = m_Capacity; +    } + +    if (newCapacity != m_Capacity) +    { +        T* const newArray = newCapacity ? VmaAllocateArray<T>(m_Allocator, newCapacity) : VMA_NULL; +        if (m_Count != 0) +        { +            memcpy(newArray, m_pArray, m_Count * sizeof(T)); +        } +        VmaFree(m_Allocator.m_pCallbacks, m_pArray); +        m_Capacity = newCapacity; +        m_pArray = newArray; +    } +} + +template<typename T, typename AllocatorT> +void VmaVector<T, AllocatorT>::resize(size_t newCount) +{ +    size_t newCapacity = m_Capacity; +    if (newCount > m_Capacity) +    { +        newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); +    } + +    if (newCapacity != m_Capacity) +    { +        T* const newArray = newCapacity ? VmaAllocateArray<T>(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; +        const size_t elementsToCopy = VMA_MIN(m_Count, newCount); +        if (elementsToCopy != 0) +        { +            memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); +        } +        VmaFree(m_Allocator.m_pCallbacks, m_pArray); +        m_Capacity = newCapacity; +        m_pArray = newArray; +    } + +    m_Count = newCount; +} + +template<typename T, typename AllocatorT> +void VmaVector<T, AllocatorT>::shrink_to_fit() +{ +    if (m_Capacity > m_Count) +    { +        T* newArray = VMA_NULL; +        if (m_Count > 0) +        { +            newArray = VmaAllocateArray<T>(m_Allocator.m_pCallbacks, m_Count); +            memcpy(newArray, m_pArray, m_Count * sizeof(T)); +        } +        VmaFree(m_Allocator.m_pCallbacks, m_pArray); +        m_Capacity = m_Count; +        m_pArray = newArray; +    } +} + +template<typename T, typename AllocatorT> +void VmaVector<T, AllocatorT>::insert(size_t index, const T& src) +{ +    VMA_HEAVY_ASSERT(index <= m_Count); +    const size_t oldCount = size(); +    resize(oldCount + 1); +    if (index < oldCount) +    { +        memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); +    } +    m_pArray[index] = src; +} + +template<typename T, typename AllocatorT> +void VmaVector<T, AllocatorT>::remove(size_t index) +{ +    VMA_HEAVY_ASSERT(index < m_Count); +    const size_t oldCount = size(); +    if (index < oldCount - 1) +    { +        memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); +    } +    resize(oldCount - 1); +} +#endif // _VMA_VECTOR_FUNCTIONS + +template<typename T, typename allocatorT> +static void VmaVectorInsert(VmaVector<T, allocatorT>& vec, size_t index, const T& item) +{ +    vec.insert(index, item); +} + +template<typename T, typename allocatorT> +static void VmaVectorRemove(VmaVector<T, allocatorT>& vec, size_t index) +{ +    vec.remove(index); +} +#endif // _VMA_VECTOR + +#ifndef _VMA_SMALL_VECTOR +/* +This is a vector (a variable-sized array), optimized for the case when the array is small. + +It contains some number of elements in-place, which allows it to avoid heap allocation +when the actual number of elements is below that threshold. This allows normal "small" +cases to be fast without losing generality for large inputs. +*/ +template<typename T, typename AllocatorT, size_t N> +class VmaSmallVector +{ +public: +    typedef T value_type; +    typedef T* iterator; + +    VmaSmallVector(const AllocatorT& allocator); +    VmaSmallVector(size_t count, const AllocatorT& allocator); +    template<typename SrcT, typename SrcAllocatorT, size_t SrcN> +    VmaSmallVector(const VmaSmallVector<SrcT, SrcAllocatorT, SrcN>&) = delete; +    template<typename SrcT, typename SrcAllocatorT, size_t SrcN> +    VmaSmallVector<T, AllocatorT, N>& operator=(const VmaSmallVector<SrcT, SrcAllocatorT, SrcN>&) = delete; +    ~VmaSmallVector() = default; + +    bool empty() const { return m_Count == 0; } +    size_t size() const { return m_Count; } +    T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } +    T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } +    T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } +    const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } +    const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } +    const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + +    iterator begin() { return data(); } +    iterator end() { return data() + m_Count; } + +    void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } +    void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } +    void push_front(const T& src) { insert(0, src); } + +    void push_back(const T& src); +    void resize(size_t newCount, bool freeMemory = false); +    void clear(bool freeMemory = false); +    void insert(size_t index, const T& src); +    void remove(size_t index); + +    T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } +    const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + +private: +    size_t m_Count; +    T m_StaticArray[N]; // Used when m_Size <= N +    VmaVector<T, AllocatorT> m_DynamicArray; // Used when m_Size > N +}; + +#ifndef _VMA_SMALL_VECTOR_FUNCTIONS +template<typename T, typename AllocatorT, size_t N> +VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(const AllocatorT& allocator) +    : m_Count(0), +    m_DynamicArray(allocator) {} + +template<typename T, typename AllocatorT, size_t N> +VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(size_t count, const AllocatorT& allocator) +    : m_Count(count), +    m_DynamicArray(count > N ? count : 0, allocator) {} + +template<typename T, typename AllocatorT, size_t N> +void VmaSmallVector<T, AllocatorT, N>::push_back(const T& src) +{ +    const size_t newIndex = size(); +    resize(newIndex + 1); +    data()[newIndex] = src; +} + +template<typename T, typename AllocatorT, size_t N> +void VmaSmallVector<T, AllocatorT, N>::resize(size_t newCount, bool freeMemory) +{ +    if (newCount > N && m_Count > N) +    { +        // Any direction, staying in m_DynamicArray +        m_DynamicArray.resize(newCount); +        if (freeMemory) +        { +            m_DynamicArray.shrink_to_fit(); +        } +    } +    else if (newCount > N && m_Count <= N) +    { +        // Growing, moving from m_StaticArray to m_DynamicArray +        m_DynamicArray.resize(newCount); +        if (m_Count > 0) +        { +            memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); +        } +    } +    else if (newCount <= N && m_Count > N) +    { +        // Shrinking, moving from m_DynamicArray to m_StaticArray +        if (newCount > 0) +        { +            memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); +        } +        m_DynamicArray.resize(0); +        if (freeMemory) +        { +            m_DynamicArray.shrink_to_fit(); +        } +    } +    else +    { +        // Any direction, staying in m_StaticArray - nothing to do here +    } +    m_Count = newCount; +} + +template<typename T, typename AllocatorT, size_t N> +void VmaSmallVector<T, AllocatorT, N>::clear(bool freeMemory) +{ +    m_DynamicArray.clear(); +    if (freeMemory) +    { +        m_DynamicArray.shrink_to_fit(); +    } +    m_Count = 0; +} + +template<typename T, typename AllocatorT, size_t N> +void VmaSmallVector<T, AllocatorT, N>::insert(size_t index, const T& src) +{ +    VMA_HEAVY_ASSERT(index <= m_Count); +    const size_t oldCount = size(); +    resize(oldCount + 1); +    T* const dataPtr = data(); +    if (index < oldCount) +    { +        //  I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. +        memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); +    } +    dataPtr[index] = src; +} + +template<typename T, typename AllocatorT, size_t N> +void VmaSmallVector<T, AllocatorT, N>::remove(size_t index) +{ +    VMA_HEAVY_ASSERT(index < m_Count); +    const size_t oldCount = size(); +    if (index < oldCount - 1) +    { +        //  I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. +        T* const dataPtr = data(); +        memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); +    } +    resize(oldCount - 1); +} +#endif // _VMA_SMALL_VECTOR_FUNCTIONS +#endif // _VMA_SMALL_VECTOR + +#ifndef _VMA_POOL_ALLOCATOR +/* +Allocator for objects of type T using a list of arrays (pools) to speed up +allocation. Number of elements that can be allocated is not bounded because +allocator can create multiple blocks. +*/ +template<typename T> +class VmaPoolAllocator +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator) +public: +    VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); +    ~VmaPoolAllocator(); +    template<typename... Types> T* Alloc(Types&&... args); +    void Free(T* ptr); + +private: +    union Item +    { +        uint32_t NextFreeIndex; +        alignas(T) char Value[sizeof(T)]; +    }; +    struct ItemBlock +    { +        Item* pItems; +        uint32_t Capacity; +        uint32_t FirstFreeIndex; +    }; + +    const VkAllocationCallbacks* m_pAllocationCallbacks; +    const uint32_t m_FirstBlockCapacity; +    VmaVector<ItemBlock, VmaStlAllocator<ItemBlock>> m_ItemBlocks; + +    ItemBlock& CreateNewBlock(); +}; + +#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS +template<typename T> +VmaPoolAllocator<T>::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity) +    : m_pAllocationCallbacks(pAllocationCallbacks), +    m_FirstBlockCapacity(firstBlockCapacity), +    m_ItemBlocks(VmaStlAllocator<ItemBlock>(pAllocationCallbacks)) +{ +    VMA_ASSERT(m_FirstBlockCapacity > 1); +} + +template<typename T> +VmaPoolAllocator<T>::~VmaPoolAllocator() +{ +    for (size_t i = m_ItemBlocks.size(); i--;) +        vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); +    m_ItemBlocks.clear(); +} + +template<typename T> +template<typename... Types> T* VmaPoolAllocator<T>::Alloc(Types&&... args) +{ +    for (size_t i = m_ItemBlocks.size(); i--; ) +    { +        ItemBlock& block = m_ItemBlocks[i]; +        // This block has some free items: Use first one. +        if (block.FirstFreeIndex != UINT32_MAX) +        { +            Item* const pItem = &block.pItems[block.FirstFreeIndex]; +            block.FirstFreeIndex = pItem->NextFreeIndex; +            T* result = (T*)&pItem->Value; +            new(result)T(std::forward<Types>(args)...); // Explicit constructor call. +            return result; +        } +    } + +    // No block has free item: Create new one and use it. +    ItemBlock& newBlock = CreateNewBlock(); +    Item* const pItem = &newBlock.pItems[0]; +    newBlock.FirstFreeIndex = pItem->NextFreeIndex; +    T* result = (T*)&pItem->Value; +    new(result) T(std::forward<Types>(args)...); // Explicit constructor call. +    return result; +} + +template<typename T> +void VmaPoolAllocator<T>::Free(T* ptr) +{ +    // Search all memory blocks to find ptr. +    for (size_t i = m_ItemBlocks.size(); i--; ) +    { +        ItemBlock& block = m_ItemBlocks[i]; + +        // Casting to union. +        Item* pItemPtr; +        memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); + +        // Check if pItemPtr is in address range of this block. +        if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) +        { +            ptr->~T(); // Explicit destructor call. +            const uint32_t index = static_cast<uint32_t>(pItemPtr - block.pItems); +            pItemPtr->NextFreeIndex = block.FirstFreeIndex; +            block.FirstFreeIndex = index; +            return; +        } +    } +    VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool."); +} + +template<typename T> +typename VmaPoolAllocator<T>::ItemBlock& VmaPoolAllocator<T>::CreateNewBlock() +{ +    const uint32_t newBlockCapacity = m_ItemBlocks.empty() ? +        m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2; + +    const ItemBlock newBlock = +    { +        vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity), +        newBlockCapacity, +        0 +    }; + +    m_ItemBlocks.push_back(newBlock); + +    // Setup singly-linked list of all free items in this block. +    for (uint32_t i = 0; i < newBlockCapacity - 1; ++i) +        newBlock.pItems[i].NextFreeIndex = i + 1; +    newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; +    return m_ItemBlocks.back(); +} +#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS +#endif // _VMA_POOL_ALLOCATOR + +#ifndef _VMA_RAW_LIST +template<typename T> +struct VmaListItem +{ +    VmaListItem* pPrev; +    VmaListItem* pNext; +    T Value; +}; + +// Doubly linked list. +template<typename T> +class VmaRawList +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList) +public: +    typedef VmaListItem<T> ItemType; + +    VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks); +    // Intentionally not calling Clear, because that would be unnecessary +    // computations to return all items to m_ItemAllocator as free. +    ~VmaRawList() = default; + +    size_t GetCount() const { return m_Count; } +    bool IsEmpty() const { return m_Count == 0; } + +    ItemType* Front() { return m_pFront; } +    ItemType* Back() { return m_pBack; } +    const ItemType* Front() const { return m_pFront; } +    const ItemType* Back() const { return m_pBack; } + +    ItemType* PushFront(); +    ItemType* PushBack(); +    ItemType* PushFront(const T& value); +    ItemType* PushBack(const T& value); +    void PopFront(); +    void PopBack(); + +    // Item can be null - it means PushBack. +    ItemType* InsertBefore(ItemType* pItem); +    // Item can be null - it means PushFront. +    ItemType* InsertAfter(ItemType* pItem); +    ItemType* InsertBefore(ItemType* pItem, const T& value); +    ItemType* InsertAfter(ItemType* pItem, const T& value); + +    void Clear(); +    void Remove(ItemType* pItem); + +private: +    const VkAllocationCallbacks* const m_pAllocationCallbacks; +    VmaPoolAllocator<ItemType> m_ItemAllocator; +    ItemType* m_pFront; +    ItemType* m_pBack; +    size_t m_Count; +}; + +#ifndef _VMA_RAW_LIST_FUNCTIONS +template<typename T> +VmaRawList<T>::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) +    : m_pAllocationCallbacks(pAllocationCallbacks), +    m_ItemAllocator(pAllocationCallbacks, 128), +    m_pFront(VMA_NULL), +    m_pBack(VMA_NULL), +    m_Count(0) {} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::PushFront() +{ +    ItemType* const pNewItem = m_ItemAllocator.Alloc(); +    pNewItem->pPrev = VMA_NULL; +    if (IsEmpty()) +    { +        pNewItem->pNext = VMA_NULL; +        m_pFront = pNewItem; +        m_pBack = pNewItem; +        m_Count = 1; +    } +    else +    { +        pNewItem->pNext = m_pFront; +        m_pFront->pPrev = pNewItem; +        m_pFront = pNewItem; +        ++m_Count; +    } +    return pNewItem; +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::PushBack() +{ +    ItemType* const pNewItem = m_ItemAllocator.Alloc(); +    pNewItem->pNext = VMA_NULL; +    if(IsEmpty()) +    { +        pNewItem->pPrev = VMA_NULL; +        m_pFront = pNewItem; +        m_pBack = pNewItem; +        m_Count = 1; +    } +    else +    { +        pNewItem->pPrev = m_pBack; +        m_pBack->pNext = pNewItem; +        m_pBack = pNewItem; +        ++m_Count; +    } +    return pNewItem; +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::PushFront(const T& value) +{ +    ItemType* const pNewItem = PushFront(); +    pNewItem->Value = value; +    return pNewItem; +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::PushBack(const T& value) +{ +    ItemType* const pNewItem = PushBack(); +    pNewItem->Value = value; +    return pNewItem; +} + +template<typename T> +void VmaRawList<T>::PopFront() +{ +    VMA_HEAVY_ASSERT(m_Count > 0); +    ItemType* const pFrontItem = m_pFront; +    ItemType* const pNextItem = pFrontItem->pNext; +    if (pNextItem != VMA_NULL) +    { +        pNextItem->pPrev = VMA_NULL; +    } +    m_pFront = pNextItem; +    m_ItemAllocator.Free(pFrontItem); +    --m_Count; +} + +template<typename T> +void VmaRawList<T>::PopBack() +{ +    VMA_HEAVY_ASSERT(m_Count > 0); +    ItemType* const pBackItem = m_pBack; +    ItemType* const pPrevItem = pBackItem->pPrev; +    if(pPrevItem != VMA_NULL) +    { +        pPrevItem->pNext = VMA_NULL; +    } +    m_pBack = pPrevItem; +    m_ItemAllocator.Free(pBackItem); +    --m_Count; +} + +template<typename T> +void VmaRawList<T>::Clear() +{ +    if (IsEmpty() == false) +    { +        ItemType* pItem = m_pBack; +        while (pItem != VMA_NULL) +        { +            ItemType* const pPrevItem = pItem->pPrev; +            m_ItemAllocator.Free(pItem); +            pItem = pPrevItem; +        } +        m_pFront = VMA_NULL; +        m_pBack = VMA_NULL; +        m_Count = 0; +    } +} + +template<typename T> +void VmaRawList<T>::Remove(ItemType* pItem) +{ +    VMA_HEAVY_ASSERT(pItem != VMA_NULL); +    VMA_HEAVY_ASSERT(m_Count > 0); + +    if(pItem->pPrev != VMA_NULL) +    { +        pItem->pPrev->pNext = pItem->pNext; +    } +    else +    { +        VMA_HEAVY_ASSERT(m_pFront == pItem); +        m_pFront = pItem->pNext; +    } + +    if(pItem->pNext != VMA_NULL) +    { +        pItem->pNext->pPrev = pItem->pPrev; +    } +    else +    { +        VMA_HEAVY_ASSERT(m_pBack == pItem); +        m_pBack = pItem->pPrev; +    } + +    m_ItemAllocator.Free(pItem); +    --m_Count; +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::InsertBefore(ItemType* pItem) +{ +    if(pItem != VMA_NULL) +    { +        ItemType* const prevItem = pItem->pPrev; +        ItemType* const newItem = m_ItemAllocator.Alloc(); +        newItem->pPrev = prevItem; +        newItem->pNext = pItem; +        pItem->pPrev = newItem; +        if(prevItem != VMA_NULL) +        { +            prevItem->pNext = newItem; +        } +        else +        { +            VMA_HEAVY_ASSERT(m_pFront == pItem); +            m_pFront = newItem; +        } +        ++m_Count; +        return newItem; +    } +    else +        return PushBack(); +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::InsertAfter(ItemType* pItem) +{ +    if(pItem != VMA_NULL) +    { +        ItemType* const nextItem = pItem->pNext; +        ItemType* const newItem = m_ItemAllocator.Alloc(); +        newItem->pNext = nextItem; +        newItem->pPrev = pItem; +        pItem->pNext = newItem; +        if(nextItem != VMA_NULL) +        { +            nextItem->pPrev = newItem; +        } +        else +        { +            VMA_HEAVY_ASSERT(m_pBack == pItem); +            m_pBack = newItem; +        } +        ++m_Count; +        return newItem; +    } +    else +        return PushFront(); +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::InsertBefore(ItemType* pItem, const T& value) +{ +    ItemType* const newItem = InsertBefore(pItem); +    newItem->Value = value; +    return newItem; +} + +template<typename T> +VmaListItem<T>* VmaRawList<T>::InsertAfter(ItemType* pItem, const T& value) +{ +    ItemType* const newItem = InsertAfter(pItem); +    newItem->Value = value; +    return newItem; +} +#endif // _VMA_RAW_LIST_FUNCTIONS +#endif // _VMA_RAW_LIST + +#ifndef _VMA_LIST +template<typename T, typename AllocatorT> +class VmaList +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaList) +public: +    class reverse_iterator; +    class const_iterator; +    class const_reverse_iterator; + +    class iterator +    { +        friend class const_iterator; +        friend class VmaList<T, AllocatorT>; +    public: +        iterator() :  m_pList(VMA_NULL), m_pItem(VMA_NULL) {} +        iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + +        T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } +        T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + +        bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } +        bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + +        iterator operator++(int) { iterator result = *this; ++*this; return result; } +        iterator operator--(int) { iterator result = *this; --*this; return result; } + +        iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } +        iterator& operator--(); + +    private: +        VmaRawList<T>* m_pList; +        VmaListItem<T>* m_pItem; + +        iterator(VmaRawList<T>* pList, VmaListItem<T>* pItem) : m_pList(pList),  m_pItem(pItem) {} +    }; +    class reverse_iterator +    { +        friend class const_reverse_iterator; +        friend class VmaList<T, AllocatorT>; +    public: +        reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} +        reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + +        T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } +        T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + +        bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } +        bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + +        reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; } +        reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; } + +        reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } +        reverse_iterator& operator--(); + +    private: +        VmaRawList<T>* m_pList; +        VmaListItem<T>* m_pItem; + +        reverse_iterator(VmaRawList<T>* pList, VmaListItem<T>* pItem) : m_pList(pList),  m_pItem(pItem) {} +    }; +    class const_iterator +    { +        friend class VmaList<T, AllocatorT>; +    public: +        const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} +        const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} +        const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + +        iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; } + +        const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } +        const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + +        bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } +        bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + +        const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; } +        const_iterator operator--(int) { const_iterator result = *this; --* this; return result; } + +        const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } +        const_iterator& operator--(); + +    private: +        const VmaRawList<T>* m_pList; +        const VmaListItem<T>* m_pItem; + +        const_iterator(const VmaRawList<T>* pList, const VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {} +    }; +    class const_reverse_iterator +    { +        friend class VmaList<T, AllocatorT>; +    public: +        const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} +        const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} +        const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + +        reverse_iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; } + +        const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } +        const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + +        bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } +        bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + +        const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; } +        const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; } + +        const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } +        const_reverse_iterator& operator--(); + +    private: +        const VmaRawList<T>* m_pList; +        const VmaListItem<T>* m_pItem; + +        const_reverse_iterator(const VmaRawList<T>* pList, const VmaListItem<T>* pItem) : m_pList(pList), m_pItem(pItem) {} +    }; + +    VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {} + +    bool empty() const { return m_RawList.IsEmpty(); } +    size_t size() const { return m_RawList.GetCount(); } + +    iterator begin() { return iterator(&m_RawList, m_RawList.Front()); } +    iterator end() { return iterator(&m_RawList, VMA_NULL); } + +    const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); } +    const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); } + +    const_iterator begin() const { return cbegin(); } +    const_iterator end() const { return cend(); } + +    reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } +    reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } + +    const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } +    const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } + +    const_reverse_iterator rbegin() const { return crbegin(); } +    const_reverse_iterator rend() const { return crend(); } + +    void push_back(const T& value) { m_RawList.PushBack(value); } +    iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); } + +    void clear() { m_RawList.Clear(); } +    void erase(iterator it) { m_RawList.Remove(it.m_pItem); } + +private: +    VmaRawList<T> m_RawList; +}; + +#ifndef _VMA_LIST_FUNCTIONS +template<typename T, typename AllocatorT> +typename VmaList<T, AllocatorT>::iterator& VmaList<T, AllocatorT>::iterator::operator--() +{ +    if (m_pItem != VMA_NULL) +    { +        m_pItem = m_pItem->pPrev; +    } +    else +    { +        VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); +        m_pItem = m_pList->Back(); +    } +    return *this; +} + +template<typename T, typename AllocatorT> +typename VmaList<T, AllocatorT>::reverse_iterator& VmaList<T, AllocatorT>::reverse_iterator::operator--() +{ +    if (m_pItem != VMA_NULL) +    { +        m_pItem = m_pItem->pNext; +    } +    else +    { +        VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); +        m_pItem = m_pList->Front(); +    } +    return *this; +} + +template<typename T, typename AllocatorT> +typename VmaList<T, AllocatorT>::const_iterator& VmaList<T, AllocatorT>::const_iterator::operator--() +{ +    if (m_pItem != VMA_NULL) +    { +        m_pItem = m_pItem->pPrev; +    } +    else +    { +        VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); +        m_pItem = m_pList->Back(); +    } +    return *this; +} + +template<typename T, typename AllocatorT> +typename VmaList<T, AllocatorT>::const_reverse_iterator& VmaList<T, AllocatorT>::const_reverse_iterator::operator--() +{ +    if (m_pItem != VMA_NULL) +    { +        m_pItem = m_pItem->pNext; +    } +    else +    { +        VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); +        m_pItem = m_pList->Back(); +    } +    return *this; +} +#endif // _VMA_LIST_FUNCTIONS +#endif // _VMA_LIST + +#ifndef _VMA_INTRUSIVE_LINKED_LIST +/* +Expected interface of ItemTypeTraits: +struct MyItemTypeTraits +{ +    typedef MyItem ItemType; +    static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; } +    static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; } +    static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; } +    static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; } +}; +*/ +template<typename ItemTypeTraits> +class VmaIntrusiveLinkedList +{ +public: +    typedef typename ItemTypeTraits::ItemType ItemType; +    static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } +    static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + +    // Movable, not copyable. +    VmaIntrusiveLinkedList() = default; +    VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src); +    VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete; +    VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src); +    VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete; +    ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); } + +    size_t GetCount() const { return m_Count; } +    bool IsEmpty() const { return m_Count == 0; } +    ItemType* Front() { return m_Front; } +    ItemType* Back() { return m_Back; } +    const ItemType* Front() const { return m_Front; } +    const ItemType* Back() const { return m_Back; } + +    void PushBack(ItemType* item); +    void PushFront(ItemType* item); +    ItemType* PopBack(); +    ItemType* PopFront(); + +    // MyItem can be null - it means PushBack. +    void InsertBefore(ItemType* existingItem, ItemType* newItem); +    // MyItem can be null - it means PushFront. +    void InsertAfter(ItemType* existingItem, ItemType* newItem); +    void Remove(ItemType* item); +    void RemoveAll(); + +private: +    ItemType* m_Front = VMA_NULL; +    ItemType* m_Back = VMA_NULL; +    size_t m_Count = 0; +}; + +#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template<typename ItemTypeTraits> +VmaIntrusiveLinkedList<ItemTypeTraits>::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src) +    : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ +    src.m_Front = src.m_Back = VMA_NULL; +    src.m_Count = 0; +} + +template<typename ItemTypeTraits> +VmaIntrusiveLinkedList<ItemTypeTraits>& VmaIntrusiveLinkedList<ItemTypeTraits>::operator=(VmaIntrusiveLinkedList&& src) +{ +    if (&src != this) +    { +        VMA_HEAVY_ASSERT(IsEmpty()); +        m_Front = src.m_Front; +        m_Back = src.m_Back; +        m_Count = src.m_Count; +        src.m_Front = src.m_Back = VMA_NULL; +        src.m_Count = 0; +    } +    return *this; +} + +template<typename ItemTypeTraits> +void VmaIntrusiveLinkedList<ItemTypeTraits>::PushBack(ItemType* item) +{ +    VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); +    if (IsEmpty()) +    { +        m_Front = item; +        m_Back = item; +        m_Count = 1; +    } +    else +    { +        ItemTypeTraits::AccessPrev(item) = m_Back; +        ItemTypeTraits::AccessNext(m_Back) = item; +        m_Back = item; +        ++m_Count; +    } +} + +template<typename ItemTypeTraits> +void VmaIntrusiveLinkedList<ItemTypeTraits>::PushFront(ItemType* item) +{ +    VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); +    if (IsEmpty()) +    { +        m_Front = item; +        m_Back = item; +        m_Count = 1; +    } +    else +    { +        ItemTypeTraits::AccessNext(item) = m_Front; +        ItemTypeTraits::AccessPrev(m_Front) = item; +        m_Front = item; +        ++m_Count; +    } +} + +template<typename ItemTypeTraits> +typename VmaIntrusiveLinkedList<ItemTypeTraits>::ItemType* VmaIntrusiveLinkedList<ItemTypeTraits>::PopBack() +{ +    VMA_HEAVY_ASSERT(m_Count > 0); +    ItemType* const backItem = m_Back; +    ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); +    if (prevItem != VMA_NULL) +    { +        ItemTypeTraits::AccessNext(prevItem) = VMA_NULL; +    } +    m_Back = prevItem; +    --m_Count; +    ItemTypeTraits::AccessPrev(backItem) = VMA_NULL; +    ItemTypeTraits::AccessNext(backItem) = VMA_NULL; +    return backItem; +} + +template<typename ItemTypeTraits> +typename VmaIntrusiveLinkedList<ItemTypeTraits>::ItemType* VmaIntrusiveLinkedList<ItemTypeTraits>::PopFront() +{ +    VMA_HEAVY_ASSERT(m_Count > 0); +    ItemType* const frontItem = m_Front; +    ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); +    if (nextItem != VMA_NULL) +    { +        ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL; +    } +    m_Front = nextItem; +    --m_Count; +    ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL; +    ItemTypeTraits::AccessNext(frontItem) = VMA_NULL; +    return frontItem; +} + +template<typename ItemTypeTraits> +void VmaIntrusiveLinkedList<ItemTypeTraits>::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ +    VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); +    if (existingItem != VMA_NULL) +    { +        ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); +        ItemTypeTraits::AccessPrev(newItem) = prevItem; +        ItemTypeTraits::AccessNext(newItem) = existingItem; +        ItemTypeTraits::AccessPrev(existingItem) = newItem; +        if (prevItem != VMA_NULL) +        { +            ItemTypeTraits::AccessNext(prevItem) = newItem; +        } +        else +        { +            VMA_HEAVY_ASSERT(m_Front == existingItem); +            m_Front = newItem; +        } +        ++m_Count; +    } +    else +        PushBack(newItem); +} + +template<typename ItemTypeTraits> +void VmaIntrusiveLinkedList<ItemTypeTraits>::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ +    VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); +    if (existingItem != VMA_NULL) +    { +        ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); +        ItemTypeTraits::AccessNext(newItem) = nextItem; +        ItemTypeTraits::AccessPrev(newItem) = existingItem; +        ItemTypeTraits::AccessNext(existingItem) = newItem; +        if (nextItem != VMA_NULL) +        { +            ItemTypeTraits::AccessPrev(nextItem) = newItem; +        } +        else +        { +            VMA_HEAVY_ASSERT(m_Back == existingItem); +            m_Back = newItem; +        } +        ++m_Count; +    } +    else +        return PushFront(newItem); +} + +template<typename ItemTypeTraits> +void VmaIntrusiveLinkedList<ItemTypeTraits>::Remove(ItemType* item) +{ +    VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0); +    if (ItemTypeTraits::GetPrev(item) != VMA_NULL) +    { +        ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); +    } +    else +    { +        VMA_HEAVY_ASSERT(m_Front == item); +        m_Front = ItemTypeTraits::GetNext(item); +    } + +    if (ItemTypeTraits::GetNext(item) != VMA_NULL) +    { +        ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); +    } +    else +    { +        VMA_HEAVY_ASSERT(m_Back == item); +        m_Back = ItemTypeTraits::GetPrev(item); +    } +    ItemTypeTraits::AccessPrev(item) = VMA_NULL; +    ItemTypeTraits::AccessNext(item) = VMA_NULL; +    --m_Count; +} + +template<typename ItemTypeTraits> +void VmaIntrusiveLinkedList<ItemTypeTraits>::RemoveAll() +{ +    if (!IsEmpty()) +    { +        ItemType* item = m_Back; +        while (item != VMA_NULL) +        { +            ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); +            ItemTypeTraits::AccessPrev(item) = VMA_NULL; +            ItemTypeTraits::AccessNext(item) = VMA_NULL; +            item = prevItem; +        } +        m_Front = VMA_NULL; +        m_Back = VMA_NULL; +        m_Count = 0; +    } +} +#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _VMA_INTRUSIVE_LINKED_LIST + +#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED +class VmaStringBuilder +{ +public: +    VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator<char>(allocationCallbacks)) {} +    ~VmaStringBuilder() = default; + +    size_t GetLength() const { return m_Data.size(); } +    const char* GetData() const { return m_Data.data(); } +    void AddNewLine() { Add('\n'); } +    void Add(char ch) { m_Data.push_back(ch); } + +    void Add(const char* pStr); +    void AddNumber(uint32_t num); +    void AddNumber(uint64_t num); +    void AddPointer(const void* ptr); + +private: +    VmaVector<char, VmaStlAllocator<char>> m_Data; +}; + +#ifndef _VMA_STRING_BUILDER_FUNCTIONS +void VmaStringBuilder::Add(const char* pStr) +{ +    const size_t strLen = strlen(pStr); +    if (strLen > 0) +    { +        const size_t oldCount = m_Data.size(); +        m_Data.resize(oldCount + strLen); +        memcpy(m_Data.data() + oldCount, pStr, strLen); +    } +} + +void VmaStringBuilder::AddNumber(uint32_t num) +{ +    char buf[11]; +    buf[10] = '\0'; +    char* p = &buf[10]; +    do +    { +        *--p = '0' + (char)(num % 10); +        num /= 10; +    } while (num); +    Add(p); +} + +void VmaStringBuilder::AddNumber(uint64_t num) +{ +    char buf[21]; +    buf[20] = '\0'; +    char* p = &buf[20]; +    do +    { +        *--p = '0' + (char)(num % 10); +        num /= 10; +    } while (num); +    Add(p); +} + +void VmaStringBuilder::AddPointer(const void* ptr) +{ +    char buf[21]; +    VmaPtrToStr(buf, sizeof(buf), ptr); +    Add(buf); +} +#endif //_VMA_STRING_BUILDER_FUNCTIONS +#endif // _VMA_STRING_BUILDER + +#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED +/* +Allows to conveniently build a correct JSON document to be written to the +VmaStringBuilder passed to the constructor. +*/ +class VmaJsonWriter +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter) +public: +    // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object. +    VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); +    ~VmaJsonWriter(); + +    // Begins object by writing "{". +    // Inside an object, you must call pairs of WriteString and a value, e.g.: +    // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject(); +    // Will write: { "A": 1, "B": 2 } +    void BeginObject(bool singleLine = false); +    // Ends object by writing "}". +    void EndObject(); + +    // Begins array by writing "[". +    // Inside an array, you can write a sequence of any values. +    void BeginArray(bool singleLine = false); +    // Ends array by writing "[". +    void EndArray(); + +    // Writes a string value inside "". +    // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped. +    void WriteString(const char* pStr); + +    // Begins writing a string value. +    // Call BeginString, ContinueString, ContinueString, ..., EndString instead of +    // WriteString to conveniently build the string content incrementally, made of +    // parts including numbers. +    void BeginString(const char* pStr = VMA_NULL); +    // Posts next part of an open string. +    void ContinueString(const char* pStr); +    // Posts next part of an open string. The number is converted to decimal characters. +    void ContinueString(uint32_t n); +    void ContinueString(uint64_t n); +    // Posts next part of an open string. Pointer value is converted to characters +    // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 +    void ContinueString_Pointer(const void* ptr); +    // Ends writing a string value by writing '"'. +    void EndString(const char* pStr = VMA_NULL); + +    // Writes a number value. +    void WriteNumber(uint32_t n); +    void WriteNumber(uint64_t n); +    // Writes a boolean value - false or true. +    void WriteBool(bool b); +    // Writes a null value. +    void WriteNull(); + +private: +    enum COLLECTION_TYPE +    { +        COLLECTION_TYPE_OBJECT, +        COLLECTION_TYPE_ARRAY, +    }; +    struct StackItem +    { +        COLLECTION_TYPE type; +        uint32_t valueCount; +        bool singleLineMode; +    }; + +    static const char* const INDENT; + +    VmaStringBuilder& m_SB; +    VmaVector< StackItem, VmaStlAllocator<StackItem> > m_Stack; +    bool m_InsideString; + +    void BeginValue(bool isString); +    void WriteIndent(bool oneLess = false); +}; +const char* const VmaJsonWriter::INDENT = "  "; + +#ifndef _VMA_JSON_WRITER_FUNCTIONS +VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) +    : m_SB(sb), +    m_Stack(VmaStlAllocator<StackItem>(pAllocationCallbacks)), +    m_InsideString(false) {} + +VmaJsonWriter::~VmaJsonWriter() +{ +    VMA_ASSERT(!m_InsideString); +    VMA_ASSERT(m_Stack.empty()); +} + +void VmaJsonWriter::BeginObject(bool singleLine) +{ +    VMA_ASSERT(!m_InsideString); + +    BeginValue(false); +    m_SB.Add('{'); + +    StackItem item; +    item.type = COLLECTION_TYPE_OBJECT; +    item.valueCount = 0; +    item.singleLineMode = singleLine; +    m_Stack.push_back(item); +} + +void VmaJsonWriter::EndObject() +{ +    VMA_ASSERT(!m_InsideString); + +    WriteIndent(true); +    m_SB.Add('}'); + +    VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); +    m_Stack.pop_back(); +} + +void VmaJsonWriter::BeginArray(bool singleLine) +{ +    VMA_ASSERT(!m_InsideString); + +    BeginValue(false); +    m_SB.Add('['); + +    StackItem item; +    item.type = COLLECTION_TYPE_ARRAY; +    item.valueCount = 0; +    item.singleLineMode = singleLine; +    m_Stack.push_back(item); +} + +void VmaJsonWriter::EndArray() +{ +    VMA_ASSERT(!m_InsideString); + +    WriteIndent(true); +    m_SB.Add(']'); + +    VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); +    m_Stack.pop_back(); +} + +void VmaJsonWriter::WriteString(const char* pStr) +{ +    BeginString(pStr); +    EndString(); +} + +void VmaJsonWriter::BeginString(const char* pStr) +{ +    VMA_ASSERT(!m_InsideString); + +    BeginValue(true); +    m_SB.Add('"'); +    m_InsideString = true; +    if (pStr != VMA_NULL && pStr[0] != '\0') +    { +        ContinueString(pStr); +    } +} + +void VmaJsonWriter::ContinueString(const char* pStr) +{ +    VMA_ASSERT(m_InsideString); + +    const size_t strLen = strlen(pStr); +    for (size_t i = 0; i < strLen; ++i) +    { +        char ch = pStr[i]; +        if (ch == '\\') +        { +            m_SB.Add("\\\\"); +        } +        else if (ch == '"') +        { +            m_SB.Add("\\\""); +        } +        else if ((uint8_t)ch >= 32) +        { +            m_SB.Add(ch); +        } +        else switch (ch) +        { +        case '\b': +            m_SB.Add("\\b"); +            break; +        case '\f': +            m_SB.Add("\\f"); +            break; +        case '\n': +            m_SB.Add("\\n"); +            break; +        case '\r': +            m_SB.Add("\\r"); +            break; +        case '\t': +            m_SB.Add("\\t"); +            break; +        default: +            VMA_ASSERT(0 && "Character not currently supported."); +        } +    } +} + +void VmaJsonWriter::ContinueString(uint32_t n) +{ +    VMA_ASSERT(m_InsideString); +    m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString(uint64_t n) +{ +    VMA_ASSERT(m_InsideString); +    m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString_Pointer(const void* ptr) +{ +    VMA_ASSERT(m_InsideString); +    m_SB.AddPointer(ptr); +} + +void VmaJsonWriter::EndString(const char* pStr) +{ +    VMA_ASSERT(m_InsideString); +    if (pStr != VMA_NULL && pStr[0] != '\0') +    { +        ContinueString(pStr); +    } +    m_SB.Add('"'); +    m_InsideString = false; +} + +void VmaJsonWriter::WriteNumber(uint32_t n) +{ +    VMA_ASSERT(!m_InsideString); +    BeginValue(false); +    m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteNumber(uint64_t n) +{ +    VMA_ASSERT(!m_InsideString); +    BeginValue(false); +    m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteBool(bool b) +{ +    VMA_ASSERT(!m_InsideString); +    BeginValue(false); +    m_SB.Add(b ? "true" : "false"); +} + +void VmaJsonWriter::WriteNull() +{ +    VMA_ASSERT(!m_InsideString); +    BeginValue(false); +    m_SB.Add("null"); +} + +void VmaJsonWriter::BeginValue(bool isString) +{ +    if (!m_Stack.empty()) +    { +        StackItem& currItem = m_Stack.back(); +        if (currItem.type == COLLECTION_TYPE_OBJECT && +            currItem.valueCount % 2 == 0) +        { +            VMA_ASSERT(isString); +        } + +        if (currItem.type == COLLECTION_TYPE_OBJECT && +            currItem.valueCount % 2 != 0) +        { +            m_SB.Add(": "); +        } +        else if (currItem.valueCount > 0) +        { +            m_SB.Add(", "); +            WriteIndent(); +        } +        else +        { +            WriteIndent(); +        } +        ++currItem.valueCount; +    } +} + +void VmaJsonWriter::WriteIndent(bool oneLess) +{ +    if (!m_Stack.empty() && !m_Stack.back().singleLineMode) +    { +        m_SB.AddNewLine(); + +        size_t count = m_Stack.size(); +        if (count > 0 && oneLess) +        { +            --count; +        } +        for (size_t i = 0; i < count; ++i) +        { +            m_SB.Add(INDENT); +        } +    } +} +#endif // _VMA_JSON_WRITER_FUNCTIONS + +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) +{ +    json.BeginObject(); + +    json.WriteString("BlockCount"); +    json.WriteNumber(stat.statistics.blockCount); +    json.WriteString("BlockBytes"); +    json.WriteNumber(stat.statistics.blockBytes); +    json.WriteString("AllocationCount"); +    json.WriteNumber(stat.statistics.allocationCount); +    json.WriteString("AllocationBytes"); +    json.WriteNumber(stat.statistics.allocationBytes); +    json.WriteString("UnusedRangeCount"); +    json.WriteNumber(stat.unusedRangeCount); + +    if (stat.statistics.allocationCount > 1) +    { +        json.WriteString("AllocationSizeMin"); +        json.WriteNumber(stat.allocationSizeMin); +        json.WriteString("AllocationSizeMax"); +        json.WriteNumber(stat.allocationSizeMax); +    } +    if (stat.unusedRangeCount > 1) +    { +        json.WriteString("UnusedRangeSizeMin"); +        json.WriteNumber(stat.unusedRangeSizeMin); +        json.WriteString("UnusedRangeSizeMax"); +        json.WriteNumber(stat.unusedRangeSizeMax); +    } +    json.EndObject(); +} +#endif // _VMA_JSON_WRITER + +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis) +public: +    VmaMappingHysteresis() = default; + +    uint32_t GetExtraMapping() const { return m_ExtraMapping; } + +    // Call when Map was called. +    // Returns true if switched to extra +1 mapping reference count. +    bool PostMap() +    { +#if VMA_MAPPING_HYSTERESIS_ENABLED +        if(m_ExtraMapping == 0) +        { +            ++m_MajorCounter; +            if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) +            { +                m_ExtraMapping = 1; +                m_MajorCounter = 0; +                m_MinorCounter = 0; +                return true; +            } +        } +        else // m_ExtraMapping == 1 +            PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED +        return false; +    } + +    // Call when Unmap was called. +    void PostUnmap() +    { +#if VMA_MAPPING_HYSTERESIS_ENABLED +        if(m_ExtraMapping == 0) +            ++m_MajorCounter; +        else // m_ExtraMapping == 1 +            PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED +    } + +    // Call when allocation was made from the memory block. +    void PostAlloc() +    { +#if VMA_MAPPING_HYSTERESIS_ENABLED +        if(m_ExtraMapping == 1) +            ++m_MajorCounter; +        else // m_ExtraMapping == 0 +            PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED +    } + +    // Call when allocation was freed from the memory block. +    // Returns true if switched to extra -1 mapping reference count. +    bool PostFree() +    { +#if VMA_MAPPING_HYSTERESIS_ENABLED +        if(m_ExtraMapping == 1) +        { +            ++m_MajorCounter; +            if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && +                m_MajorCounter > m_MinorCounter + 1) +            { +                m_ExtraMapping = 0; +                m_MajorCounter = 0; +                m_MinorCounter = 0; +                return true; +            } +        } +        else // m_ExtraMapping == 0 +            PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED +        return false; +    } + +private: +    static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + +    uint32_t m_MinorCounter = 0; +    uint32_t m_MajorCounter = 0; +    uint32_t m_ExtraMapping = 0; // 0 or 1. + +    void PostMinorCounter() +    { +        if(m_MinorCounter < m_MajorCounter) +        { +            ++m_MinorCounter; +        } +        else if(m_MajorCounter > 0) +        { +            --m_MajorCounter; +            --m_MinorCounter; +        } +    } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + +#if VMA_EXTERNAL_MEMORY_WIN32 +class VmaWin32Handle +{ +public: +    VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { } +    explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { } +    ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } } +    VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle) + +public: +    // Strengthened +    VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept +    { +        *pHandle = VMA_NULL; +        // Try to get handle first. +        if (m_hHandle != VMA_NULL) +        { +            *pHandle = Duplicate(hTargetProcess); +            return VK_SUCCESS; +        } + +        VkResult res = VK_SUCCESS; +        // If failed, try to create it. +        { +            VmaMutexLockWrite lock(m_Mutex, useMutex); +            if (m_hHandle == VMA_NULL) +            { +                res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle); +            } +        } + +        *pHandle = Duplicate(hTargetProcess); +        return res; +    } + +    operator bool() const noexcept { return m_hHandle != VMA_NULL; } +private: +    // Not atomic +    static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept +    { +        VkResult res = VK_ERROR_FEATURE_NOT_PRESENT; +        if (pvkGetMemoryWin32HandleKHR != VMA_NULL) +        { +            VkMemoryGetWin32HandleInfoKHR handleInfo{ }; +            handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; +            handleInfo.memory = memory; +            handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; +            res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle); +        } +        return res; +    } +    HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept +    { +        if (!m_hHandle) +            return m_hHandle; + +        HANDLE hCurrentProcess = ::GetCurrentProcess(); +        HANDLE hDupHandle = VMA_NULL; +        if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) +        { +            VMA_ASSERT(0 && "Failed to duplicate handle."); +        } +        return hDupHandle; +    } +private: +    HANDLE m_hHandle; +    VMA_RW_MUTEX m_Mutex; // Protects access m_Handle +}; +#else  +class VmaWin32Handle +{ +    // ABI compatibility +    void* placeholder = VMA_NULL; +    VMA_RW_MUTEX placeholder2; +}; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + + +#ifndef _VMA_DEVICE_MEMORY_BLOCK +/* +Represents a single block of device memory (`VkDeviceMemory`) with all the +data about its regions (aka suballocations, #VmaAllocation), assigned and free. + +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. +*/ +class VmaDeviceMemoryBlock +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock) +public: +    VmaBlockMetadata* m_pMetadata; + +    VmaDeviceMemoryBlock(VmaAllocator hAllocator); +    ~VmaDeviceMemoryBlock(); + +    // Always call after construction. +    void Init( +        VmaAllocator hAllocator, +        VmaPool hParentPool, +        uint32_t newMemoryTypeIndex, +        VkDeviceMemory newMemory, +        VkDeviceSize newSize, +        uint32_t id, +        uint32_t algorithm, +        VkDeviceSize bufferImageGranularity); +    // Always call before destruction. +    void Destroy(VmaAllocator allocator); + +    VmaPool GetParentPool() const { return m_hParentPool; } +    VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } +    uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } +    uint32_t GetId() const { return m_Id; } +    void* GetMappedData() const { return m_pMappedData; } +    uint32_t GetMapRefCount() const { return m_MapCount; } + +    // Call when allocation/free was made from m_pMetadata. +    // Used for m_MappingHysteresis. +    void PostAlloc(VmaAllocator hAllocator); +    void PostFree(VmaAllocator hAllocator); + +    // Validates all data structures inside this object. If not valid, returns false. +    bool Validate() const; +    VkResult CheckCorruption(VmaAllocator hAllocator); + +    // ppData can be null. +    VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData); +    void Unmap(VmaAllocator hAllocator, uint32_t count); + +    VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); +    VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + +    VkResult BindBufferMemory( +        const VmaAllocator hAllocator, +        const VmaAllocation hAllocation, +        VkDeviceSize allocationLocalOffset, +        VkBuffer hBuffer, +        const void* pNext); +    VkResult BindImageMemory( +        const VmaAllocator hAllocator, +        const VmaAllocation hAllocation, +        VkDeviceSize allocationLocalOffset, +        VkImage hImage, +        const void* pNext); +#if VMA_EXTERNAL_MEMORY_WIN32 +    VkResult CreateWin32Handle( +        const VmaAllocator hAllocator, +        PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, +        HANDLE hTargetProcess, +        HANDLE* pHandle)noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 +private: +    VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. +    uint32_t m_MemoryTypeIndex; +    uint32_t m_Id; +    VkDeviceMemory m_hMemory; + +    /* +    Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory. +    Also protects m_MapCount, m_pMappedData. +    Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. +    */ +    VMA_MUTEX m_MapAndBindMutex; +    VmaMappingHysteresis m_MappingHysteresis; +    uint32_t m_MapCount; +    void* m_pMappedData; + +    VmaWin32Handle m_Handle; +}; +#endif // _VMA_DEVICE_MEMORY_BLOCK + +#ifndef _VMA_ALLOCATION_T +struct VmaAllocationExtraData +{ +    void* m_pMappedData = VMA_NULL; // Not null means memory is mapped. +    VmaWin32Handle m_Handle; +}; + +struct VmaAllocation_T +{ +    friend struct VmaDedicatedAllocationListItemTraits; + +    enum FLAGS +    { +        FLAG_PERSISTENT_MAP   = 0x01, +        FLAG_MAPPING_ALLOWED  = 0x02, +    }; + +public: +    enum ALLOCATION_TYPE +    { +        ALLOCATION_TYPE_NONE, +        ALLOCATION_TYPE_BLOCK, +        ALLOCATION_TYPE_DEDICATED, +    }; + +    // This struct is allocated using VmaPoolAllocator. +    VmaAllocation_T(bool mappingAllowed); +    ~VmaAllocation_T(); + +    void InitBlockAllocation( +        VmaDeviceMemoryBlock* block, +        VmaAllocHandle allocHandle, +        VkDeviceSize alignment, +        VkDeviceSize size, +        uint32_t memoryTypeIndex, +        VmaSuballocationType suballocationType, +        bool mapped); +    // pMappedData not null means allocation is created with MAPPED flag. +    void InitDedicatedAllocation( +        VmaAllocator allocator, +        VmaPool hParentPool, +        uint32_t memoryTypeIndex, +        VkDeviceMemory hMemory, +        VmaSuballocationType suballocationType, +        void* pMappedData, +        VkDeviceSize size); +    void Destroy(VmaAllocator allocator); + +    ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } +    VkDeviceSize GetAlignment() const { return m_Alignment; } +    VkDeviceSize GetSize() const { return m_Size; } +    void* GetUserData() const { return m_pUserData; } +    const char* GetName() const { return m_pName; } +    VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } + +    VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } +    uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } +    bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } +    bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } + +    void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } +    void SetName(VmaAllocator hAllocator, const char* pName); +    void FreeName(VmaAllocator hAllocator); +    uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); +    VmaAllocHandle GetAllocHandle() const; +    VkDeviceSize GetOffset() const; +    VmaPool GetParentPool() const; +    VkDeviceMemory GetMemory() const; +    void* GetMappedData() const; + +    void BlockAllocMap(); +    void BlockAllocUnmap(); +    VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); +    void DedicatedAllocUnmap(VmaAllocator hAllocator); + +#if VMA_STATS_STRING_ENABLED +    VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; } +    void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) +    { +        VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); +        m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5); +    } +    void InitImageUsage(const VkImageCreateInfo &createInfo) +    { +        VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); +        m_BufferImageUsage = VmaBufferImageUsage(createInfo); +    } +    void PrintParameters(class VmaJsonWriter& json) const; +#endif + +#if VMA_EXTERNAL_MEMORY_WIN32 +    VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + +private: +    // Allocation out of VmaDeviceMemoryBlock. +    struct BlockAllocation +    { +        VmaDeviceMemoryBlock* m_Block; +        VmaAllocHandle m_AllocHandle; +    }; +    // Allocation for an object that has its own private VkDeviceMemory. +    struct DedicatedAllocation +    { +        VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. +        VkDeviceMemory m_hMemory; +        VmaAllocationExtraData* m_ExtraData; +        VmaAllocation_T* m_Prev; +        VmaAllocation_T* m_Next; +    }; +    union +    { +        // Allocation out of VmaDeviceMemoryBlock. +        BlockAllocation m_BlockAllocation; +        // Allocation for an object that has its own private VkDeviceMemory. +        DedicatedAllocation m_DedicatedAllocation; +    }; + +    VkDeviceSize m_Alignment; +    VkDeviceSize m_Size; +    void* m_pUserData; +    char* m_pName; +    uint32_t m_MemoryTypeIndex; +    uint8_t m_Type; // ALLOCATION_TYPE +    uint8_t m_SuballocationType; // VmaSuballocationType +    // Reference counter for vmaMapMemory()/vmaUnmapMemory(). +    uint8_t m_MapCount; +    uint8_t m_Flags; // enum FLAGS +#if VMA_STATS_STRING_ENABLED +    VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. +#endif + +    void EnsureExtraData(VmaAllocator hAllocator); +}; +#endif // _VMA_ALLOCATION_T + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS +struct VmaDedicatedAllocationListItemTraits +{ +    typedef VmaAllocation_T ItemType; + +    static ItemType* GetPrev(const ItemType* item) +    { +        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); +        return item->m_DedicatedAllocation.m_Prev; +    } +    static ItemType* GetNext(const ItemType* item) +    { +        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); +        return item->m_DedicatedAllocation.m_Next; +    } +    static ItemType*& AccessPrev(ItemType* item) +    { +        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); +        return item->m_DedicatedAllocation.m_Prev; +    } +    static ItemType*& AccessNext(ItemType* item) +    { +        VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); +        return item->m_DedicatedAllocation.m_Next; +    } +}; +#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST +/* +Stores linked list of VmaAllocation_T objects. +Thread-safe, synchronized internally. +*/ +class VmaDedicatedAllocationList +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList) +public: +    VmaDedicatedAllocationList() {} +    ~VmaDedicatedAllocationList(); + +    void Init(bool useMutex) { m_UseMutex = useMutex; } +    bool Validate(); + +    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); +    void AddStatistics(VmaStatistics& inoutStats); +#if VMA_STATS_STRING_ENABLED +    // Writes JSON array with the list of allocations. +    void BuildStatsString(VmaJsonWriter& json); +#endif + +    bool IsEmpty(); +    void Register(VmaAllocation alloc); +    void Unregister(VmaAllocation alloc); + +private: +    typedef VmaIntrusiveLinkedList<VmaDedicatedAllocationListItemTraits> DedicatedAllocationLinkedList; + +    bool m_UseMutex = true; +    VMA_RW_MUTEX m_Mutex; +    DedicatedAllocationLinkedList m_AllocationList; +}; + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS + +VmaDedicatedAllocationList::~VmaDedicatedAllocationList() +{ +    VMA_HEAVY_ASSERT(Validate()); + +    if (!m_AllocationList.IsEmpty()) +    { +        VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!"); +    } +} + +bool VmaDedicatedAllocationList::Validate() +{ +    const size_t declaredCount = m_AllocationList.GetCount(); +    size_t actualCount = 0; +    VmaMutexLockRead lock(m_Mutex, m_UseMutex); +    for (VmaAllocation alloc = m_AllocationList.Front(); +        alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) +    { +        ++actualCount; +    } +    VMA_VALIDATE(actualCount == declaredCount); + +    return true; +} + +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ +    for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) +    { +        const VkDeviceSize size = item->GetSize(); +        inoutStats.statistics.blockCount++; +        inoutStats.statistics.blockBytes += size; +        VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); +    } +} + +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) +{ +    VmaMutexLockRead lock(m_Mutex, m_UseMutex); + +    const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); +    inoutStats.blockCount += allocCount; +    inoutStats.allocationCount += allocCount; + +    for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) +    { +        const VkDeviceSize size = item->GetSize(); +        inoutStats.blockBytes += size; +        inoutStats.allocationBytes += size; +    } +} + +#if VMA_STATS_STRING_ENABLED +void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json) +{ +    VmaMutexLockRead lock(m_Mutex, m_UseMutex); +    json.BeginArray(); +    for (VmaAllocation alloc = m_AllocationList.Front(); +        alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) +    { +        json.BeginObject(true); +        alloc->PrintParameters(json); +        json.EndObject(); +    } +    json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaDedicatedAllocationList::IsEmpty() +{ +    VmaMutexLockRead lock(m_Mutex, m_UseMutex); +    return m_AllocationList.IsEmpty(); +} + +void VmaDedicatedAllocationList::Register(VmaAllocation alloc) +{ +    VmaMutexLockWrite lock(m_Mutex, m_UseMutex); +    m_AllocationList.PushBack(alloc); +} + +void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc) +{ +    VmaMutexLockWrite lock(m_Mutex, m_UseMutex); +    m_AllocationList.Remove(alloc); +} +#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS +#endif // _VMA_DEDICATED_ALLOCATION_LIST + +#ifndef _VMA_SUBALLOCATION +/* +Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as +allocated memory block or free. +*/ +struct VmaSuballocation +{ +    VkDeviceSize offset; +    VkDeviceSize size; +    void* userData; +    VmaSuballocationType type; +}; + +// Comparator for offsets. +struct VmaSuballocationOffsetLess +{ +    bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const +    { +        return lhs.offset < rhs.offset; +    } +}; + +struct VmaSuballocationOffsetGreater +{ +    bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const +    { +        return lhs.offset > rhs.offset; +    } +}; + +struct VmaSuballocationItemSizeLess +{ +    bool operator()(const VmaSuballocationList::iterator lhs, +        const VmaSuballocationList::iterator rhs) const +    { +        return lhs->size < rhs->size; +    } + +    bool operator()(const VmaSuballocationList::iterator lhs, +        VkDeviceSize rhsSize) const +    { +        return lhs->size < rhsSize; +    } +}; +#endif // _VMA_SUBALLOCATION + +#ifndef _VMA_ALLOCATION_REQUEST +/* +Parameters of planned allocation inside a VmaDeviceMemoryBlock. +item points to a FREE suballocation. +*/ +struct VmaAllocationRequest +{ +    VmaAllocHandle allocHandle; +    VkDeviceSize size; +    VmaSuballocationList::iterator item; +    void* customData; +    uint64_t algorithmData; +    VmaAllocationRequestType type; +}; +#endif // _VMA_ALLOCATION_REQUEST + +#ifndef _VMA_BLOCK_METADATA +/* +Data structure used for bookkeeping of allocations and unused ranges of memory +in a single VkDeviceMemory block. +*/ +class VmaBlockMetadata +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata) +public: +    // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object. +    VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, +        VkDeviceSize bufferImageGranularity, bool isVirtual); +    virtual ~VmaBlockMetadata() = default; + +    virtual void Init(VkDeviceSize size) { m_Size = size; } +    bool IsVirtual() const { return m_IsVirtual; } +    VkDeviceSize GetSize() const { return m_Size; } + +    // Validates all data structures inside this object. If not valid, returns false. +    virtual bool Validate() const = 0; +    virtual size_t GetAllocationCount() const = 0; +    virtual size_t GetFreeRegionsCount() const = 0; +    virtual VkDeviceSize GetSumFreeSize() const = 0; +    // Returns true if this block is empty - contains only single free suballocation. +    virtual bool IsEmpty() const = 0; +    virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; +    virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; +    virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + +    virtual VmaAllocHandle GetAllocationListBegin() const = 0; +    virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; +    virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; + +    // Shouldn't modify blockCount. +    virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; +    virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; + +#if VMA_STATS_STRING_ENABLED +    virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; +#endif + +    // Tries to find a place for suballocation with given parameters inside this block. +    // If succeeded, fills pAllocationRequest and returns true. +    // If failed, returns false. +    virtual bool CreateAllocationRequest( +        VkDeviceSize allocSize, +        VkDeviceSize allocAlignment, +        bool upperAddress, +        VmaSuballocationType allocType, +        // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags. +        uint32_t strategy, +        VmaAllocationRequest* pAllocationRequest) = 0; + +    virtual VkResult CheckCorruption(const void* pBlockData) = 0; + +    // Makes actual allocation based on request. Request must already be checked and valid. +    virtual void Alloc( +        const VmaAllocationRequest& request, +        VmaSuballocationType type, +        void* userData) = 0; + +    // Frees suballocation assigned to given memory region. +    virtual void Free(VmaAllocHandle allocHandle) = 0; + +    // Frees all allocations. +    // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations! +    virtual void Clear() = 0; + +    virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0; +    virtual void DebugLogAllAllocations() const = 0; + +protected: +    const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } +    VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } +    VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); } + +    void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; +#if VMA_STATS_STRING_ENABLED +    // mapRefCount == UINT32_MAX means unspecified. +    void PrintDetailedMap_Begin(class VmaJsonWriter& json, +        VkDeviceSize unusedBytes, +        size_t allocationCount, +        size_t unusedRangeCount) const; +    void PrintDetailedMap_Allocation(class VmaJsonWriter& json, +        VkDeviceSize offset, VkDeviceSize size, void* userData) const; +    void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, +        VkDeviceSize offset, +        VkDeviceSize size) const; +    void PrintDetailedMap_End(class VmaJsonWriter& json) const; +#endif + +private: +    VkDeviceSize m_Size; +    const VkAllocationCallbacks* m_pAllocationCallbacks; +    const VkDeviceSize m_BufferImageGranularity; +    const bool m_IsVirtual; +}; + +#ifndef _VMA_BLOCK_METADATA_FUNCTIONS +VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, +    VkDeviceSize bufferImageGranularity, bool isVirtual) +    : m_Size(0), +    m_pAllocationCallbacks(pAllocationCallbacks), +    m_BufferImageGranularity(bufferImageGranularity), +    m_IsVirtual(isVirtual) {} + +void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ +    if (IsVirtual()) +    { +        VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData); +    } +    else +    { +        VMA_ASSERT(userData != VMA_NULL); +        VmaAllocation allocation = reinterpret_cast<VmaAllocation>(userData); + +        userData = allocation->GetUserData(); +        const char* name = allocation->GetName(); + +#if VMA_STATS_STRING_ENABLED +        VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64, +            offset, size, userData, name ? name : "vma_empty", +            VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], +            (uint64_t)allocation->GetBufferImageUsage().Value); +#else +        VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u", +            offset, size, userData, name ? name : "vma_empty", +            (unsigned)allocation->GetSuballocationType()); +#endif // VMA_STATS_STRING_ENABLED +    } + +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, +    VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ +    json.WriteString("TotalBytes"); +    json.WriteNumber(GetSize()); + +    json.WriteString("UnusedBytes"); +    json.WriteNumber(unusedBytes); + +    json.WriteString("Allocations"); +    json.WriteNumber((uint64_t)allocationCount); + +    json.WriteString("UnusedRanges"); +    json.WriteNumber((uint64_t)unusedRangeCount); + +    json.WriteString("Suballocations"); +    json.BeginArray(); +} + +void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, +    VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ +    json.BeginObject(true); + +    json.WriteString("Offset"); +    json.WriteNumber(offset); + +    if (IsVirtual()) +    { +        json.WriteString("Size"); +        json.WriteNumber(size); +        if (userData) +        { +            json.WriteString("CustomData"); +            json.BeginString(); +            json.ContinueString_Pointer(userData); +            json.EndString(); +        } +    } +    else +    { +        ((VmaAllocation)userData)->PrintParameters(json); +    } + +    json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, +    VkDeviceSize offset, VkDeviceSize size) const +{ +    json.BeginObject(true); + +    json.WriteString("Offset"); +    json.WriteNumber(offset); + +    json.WriteString("Type"); +    json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]); + +    json.WriteString("Size"); +    json.WriteNumber(size); + +    json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const +{ +    json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_BLOCK_METADATA_FUNCTIONS +#endif // _VMA_BLOCK_METADATA + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +// Before deleting object of this class remember to call 'Destroy()' +class VmaBlockBufferImageGranularity final +{ +public: +    struct ValidationContext +    { +        const VkAllocationCallbacks* allocCallbacks; +        uint16_t* pageAllocs; +    }; + +    VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity); +    ~VmaBlockBufferImageGranularity(); + +    bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; } + +    void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size); +    // Before destroying object you must call free it's memory +    void Destroy(const VkAllocationCallbacks* pAllocationCallbacks); + +    void RoundupAllocRequest(VmaSuballocationType allocType, +        VkDeviceSize& inOutAllocSize, +        VkDeviceSize& inOutAllocAlignment) const; + +    bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, +        VkDeviceSize allocSize, +        VkDeviceSize blockOffset, +        VkDeviceSize blockSize, +        VmaSuballocationType allocType) const; + +    void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size); +    void FreePages(VkDeviceSize offset, VkDeviceSize size); +    void Clear(); + +    ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks, +        bool isVirutal) const; +    bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const; +    bool FinishValidation(ValidationContext& ctx) const; + +private: +    static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256; + +    struct RegionInfo +    { +        uint8_t allocType; +        uint16_t allocCount; +    }; + +    VkDeviceSize m_BufferImageGranularity; +    uint32_t m_RegionCount; +    RegionInfo* m_RegionInfo; + +    uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); } +    uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); } + +    uint32_t OffsetToPageIndex(VkDeviceSize offset) const; +    void AllocPage(RegionInfo& page, uint8_t allocType); +}; + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity) +    : m_BufferImageGranularity(bufferImageGranularity), +    m_RegionCount(0), +    m_RegionInfo(VMA_NULL) {} + +VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity() +{ +    VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!"); +} + +void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size) +{ +    if (IsEnabled()) +    { +        m_RegionCount = static_cast<uint32_t>(VmaDivideRoundingUp(size, m_BufferImageGranularity)); +        m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount); +        memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); +    } +} + +void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks) +{ +    if (m_RegionInfo) +    { +        vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount); +        m_RegionInfo = VMA_NULL; +    } +} + +void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType, +    VkDeviceSize& inOutAllocSize, +    VkDeviceSize& inOutAllocAlignment) const +{ +    if (m_BufferImageGranularity > 1 && +        m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY) +    { +        if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || +            allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || +            allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) +        { +            inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity); +            inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity); +        } +    } +} + +bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, +    VkDeviceSize allocSize, +    VkDeviceSize blockOffset, +    VkDeviceSize blockSize, +    VmaSuballocationType allocType) const +{ +    if (IsEnabled()) +    { +        uint32_t startPage = GetStartPage(inOutAllocOffset); +        if (m_RegionInfo[startPage].allocCount > 0 && +            VmaIsBufferImageGranularityConflict(static_cast<VmaSuballocationType>(m_RegionInfo[startPage].allocType), allocType)) +        { +            inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity); +            if (blockSize < allocSize + inOutAllocOffset - blockOffset) +                return true; +            ++startPage; +        } +        uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize); +        if (endPage != startPage && +            m_RegionInfo[endPage].allocCount > 0 && +            VmaIsBufferImageGranularityConflict(static_cast<VmaSuballocationType>(m_RegionInfo[endPage].allocType), allocType)) +        { +            return true; +        } +    } +    return false; +} + +void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size) +{ +    if (IsEnabled()) +    { +        uint32_t startPage = GetStartPage(offset); +        AllocPage(m_RegionInfo[startPage], allocType); + +        uint32_t endPage = GetEndPage(offset, size); +        if (startPage != endPage) +            AllocPage(m_RegionInfo[endPage], allocType); +    } +} + +void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size) +{ +    if (IsEnabled()) +    { +        uint32_t startPage = GetStartPage(offset); +        --m_RegionInfo[startPage].allocCount; +        if (m_RegionInfo[startPage].allocCount == 0) +            m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; +        uint32_t endPage = GetEndPage(offset, size); +        if (startPage != endPage) +        { +            --m_RegionInfo[endPage].allocCount; +            if (m_RegionInfo[endPage].allocCount == 0) +                m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; +        } +    } +} + +void VmaBlockBufferImageGranularity::Clear() +{ +    if (m_RegionInfo) +        memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); +} + +VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation( +    const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const +{ +    ValidationContext ctx{ pAllocationCallbacks, VMA_NULL }; +    if (!isVirutal && IsEnabled()) +    { +        ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount); +        memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t)); +    } +    return ctx; +} + +bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx, +    VkDeviceSize offset, VkDeviceSize size) const +{ +    if (IsEnabled()) +    { +        uint32_t start = GetStartPage(offset); +        ++ctx.pageAllocs[start]; +        VMA_VALIDATE(m_RegionInfo[start].allocCount > 0); + +        uint32_t end = GetEndPage(offset, size); +        if (start != end) +        { +            ++ctx.pageAllocs[end]; +            VMA_VALIDATE(m_RegionInfo[end].allocCount > 0); +        } +    } +    return true; +} + +bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const +{ +    // Check proper page structure +    if (IsEnabled()) +    { +        VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!"); + +        for (uint32_t page = 0; page < m_RegionCount; ++page) +        { +            VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount); +        } +        vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount); +        ctx.pageAllocs = VMA_NULL; +    } +    return true; +} + +uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const +{ +    return static_cast<uint32_t>(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity)); +} + +void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType) +{ +    // When current alloc type is free then it can be overridden by new type +    if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE)) +        page.allocType = allocType; + +    ++page.allocCount; +} +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY + +#ifndef _VMA_BLOCK_METADATA_LINEAR +/* +Allocations and their references in internal data structure look like this: + +if(m_2ndVectorMode == SECOND_VECTOR_EMPTY): + +        0 +-------+ +          |       | +          |       | +          |       | +          +-------+ +          | Alloc |  1st[m_1stNullItemsBeginCount] +          +-------+ +          | Alloc |  1st[m_1stNullItemsBeginCount + 1] +          +-------+ +          |  ...  | +          +-------+ +          | Alloc |  1st[1st.size() - 1] +          +-------+ +          |       | +          |       | +          |       | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER): + +        0 +-------+ +          | Alloc |  2nd[0] +          +-------+ +          | Alloc |  2nd[1] +          +-------+ +          |  ...  | +          +-------+ +          | Alloc |  2nd[2nd.size() - 1] +          +-------+ +          |       | +          |       | +          |       | +          +-------+ +          | Alloc |  1st[m_1stNullItemsBeginCount] +          +-------+ +          | Alloc |  1st[m_1stNullItemsBeginCount + 1] +          +-------+ +          |  ...  | +          +-------+ +          | Alloc |  1st[1st.size() - 1] +          +-------+ +          |       | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK): + +        0 +-------+ +          |       | +          |       | +          |       | +          +-------+ +          | Alloc |  1st[m_1stNullItemsBeginCount] +          +-------+ +          | Alloc |  1st[m_1stNullItemsBeginCount + 1] +          +-------+ +          |  ...  | +          +-------+ +          | Alloc |  1st[1st.size() - 1] +          +-------+ +          |       | +          |       | +          |       | +          +-------+ +          | Alloc |  2nd[2nd.size() - 1] +          +-------+ +          |  ...  | +          +-------+ +          | Alloc |  2nd[1] +          +-------+ +          | Alloc |  2nd[0] +GetSize() +-------+ + +*/ +class VmaBlockMetadata_Linear : public VmaBlockMetadata +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear) +public: +    VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, +        VkDeviceSize bufferImageGranularity, bool isVirtual); +    virtual ~VmaBlockMetadata_Linear() = default; + +    VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } +    bool IsEmpty() const override { return GetAllocationCount() == 0; } +    VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; } + +    void Init(VkDeviceSize size) override; +    bool Validate() const override; +    size_t GetAllocationCount() const override; +    size_t GetFreeRegionsCount() const override; + +    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; +    void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED +    void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + +    bool CreateAllocationRequest( +        VkDeviceSize allocSize, +        VkDeviceSize allocAlignment, +        bool upperAddress, +        VmaSuballocationType allocType, +        uint32_t strategy, +        VmaAllocationRequest* pAllocationRequest) override; + +    VkResult CheckCorruption(const void* pBlockData) override; + +    void Alloc( +        const VmaAllocationRequest& request, +        VmaSuballocationType type, +        void* userData) override; + +    void Free(VmaAllocHandle allocHandle) override; +    void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; +    void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; +    VmaAllocHandle GetAllocationListBegin() const override; +    VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; +    VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; +    void Clear() override; +    void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; +    void DebugLogAllAllocations() const override; + +private: +    /* +    There are two suballocation vectors, used in ping-pong way. +    The one with index m_1stVectorIndex is called 1st. +    The one with index (m_1stVectorIndex ^ 1) is called 2nd. +    2nd can be non-empty only when 1st is not empty. +    When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. +    */ +    typedef VmaVector<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> SuballocationVectorType; + +    enum SECOND_VECTOR_MODE +    { +        SECOND_VECTOR_EMPTY, +        /* +        Suballocations in 2nd vector are created later than the ones in 1st, but they +        all have smaller offset. +        */ +        SECOND_VECTOR_RING_BUFFER, +        /* +        Suballocations in 2nd vector are upper side of double stack. +        They all have offsets higher than those in 1st vector. +        Top of this stack means smaller offsets, but higher indices in this vector. +        */ +        SECOND_VECTOR_DOUBLE_STACK, +    }; + +    VkDeviceSize m_SumFreeSize; +    SuballocationVectorType m_Suballocations0, m_Suballocations1; +    uint32_t m_1stVectorIndex; +    SECOND_VECTOR_MODE m_2ndVectorMode; +    // Number of items in 1st vector with hAllocation = null at the beginning. +    size_t m_1stNullItemsBeginCount; +    // Number of other items in 1st vector with hAllocation = null somewhere in the middle. +    size_t m_1stNullItemsMiddleCount; +    // Number of items in 2nd vector with hAllocation = null. +    size_t m_2ndNullItemsCount; + +    SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } +    SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } +    const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } +    const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + +    VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; +    bool ShouldCompact1st() const; +    void CleanupAfterFree(); + +    bool CreateAllocationRequest_LowerAddress( +        VkDeviceSize allocSize, +        VkDeviceSize allocAlignment, +        VmaSuballocationType allocType, +        uint32_t strategy, +        VmaAllocationRequest* pAllocationRequest); +    bool CreateAllocationRequest_UpperAddress( +        VkDeviceSize allocSize, +        VkDeviceSize allocAlignment, +        VmaSuballocationType allocType, +        uint32_t strategy, +        VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, +    VkDeviceSize bufferImageGranularity, bool isVirtual) +    : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), +    m_SumFreeSize(0), +    m_Suballocations0(VmaStlAllocator<VmaSuballocation>(pAllocationCallbacks)), +    m_Suballocations1(VmaStlAllocator<VmaSuballocation>(pAllocationCallbacks)), +    m_1stVectorIndex(0), +    m_2ndVectorMode(SECOND_VECTOR_EMPTY), +    m_1stNullItemsBeginCount(0), +    m_1stNullItemsMiddleCount(0), +    m_2ndNullItemsCount(0) {} + +void VmaBlockMetadata_Linear::Init(VkDeviceSize size) +{ +    VmaBlockMetadata::Init(size); +    m_SumFreeSize = size; +} + +bool VmaBlockMetadata_Linear::Validate() const +{ +    const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + +    VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); +    VMA_VALIDATE(!suballocations1st.empty() || +        suballocations2nd.empty() || +        m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + +    if (!suballocations1st.empty()) +    { +        // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. +        VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE); +        // Null item at the end should be just pop_back(). +        VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE); +    } +    if (!suballocations2nd.empty()) +    { +        // Null item at the end should be just pop_back(). +        VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE); +    } + +    VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); +    VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + +    VkDeviceSize sumUsedSize = 0; +    const size_t suballoc1stCount = suballocations1st.size(); +    const VkDeviceSize debugMargin = GetDebugMargin(); +    VkDeviceSize offset = 0; + +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        const size_t suballoc2ndCount = suballocations2nd.size(); +        size_t nullItem2ndCount = 0; +        for (size_t i = 0; i < suballoc2ndCount; ++i) +        { +            const VmaSuballocation& suballoc = suballocations2nd[i]; +            const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + +            VmaAllocation const alloc = (VmaAllocation)suballoc.userData; +            if (!IsVirtual()) +            { +                VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); +            } +            VMA_VALIDATE(suballoc.offset >= offset); + +            if (!currFree) +            { +                if (!IsVirtual()) +                { +                    VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); +                    VMA_VALIDATE(alloc->GetSize() == suballoc.size); +                } +                sumUsedSize += suballoc.size; +            } +            else +            { +                ++nullItem2ndCount; +            } + +            offset = suballoc.offset + suballoc.size + debugMargin; +        } + +        VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); +    } + +    for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) +    { +        const VmaSuballocation& suballoc = suballocations1st[i]; +        VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE && +            suballoc.userData == VMA_NULL); +    } + +    size_t nullItem1stCount = m_1stNullItemsBeginCount; + +    for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) +    { +        const VmaSuballocation& suballoc = suballocations1st[i]; +        const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + +        VmaAllocation const alloc = (VmaAllocation)suballoc.userData; +        if (!IsVirtual()) +        { +            VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); +        } +        VMA_VALIDATE(suballoc.offset >= offset); +        VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + +        if (!currFree) +        { +            if (!IsVirtual()) +            { +                VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); +                VMA_VALIDATE(alloc->GetSize() == suballoc.size); +            } +            sumUsedSize += suballoc.size; +        } +        else +        { +            ++nullItem1stCount; +        } + +        offset = suballoc.offset + suballoc.size + debugMargin; +    } +    VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); + +    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        const size_t suballoc2ndCount = suballocations2nd.size(); +        size_t nullItem2ndCount = 0; +        for (size_t i = suballoc2ndCount; i--; ) +        { +            const VmaSuballocation& suballoc = suballocations2nd[i]; +            const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + +            VmaAllocation const alloc = (VmaAllocation)suballoc.userData; +            if (!IsVirtual()) +            { +                VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); +            } +            VMA_VALIDATE(suballoc.offset >= offset); + +            if (!currFree) +            { +                if (!IsVirtual()) +                { +                    VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); +                    VMA_VALIDATE(alloc->GetSize() == suballoc.size); +                } +                sumUsedSize += suballoc.size; +            } +            else +            { +                ++nullItem2ndCount; +            } + +            offset = suballoc.offset + suballoc.size + debugMargin; +        } + +        VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); +    } + +    VMA_VALIDATE(offset <= GetSize()); +    VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + +    return true; +} + +size_t VmaBlockMetadata_Linear::GetAllocationCount() const +{ +    return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + +        AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} + +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ +    // Function only used for defragmentation, which is disabled for this algorithm +    VMA_ASSERT(0); +    return SIZE_MAX; +} + +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ +    const VkDeviceSize size = GetSize(); +    const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +    const size_t suballoc1stCount = suballocations1st.size(); +    const size_t suballoc2ndCount = suballocations2nd.size(); + +    inoutStats.statistics.blockCount++; +    inoutStats.statistics.blockBytes += size; + +    VkDeviceSize lastOffset = 0; + +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; +        size_t nextAlloc2ndIndex = 0; +        while (lastOffset < freeSpace2ndTo1stEnd) +        { +            // Find next non-null allocation or move nextAllocIndex to the end. +            while (nextAlloc2ndIndex < suballoc2ndCount && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                ++nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex < suballoc2ndCount) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // 1. Process free space before this allocation. +                if (lastOffset < suballoc.offset) +                { +                    // There is free space from lastOffset to suballoc.offset. +                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; +                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); +                } + +                // 2. Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + +                // 3. Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                ++nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                // There is free space from lastOffset to freeSpace2ndTo1stEnd. +                if (lastOffset < freeSpace2ndTo1stEnd) +                { +                    const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; +                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); +                } + +                // End of loop. +                lastOffset = freeSpace2ndTo1stEnd; +            } +        } +    } + +    size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; +    const VkDeviceSize freeSpace1stTo2ndEnd = +        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; +    while (lastOffset < freeSpace1stTo2ndEnd) +    { +        // Find next non-null allocation or move nextAllocIndex to the end. +        while (nextAlloc1stIndex < suballoc1stCount && +            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) +        { +            ++nextAlloc1stIndex; +        } + +        // Found non-null allocation. +        if (nextAlloc1stIndex < suballoc1stCount) +        { +            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + +            // 1. Process free space before this allocation. +            if (lastOffset < suballoc.offset) +            { +                // There is free space from lastOffset to suballoc.offset. +                const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; +                VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); +            } + +            // 2. Process this allocation. +            // There is allocation with suballoc.offset, suballoc.size. +            VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + +            // 3. Prepare for next iteration. +            lastOffset = suballoc.offset + suballoc.size; +            ++nextAlloc1stIndex; +        } +        // We are at the end. +        else +        { +            // There is free space from lastOffset to freeSpace1stTo2ndEnd. +            if (lastOffset < freeSpace1stTo2ndEnd) +            { +                const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; +                VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); +            } + +            // End of loop. +            lastOffset = freeSpace1stTo2ndEnd; +        } +    } + +    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; +        while (lastOffset < size) +        { +            // Find next non-null allocation or move nextAllocIndex to the end. +            while (nextAlloc2ndIndex != SIZE_MAX && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                --nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex != SIZE_MAX) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // 1. Process free space before this allocation. +                if (lastOffset < suballoc.offset) +                { +                    // There is free space from lastOffset to suballoc.offset. +                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; +                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); +                } + +                // 2. Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + +                // 3. Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                --nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                // There is free space from lastOffset to size. +                if (lastOffset < size) +                { +                    const VkDeviceSize unusedRangeSize = size - lastOffset; +                    VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); +                } + +                // End of loop. +                lastOffset = size; +            } +        } +    } +} + +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const +{ +    const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +    const VkDeviceSize size = GetSize(); +    const size_t suballoc1stCount = suballocations1st.size(); +    const size_t suballoc2ndCount = suballocations2nd.size(); + +    inoutStats.blockCount++; +    inoutStats.blockBytes += size; +    inoutStats.allocationBytes += size - m_SumFreeSize; + +    VkDeviceSize lastOffset = 0; + +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; +        size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount; +        while (lastOffset < freeSpace2ndTo1stEnd) +        { +            // Find next non-null allocation or move nextAlloc2ndIndex to the end. +            while (nextAlloc2ndIndex < suballoc2ndCount && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                ++nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex < suballoc2ndCount) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                ++inoutStats.allocationCount; + +                // Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                ++nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                // End of loop. +                lastOffset = freeSpace2ndTo1stEnd; +            } +        } +    } + +    size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; +    const VkDeviceSize freeSpace1stTo2ndEnd = +        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; +    while (lastOffset < freeSpace1stTo2ndEnd) +    { +        // Find next non-null allocation or move nextAllocIndex to the end. +        while (nextAlloc1stIndex < suballoc1stCount && +            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) +        { +            ++nextAlloc1stIndex; +        } + +        // Found non-null allocation. +        if (nextAlloc1stIndex < suballoc1stCount) +        { +            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + +            // Process this allocation. +            // There is allocation with suballoc.offset, suballoc.size. +            ++inoutStats.allocationCount; + +            // Prepare for next iteration. +            lastOffset = suballoc.offset + suballoc.size; +            ++nextAlloc1stIndex; +        } +        // We are at the end. +        else +        { +            // End of loop. +            lastOffset = freeSpace1stTo2ndEnd; +        } +    } + +    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; +        while (lastOffset < size) +        { +            // Find next non-null allocation or move nextAlloc2ndIndex to the end. +            while (nextAlloc2ndIndex != SIZE_MAX && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                --nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex != SIZE_MAX) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                ++inoutStats.allocationCount; + +                // Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                --nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                // End of loop. +                lastOffset = size; +            } +        } +    } +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +{ +    const VkDeviceSize size = GetSize(); +    const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +    const size_t suballoc1stCount = suballocations1st.size(); +    const size_t suballoc2ndCount = suballocations2nd.size(); + +    // FIRST PASS + +    size_t unusedRangeCount = 0; +    VkDeviceSize usedBytes = 0; + +    VkDeviceSize lastOffset = 0; + +    size_t alloc2ndCount = 0; +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; +        size_t nextAlloc2ndIndex = 0; +        while (lastOffset < freeSpace2ndTo1stEnd) +        { +            // Find next non-null allocation or move nextAlloc2ndIndex to the end. +            while (nextAlloc2ndIndex < suballoc2ndCount && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                ++nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex < suballoc2ndCount) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // 1. Process free space before this allocation. +                if (lastOffset < suballoc.offset) +                { +                    // There is free space from lastOffset to suballoc.offset. +                    ++unusedRangeCount; +                } + +                // 2. Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                ++alloc2ndCount; +                usedBytes += suballoc.size; + +                // 3. Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                ++nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                if (lastOffset < freeSpace2ndTo1stEnd) +                { +                    // There is free space from lastOffset to freeSpace2ndTo1stEnd. +                    ++unusedRangeCount; +                } + +                // End of loop. +                lastOffset = freeSpace2ndTo1stEnd; +            } +        } +    } + +    size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; +    size_t alloc1stCount = 0; +    const VkDeviceSize freeSpace1stTo2ndEnd = +        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; +    while (lastOffset < freeSpace1stTo2ndEnd) +    { +        // Find next non-null allocation or move nextAllocIndex to the end. +        while (nextAlloc1stIndex < suballoc1stCount && +            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) +        { +            ++nextAlloc1stIndex; +        } + +        // Found non-null allocation. +        if (nextAlloc1stIndex < suballoc1stCount) +        { +            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + +            // 1. Process free space before this allocation. +            if (lastOffset < suballoc.offset) +            { +                // There is free space from lastOffset to suballoc.offset. +                ++unusedRangeCount; +            } + +            // 2. Process this allocation. +            // There is allocation with suballoc.offset, suballoc.size. +            ++alloc1stCount; +            usedBytes += suballoc.size; + +            // 3. Prepare for next iteration. +            lastOffset = suballoc.offset + suballoc.size; +            ++nextAlloc1stIndex; +        } +        // We are at the end. +        else +        { +            if (lastOffset < freeSpace1stTo2ndEnd) +            { +                // There is free space from lastOffset to freeSpace1stTo2ndEnd. +                ++unusedRangeCount; +            } + +            // End of loop. +            lastOffset = freeSpace1stTo2ndEnd; +        } +    } + +    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; +        while (lastOffset < size) +        { +            // Find next non-null allocation or move nextAlloc2ndIndex to the end. +            while (nextAlloc2ndIndex != SIZE_MAX && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                --nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex != SIZE_MAX) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // 1. Process free space before this allocation. +                if (lastOffset < suballoc.offset) +                { +                    // There is free space from lastOffset to suballoc.offset. +                    ++unusedRangeCount; +                } + +                // 2. Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                ++alloc2ndCount; +                usedBytes += suballoc.size; + +                // 3. Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                --nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                if (lastOffset < size) +                { +                    // There is free space from lastOffset to size. +                    ++unusedRangeCount; +                } + +                // End of loop. +                lastOffset = size; +            } +        } +    } + +    const VkDeviceSize unusedBytes = size - usedBytes; +    PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + +    // SECOND PASS +    lastOffset = 0; + +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; +        size_t nextAlloc2ndIndex = 0; +        while (lastOffset < freeSpace2ndTo1stEnd) +        { +            // Find next non-null allocation or move nextAlloc2ndIndex to the end. +            while (nextAlloc2ndIndex < suballoc2ndCount && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                ++nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex < suballoc2ndCount) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // 1. Process free space before this allocation. +                if (lastOffset < suballoc.offset) +                { +                    // There is free space from lastOffset to suballoc.offset. +                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; +                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); +                } + +                // 2. Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + +                // 3. Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                ++nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                if (lastOffset < freeSpace2ndTo1stEnd) +                { +                    // There is free space from lastOffset to freeSpace2ndTo1stEnd. +                    const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; +                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); +                } + +                // End of loop. +                lastOffset = freeSpace2ndTo1stEnd; +            } +        } +    } + +    nextAlloc1stIndex = m_1stNullItemsBeginCount; +    while (lastOffset < freeSpace1stTo2ndEnd) +    { +        // Find next non-null allocation or move nextAllocIndex to the end. +        while (nextAlloc1stIndex < suballoc1stCount && +            suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) +        { +            ++nextAlloc1stIndex; +        } + +        // Found non-null allocation. +        if (nextAlloc1stIndex < suballoc1stCount) +        { +            const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + +            // 1. Process free space before this allocation. +            if (lastOffset < suballoc.offset) +            { +                // There is free space from lastOffset to suballoc.offset. +                const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; +                PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); +            } + +            // 2. Process this allocation. +            // There is allocation with suballoc.offset, suballoc.size. +            PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + +            // 3. Prepare for next iteration. +            lastOffset = suballoc.offset + suballoc.size; +            ++nextAlloc1stIndex; +        } +        // We are at the end. +        else +        { +            if (lastOffset < freeSpace1stTo2ndEnd) +            { +                // There is free space from lastOffset to freeSpace1stTo2ndEnd. +                const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; +                PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); +            } + +            // End of loop. +            lastOffset = freeSpace1stTo2ndEnd; +        } +    } + +    if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; +        while (lastOffset < size) +        { +            // Find next non-null allocation or move nextAlloc2ndIndex to the end. +            while (nextAlloc2ndIndex != SIZE_MAX && +                suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) +            { +                --nextAlloc2ndIndex; +            } + +            // Found non-null allocation. +            if (nextAlloc2ndIndex != SIZE_MAX) +            { +                const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + +                // 1. Process free space before this allocation. +                if (lastOffset < suballoc.offset) +                { +                    // There is free space from lastOffset to suballoc.offset. +                    const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; +                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); +                } + +                // 2. Process this allocation. +                // There is allocation with suballoc.offset, suballoc.size. +                PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + +                // 3. Prepare for next iteration. +                lastOffset = suballoc.offset + suballoc.size; +                --nextAlloc2ndIndex; +            } +            // We are at the end. +            else +            { +                if (lastOffset < size) +                { +                    // There is free space from lastOffset to size. +                    const VkDeviceSize unusedRangeSize = size - lastOffset; +                    PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); +                } + +                // End of loop. +                lastOffset = size; +            } +        } +    } + +    PrintDetailedMap_End(json); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaBlockMetadata_Linear::CreateAllocationRequest( +    VkDeviceSize allocSize, +    VkDeviceSize allocAlignment, +    bool upperAddress, +    VmaSuballocationType allocType, +    uint32_t strategy, +    VmaAllocationRequest* pAllocationRequest) +{ +    VMA_ASSERT(allocSize > 0); +    VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); +    VMA_ASSERT(pAllocationRequest != VMA_NULL); +    VMA_HEAVY_ASSERT(Validate()); + +    if(allocSize > GetSize()) +        return false; + +    pAllocationRequest->size = allocSize; +    return upperAddress ? +        CreateAllocationRequest_UpperAddress( +            allocSize, allocAlignment, allocType, strategy, pAllocationRequest) : +        CreateAllocationRequest_LowerAddress( +            allocSize, allocAlignment, allocType, strategy, pAllocationRequest); +} + +VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) +{ +    VMA_ASSERT(!IsVirtual()); +    SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) +    { +        const VmaSuballocation& suballoc = suballocations1st[i]; +        if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) +        { +            if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) +            { +                VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); +                return VK_ERROR_UNKNOWN_COPY; +            } +        } +    } + +    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +    for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i) +    { +        const VmaSuballocation& suballoc = suballocations2nd[i]; +        if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) +        { +            if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) +            { +                VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); +                return VK_ERROR_UNKNOWN_COPY; +            } +        } +    } + +    return VK_SUCCESS; +} + +void VmaBlockMetadata_Linear::Alloc( +    const VmaAllocationRequest& request, +    VmaSuballocationType type, +    void* userData) +{ +    const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1; +    const VmaSuballocation newSuballoc = { offset, request.size, userData, type }; + +    switch (request.type) +    { +    case VmaAllocationRequestType::UpperAddress: +    { +        VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && +            "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); +        SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +        suballocations2nd.push_back(newSuballoc); +        m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; +    } +    break; +    case VmaAllocationRequestType::EndOf1st: +    { +        SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + +        VMA_ASSERT(suballocations1st.empty() || +            offset >= suballocations1st.back().offset + suballocations1st.back().size); +        // Check if it fits before the end of the block. +        VMA_ASSERT(offset + request.size <= GetSize()); + +        suballocations1st.push_back(newSuballoc); +    } +    break; +    case VmaAllocationRequestType::EndOf2nd: +    { +        SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +        // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. +        VMA_ASSERT(!suballocations1st.empty() && +            offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); +        SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + +        switch (m_2ndVectorMode) +        { +        case SECOND_VECTOR_EMPTY: +            // First allocation from second part ring buffer. +            VMA_ASSERT(suballocations2nd.empty()); +            m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; +            break; +        case SECOND_VECTOR_RING_BUFFER: +            // 2-part ring buffer is already started. +            VMA_ASSERT(!suballocations2nd.empty()); +            break; +        case SECOND_VECTOR_DOUBLE_STACK: +            VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); +            break; +        default: +            VMA_ASSERT(0); +        } + +        suballocations2nd.push_back(newSuballoc); +    } +    break; +    default: +        VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); +    } + +    m_SumFreeSize -= newSuballoc.size; +} + +void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle) +{ +    SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +    VkDeviceSize offset = (VkDeviceSize)allocHandle - 1; + +    if (!suballocations1st.empty()) +    { +        // First allocation: Mark it as next empty at the beginning. +        VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; +        if (firstSuballoc.offset == offset) +        { +            firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; +            firstSuballoc.userData = VMA_NULL; +            m_SumFreeSize += firstSuballoc.size; +            ++m_1stNullItemsBeginCount; +            CleanupAfterFree(); +            return; +        } +    } + +    // Last allocation in 2-part ring buffer or top of upper stack (same logic). +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || +        m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        VmaSuballocation& lastSuballoc = suballocations2nd.back(); +        if (lastSuballoc.offset == offset) +        { +            m_SumFreeSize += lastSuballoc.size; +            suballocations2nd.pop_back(); +            CleanupAfterFree(); +            return; +        } +    } +    // Last allocation in 1st vector. +    else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) +    { +        VmaSuballocation& lastSuballoc = suballocations1st.back(); +        if (lastSuballoc.offset == offset) +        { +            m_SumFreeSize += lastSuballoc.size; +            suballocations1st.pop_back(); +            CleanupAfterFree(); +            return; +        } +    } + +    VmaSuballocation refSuballoc; +    refSuballoc.offset = offset; +    // Rest of members stays uninitialized intentionally for better performance. + +    // Item from the middle of 1st vector. +    { +        const SuballocationVectorType::iterator it = VmaBinaryFindSorted( +            suballocations1st.begin() + m_1stNullItemsBeginCount, +            suballocations1st.end(), +            refSuballoc, +            VmaSuballocationOffsetLess()); +        if (it != suballocations1st.end()) +        { +            it->type = VMA_SUBALLOCATION_TYPE_FREE; +            it->userData = VMA_NULL; +            ++m_1stNullItemsMiddleCount; +            m_SumFreeSize += it->size; +            CleanupAfterFree(); +            return; +        } +    } + +    if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) +    { +        // Item from the middle of 2nd vector. +        const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? +            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : +            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); +        if (it != suballocations2nd.end()) +        { +            it->type = VMA_SUBALLOCATION_TYPE_FREE; +            it->userData = VMA_NULL; +            ++m_2ndNullItemsCount; +            m_SumFreeSize += it->size; +            CleanupAfterFree(); +            return; +        } +    } + +    VMA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ +    outInfo.offset = (VkDeviceSize)allocHandle - 1; +    VmaSuballocation& suballoc = FindSuballocation(outInfo.offset); +    outInfo.size = suballoc.size; +    outInfo.pUserData = suballoc.userData; +} + +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ +    return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ +    // Function only used for defragmentation, which is disabled for this algorithm +    VMA_ASSERT(0); +    return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ +    // Function only used for defragmentation, which is disabled for this algorithm +    VMA_ASSERT(0); +    return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ +    // Function only used for defragmentation, which is disabled for this algorithm +    VMA_ASSERT(0); +    return 0; +} + +void VmaBlockMetadata_Linear::Clear() +{ +    m_SumFreeSize = GetSize(); +    m_Suballocations0.clear(); +    m_Suballocations1.clear(); +    // Leaving m_1stVectorIndex unchanged - it doesn't matter. +    m_2ndVectorMode = SECOND_VECTOR_EMPTY; +    m_1stNullItemsBeginCount = 0; +    m_1stNullItemsMiddleCount = 0; +    m_2ndNullItemsCount = 0; +} + +void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ +    VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1); +    suballoc.userData = userData; +} + +void VmaBlockMetadata_Linear::DebugLogAllAllocations() const +{ +    const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it) +        if (it->type != VMA_SUBALLOCATION_TYPE_FREE) +            DebugLogAllocation(it->offset, it->size, it->userData); + +    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); +    for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it) +        if (it->type != VMA_SUBALLOCATION_TYPE_FREE) +            DebugLogAllocation(it->offset, it->size, it->userData); +} + +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const +{ +    const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + +    VmaSuballocation refSuballoc; +    refSuballoc.offset = offset; +    // Rest of members stays uninitialized intentionally for better performance. + +    // Item from the 1st vector. +    { +        SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( +            suballocations1st.begin() + m_1stNullItemsBeginCount, +            suballocations1st.end(), +            refSuballoc, +            VmaSuballocationOffsetLess()); +        if (it != suballocations1st.end()) +        { +            return const_cast<VmaSuballocation&>(*it); +        } +    } + +    if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) +    { +        // Rest of members stays uninitialized intentionally for better performance. +        SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? +            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : +            VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); +        if (it != suballocations2nd.end()) +        { +            return const_cast<VmaSuballocation&>(*it); +        } +    } + +    VMA_ASSERT(0 && "Allocation not found in linear allocator!"); +    return const_cast<VmaSuballocation&>(suballocations1st.back()); // Should never occur. +} + +bool VmaBlockMetadata_Linear::ShouldCompact1st() const +{ +    const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; +    const size_t suballocCount = AccessSuballocations1st().size(); +    return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void VmaBlockMetadata_Linear::CleanupAfterFree() +{ +    SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + +    if (IsEmpty()) +    { +        suballocations1st.clear(); +        suballocations2nd.clear(); +        m_1stNullItemsBeginCount = 0; +        m_1stNullItemsMiddleCount = 0; +        m_2ndNullItemsCount = 0; +        m_2ndVectorMode = SECOND_VECTOR_EMPTY; +    } +    else +    { +        const size_t suballoc1stCount = suballocations1st.size(); +        const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; +        VMA_ASSERT(nullItem1stCount <= suballoc1stCount); + +        // Find more null items at the beginning of 1st vector. +        while (m_1stNullItemsBeginCount < suballoc1stCount && +            suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) +        { +            ++m_1stNullItemsBeginCount; +            --m_1stNullItemsMiddleCount; +        } + +        // Find more null items at the end of 1st vector. +        while (m_1stNullItemsMiddleCount > 0 && +            suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE) +        { +            --m_1stNullItemsMiddleCount; +            suballocations1st.pop_back(); +        } + +        // Find more null items at the end of 2nd vector. +        while (m_2ndNullItemsCount > 0 && +            suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE) +        { +            --m_2ndNullItemsCount; +            suballocations2nd.pop_back(); +        } + +        // Find more null items at the beginning of 2nd vector. +        while (m_2ndNullItemsCount > 0 && +            suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE) +        { +            --m_2ndNullItemsCount; +            VmaVectorRemove(suballocations2nd, 0); +        } + +        if (ShouldCompact1st()) +        { +            const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; +            size_t srcIndex = m_1stNullItemsBeginCount; +            for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) +            { +                while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE) +                { +                    ++srcIndex; +                } +                if (dstIndex != srcIndex) +                { +                    suballocations1st[dstIndex] = suballocations1st[srcIndex]; +                } +                ++srcIndex; +            } +            suballocations1st.resize(nonNullItemCount); +            m_1stNullItemsBeginCount = 0; +            m_1stNullItemsMiddleCount = 0; +        } + +        // 2nd vector became empty. +        if (suballocations2nd.empty()) +        { +            m_2ndVectorMode = SECOND_VECTOR_EMPTY; +        } + +        // 1st vector became empty. +        if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) +        { +            suballocations1st.clear(); +            m_1stNullItemsBeginCount = 0; + +            if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +            { +                // Swap 1st with 2nd. Now 2nd is empty. +                m_2ndVectorMode = SECOND_VECTOR_EMPTY; +                m_1stNullItemsMiddleCount = m_2ndNullItemsCount; +                while (m_1stNullItemsBeginCount < suballocations2nd.size() && +                    suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) +                { +                    ++m_1stNullItemsBeginCount; +                    --m_1stNullItemsMiddleCount; +                } +                m_2ndNullItemsCount = 0; +                m_1stVectorIndex ^= 1; +            } +        } +    } + +    VMA_HEAVY_ASSERT(Validate()); +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( +    VkDeviceSize allocSize, +    VkDeviceSize allocAlignment, +    VmaSuballocationType allocType, +    uint32_t strategy, +    VmaAllocationRequest* pAllocationRequest) +{ +    const VkDeviceSize blockSize = GetSize(); +    const VkDeviceSize debugMargin = GetDebugMargin(); +    const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); +    SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + +    if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +    { +        // Try to allocate at the end of 1st vector. + +        VkDeviceSize resultBaseOffset = 0; +        if (!suballocations1st.empty()) +        { +            const VmaSuballocation& lastSuballoc = suballocations1st.back(); +            resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; +        } + +        // Start from offset equal to beginning of free space. +        VkDeviceSize resultOffset = resultBaseOffset; + +        // Apply alignment. +        resultOffset = VmaAlignUp(resultOffset, allocAlignment); + +        // Check previous suballocations for BufferImageGranularity conflicts. +        // Make bigger alignment if necessary. +        if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) +        { +            bool bufferImageGranularityConflict = false; +            for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) +            { +                const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; +                if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) +                { +                    if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) +                    { +                        bufferImageGranularityConflict = true; +                        break; +                    } +                } +                else +                    // Already on previous page. +                    break; +            } +            if (bufferImageGranularityConflict) +            { +                resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); +            } +        } + +        const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? +            suballocations2nd.back().offset : blockSize; + +        // There is enough free space at the end after alignment. +        if (resultOffset + allocSize + debugMargin <= freeSpaceEnd) +        { +            // Check next suballocations for BufferImageGranularity conflicts. +            // If conflict exists, allocation cannot be made here. +            if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) +            { +                for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) +                { +                    const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; +                    if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) +                    { +                        if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) +                        { +                            return false; +                        } +                    } +                    else +                    { +                        // Already on previous page. +                        break; +                    } +                } +            } + +            // All tests passed: Success. +            pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); +            // pAllocationRequest->item, customData unused. +            pAllocationRequest->type = VmaAllocationRequestType::EndOf1st; +            return true; +        } +    } + +    // Wrap-around to end of 2nd vector. Try to allocate there, watching for the +    // beginning of 1st vector as the end of free space. +    if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        VMA_ASSERT(!suballocations1st.empty()); + +        VkDeviceSize resultBaseOffset = 0; +        if (!suballocations2nd.empty()) +        { +            const VmaSuballocation& lastSuballoc = suballocations2nd.back(); +            resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; +        } + +        // Start from offset equal to beginning of free space. +        VkDeviceSize resultOffset = resultBaseOffset; + +        // Apply alignment. +        resultOffset = VmaAlignUp(resultOffset, allocAlignment); + +        // Check previous suballocations for BufferImageGranularity conflicts. +        // Make bigger alignment if necessary. +        if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) +        { +            bool bufferImageGranularityConflict = false; +            for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) +            { +                const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex]; +                if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) +                { +                    if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) +                    { +                        bufferImageGranularityConflict = true; +                        break; +                    } +                } +                else +                    // Already on previous page. +                    break; +            } +            if (bufferImageGranularityConflict) +            { +                resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); +            } +        } + +        size_t index1st = m_1stNullItemsBeginCount; + +        // There is enough free space at the end after alignment. +        if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) || +            (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset)) +        { +            // Check next suballocations for BufferImageGranularity conflicts. +            // If conflict exists, allocation cannot be made here. +            if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) +            { +                for (size_t nextSuballocIndex = index1st; +                    nextSuballocIndex < suballocations1st.size(); +                    nextSuballocIndex++) +                { +                    const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex]; +                    if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) +                    { +                        if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) +                        { +                            return false; +                        } +                    } +                    else +                    { +                        // Already on next page. +                        break; +                    } +                } +            } + +            // All tests passed: Success. +            pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); +            pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd; +            // pAllocationRequest->item, customData unused. +            return true; +        } +    } + +    return false; +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( +    VkDeviceSize allocSize, +    VkDeviceSize allocAlignment, +    VmaSuballocationType allocType, +    uint32_t strategy, +    VmaAllocationRequest* pAllocationRequest) +{ +    const VkDeviceSize blockSize = GetSize(); +    const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); +    SuballocationVectorType& suballocations1st = AccessSuballocations1st(); +    SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + +    if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) +    { +        VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); +        return false; +    } + +    // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). +    if (allocSize > blockSize) +    { +        return false; +    } +    VkDeviceSize resultBaseOffset = blockSize - allocSize; +    if (!suballocations2nd.empty()) +    { +        const VmaSuballocation& lastSuballoc = suballocations2nd.back(); +        resultBaseOffset = lastSuballoc.offset - allocSize; +        if (allocSize > lastSuballoc.offset) +        { +            return false; +        } +    } + +    // Start from offset equal to end of free space. +    VkDeviceSize resultOffset = resultBaseOffset; + +    const VkDeviceSize debugMargin = GetDebugMargin(); + +    // Apply debugMargin at the end. +    if (debugMargin > 0) +    { +        if (resultOffset < debugMargin) +        { +            return false; +        } +        resultOffset -= debugMargin; +    } + +    // Apply alignment. +    resultOffset = VmaAlignDown(resultOffset, allocAlignment); + +    // Check next suballocations from 2nd for BufferImageGranularity conflicts. +    // Make bigger alignment if necessary. +    if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) +    { +        bool bufferImageGranularityConflict = false; +        for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) +        { +            const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; +            if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) +            { +                if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) +                { +                    bufferImageGranularityConflict = true; +                    break; +                } +            } +            else +                // Already on previous page. +                break; +        } +        if (bufferImageGranularityConflict) +        { +            resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity); +        } +    } + +    // There is enough free space. +    const VkDeviceSize endOf1st = !suballocations1st.empty() ? +        suballocations1st.back().offset + suballocations1st.back().size : +        0; +    if (endOf1st + debugMargin <= resultOffset) +    { +        // Check previous suballocations for BufferImageGranularity conflicts. +        // If conflict exists, allocation cannot be made here. +        if (bufferImageGranularity > 1) +        { +            for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) +            { +                const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; +                if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) +                { +                    if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) +                    { +                        return false; +                    } +                } +                else +                { +                    // Already on next page. +                    break; +                } +            } +        } + +        // All tests passed: Success. +        pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); +        // pAllocationRequest->item unused. +        pAllocationRequest->type = VmaAllocationRequestType::UpperAddress; +        return true; +    } + +    return false; +} +#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_LINEAR + +#ifndef _VMA_BLOCK_METADATA_TLSF +// To not search current larger region if first allocation won't succeed and skip to smaller range +// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest(). +// When fragmentation and reusal of previous blocks doesn't matter then use with +// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible. +class VmaBlockMetadata_TLSF : public VmaBlockMetadata +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF) +public: +    VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, +        VkDeviceSize bufferImageGranularity, bool isVirtual); +    virtual ~VmaBlockMetadata_TLSF(); + +    size_t GetAllocationCount() const override { return m_AllocCount; } +    size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } +    VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } +    bool IsEmpty() const override { return m_NullBlock->offset == 0; } +    VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; } + +    void Init(VkDeviceSize size) override; +    bool Validate() const override; + +    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; +    void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED +    void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + +    bool CreateAllocationRequest( +        VkDeviceSize allocSize, +        VkDeviceSize allocAlignment, +        bool upperAddress, +        VmaSuballocationType allocType, +        uint32_t strategy, +        VmaAllocationRequest* pAllocationRequest) override; + +    VkResult CheckCorruption(const void* pBlockData) override; +    void Alloc( +        const VmaAllocationRequest& request, +        VmaSuballocationType type, +        void* userData) override; + +    void Free(VmaAllocHandle allocHandle) override; +    void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; +    void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; +    VmaAllocHandle GetAllocationListBegin() const override; +    VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; +    VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; +    void Clear() override; +    void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; +    void DebugLogAllAllocations() const override; + +private: +    // According to original paper it should be preferable 4 or 5: +    // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" +    // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf +    static const uint8_t SECOND_LEVEL_INDEX = 5; +    static const uint16_t SMALL_BUFFER_SIZE = 256; +    static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16; +    static const uint8_t MEMORY_CLASS_SHIFT = 7; +    static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; + +    class Block +    { +    public: +        VkDeviceSize offset; +        VkDeviceSize size; +        Block* prevPhysical; +        Block* nextPhysical; + +        void MarkFree() { prevFree = VMA_NULL; } +        void MarkTaken() { prevFree = this; } +        bool IsFree() const { return prevFree != this; } +        void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; } +        Block*& PrevFree() { return prevFree; } +        Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; } + +    private: +        Block* prevFree; // Address of the same block here indicates that block is taken +        union +        { +            Block* nextFree; +            void* userData; +        }; +    }; + +    size_t m_AllocCount; +    // Total number of free blocks besides null block +    size_t m_BlocksFreeCount; +    // Total size of free blocks excluding null block +    VkDeviceSize m_BlocksFreeSize; +    uint32_t m_IsFreeBitmap; +    uint8_t m_MemoryClasses; +    uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; +    uint32_t m_ListsCount; +    /* +    * 0: 0-3 lists for small buffers +    * 1+: 0-(2^SLI-1) lists for normal buffers +    */ +    Block** m_FreeList; +    VmaPoolAllocator<Block> m_BlockAllocator; +    Block* m_NullBlock; +    VmaBlockBufferImageGranularity m_GranularityHandler; + +    uint8_t SizeToMemoryClass(VkDeviceSize size) const; +    uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const; +    uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const; +    uint32_t GetListIndex(VkDeviceSize size) const; + +    void RemoveFreeBlock(Block* block); +    void InsertFreeBlock(Block* block); +    void MergeBlock(Block* block, Block* prev); + +    Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const; +    bool CheckBlock( +        Block& block, +        uint32_t listIndex, +        VkDeviceSize allocSize, +        VkDeviceSize allocAlignment, +        VmaSuballocationType allocType, +        VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, +    VkDeviceSize bufferImageGranularity, bool isVirtual) +    : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), +    m_AllocCount(0), +    m_BlocksFreeCount(0), +    m_BlocksFreeSize(0), +    m_IsFreeBitmap(0), +    m_MemoryClasses(0), +    m_ListsCount(0), +    m_FreeList(VMA_NULL), +    m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT), +    m_NullBlock(VMA_NULL), +    m_GranularityHandler(bufferImageGranularity) {} + +VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF() +{ +    if (m_FreeList) +        vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount); +    m_GranularityHandler.Destroy(GetAllocationCallbacks()); +} + +void VmaBlockMetadata_TLSF::Init(VkDeviceSize size) +{ +    VmaBlockMetadata::Init(size); + +    if (!IsVirtual()) +        m_GranularityHandler.Init(GetAllocationCallbacks(), size); + +    m_NullBlock = m_BlockAllocator.Alloc(); +    m_NullBlock->size = size; +    m_NullBlock->offset = 0; +    m_NullBlock->prevPhysical = VMA_NULL; +    m_NullBlock->nextPhysical = VMA_NULL; +    m_NullBlock->MarkFree(); +    m_NullBlock->NextFree() = VMA_NULL; +    m_NullBlock->PrevFree() = VMA_NULL; +    uint8_t memoryClass = SizeToMemoryClass(size); +    uint16_t sli = SizeToSecondIndex(size, memoryClass); +    m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; +    if (IsVirtual()) +        m_ListsCount += 1UL << SECOND_LEVEL_INDEX; +    else +        m_ListsCount += 4; + +    m_MemoryClasses = memoryClass + uint8_t(2); +    memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t)); + +    m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount); +    memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool VmaBlockMetadata_TLSF::Validate() const +{ +    VMA_VALIDATE(GetSumFreeSize() <= GetSize()); + +    VkDeviceSize calculatedSize = m_NullBlock->size; +    VkDeviceSize calculatedFreeSize = m_NullBlock->size; +    size_t allocCount = 0; +    size_t freeCount = 0; + +    // Check integrity of free lists +    for (uint32_t list = 0; list < m_ListsCount; ++list) +    { +        Block* block = m_FreeList[list]; +        if (block != VMA_NULL) +        { +            VMA_VALIDATE(block->IsFree()); +            VMA_VALIDATE(block->PrevFree() == VMA_NULL); +            while (block->NextFree()) +            { +                VMA_VALIDATE(block->NextFree()->IsFree()); +                VMA_VALIDATE(block->NextFree()->PrevFree() == block); +                block = block->NextFree(); +            } +        } +    } + +    VkDeviceSize nextOffset = m_NullBlock->offset; +    auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual()); + +    VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL); +    if (m_NullBlock->prevPhysical) +    { +        VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); +    } +    // Check all blocks +    for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical) +    { +        VMA_VALIDATE(prev->offset + prev->size == nextOffset); +        nextOffset = prev->offset; +        calculatedSize += prev->size; + +        uint32_t listIndex = GetListIndex(prev->size); +        if (prev->IsFree()) +        { +            ++freeCount; +            // Check if free block belongs to free list +            Block* freeBlock = m_FreeList[listIndex]; +            VMA_VALIDATE(freeBlock != VMA_NULL); + +            bool found = false; +            do +            { +                if (freeBlock == prev) +                    found = true; + +                freeBlock = freeBlock->NextFree(); +            } while (!found && freeBlock != VMA_NULL); + +            VMA_VALIDATE(found); +            calculatedFreeSize += prev->size; +        } +        else +        { +            ++allocCount; +            // Check if taken block is not on a free list +            Block* freeBlock = m_FreeList[listIndex]; +            while (freeBlock) +            { +                VMA_VALIDATE(freeBlock != prev); +                freeBlock = freeBlock->NextFree(); +            } + +            if (!IsVirtual()) +            { +                VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size)); +            } +        } + +        if (prev->prevPhysical) +        { +            VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev); +        } +    } + +    if (!IsVirtual()) +    { +        VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx)); +    } + +    VMA_VALIDATE(nextOffset == 0); +    VMA_VALIDATE(calculatedSize == GetSize()); +    VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); +    VMA_VALIDATE(allocCount == m_AllocCount); +    VMA_VALIDATE(freeCount == m_BlocksFreeCount); + +    return true; +} + +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ +    inoutStats.statistics.blockCount++; +    inoutStats.statistics.blockBytes += GetSize(); +    if (m_NullBlock->size > 0) +        VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); + +    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) +    { +        if (block->IsFree()) +            VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); +        else +            VmaAddDetailedStatisticsAllocation(inoutStats, block->size); +    } +} + +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const +{ +    inoutStats.blockCount++; +    inoutStats.allocationCount += (uint32_t)m_AllocCount; +    inoutStats.blockBytes += GetSize(); +    inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +{ +    size_t blockCount = m_AllocCount + m_BlocksFreeCount; +    VmaStlAllocator<Block*> allocator(GetAllocationCallbacks()); +    VmaVector<Block*, VmaStlAllocator<Block*>> blockList(blockCount, allocator); + +    size_t i = blockCount; +    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) +    { +        blockList[--i] = block; +    } +    VMA_ASSERT(i == 0); + +    VmaDetailedStatistics stats; +    VmaClearDetailedStatistics(stats); +    AddDetailedStatistics(stats); + +    PrintDetailedMap_Begin(json, +        stats.statistics.blockBytes - stats.statistics.allocationBytes, +        stats.statistics.allocationCount, +        stats.unusedRangeCount); + +    for (; i < blockCount; ++i) +    { +        Block* block = blockList[i]; +        if (block->IsFree()) +            PrintDetailedMap_UnusedRange(json, block->offset, block->size); +        else +            PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData()); +    } +    if (m_NullBlock->size > 0) +        PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size); + +    PrintDetailedMap_End(json); +} +#endif + +bool VmaBlockMetadata_TLSF::CreateAllocationRequest( +    VkDeviceSize allocSize, +    VkDeviceSize allocAlignment, +    bool upperAddress, +    VmaSuballocationType allocType, +    uint32_t strategy, +    VmaAllocationRequest* pAllocationRequest) +{ +    VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); +    VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); + +    // For small granularity round up +    if (!IsVirtual()) +        m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment); + +    allocSize += GetDebugMargin(); +    // Quick check for too small pool +    if (allocSize > GetSumFreeSize()) +        return false; + +    // If no free blocks in pool then check only null block +    if (m_BlocksFreeCount == 0) +        return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest); + +    // Round up to the next block +    VkDeviceSize sizeForNextList = allocSize; +    VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4)); +    if (allocSize > SMALL_BUFFER_SIZE) +    { +        sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX)); +    } +    else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) +        sizeForNextList = SMALL_BUFFER_SIZE + 1; +    else +        sizeForNextList += smallSizeStep; + +    uint32_t nextListIndex = m_ListsCount; +    uint32_t prevListIndex = m_ListsCount; +    Block* nextListBlock = VMA_NULL; +    Block* prevListBlock = VMA_NULL; + +    // Check blocks according to strategies +    if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) +    { +        // Quick check for larger block first +        nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); +        if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +            return true; + +        // If not fitted then null block +        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) +            return true; + +        // Null block failed, search larger bucket +        while (nextListBlock) +        { +            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            nextListBlock = nextListBlock->NextFree(); +        } + +        // Failed again, check best fit bucket +        prevListBlock = FindFreeBlock(allocSize, prevListIndex); +        while (prevListBlock) +        { +            if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            prevListBlock = prevListBlock->NextFree(); +        } +    } +    else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT) +    { +        // Check best fit bucket +        prevListBlock = FindFreeBlock(allocSize, prevListIndex); +        while (prevListBlock) +        { +            if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            prevListBlock = prevListBlock->NextFree(); +        } + +        // If failed check null block +        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) +            return true; + +        // Check larger bucket +        nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); +        while (nextListBlock) +        { +            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            nextListBlock = nextListBlock->NextFree(); +        } +    } +    else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) +    { +        // Perform search from the start +        VmaStlAllocator<Block*> allocator(GetAllocationCallbacks()); +        VmaVector<Block*, VmaStlAllocator<Block*>> blockList(m_BlocksFreeCount, allocator); + +        size_t i = m_BlocksFreeCount; +        for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) +        { +            if (block->IsFree() && block->size >= allocSize) +                blockList[--i] = block; +        } + +        for (; i < m_BlocksFreeCount; ++i) +        { +            Block& block = *blockList[i]; +            if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +        } + +        // If failed check null block +        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) +            return true; + +        // Whole range searched, no more memory +        return false; +    } +    else +    { +        // Check larger bucket +        nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); +        while (nextListBlock) +        { +            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            nextListBlock = nextListBlock->NextFree(); +        } + +        // If failed check null block +        if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) +            return true; + +        // Check best fit bucket +        prevListBlock = FindFreeBlock(allocSize, prevListIndex); +        while (prevListBlock) +        { +            if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            prevListBlock = prevListBlock->NextFree(); +        } +    } + +    // Worst case, full search has to be done +    while (++nextListIndex < m_ListsCount) +    { +        nextListBlock = m_FreeList[nextListIndex]; +        while (nextListBlock) +        { +            if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) +                return true; +            nextListBlock = nextListBlock->NextFree(); +        } +    } + +    // No more memory sadly +    return false; +} + +VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) +{ +    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) +    { +        if (!block->IsFree()) +        { +            if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) +            { +                VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); +                return VK_ERROR_UNKNOWN_COPY; +            } +        } +    } + +    return VK_SUCCESS; +} + +void VmaBlockMetadata_TLSF::Alloc( +    const VmaAllocationRequest& request, +    VmaSuballocationType type, +    void* userData) +{ +    VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF); + +    // Get block and pop it from the free list +    Block* currentBlock = (Block*)request.allocHandle; +    VkDeviceSize offset = request.algorithmData; +    VMA_ASSERT(currentBlock != VMA_NULL); +    VMA_ASSERT(currentBlock->offset <= offset); + +    if (currentBlock != m_NullBlock) +        RemoveFreeBlock(currentBlock); + +    VkDeviceSize debugMargin = GetDebugMargin(); +    VkDeviceSize missingAlignment = offset - currentBlock->offset; + +    // Append missing alignment to prev block or create new one +    if (missingAlignment) +    { +        Block* prevBlock = currentBlock->prevPhysical; +        VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!"); + +        if (prevBlock->IsFree() && prevBlock->size != debugMargin) +        { +            uint32_t oldList = GetListIndex(prevBlock->size); +            prevBlock->size += missingAlignment; +            // Check if new size crosses list bucket +            if (oldList != GetListIndex(prevBlock->size)) +            { +                prevBlock->size -= missingAlignment; +                RemoveFreeBlock(prevBlock); +                prevBlock->size += missingAlignment; +                InsertFreeBlock(prevBlock); +            } +            else +                m_BlocksFreeSize += missingAlignment; +        } +        else +        { +            Block* newBlock = m_BlockAllocator.Alloc(); +            currentBlock->prevPhysical = newBlock; +            prevBlock->nextPhysical = newBlock; +            newBlock->prevPhysical = prevBlock; +            newBlock->nextPhysical = currentBlock; +            newBlock->size = missingAlignment; +            newBlock->offset = currentBlock->offset; +            newBlock->MarkTaken(); + +            InsertFreeBlock(newBlock); +        } + +        currentBlock->size -= missingAlignment; +        currentBlock->offset += missingAlignment; +    } + +    VkDeviceSize size = request.size + debugMargin; +    if (currentBlock->size == size) +    { +        if (currentBlock == m_NullBlock) +        { +            // Setup new null block +            m_NullBlock = m_BlockAllocator.Alloc(); +            m_NullBlock->size = 0; +            m_NullBlock->offset = currentBlock->offset + size; +            m_NullBlock->prevPhysical = currentBlock; +            m_NullBlock->nextPhysical = VMA_NULL; +            m_NullBlock->MarkFree(); +            m_NullBlock->PrevFree() = VMA_NULL; +            m_NullBlock->NextFree() = VMA_NULL; +            currentBlock->nextPhysical = m_NullBlock; +            currentBlock->MarkTaken(); +        } +    } +    else +    { +        VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + +        // Create new free block +        Block* newBlock = m_BlockAllocator.Alloc(); +        newBlock->size = currentBlock->size - size; +        newBlock->offset = currentBlock->offset + size; +        newBlock->prevPhysical = currentBlock; +        newBlock->nextPhysical = currentBlock->nextPhysical; +        currentBlock->nextPhysical = newBlock; +        currentBlock->size = size; + +        if (currentBlock == m_NullBlock) +        { +            m_NullBlock = newBlock; +            m_NullBlock->MarkFree(); +            m_NullBlock->NextFree() = VMA_NULL; +            m_NullBlock->PrevFree() = VMA_NULL; +            currentBlock->MarkTaken(); +        } +        else +        { +            newBlock->nextPhysical->prevPhysical = newBlock; +            newBlock->MarkTaken(); +            InsertFreeBlock(newBlock); +        } +    } +    currentBlock->UserData() = userData; + +    if (debugMargin > 0) +    { +        currentBlock->size -= debugMargin; +        Block* newBlock = m_BlockAllocator.Alloc(); +        newBlock->size = debugMargin; +        newBlock->offset = currentBlock->offset + currentBlock->size; +        newBlock->prevPhysical = currentBlock; +        newBlock->nextPhysical = currentBlock->nextPhysical; +        newBlock->MarkTaken(); +        currentBlock->nextPhysical->prevPhysical = newBlock; +        currentBlock->nextPhysical = newBlock; +        InsertFreeBlock(newBlock); +    } + +    if (!IsVirtual()) +        m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData, +            currentBlock->offset, currentBlock->size); +    ++m_AllocCount; +} + +void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle) +{ +    Block* block = (Block*)allocHandle; +    Block* next = block->nextPhysical; +    VMA_ASSERT(!block->IsFree() && "Block is already free!"); + +    if (!IsVirtual()) +        m_GranularityHandler.FreePages(block->offset, block->size); +    --m_AllocCount; + +    VkDeviceSize debugMargin = GetDebugMargin(); +    if (debugMargin > 0) +    { +        RemoveFreeBlock(next); +        MergeBlock(next, block); +        block = next; +        next = next->nextPhysical; +    } + +    // Try merging +    Block* prev = block->prevPhysical; +    if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin) +    { +        RemoveFreeBlock(prev); +        MergeBlock(block, prev); +    } + +    if (!next->IsFree()) +        InsertFreeBlock(block); +    else if (next == m_NullBlock) +        MergeBlock(m_NullBlock, block); +    else +    { +        RemoveFreeBlock(next); +        MergeBlock(next, block); +        InsertFreeBlock(next); +    } +} + +void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ +    Block* block = (Block*)allocHandle; +    VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); +    outInfo.offset = block->offset; +    outInfo.size = block->size; +    outInfo.pUserData = block->UserData(); +} + +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ +    Block* block = (Block*)allocHandle; +    VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); +    return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ +    if (m_AllocCount == 0) +        return VK_NULL_HANDLE; + +    for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) +    { +        if (!block->IsFree()) +            return (VmaAllocHandle)block; +    } +    VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); +    return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ +    Block* startBlock = (Block*)prevAlloc; +    VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + +    for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) +    { +        if (!block->IsFree()) +            return (VmaAllocHandle)block; +    } +    return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ +    Block* block = (Block*)alloc; +    VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + +    if (block->prevPhysical) +        return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; +    return 0; +} + +void VmaBlockMetadata_TLSF::Clear() +{ +    m_AllocCount = 0; +    m_BlocksFreeCount = 0; +    m_BlocksFreeSize = 0; +    m_IsFreeBitmap = 0; +    m_NullBlock->offset = 0; +    m_NullBlock->size = GetSize(); +    Block* block = m_NullBlock->prevPhysical; +    m_NullBlock->prevPhysical = VMA_NULL; +    while (block) +    { +        Block* prev = block->prevPhysical; +        m_BlockAllocator.Free(block); +        block = prev; +    } +    memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +    memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t)); +    m_GranularityHandler.Clear(); +} + +void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ +    Block* block = (Block*)allocHandle; +    VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); +    block->UserData() = userData; +} + +void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const +{ +    for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) +        if (!block->IsFree()) +            DebugLogAllocation(block->offset, block->size, block->UserData()); +} + +uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const +{ +    if (size > SMALL_BUFFER_SIZE) +        return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT); +    return 0; +} + +uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const +{ +    if (memoryClass == 0) +    { +        if (IsVirtual()) +            return static_cast<uint16_t>((size - 1) / 8); +        else +            return static_cast<uint16_t>((size - 1) / 64); +    } +    return static_cast<uint16_t>((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const +{ +    if (memoryClass == 0) +        return secondIndex; + +    const uint32_t index = static_cast<uint32_t>(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; +    if (IsVirtual()) +        return index + (1 << SECOND_LEVEL_INDEX); +    else +        return index + 4; +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const +{ +    uint8_t memoryClass = SizeToMemoryClass(size); +    return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ +    VMA_ASSERT(block != m_NullBlock); +    VMA_ASSERT(block->IsFree()); + +    if (block->NextFree() != VMA_NULL) +        block->NextFree()->PrevFree() = block->PrevFree(); +    if (block->PrevFree() != VMA_NULL) +        block->PrevFree()->NextFree() = block->NextFree(); +    else +    { +        uint8_t memClass = SizeToMemoryClass(block->size); +        uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); +        uint32_t index = GetListIndex(memClass, secondIndex); +        VMA_ASSERT(m_FreeList[index] == block); +        m_FreeList[index] = block->NextFree(); +        if (block->NextFree() == VMA_NULL) +        { +            m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); +            if (m_InnerIsFreeBitmap[memClass] == 0) +                m_IsFreeBitmap &= ~(1UL << memClass); +        } +    } +    block->MarkTaken(); +    block->UserData() = VMA_NULL; +    --m_BlocksFreeCount; +    m_BlocksFreeSize -= block->size; +} + +void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ +    VMA_ASSERT(block != m_NullBlock); +    VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + +    uint8_t memClass = SizeToMemoryClass(block->size); +    uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); +    uint32_t index = GetListIndex(memClass, secondIndex); +    VMA_ASSERT(index < m_ListsCount); +    block->PrevFree() = VMA_NULL; +    block->NextFree() = m_FreeList[index]; +    m_FreeList[index] = block; +    if (block->NextFree() != VMA_NULL) +        block->NextFree()->PrevFree() = block; +    else +    { +        m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; +        m_IsFreeBitmap |= 1UL << memClass; +    } +    ++m_BlocksFreeCount; +    m_BlocksFreeSize += block->size; +} + +void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ +    VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!"); +    VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + +    block->offset = prev->offset; +    block->size += prev->size; +    block->prevPhysical = prev->prevPhysical; +    if (block->prevPhysical) +        block->prevPhysical->nextPhysical = block; +    m_BlockAllocator.Free(prev); +} + +VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const +{ +    uint8_t memoryClass = SizeToMemoryClass(size); +    uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); +    if (!innerFreeMap) +    { +        // Check higher levels for available blocks +        uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); +        if (!freeMap) +            return VMA_NULL; // No more memory available + +        // Find lowest free region +        memoryClass = VMA_BITSCAN_LSB(freeMap); +        innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; +        VMA_ASSERT(innerFreeMap != 0); +    } +    // Find lowest free subregion +    listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap)); +    VMA_ASSERT(m_FreeList[listIndex]); +    return m_FreeList[listIndex]; +} + +bool VmaBlockMetadata_TLSF::CheckBlock( +    Block& block, +    uint32_t listIndex, +    VkDeviceSize allocSize, +    VkDeviceSize allocAlignment, +    VmaSuballocationType allocType, +    VmaAllocationRequest* pAllocationRequest) +{ +    VMA_ASSERT(block.IsFree() && "Block is already taken!"); + +    VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment); +    if (block.size < allocSize + alignedOffset - block.offset) +        return false; + +    // Check for granularity conflicts +    if (!IsVirtual() && +        m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType)) +        return false; + +    // Alloc successful +    pAllocationRequest->type = VmaAllocationRequestType::TLSF; +    pAllocationRequest->allocHandle = (VmaAllocHandle)█ +    pAllocationRequest->size = allocSize - GetDebugMargin(); +    pAllocationRequest->customData = (void*)allocType; +    pAllocationRequest->algorithmData = alignedOffset; + +    // Place block at the start of list if it's normal block +    if (listIndex != m_ListsCount && block.PrevFree()) +    { +        block.PrevFree()->NextFree() = block.NextFree(); +        if (block.NextFree()) +            block.NextFree()->PrevFree() = block.PrevFree(); +        block.PrevFree() = VMA_NULL; +        block.NextFree() = m_FreeList[listIndex]; +        m_FreeList[listIndex] = █ +        if (block.NextFree()) +            block.NextFree()->PrevFree() = █ +    } + +    return true; +} +#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_TLSF + +#ifndef _VMA_BLOCK_VECTOR +/* +Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific +Vulkan memory type. + +Synchronized internally with a mutex. +*/ +class VmaBlockVector +{ +    friend struct VmaDefragmentationContext_T; +    VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector) +public: +    VmaBlockVector( +        VmaAllocator hAllocator, +        VmaPool hParentPool, +        uint32_t memoryTypeIndex, +        VkDeviceSize preferredBlockSize, +        size_t minBlockCount, +        size_t maxBlockCount, +        VkDeviceSize bufferImageGranularity, +        bool explicitBlockSize, +        uint32_t algorithm, +        float priority, +        VkDeviceSize minAllocationAlignment, +        void* pMemoryAllocateNext); +    ~VmaBlockVector(); + +    VmaAllocator GetAllocator() const { return m_hAllocator; } +    VmaPool GetParentPool() const { return m_hParentPool; } +    bool IsCustomPool() const { return m_hParentPool != VMA_NULL; } +    uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } +    VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } +    VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } +    uint32_t GetAlgorithm() const { return m_Algorithm; } +    bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } +    float GetPriority() const { return m_Priority; } +    const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } +    // To be used only while the m_Mutex is locked. Used during defragmentation. +    size_t GetBlockCount() const { return m_Blocks.size(); } +    // To be used only while the m_Mutex is locked. Used during defragmentation. +    VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } +    VMA_RW_MUTEX &GetMutex() { return m_Mutex; } + +    VkResult CreateMinBlocks(); +    void AddStatistics(VmaStatistics& inoutStats); +    void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); +    bool IsEmpty(); +    bool IsCorruptionDetectionEnabled() const; + +    VkResult Allocate( +        VkDeviceSize size, +        VkDeviceSize alignment, +        const VmaAllocationCreateInfo& createInfo, +        VmaSuballocationType suballocType, +        size_t allocationCount, +        VmaAllocation* pAllocations); + +    void Free(const VmaAllocation hAllocation); + +#if VMA_STATS_STRING_ENABLED +    void PrintDetailedMap(class VmaJsonWriter& json); +#endif + +    VkResult CheckCorruption(); + +private: +    const VmaAllocator m_hAllocator; +    const VmaPool m_hParentPool; +    const uint32_t m_MemoryTypeIndex; +    const VkDeviceSize m_PreferredBlockSize; +    const size_t m_MinBlockCount; +    const size_t m_MaxBlockCount; +    const VkDeviceSize m_BufferImageGranularity; +    const bool m_ExplicitBlockSize; +    const uint32_t m_Algorithm; +    const float m_Priority; +    const VkDeviceSize m_MinAllocationAlignment; + +    void* const m_pMemoryAllocateNext; +    VMA_RW_MUTEX m_Mutex; +    // Incrementally sorted by sumFreeSize, ascending. +    VmaVector<VmaDeviceMemoryBlock*, VmaStlAllocator<VmaDeviceMemoryBlock*>> m_Blocks; +    uint32_t m_NextBlockId; +    bool m_IncrementalSort = true; + +    void SetIncrementalSort(bool val) { m_IncrementalSort = val; } + +    VkDeviceSize CalcMaxBlockSize() const; +    // Finds and removes given block from vector. +    void Remove(VmaDeviceMemoryBlock* pBlock); +    // Performs single step in sorting m_Blocks. They may not be fully sorted +    // after this call. +    void IncrementallySortBlocks(); +    void SortByFreeSize(); + +    VkResult AllocatePage( +        VkDeviceSize size, +        VkDeviceSize alignment, +        const VmaAllocationCreateInfo& createInfo, +        VmaSuballocationType suballocType, +        VmaAllocation* pAllocation); + +    VkResult AllocateFromBlock( +        VmaDeviceMemoryBlock* pBlock, +        VkDeviceSize size, +        VkDeviceSize alignment, +        VmaAllocationCreateFlags allocFlags, +        void* pUserData, +        VmaSuballocationType suballocType, +        uint32_t strategy, +        VmaAllocation* pAllocation); + +    VkResult CommitAllocationRequest( +        VmaAllocationRequest& allocRequest, +        VmaDeviceMemoryBlock* pBlock, +        VkDeviceSize alignment, +        VmaAllocationCreateFlags allocFlags, +        void* pUserData, +        VmaSuballocationType suballocType, +        VmaAllocation* pAllocation); + +    VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); +    bool HasEmptyBlock(); +}; +#endif // _VMA_BLOCK_VECTOR + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT +struct VmaDefragmentationContext_T +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T) +public: +    VmaDefragmentationContext_T( +        VmaAllocator hAllocator, +        const VmaDefragmentationInfo& info); +    ~VmaDefragmentationContext_T(); + +    void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } + +    VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); +    VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); + +private: +    // Max number of allocations to ignore due to size constraints before ending single pass +    static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; +    enum class CounterStatus { Pass, Ignore, End }; + +    struct FragmentedBlock +    { +        uint32_t data; +        VmaDeviceMemoryBlock* block; +    }; +    struct StateBalanced +    { +        VkDeviceSize avgFreeSize = 0; +        VkDeviceSize avgAllocSize = UINT64_MAX; +    }; +    struct StateExtensive +    { +        enum class Operation : uint8_t +        { +            FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, +            MoveBuffers, MoveTextures, MoveAll, +            Cleanup, Done +        }; + +        Operation operation = Operation::FindFreeBlockTexture; +        size_t firstFreeBlock = SIZE_MAX; +    }; +    struct MoveAllocationData +    { +        VkDeviceSize size; +        VkDeviceSize alignment; +        VmaSuballocationType type; +        VmaAllocationCreateFlags flags; +        VmaDefragmentationMove move = {}; +    }; + +    const VkDeviceSize m_MaxPassBytes; +    const uint32_t m_MaxPassAllocations; +    const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback; +    void* m_BreakCallbackUserData; + +    VmaStlAllocator<VmaDefragmentationMove> m_MoveAllocator; +    VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>> m_Moves; + +    uint8_t m_IgnoredAllocs = 0; +    uint32_t m_Algorithm; +    uint32_t m_BlockVectorCount; +    VmaBlockVector* m_PoolBlockVector; +    VmaBlockVector** m_pBlockVectors; +    size_t m_ImmovableBlockCount = 0; +    VmaDefragmentationStats m_GlobalStats = { 0 }; +    VmaDefragmentationStats m_PassStats = { 0 }; +    void* m_AlgorithmState = VMA_NULL; + +    static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); +    CounterStatus CheckCounters(VkDeviceSize bytes); +    bool IncrementCounters(VkDeviceSize bytes); +    bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); +    bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + +    bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); +    bool ComputeDefragmentation_Fast(VmaBlockVector& vector); +    bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); +    bool ComputeDefragmentation_Full(VmaBlockVector& vector); +    bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + +    void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); +    bool MoveDataToFreeBlocks(VmaSuballocationType currentType, +        VmaBlockVector& vector, size_t firstFreeBlock, +        bool& texturePresent, bool& bufferPresent, bool& otherPresent); +}; +#endif // _VMA_DEFRAGMENTATION_CONTEXT + +#ifndef _VMA_POOL_T +struct VmaPool_T +{ +    friend struct VmaPoolListItemTraits; +    VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T) +public: +    VmaBlockVector m_BlockVector; +    VmaDedicatedAllocationList m_DedicatedAllocations; + +    VmaPool_T( +        VmaAllocator hAllocator, +        const VmaPoolCreateInfo& createInfo, +        VkDeviceSize preferredBlockSize); +    ~VmaPool_T(); + +    uint32_t GetId() const { return m_Id; } +    void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; } + +    const char* GetName() const { return m_Name; } +    void SetName(const char* pName); + +#if VMA_STATS_STRING_ENABLED +    //void PrintDetailedMap(class VmaStringBuilder& sb); +#endif + +private: +    uint32_t m_Id; +    char* m_Name; +    VmaPool_T* m_PrevPool = VMA_NULL; +    VmaPool_T* m_NextPool = VMA_NULL; +}; + +struct VmaPoolListItemTraits +{ +    typedef VmaPool_T ItemType; + +    static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } +    static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } +    static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } +    static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } +}; +#endif // _VMA_POOL_T + +#ifndef _VMA_CURRENT_BUDGET_DATA +struct VmaCurrentBudgetData +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData) +public: + +    VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; +    VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; +    VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; +    VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; + +#if VMA_MEMORY_BUDGET +    VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch; +    VMA_RW_MUTEX m_BudgetMutex; +    uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS]; +    uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS]; +    uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS]; +#endif // VMA_MEMORY_BUDGET + +    VmaCurrentBudgetData(); + +    void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); +    void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); +}; + +#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +VmaCurrentBudgetData::VmaCurrentBudgetData() +{ +    for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) +    { +        m_BlockCount[heapIndex] = 0; +        m_AllocationCount[heapIndex] = 0; +        m_BlockBytes[heapIndex] = 0; +        m_AllocationBytes[heapIndex] = 0; +#if VMA_MEMORY_BUDGET +        m_VulkanUsage[heapIndex] = 0; +        m_VulkanBudget[heapIndex] = 0; +        m_BlockBytesAtBudgetFetch[heapIndex] = 0; +#endif +    } + +#if VMA_MEMORY_BUDGET +    m_OperationsSinceBudgetFetch = 0; +#endif +} + +void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ +    m_AllocationBytes[heapIndex] += allocationSize; +    ++m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET +    ++m_OperationsSinceBudgetFetch; +#endif +} + +void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ +    VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); +    m_AllocationBytes[heapIndex] -= allocationSize; +    VMA_ASSERT(m_AllocationCount[heapIndex] > 0); +    --m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET +    ++m_OperationsSinceBudgetFetch; +#endif +} +#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _VMA_CURRENT_BUDGET_DATA + +#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR +/* +Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects. +*/ +class VmaAllocationObjectAllocator +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator) +public: +    VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) +        : m_Allocator(pAllocationCallbacks, 1024) {} + +    template<typename... Types> VmaAllocation Allocate(Types&&... args); +    void Free(VmaAllocation hAlloc); + +private: +    VMA_MUTEX m_Mutex; +    VmaPoolAllocator<VmaAllocation_T> m_Allocator; +}; + +template<typename... Types> +VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args) +{ +    VmaMutexLock mutexLock(m_Mutex); +    return m_Allocator.Alloc<Types...>(std::forward<Types>(args)...); +} + +void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) +{ +    VmaMutexLock mutexLock(m_Mutex); +    m_Allocator.Free(hAlloc); +} +#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _VMA_VIRTUAL_BLOCK_T +struct VmaVirtualBlock_T +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T) +public: +    const bool m_AllocationCallbacksSpecified; +    const VkAllocationCallbacks m_AllocationCallbacks; + +    VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo); +    ~VmaVirtualBlock_T(); + +    VkResult Init() { return VK_SUCCESS; } +    bool IsEmpty() const { return m_Metadata->IsEmpty(); } +    void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); } +    void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); } +    void Clear() { m_Metadata->Clear(); } + +    const VkAllocationCallbacks* GetAllocationCallbacks() const; +    void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); +    VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, +        VkDeviceSize* outOffset); +    void GetStatistics(VmaStatistics& outStats) const; +    void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; +#if VMA_STATS_STRING_ENABLED +    void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; +#endif + +private: +    VmaBlockMetadata* m_Metadata; +}; + +#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo) +    : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL), +    m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks) +{ +    const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; +    switch (algorithm) +    { +    case 0: +        m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); +        break; +    case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: +        m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); +        break; +    default: +        VMA_ASSERT(0); +        m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); +    } + +    m_Metadata->Init(createInfo.size); +} + +VmaVirtualBlock_T::~VmaVirtualBlock_T() +{ +    // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT +    // to receive the list of the unfreed allocations. +    if (!m_Metadata->IsEmpty()) +        m_Metadata->DebugLogAllAllocations(); +    // This is the most important assert in the entire library. +    // Hitting it means you have some memory leak - unreleased virtual allocations. +    VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); + +    vma_delete(GetAllocationCallbacks(), m_Metadata); +} + +const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const +{ +    return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; +} + +void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo) +{ +    m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo); +} + +VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, +    VkDeviceSize* outOffset) +{ +    VmaAllocationRequest request = {}; +    if (m_Metadata->CreateAllocationRequest( +        createInfo.size, // allocSize +        VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment +        (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress +        VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant +        createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy +        &request)) +    { +        m_Metadata->Alloc(request, +            VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant +            createInfo.pUserData); +        outAllocation = (VmaVirtualAllocation)request.allocHandle; +        if(outOffset) +            *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle); +        return VK_SUCCESS; +    } +    outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE; +    if (outOffset) +        *outOffset = UINT64_MAX; +    return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const +{ +    VmaClearStatistics(outStats); +    m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const +{ +    VmaClearDetailedStatistics(outStats); +    m_Metadata->AddDetailedStatistics(outStats); +} + +#if VMA_STATS_STRING_ENABLED +void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const +{ +    VmaJsonWriter json(GetAllocationCallbacks(), sb); +    json.BeginObject(); + +    VmaDetailedStatistics stats; +    CalculateDetailedStatistics(stats); + +    json.WriteString("Stats"); +    VmaPrintDetailedStatistics(json, stats); + +    if (detailedMap) +    { +        json.WriteString("Details"); +        json.BeginObject(); +        m_Metadata->PrintDetailedMap(json); +        json.EndObject(); +    } + +    json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +#endif // _VMA_VIRTUAL_BLOCK_T + + +// Main allocator object. +struct VmaAllocator_T +{ +    VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T) +public: +    const bool m_UseMutex; +    const uint32_t m_VulkanApiVersion; +    bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). +    bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). +    bool m_UseExtMemoryBudget; +    bool m_UseAmdDeviceCoherentMemory; +    bool m_UseKhrBufferDeviceAddress; +    bool m_UseExtMemoryPriority; +    bool m_UseKhrMaintenance4; +    bool m_UseKhrMaintenance5; +    bool m_UseKhrExternalMemoryWin32; +    const VkDevice m_hDevice; +    const VkInstance m_hInstance; +    const bool m_AllocationCallbacksSpecified; +    const VkAllocationCallbacks m_AllocationCallbacks; +    VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; +    VmaAllocationObjectAllocator m_AllocationObjectAllocator; + +    // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size. +    uint32_t m_HeapSizeLimitMask; + +    VkPhysicalDeviceProperties m_PhysicalDeviceProperties; +    VkPhysicalDeviceMemoryProperties m_MemProps; + +    // Default pools. +    VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; +    VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES]; + +    VmaCurrentBudgetData m_Budget; +    VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects. + +    VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); +    VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo); +    ~VmaAllocator_T(); + +    const VkAllocationCallbacks* GetAllocationCallbacks() const +    { +        return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; +    } +    const VmaVulkanFunctions& GetVulkanFunctions() const +    { +        return m_VulkanFunctions; +    } + +    VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; } + +    VkDeviceSize GetBufferImageGranularity() const +    { +        return VMA_MAX( +            static_cast<VkDeviceSize>(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY), +            m_PhysicalDeviceProperties.limits.bufferImageGranularity); +    } + +    uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; } +    uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; } + +    uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const +    { +        VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount); +        return m_MemProps.memoryTypes[memTypeIndex].heapIndex; +    } +    // True when specific memory type is HOST_VISIBLE but not HOST_COHERENT. +    bool IsMemoryTypeNonCoherent(uint32_t memTypeIndex) const +    { +        return (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) == +            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +    } +    // Minimum alignment for all allocations in specific memory type. +    VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const +    { +        return IsMemoryTypeNonCoherent(memTypeIndex) ? +            VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) : +            (VkDeviceSize)VMA_MIN_ALIGNMENT; +    } + +    bool IsIntegratedGpu() const +    { +        return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; +    } + +    uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } + +    void GetBufferMemoryRequirements( +        VkBuffer hBuffer, +        VkMemoryRequirements& memReq, +        bool& requiresDedicatedAllocation, +        bool& prefersDedicatedAllocation) const; +    void GetImageMemoryRequirements( +        VkImage hImage, +        VkMemoryRequirements& memReq, +        bool& requiresDedicatedAllocation, +        bool& prefersDedicatedAllocation) const; +    VkResult FindMemoryTypeIndex( +        uint32_t memoryTypeBits, +        const VmaAllocationCreateInfo* pAllocationCreateInfo, +        VmaBufferImageUsage bufImgUsage, +        uint32_t* pMemoryTypeIndex) const; + +    // Main allocation function. +    VkResult AllocateMemory( +        const VkMemoryRequirements& vkMemReq, +        bool requiresDedicatedAllocation, +        bool prefersDedicatedAllocation, +        VkBuffer dedicatedBuffer, +        VkImage dedicatedImage, +        VmaBufferImageUsage dedicatedBufferImageUsage, +        const VmaAllocationCreateInfo& createInfo, +        VmaSuballocationType suballocType, +        size_t allocationCount, +        VmaAllocation* pAllocations); + +    // Main deallocation function. +    void FreeMemory( +        size_t allocationCount, +        const VmaAllocation* pAllocations); + +    void CalculateStatistics(VmaTotalStatistics* pStats); + +    void GetHeapBudgets( +        VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); + +#if VMA_STATS_STRING_ENABLED +    void PrintDetailedMap(class VmaJsonWriter& json); +#endif + +    void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); +    void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo); + +    VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); +    void DestroyPool(VmaPool pool); +    void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); +    void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); + +    void SetCurrentFrameIndex(uint32_t frameIndex); +    uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } + +    VkResult CheckPoolCorruption(VmaPool hPool); +    VkResult CheckCorruption(uint32_t memoryTypeBits); + +    // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping. +    VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); +    // Call to Vulkan function vkFreeMemory with accompanying bookkeeping. +    void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory); +    // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR. +    VkResult BindVulkanBuffer( +        VkDeviceMemory memory, +        VkDeviceSize memoryOffset, +        VkBuffer buffer, +        const void* pNext); +    // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR. +    VkResult BindVulkanImage( +        VkDeviceMemory memory, +        VkDeviceSize memoryOffset, +        VkImage image, +        const void* pNext); + +    VkResult Map(VmaAllocation hAllocation, void** ppData); +    void Unmap(VmaAllocation hAllocation); + +    VkResult BindBufferMemory( +        VmaAllocation hAllocation, +        VkDeviceSize allocationLocalOffset, +        VkBuffer hBuffer, +        const void* pNext); +    VkResult BindImageMemory( +        VmaAllocation hAllocation, +        VkDeviceSize allocationLocalOffset, +        VkImage hImage, +        const void* pNext); + +    VkResult FlushOrInvalidateAllocation( +        VmaAllocation hAllocation, +        VkDeviceSize offset, VkDeviceSize size, +        VMA_CACHE_OPERATION op); +    VkResult FlushOrInvalidateAllocations( +        uint32_t allocationCount, +        const VmaAllocation* allocations, +        const VkDeviceSize* offsets, const VkDeviceSize* sizes, +        VMA_CACHE_OPERATION op); + +    VkResult CopyMemoryToAllocation( +        const void* pSrcHostPointer, +        VmaAllocation dstAllocation, +        VkDeviceSize dstAllocationLocalOffset, +        VkDeviceSize size); +    VkResult CopyAllocationToMemory( +        VmaAllocation srcAllocation, +        VkDeviceSize srcAllocationLocalOffset, +        void* pDstHostPointer, +        VkDeviceSize size); + +    void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); + +    /* +    Returns bit mask of memory types that can support defragmentation on GPU as +    they support creation of required buffer for copy operations. +    */ +    uint32_t GetGpuDefragmentationMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY +    VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const +    { +        return m_TypeExternalMemoryHandleTypes[memTypeIndex]; +    } +#endif // #if VMA_EXTERNAL_MEMORY + +private: +    VkDeviceSize m_PreferredLargeHeapBlockSize; + +    VkPhysicalDevice m_PhysicalDevice; +    VMA_ATOMIC_UINT32 m_CurrentFrameIndex; +    VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. +#if VMA_EXTERNAL_MEMORY +    VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES]; +#endif // #if VMA_EXTERNAL_MEMORY + +    VMA_RW_MUTEX m_PoolsMutex; +    typedef VmaIntrusiveLinkedList<VmaPoolListItemTraits> PoolList; +    // Protected by m_PoolsMutex. +    PoolList m_Pools; +    uint32_t m_NextPoolId; + +    VmaVulkanFunctions m_VulkanFunctions; + +    // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. +    uint32_t m_GlobalMemoryTypeBits; + +    void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 +    void ImportVulkanFunctions_Static(); +#endif + +    void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 +    void ImportVulkanFunctions_Dynamic(); +#endif + +    void ValidateVulkanFunctions(); + +    VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); + +    VkResult AllocateMemoryOfType( +        VmaPool pool, +        VkDeviceSize size, +        VkDeviceSize alignment, +        bool dedicatedPreferred, +        VkBuffer dedicatedBuffer, +        VkImage dedicatedImage, +        VmaBufferImageUsage dedicatedBufferImageUsage, +        const VmaAllocationCreateInfo& createInfo, +        uint32_t memTypeIndex, +        VmaSuballocationType suballocType, +        VmaDedicatedAllocationList& dedicatedAllocations, +        VmaBlockVector& blockVector, +        size_t allocationCount, +        VmaAllocation* pAllocations); + +    // Helper function only to be used inside AllocateDedicatedMemory. +    VkResult AllocateDedicatedMemoryPage( +        VmaPool pool, +        VkDeviceSize size, +        VmaSuballocationType suballocType, +        uint32_t memTypeIndex, +        const VkMemoryAllocateInfo& allocInfo, +        bool map, +        bool isUserDataString, +        bool isMappingAllowed, +        void* pUserData, +        VmaAllocation* pAllocation); + +    // Allocates and registers new VkDeviceMemory specifically for dedicated allocations. +    VkResult AllocateDedicatedMemory( +        VmaPool pool, +        VkDeviceSize size, +        VmaSuballocationType suballocType, +        VmaDedicatedAllocationList& dedicatedAllocations, +        uint32_t memTypeIndex, +        bool map, +        bool isUserDataString, +        bool isMappingAllowed, +        bool canAliasMemory, +        void* pUserData, +        float priority, +        VkBuffer dedicatedBuffer, +        VkImage dedicatedImage, +        VmaBufferImageUsage dedicatedBufferImageUsage, +        size_t allocationCount, +        VmaAllocation* pAllocations, +        const void* pNextChain = VMA_NULL); + +    void FreeDedicatedMemory(const VmaAllocation allocation); + +    VkResult CalcMemTypeParams( +        VmaAllocationCreateInfo& outCreateInfo, +        uint32_t memTypeIndex, +        VkDeviceSize size, +        size_t allocationCount); +    VkResult CalcAllocationParams( +        VmaAllocationCreateInfo& outCreateInfo, +        bool dedicatedRequired, +        bool dedicatedPreferred); + +    /* +    Calculates and returns bit mask of memory types that can support defragmentation +    on GPU as they support creation of required buffer for copy operations. +    */ +    uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; +    uint32_t CalculateGlobalMemoryTypeBits() const; + +    bool GetFlushOrInvalidateRange( +        VmaAllocation allocation, +        VkDeviceSize offset, VkDeviceSize size, +        VkMappedMemoryRange& outRange) const; + +#if VMA_MEMORY_BUDGET +    void UpdateVulkanBudget(); +#endif // #if VMA_MEMORY_BUDGET +}; + + +#ifndef _VMA_MEMORY_FUNCTIONS +static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment) +{ +    return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment); +} + +static void VmaFree(VmaAllocator hAllocator, void* ptr) +{ +    VmaFree(&hAllocator->m_AllocationCallbacks, ptr); +} + +template<typename T> +static T* VmaAllocate(VmaAllocator hAllocator) +{ +    return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T)); +} + +template<typename T> +static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count) +{ +    return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +template<typename T> +static void vma_delete(VmaAllocator hAllocator, T* ptr) +{ +    if(ptr != VMA_NULL) +    { +        ptr->~T(); +        VmaFree(hAllocator, ptr); +    } +} + +template<typename T> +static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count) +{ +    if(ptr != VMA_NULL) +    { +        for(size_t i = count; i--; ) +            ptr[i].~T(); +        VmaFree(hAllocator, ptr); +    } +} +#endif // _VMA_MEMORY_FUNCTIONS + +#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS +VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) +    : m_pMetadata(VMA_NULL), +    m_MemoryTypeIndex(UINT32_MAX), +    m_Id(0), +    m_hMemory(VK_NULL_HANDLE), +    m_MapCount(0), +    m_pMappedData(VMA_NULL){} + +VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() +{ +    VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); +    VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE); +} + +void VmaDeviceMemoryBlock::Init( +    VmaAllocator hAllocator, +    VmaPool hParentPool, +    uint32_t newMemoryTypeIndex, +    VkDeviceMemory newMemory, +    VkDeviceSize newSize, +    uint32_t id, +    uint32_t algorithm, +    VkDeviceSize bufferImageGranularity) +{ +    VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + +    m_hParentPool = hParentPool; +    m_MemoryTypeIndex = newMemoryTypeIndex; +    m_Id = id; +    m_hMemory = newMemory; + +    switch (algorithm) +    { +    case 0: +        m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), +            bufferImageGranularity, false); // isVirtual +        break; +    case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: +        m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), +            bufferImageGranularity, false); // isVirtual +        break; +    default: +        VMA_ASSERT(0); +        m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), +            bufferImageGranularity, false); // isVirtual +    } +    m_pMetadata->Init(newSize); +} + +void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) +{ +    // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT +    // to receive the list of the unfreed allocations. +    if (!m_pMetadata->IsEmpty()) +        m_pMetadata->DebugLogAllAllocations(); +    // This is the most important assert in the entire library. +    // Hitting it means you have some memory leak - unreleased VmaAllocation objects. +    VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + +    VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE); +    allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); +    m_hMemory = VK_NULL_HANDLE; + +    vma_delete(allocator, m_pMetadata); +    m_pMetadata = VMA_NULL; +} + +void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator) +{ +    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); +    m_MappingHysteresis.PostAlloc(); +} + +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ +    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); +    if(m_MappingHysteresis.PostFree()) +    { +        VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); +        if (m_MapCount == 0) +        { +            m_pMappedData = VMA_NULL; +            (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); +        } +    } +} + +bool VmaDeviceMemoryBlock::Validate() const +{ +    VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && +        (m_pMetadata->GetSize() != 0)); + +    return m_pMetadata->Validate(); +} + +VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) +{ +    void* pData = VMA_NULL; +    VkResult res = Map(hAllocator, 1, &pData); +    if (res != VK_SUCCESS) +    { +        return res; +    } + +    res = m_pMetadata->CheckCorruption(pData); + +    Unmap(hAllocator, 1); + +    return res; +} + +VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData) +{ +    if (count == 0) +    { +        return VK_SUCCESS; +    } + +    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); +    const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); +    if (oldTotalMapCount != 0) +    { +        VMA_ASSERT(m_pMappedData != VMA_NULL); +        m_MappingHysteresis.PostMap(); +        m_MapCount += count; +        if (ppData != VMA_NULL) +        { +            *ppData = m_pMappedData; +        } +        return VK_SUCCESS; +    } +    else +    { +        VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( +            hAllocator->m_hDevice, +            m_hMemory, +            0, // offset +            VK_WHOLE_SIZE, +            0, // flags +            &m_pMappedData); +        if (result == VK_SUCCESS) +        { +            VMA_ASSERT(m_pMappedData != VMA_NULL); +            m_MappingHysteresis.PostMap(); +            m_MapCount = count; +            if (ppData != VMA_NULL) +            { +                *ppData = m_pMappedData; +            } +        } +        return result; +    } +} + +void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) +{ +    if (count == 0) +    { +        return; +    } + +    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); +    if (m_MapCount >= count) +    { +        m_MapCount -= count; +        const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); +        if (totalMapCount == 0) +        { +            m_pMappedData = VMA_NULL; +            (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); +        } +        m_MappingHysteresis.PostUnmap(); +    } +    else +    { +        VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); +    } +} + +VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ +    VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + +    void* pData; +    VkResult res = Map(hAllocator, 1, &pData); +    if (res != VK_SUCCESS) +    { +        return res; +    } + +    VmaWriteMagicValue(pData, allocOffset + allocSize); + +    Unmap(hAllocator, 1); +    return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ +    VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + +    void* pData; +    VkResult res = Map(hAllocator, 1, &pData); +    if (res != VK_SUCCESS) +    { +        return res; +    } + +    if (!VmaValidateMagicValue(pData, allocOffset + allocSize)) +    { +        VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!"); +    } + +    Unmap(hAllocator, 1); +    return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::BindBufferMemory( +    const VmaAllocator hAllocator, +    const VmaAllocation hAllocation, +    VkDeviceSize allocationLocalOffset, +    VkBuffer hBuffer, +    const void* pNext) +{ +    VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && +        hAllocation->GetBlock() == this); +    VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && +        "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); +    const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; +    // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. +    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); +    return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); +} + +VkResult VmaDeviceMemoryBlock::BindImageMemory( +    const VmaAllocator hAllocator, +    const VmaAllocation hAllocation, +    VkDeviceSize allocationLocalOffset, +    VkImage hImage, +    const void* pNext) +{ +    VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && +        hAllocation->GetBlock() == this); +    VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && +        "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); +    const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; +    // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. +    VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); +    return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); +} + +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ +    VMA_ASSERT(pHandle); +    return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS + +#ifndef _VMA_ALLOCATION_T_FUNCTIONS +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) +    : m_Alignment{ 1 }, +    m_Size{ 0 }, +    m_pUserData{ VMA_NULL }, +    m_pName{ VMA_NULL }, +    m_MemoryTypeIndex{ 0 }, +    m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, +    m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, +    m_MapCount{ 0 }, +    m_Flags{ 0 } +{ +    if(mappingAllowed) +        m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; +} + +VmaAllocation_T::~VmaAllocation_T() +{ +    VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction."); + +    // Check if owned string was freed. +    VMA_ASSERT(m_pName == VMA_NULL); +} + +void VmaAllocation_T::InitBlockAllocation( +    VmaDeviceMemoryBlock* block, +    VmaAllocHandle allocHandle, +    VkDeviceSize alignment, +    VkDeviceSize size, +    uint32_t memoryTypeIndex, +    VmaSuballocationType suballocationType, +    bool mapped) +{ +    VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); +    VMA_ASSERT(block != VMA_NULL); +    m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; +    m_Alignment = alignment; +    m_Size = size; +    m_MemoryTypeIndex = memoryTypeIndex; +    if(mapped) +    { +        VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); +        m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; +    } +    m_SuballocationType = (uint8_t)suballocationType; +    m_BlockAllocation.m_Block = block; +    m_BlockAllocation.m_AllocHandle = allocHandle; +} + +void VmaAllocation_T::InitDedicatedAllocation( +    VmaAllocator allocator, +    VmaPool hParentPool, +    uint32_t memoryTypeIndex, +    VkDeviceMemory hMemory, +    VmaSuballocationType suballocationType, +    void* pMappedData, +    VkDeviceSize size) +{ +    VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); +    VMA_ASSERT(hMemory != VK_NULL_HANDLE); +    m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; +    m_Alignment = 0; +    m_Size = size; +    m_MemoryTypeIndex = memoryTypeIndex; +    m_SuballocationType = (uint8_t)suballocationType; +    m_DedicatedAllocation.m_ExtraData = VMA_NULL; +    m_DedicatedAllocation.m_hParentPool = hParentPool; +    m_DedicatedAllocation.m_hMemory = hMemory; +    m_DedicatedAllocation.m_Prev = VMA_NULL; +    m_DedicatedAllocation.m_Next = VMA_NULL; + +    if (pMappedData != VMA_NULL) +    { +        VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); +        m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; +        EnsureExtraData(allocator); +        m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData; +    } +} + +void VmaAllocation_T::Destroy(VmaAllocator allocator) +{ +    FreeName(allocator); + +    if (GetType() == ALLOCATION_TYPE_DEDICATED) +    { +        vma_delete(allocator, m_DedicatedAllocation.m_ExtraData); +    } +} + +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) +{ +    VMA_ASSERT(pName == VMA_NULL || pName != m_pName); + +    FreeName(hAllocator); + +    if (pName != VMA_NULL) +        m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); +} + +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) +{ +    VMA_ASSERT(allocation != VMA_NULL); +    VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); +    VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); + +    if (m_MapCount != 0) +        m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); + +    m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); +    std::swap(m_BlockAllocation, allocation->m_BlockAllocation); +    m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); + +#if VMA_STATS_STRING_ENABLED +    std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif +    return m_MapCount; +} + +VmaAllocHandle VmaAllocation_T::GetAllocHandle() const +{ +    switch (m_Type) +    { +    case ALLOCATION_TYPE_BLOCK: +        return m_BlockAllocation.m_AllocHandle; +    case ALLOCATION_TYPE_DEDICATED: +        return VK_NULL_HANDLE; +    default: +        VMA_ASSERT(0); +        return VK_NULL_HANDLE; +    } +} + +VkDeviceSize VmaAllocation_T::GetOffset() const +{ +    switch (m_Type) +    { +    case ALLOCATION_TYPE_BLOCK: +        return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle); +    case ALLOCATION_TYPE_DEDICATED: +        return 0; +    default: +        VMA_ASSERT(0); +        return 0; +    } +} + +VmaPool VmaAllocation_T::GetParentPool() const +{ +    switch (m_Type) +    { +    case ALLOCATION_TYPE_BLOCK: +        return m_BlockAllocation.m_Block->GetParentPool(); +    case ALLOCATION_TYPE_DEDICATED: +        return m_DedicatedAllocation.m_hParentPool; +    default: +        VMA_ASSERT(0); +        return VK_NULL_HANDLE; +    } +} + +VkDeviceMemory VmaAllocation_T::GetMemory() const +{ +    switch (m_Type) +    { +    case ALLOCATION_TYPE_BLOCK: +        return m_BlockAllocation.m_Block->GetDeviceMemory(); +    case ALLOCATION_TYPE_DEDICATED: +        return m_DedicatedAllocation.m_hMemory; +    default: +        VMA_ASSERT(0); +        return VK_NULL_HANDLE; +    } +} + +void* VmaAllocation_T::GetMappedData() const +{ +    switch (m_Type) +    { +    case ALLOCATION_TYPE_BLOCK: +        if (m_MapCount != 0 || IsPersistentMap()) +        { +            void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); +            VMA_ASSERT(pBlockData != VMA_NULL); +            return (char*)pBlockData + GetOffset(); +        } +        else +        { +            return VMA_NULL; +        } +        break; +    case ALLOCATION_TYPE_DEDICATED: +        VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) == +            (m_MapCount != 0 || IsPersistentMap())); +        return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL; +    default: +        VMA_ASSERT(0); +        return VMA_NULL; +    } +} + +void VmaAllocation_T::BlockAllocMap() +{ +    VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); +    VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + +    if (m_MapCount < 0xFF) +    { +        ++m_MapCount; +    } +    else +    { +        VMA_ASSERT(0 && "Allocation mapped too many times simultaneously."); +    } +} + +void VmaAllocation_T::BlockAllocUnmap() +{ +    VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + +    if (m_MapCount > 0) +    { +        --m_MapCount; +    } +    else +    { +        VMA_ASSERT(0 && "Unmapping allocation not previously mapped."); +    } +} + +VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) +{ +    VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); +    VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + +    EnsureExtraData(hAllocator); + +    if (m_MapCount != 0 || IsPersistentMap()) +    { +        if (m_MapCount < 0xFF) +        { +            VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL); +            *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData; +            ++m_MapCount; +            return VK_SUCCESS; +        } +        else +        { +            VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously."); +            return VK_ERROR_MEMORY_MAP_FAILED; +        } +    } +    else +    { +        VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( +            hAllocator->m_hDevice, +            m_DedicatedAllocation.m_hMemory, +            0, // offset +            VK_WHOLE_SIZE, +            0, // flags +            ppData); +        if (result == VK_SUCCESS) +        { +            m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData; +            m_MapCount = 1; +        } +        return result; +    } +} + +void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) +{ +    VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + +    if (m_MapCount > 0) +    { +        --m_MapCount; +        if (m_MapCount == 0 && !IsPersistentMap()) +        { +            VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL); +            m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL; +            (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( +                hAllocator->m_hDevice, +                m_DedicatedAllocation.m_hMemory); +        } +    } +    else +    { +        VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped."); +    } +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const +{ +    json.WriteString("Type"); +    json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]); + +    json.WriteString("Size"); +    json.WriteNumber(m_Size); +    json.WriteString("Usage"); +    json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t. + +    if (m_pUserData != VMA_NULL) +    { +        json.WriteString("CustomData"); +        json.BeginString(); +        json.ContinueString_Pointer(m_pUserData); +        json.EndString(); +    } +    if (m_pName != VMA_NULL) +    { +        json.WriteString("Name"); +        json.WriteString(m_pName); +    } +} +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ +    auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR; +    switch (m_Type) +    { +    case ALLOCATION_TYPE_BLOCK: +        return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle); +    case ALLOCATION_TYPE_DEDICATED: +        EnsureExtraData(hAllocator); +        return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); +    default: +        VMA_ASSERT(0); +        return VK_ERROR_FEATURE_NOT_PRESENT; +    } +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // VMA_STATS_STRING_ENABLED + +void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator) +{ +    if (m_DedicatedAllocation.m_ExtraData == VMA_NULL) +    { +        m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)(); +    } +} + +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) +{ +    if(m_pName) +    { +        VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); +        m_pName = VMA_NULL; +    } +} +#endif // _VMA_ALLOCATION_T_FUNCTIONS + +#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS +VmaBlockVector::VmaBlockVector( +    VmaAllocator hAllocator, +    VmaPool hParentPool, +    uint32_t memoryTypeIndex, +    VkDeviceSize preferredBlockSize, +    size_t minBlockCount, +    size_t maxBlockCount, +    VkDeviceSize bufferImageGranularity, +    bool explicitBlockSize, +    uint32_t algorithm, +    float priority, +    VkDeviceSize minAllocationAlignment, +    void* pMemoryAllocateNext) +    : m_hAllocator(hAllocator), +    m_hParentPool(hParentPool), +    m_MemoryTypeIndex(memoryTypeIndex), +    m_PreferredBlockSize(preferredBlockSize), +    m_MinBlockCount(minBlockCount), +    m_MaxBlockCount(maxBlockCount), +    m_BufferImageGranularity(bufferImageGranularity), +    m_ExplicitBlockSize(explicitBlockSize), +    m_Algorithm(algorithm), +    m_Priority(priority), +    m_MinAllocationAlignment(minAllocationAlignment), +    m_pMemoryAllocateNext(pMemoryAllocateNext), +    m_Blocks(VmaStlAllocator<VmaDeviceMemoryBlock*>(hAllocator->GetAllocationCallbacks())), +    m_NextBlockId(0) {} + +VmaBlockVector::~VmaBlockVector() +{ +    for (size_t i = m_Blocks.size(); i--; ) +    { +        m_Blocks[i]->Destroy(m_hAllocator); +        vma_delete(m_hAllocator, m_Blocks[i]); +    } +} + +VkResult VmaBlockVector::CreateMinBlocks() +{ +    for (size_t i = 0; i < m_MinBlockCount; ++i) +    { +        VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL); +        if (res != VK_SUCCESS) +        { +            return res; +        } +    } +    return VK_SUCCESS; +} + +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) +{ +    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + +    const size_t blockCount = m_Blocks.size(); +    for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) +    { +        const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; +        VMA_ASSERT(pBlock); +        VMA_HEAVY_ASSERT(pBlock->Validate()); +        pBlock->m_pMetadata->AddStatistics(inoutStats); +    } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ +    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + +    const size_t blockCount = m_Blocks.size(); +    for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) +    { +        const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; +        VMA_ASSERT(pBlock); +        VMA_HEAVY_ASSERT(pBlock->Validate()); +        pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); +    } +} + +bool VmaBlockVector::IsEmpty() +{ +    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); +    return m_Blocks.empty(); +} + +bool VmaBlockVector::IsCorruptionDetectionEnabled() const +{ +    const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +    return (VMA_DEBUG_DETECT_CORRUPTION != 0) && +        (VMA_DEBUG_MARGIN > 0) && +        (m_Algorithm == 0 || m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) && +        (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags; +} + +VkResult VmaBlockVector::Allocate( +    VkDeviceSize size, +    VkDeviceSize alignment, +    const VmaAllocationCreateInfo& createInfo, +    VmaSuballocationType suballocType, +    size_t allocationCount, +    VmaAllocation* pAllocations) +{ +    size_t allocIndex; +    VkResult res = VK_SUCCESS; + +    alignment = VMA_MAX(alignment, m_MinAllocationAlignment); + +    if (IsCorruptionDetectionEnabled()) +    { +        size = VmaAlignUp<VkDeviceSize>(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); +        alignment = VmaAlignUp<VkDeviceSize>(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); +    } + +    { +        VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); +        for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) +        { +            res = AllocatePage( +                size, +                alignment, +                createInfo, +                suballocType, +                pAllocations + allocIndex); +            if (res != VK_SUCCESS) +            { +                break; +            } +        } +    } + +    if (res != VK_SUCCESS) +    { +        // Free all already created allocations. +        while (allocIndex--) +            Free(pAllocations[allocIndex]); +        memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); +    } + +    return res; +} + +VkResult VmaBlockVector::AllocatePage( +    VkDeviceSize size, +    VkDeviceSize alignment, +    const VmaAllocationCreateInfo& createInfo, +    VmaSuballocationType suballocType, +    VmaAllocation* pAllocation) +{ +    const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + +    VkDeviceSize freeMemory; +    { +        const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); +        VmaBudget heapBudget = {}; +        m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); +        freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; +    } + +    const bool canFallbackToDedicated = !HasExplicitBlockSize() && +        (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0; +    const bool canCreateNewBlock = +        ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && +        (m_Blocks.size() < m_MaxBlockCount) && +        (freeMemory >= size || !canFallbackToDedicated); +    uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; + +    // Upper address can only be used with linear allocator and within single memory block. +    if (isUpperAddress && +        (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1)) +    { +        return VK_ERROR_FEATURE_NOT_PRESENT; +    } + +    // Early reject: requested allocation size is larger that maximum block size for this block vector. +    if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize) +    { +        return VK_ERROR_OUT_OF_DEVICE_MEMORY; +    } + +    // 1. Search existing allocations. Try to allocate. +    if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) +    { +        // Use only last block. +        if (!m_Blocks.empty()) +        { +            VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); +            VMA_ASSERT(pCurrBlock); +            VkResult res = AllocateFromBlock( +                pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); +            if (res == VK_SUCCESS) +            { +                VMA_DEBUG_LOG_FORMAT("    Returned from last block #%" PRIu32, pCurrBlock->GetId()); +                IncrementallySortBlocks(); +                return VK_SUCCESS; +            } +        } +    } +    else +    { +        if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default +        { +            const bool isHostVisible = +                (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; +            if(isHostVisible) +            { +                const bool isMappingAllowed = (createInfo.flags & +                    (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; +                /* +                For non-mappable allocations, check blocks that are not mapped first. +                For mappable allocations, check blocks that are already mapped first. +                This way, having many blocks, we will separate mappable and non-mappable allocations, +                hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. +                */ +                for(size_t mappingI = 0; mappingI < 2; ++mappingI) +                { +                    // Forward order in m_Blocks - prefer blocks with smallest amount of free space. +                    for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) +                    { +                        VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; +                        VMA_ASSERT(pCurrBlock); +                        const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; +                        if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) +                        { +                            VkResult res = AllocateFromBlock( +                                pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); +                            if (res == VK_SUCCESS) +                            { +                                VMA_DEBUG_LOG_FORMAT("    Returned from existing block #%" PRIu32, pCurrBlock->GetId()); +                                IncrementallySortBlocks(); +                                return VK_SUCCESS; +                            } +                        } +                    } +                } +            } +            else +            { +                // Forward order in m_Blocks - prefer blocks with smallest amount of free space. +                for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) +                { +                    VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; +                    VMA_ASSERT(pCurrBlock); +                    VkResult res = AllocateFromBlock( +                        pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); +                    if (res == VK_SUCCESS) +                    { +                        VMA_DEBUG_LOG_FORMAT("    Returned from existing block #%" PRIu32, pCurrBlock->GetId()); +                        IncrementallySortBlocks(); +                        return VK_SUCCESS; +                    } +                } +            } +        } +        else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT +        { +            // Backward order in m_Blocks - prefer blocks with largest amount of free space. +            for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) +            { +                VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; +                VMA_ASSERT(pCurrBlock); +                VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); +                if (res == VK_SUCCESS) +                { +                    VMA_DEBUG_LOG_FORMAT("    Returned from existing block #%" PRIu32, pCurrBlock->GetId()); +                    IncrementallySortBlocks(); +                    return VK_SUCCESS; +                } +            } +        } +    } + +    // 2. Try to create new block. +    if (canCreateNewBlock) +    { +        // Calculate optimal size for new block. +        VkDeviceSize newBlockSize = m_PreferredBlockSize; +        uint32_t newBlockSizeShift = 0; +        const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3; + +        if (!m_ExplicitBlockSize) +        { +            // Allocate 1/8, 1/4, 1/2 as first blocks. +            const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize(); +            for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) +            { +                const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; +                if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) +                { +                    newBlockSize = smallerNewBlockSize; +                    ++newBlockSizeShift; +                } +                else +                { +                    break; +                } +            } +        } + +        size_t newBlockIndex = 0; +        VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? +            CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; +        // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. +        if (!m_ExplicitBlockSize) +        { +            while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) +            { +                const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; +                if (smallerNewBlockSize >= size) +                { +                    newBlockSize = smallerNewBlockSize; +                    ++newBlockSizeShift; +                    res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? +                        CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; +                } +                else +                { +                    break; +                } +            } +        } + +        if (res == VK_SUCCESS) +        { +            VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; +            VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + +            res = AllocateFromBlock( +                pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); +            if (res == VK_SUCCESS) +            { +                VMA_DEBUG_LOG_FORMAT("    Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize); +                IncrementallySortBlocks(); +                return VK_SUCCESS; +            } +            else +            { +                // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment. +                return VK_ERROR_OUT_OF_DEVICE_MEMORY; +            } +        } +    } + +    return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaBlockVector::Free(const VmaAllocation hAllocation) +{ +    VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; + +    bool budgetExceeded = false; +    { +        const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); +        VmaBudget heapBudget = {}; +        m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); +        budgetExceeded = heapBudget.usage >= heapBudget.budget; +    } + +    // Scope for lock. +    { +        VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + +        VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + +        if (IsCorruptionDetectionEnabled()) +        { +            VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize()); +            VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value."); +        } + +        if (hAllocation->IsPersistentMap()) +        { +            pBlock->Unmap(m_hAllocator, 1); +        } + +        const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); +        pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); +        pBlock->PostFree(m_hAllocator); +        VMA_HEAVY_ASSERT(pBlock->Validate()); + +        VMA_DEBUG_LOG_FORMAT("  Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex); + +        const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; +        // pBlock became empty after this deallocation. +        if (pBlock->m_pMetadata->IsEmpty()) +        { +            // Already had empty block. We don't want to have two, so delete this one. +            if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) +            { +                pBlockToDelete = pBlock; +                Remove(pBlock); +            } +            // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. +        } +        // pBlock didn't become empty, but we have another empty block - find and free that one. +        // (This is optional, heuristics.) +        else if (hadEmptyBlockBeforeFree && canDeleteBlock) +        { +            VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); +            if (pLastBlock->m_pMetadata->IsEmpty()) +            { +                pBlockToDelete = pLastBlock; +                m_Blocks.pop_back(); +            } +        } + +        IncrementallySortBlocks(); + +        m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); +        hAllocation->Destroy(m_hAllocator); +        m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); +    } + +    // Destruction of a free block. Deferred until this point, outside of mutex +    // lock, for performance reason. +    if (pBlockToDelete != VMA_NULL) +    { +        VMA_DEBUG_LOG_FORMAT("    Deleted empty block #%" PRIu32, pBlockToDelete->GetId()); +        pBlockToDelete->Destroy(m_hAllocator); +        vma_delete(m_hAllocator, pBlockToDelete); +    } +} + +VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const +{ +    VkDeviceSize result = 0; +    for (size_t i = m_Blocks.size(); i--; ) +    { +        result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); +        if (result >= m_PreferredBlockSize) +        { +            break; +        } +    } +    return result; +} + +void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) +{ +    for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) +    { +        if (m_Blocks[blockIndex] == pBlock) +        { +            VmaVectorRemove(m_Blocks, blockIndex); +            return; +        } +    } +    VMA_ASSERT(0); +} + +void VmaBlockVector::IncrementallySortBlocks() +{ +    if (!m_IncrementalSort) +        return; +    if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) +    { +        // Bubble sort only until first swap. +        for (size_t i = 1; i < m_Blocks.size(); ++i) +        { +            if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) +            { +                std::swap(m_Blocks[i - 1], m_Blocks[i]); +                return; +            } +        } +    } +} + +void VmaBlockVector::SortByFreeSize() +{ +    VMA_SORT(m_Blocks.begin(), m_Blocks.end(), +        [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool +        { +            return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); +        }); +} + +VkResult VmaBlockVector::AllocateFromBlock( +    VmaDeviceMemoryBlock* pBlock, +    VkDeviceSize size, +    VkDeviceSize alignment, +    VmaAllocationCreateFlags allocFlags, +    void* pUserData, +    VmaSuballocationType suballocType, +    uint32_t strategy, +    VmaAllocation* pAllocation) +{ +    const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + +    VmaAllocationRequest currRequest = {}; +    if (pBlock->m_pMetadata->CreateAllocationRequest( +        size, +        alignment, +        isUpperAddress, +        suballocType, +        strategy, +        &currRequest)) +    { +        return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); +    } +    return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +VkResult VmaBlockVector::CommitAllocationRequest( +    VmaAllocationRequest& allocRequest, +    VmaDeviceMemoryBlock* pBlock, +    VkDeviceSize alignment, +    VmaAllocationCreateFlags allocFlags, +    void* pUserData, +    VmaSuballocationType suballocType, +    VmaAllocation* pAllocation) +{ +    const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; +    const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; +    const bool isMappingAllowed = (allocFlags & +        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + +    pBlock->PostAlloc(m_hAllocator); +    // Allocate from pCurrBlock. +    if (mapped) +    { +        VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); +        if (res != VK_SUCCESS) +        { +            return res; +        } +    } + +    *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); +    pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); +    (*pAllocation)->InitBlockAllocation( +        pBlock, +        allocRequest.allocHandle, +        alignment, +        allocRequest.size, // Not size, as actual allocation size may be larger than requested! +        m_MemoryTypeIndex, +        suballocType, +        mapped); +    VMA_HEAVY_ASSERT(pBlock->Validate()); +    if (isUserDataString) +        (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); +    else +        (*pAllocation)->SetUserData(m_hAllocator, pUserData); +    m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); +    if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) +    { +        m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); +    } +    if (IsCorruptionDetectionEnabled()) +    { +        VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); +        VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); +    } +    return VK_SUCCESS; +} + +VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) +{ +    VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; +    allocInfo.pNext = m_pMemoryAllocateNext; +    allocInfo.memoryTypeIndex = m_MemoryTypeIndex; +    allocInfo.allocationSize = blockSize; + +#if VMA_BUFFER_DEVICE_ADDRESS +    // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature. +    VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; +    if (m_hAllocator->m_UseKhrBufferDeviceAddress) +    { +        allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; +        VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); +    } +#endif // VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY +    VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; +    if (m_hAllocator->m_UseExtMemoryPriority) +    { +        VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); +        priorityInfo.priority = m_Priority; +        VmaPnextChainPushFront(&allocInfo, &priorityInfo); +    } +#endif // VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY +    // Attach VkExportMemoryAllocateInfoKHR if necessary. +    VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; +    exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex); +    if (exportMemoryAllocInfo.handleTypes != 0) +    { +        VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); +    } +#endif // VMA_EXTERNAL_MEMORY + +    VkDeviceMemory mem = VK_NULL_HANDLE; +    VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); +    if (res < 0) +    { +        return res; +    } + +    // New VkDeviceMemory successfully created. + +    // Create new Allocation for it. +    VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator); +    pBlock->Init( +        m_hAllocator, +        m_hParentPool, +        m_MemoryTypeIndex, +        mem, +        allocInfo.allocationSize, +        m_NextBlockId++, +        m_Algorithm, +        m_BufferImageGranularity); + +    m_Blocks.push_back(pBlock); +    if (pNewBlockIndex != VMA_NULL) +    { +        *pNewBlockIndex = m_Blocks.size() - 1; +    } + +    return VK_SUCCESS; +} + +bool VmaBlockVector::HasEmptyBlock() +{ +    for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) +    { +        VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; +        if (pBlock->m_pMetadata->IsEmpty()) +        { +            return true; +        } +    } +    return false; +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) +{ +    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + +    json.BeginObject(); +    for (size_t i = 0; i < m_Blocks.size(); ++i) +    { +        json.BeginString(); +        json.ContinueString(m_Blocks[i]->GetId()); +        json.EndString(); + +        json.BeginObject(); +        json.WriteString("MapRefCount"); +        json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + +        m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); +        json.EndObject(); +    } +    json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED + +VkResult VmaBlockVector::CheckCorruption() +{ +    if (!IsCorruptionDetectionEnabled()) +    { +        return VK_ERROR_FEATURE_NOT_PRESENT; +    } + +    VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); +    for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) +    { +        VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; +        VMA_ASSERT(pBlock); +        VkResult res = pBlock->CheckCorruption(m_hAllocator); +        if (res != VK_SUCCESS) +        { +            return res; +        } +    } +    return VK_SUCCESS; +} + +#endif // _VMA_BLOCK_VECTOR_FUNCTIONS + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +VmaDefragmentationContext_T::VmaDefragmentationContext_T( +    VmaAllocator hAllocator, +    const VmaDefragmentationInfo& info) +    : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), +    m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), +    m_BreakCallback(info.pfnBreakCallback), +    m_BreakCallbackUserData(info.pBreakCallbackUserData), +    m_MoveAllocator(hAllocator->GetAllocationCallbacks()), +    m_Moves(m_MoveAllocator) +{ +    m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + +    if (info.pool != VMA_NULL) +    { +        m_BlockVectorCount = 1; +        m_PoolBlockVector = &info.pool->m_BlockVector; +        m_pBlockVectors = &m_PoolBlockVector; +        m_PoolBlockVector->SetIncrementalSort(false); +        m_PoolBlockVector->SortByFreeSize(); +    } +    else +    { +        m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); +        m_PoolBlockVector = VMA_NULL; +        m_pBlockVectors = hAllocator->m_pBlockVectors; +        for (uint32_t i = 0; i < m_BlockVectorCount; ++i) +        { +            VmaBlockVector* vector = m_pBlockVectors[i]; +            if (vector != VMA_NULL) +            { +                vector->SetIncrementalSort(false); +                vector->SortByFreeSize(); +            } +        } +    } + +    switch (m_Algorithm) +    { +    case 0: // Default algorithm +        m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; +        m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); +        break; +    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: +        m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); +        break; +    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: +        if (hAllocator->GetBufferImageGranularity() > 1) +        { +            m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); +        } +        break; +    } +} + +VmaDefragmentationContext_T::~VmaDefragmentationContext_T() +{ +    if (m_PoolBlockVector != VMA_NULL) +    { +        m_PoolBlockVector->SetIncrementalSort(true); +    } +    else +    { +        for (uint32_t i = 0; i < m_BlockVectorCount; ++i) +        { +            VmaBlockVector* vector = m_pBlockVectors[i]; +            if (vector != VMA_NULL) +                vector->SetIncrementalSort(true); +        } +    } + +    if (m_AlgorithmState) +    { +        switch (m_Algorithm) +        { +        case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: +            vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateBalanced*>(m_AlgorithmState), m_BlockVectorCount); +            break; +        case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: +            vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateExtensive*>(m_AlgorithmState), m_BlockVectorCount); +            break; +        default: +            VMA_ASSERT(0); +        } +    } +} + +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) +{ +    if (m_PoolBlockVector != VMA_NULL) +    { +        VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); + +        if (m_PoolBlockVector->GetBlockCount() > 1) +            ComputeDefragmentation(*m_PoolBlockVector, 0); +        else if (m_PoolBlockVector->GetBlockCount() == 1) +            ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); +    } +    else +    { +        for (uint32_t i = 0; i < m_BlockVectorCount; ++i) +        { +            if (m_pBlockVectors[i] != VMA_NULL) +            { +                VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + +                if (m_pBlockVectors[i]->GetBlockCount() > 1) +                { +                    if (ComputeDefragmentation(*m_pBlockVectors[i], i)) +                        break; +                } +                else if (m_pBlockVectors[i]->GetBlockCount() == 1) +                { +                    if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) +                        break; +                } +            } +        } +    } + +    moveInfo.moveCount = static_cast<uint32_t>(m_Moves.size()); +    if (moveInfo.moveCount > 0) +    { +        moveInfo.pMoves = m_Moves.data(); +        return VK_INCOMPLETE; +    } + +    moveInfo.pMoves = VMA_NULL; +    return VK_SUCCESS; +} + +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) +{ +    VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); + +    VkResult result = VK_SUCCESS; +    VmaStlAllocator<FragmentedBlock> blockAllocator(m_MoveAllocator.m_pCallbacks); +    VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> immovableBlocks(blockAllocator); +    VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> mappedBlocks(blockAllocator); + +    VmaAllocator allocator = VMA_NULL; +    for (uint32_t i = 0; i < moveInfo.moveCount; ++i) +    { +        VmaDefragmentationMove& move = moveInfo.pMoves[i]; +        size_t prevCount = 0, currentCount = 0; +        VkDeviceSize freedBlockSize = 0; + +        uint32_t vectorIndex; +        VmaBlockVector* vector; +        if (m_PoolBlockVector != VMA_NULL) +        { +            vectorIndex = 0; +            vector = m_PoolBlockVector; +        } +        else +        { +            vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); +            vector = m_pBlockVectors[vectorIndex]; +            VMA_ASSERT(vector != VMA_NULL); +        } + +        switch (move.operation) +        { +        case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: +        { +            uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); +            if (mapCount > 0) +            { +                allocator = vector->m_hAllocator; +                VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); +                bool notPresent = true; +                for (FragmentedBlock& block : mappedBlocks) +                { +                    if (block.block == newMapBlock) +                    { +                        notPresent = false; +                        block.data += mapCount; +                        break; +                    } +                } +                if (notPresent) +                    mappedBlocks.push_back({ mapCount, newMapBlock }); +            } + +            // Scope for locks, Free have it's own lock +            { +                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); +                prevCount = vector->GetBlockCount(); +                freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); +            } +            vector->Free(move.dstTmpAllocation); +            { +                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); +                currentCount = vector->GetBlockCount(); +            } + +            result = VK_INCOMPLETE; +            break; +        } +        case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: +        { +            m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); +            --m_PassStats.allocationsMoved; +            vector->Free(move.dstTmpAllocation); + +            VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); +            bool notPresent = true; +            for (const FragmentedBlock& block : immovableBlocks) +            { +                if (block.block == newBlock) +                { +                    notPresent = false; +                    break; +                } +            } +            if (notPresent) +                immovableBlocks.push_back({ vectorIndex, newBlock }); +            break; +        } +        case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: +        { +            m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); +            --m_PassStats.allocationsMoved; +            // Scope for locks, Free have it's own lock +            { +                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); +                prevCount = vector->GetBlockCount(); +                freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); +            } +            vector->Free(move.srcAllocation); +            { +                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); +                currentCount = vector->GetBlockCount(); +            } +            freedBlockSize *= prevCount - currentCount; + +            VkDeviceSize dstBlockSize; +            { +                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); +                dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); +            } +            vector->Free(move.dstTmpAllocation); +            { +                VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); +                freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); +                currentCount = vector->GetBlockCount(); +            } + +            result = VK_INCOMPLETE; +            break; +        } +        default: +            VMA_ASSERT(0); +        } + +        if (prevCount > currentCount) +        { +            size_t freedBlocks = prevCount - currentCount; +            m_PassStats.deviceMemoryBlocksFreed += static_cast<uint32_t>(freedBlocks); +            m_PassStats.bytesFreed += freedBlockSize; +        } + +        if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT && +            m_AlgorithmState != VMA_NULL) +        { +            // Avoid unnecessary tries to allocate when new free block is available +            StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[vectorIndex]; +            if (state.firstFreeBlock != SIZE_MAX) +            { +                const size_t diff = prevCount - currentCount; +                if (state.firstFreeBlock >= diff) +                { +                    state.firstFreeBlock -= diff; +                    if (state.firstFreeBlock != 0) +                        state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); +                } +                else +                    state.firstFreeBlock = 0; +            } +        } +    } +    moveInfo.moveCount = 0; +    moveInfo.pMoves = VMA_NULL; +    m_Moves.clear(); + +    // Update stats +    m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; +    m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; +    m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; +    m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; +    m_PassStats = { 0 }; + +    // Move blocks with immovable allocations according to algorithm +    if (immovableBlocks.size() > 0) +    { +        do +        { +            if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT) +            { +                if (m_AlgorithmState != VMA_NULL) +                { +                    bool swapped = false; +                    // Move to the start of free blocks range +                    for (const FragmentedBlock& block : immovableBlocks) +                    { +                        StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[block.data]; +                        if (state.operation != StateExtensive::Operation::Cleanup) +                        { +                            VmaBlockVector* vector = m_pBlockVectors[block.data]; +                            VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + +                            for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) +                            { +                                if (vector->GetBlock(i) == block.block) +                                { +                                    std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); +                                    if (state.firstFreeBlock != SIZE_MAX) +                                    { +                                        if (i + 1 < state.firstFreeBlock) +                                        { +                                            if (state.firstFreeBlock > 1) +                                                std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); +                                            else +                                                --state.firstFreeBlock; +                                        } +                                    } +                                    swapped = true; +                                    break; +                                } +                            } +                        } +                    } +                    if (swapped) +                        result = VK_INCOMPLETE; +                    break; +                } +            } + +            // Move to the beginning +            for (const FragmentedBlock& block : immovableBlocks) +            { +                VmaBlockVector* vector = m_pBlockVectors[block.data]; +                VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + +                for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) +                { +                    if (vector->GetBlock(i) == block.block) +                    { +                        std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); +                        break; +                    } +                } +            } +        } while (false); +    } + +    // Bulk-map destination blocks +    for (const FragmentedBlock& block : mappedBlocks) +    { +        VkResult res = block.block->Map(allocator, block.data, VMA_NULL); +        VMA_ASSERT(res == VK_SUCCESS); +    } +    return result; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) +{ +    switch (m_Algorithm) +    { +    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: +        return ComputeDefragmentation_Fast(vector); +    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: +        return ComputeDefragmentation_Balanced(vector, index, true); +    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: +        return ComputeDefragmentation_Full(vector); +    case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: +        return ComputeDefragmentation_Extensive(vector, index); +    default: +        VMA_ASSERT(0); +        return ComputeDefragmentation_Balanced(vector, index, true); +    } +} + +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( +    VmaAllocHandle handle, VmaBlockMetadata* metadata) +{ +    MoveAllocationData moveData; +    moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); +    moveData.size = moveData.move.srcAllocation->GetSize(); +    moveData.alignment = moveData.move.srcAllocation->GetAlignment(); +    moveData.type = moveData.move.srcAllocation->GetSuballocationType(); +    moveData.flags = 0; + +    if (moveData.move.srcAllocation->IsPersistentMap()) +        moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; +    if (moveData.move.srcAllocation->IsMappingAllowed()) +        moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + +    return moveData; +} + +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) +{ +    // Check custom criteria if exists +    if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData)) +        return CounterStatus::End; + +    // Ignore allocation if will exceed max size for copy +    if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) +    { +        if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) +            return CounterStatus::Ignore; +        else +            return CounterStatus::End; +    } +    else +        m_IgnoredAllocs = 0; +    return CounterStatus::Pass; +} + +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) +{ +    m_PassStats.bytesMoved += bytes; +    // Early return when max found +    if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) +    { +        VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations || +            m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); +        return true; +    } +    return false; +} + +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) +{ +    VmaBlockMetadata* metadata = block->m_pMetadata; + +    for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); +        handle != VK_NULL_HANDLE; +        handle = metadata->GetNextAllocation(handle)) +    { +        MoveAllocationData moveData = GetMoveData(handle, metadata); +        // Ignore newly created allocations by defragmentation algorithm +        if (moveData.move.srcAllocation->GetUserData() == this) +            continue; +        switch (CheckCounters(moveData.move.srcAllocation->GetSize())) +        { +        case CounterStatus::Ignore: +            continue; +        case CounterStatus::End: +            return true; +        case CounterStatus::Pass: +            break; +        default: +            VMA_ASSERT(0); +        } + +        VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); +        if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) +        { +            VmaAllocationRequest request = {}; +            if (metadata->CreateAllocationRequest( +                moveData.size, +                moveData.alignment, +                false, +                moveData.type, +                VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, +                &request)) +            { +                if (metadata->GetAllocationOffset(request.allocHandle) < offset) +                { +                    if (vector.CommitAllocationRequest( +                        request, +                        block, +                        moveData.alignment, +                        moveData.flags, +                        this, +                        moveData.type, +                        &moveData.move.dstTmpAllocation) == VK_SUCCESS) +                    { +                        m_Moves.push_back(moveData.move); +                        if (IncrementCounters(moveData.size)) +                            return true; +                    } +                } +            } +        } +    } +    return false; +} + +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) +{ +    for (; start < end; ++start) +    { +        VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); +        if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) +        { +            if (vector.AllocateFromBlock(dstBlock, +                data.size, +                data.alignment, +                data.flags, +                this, +                data.type, +                0, +                &data.move.dstTmpAllocation) == VK_SUCCESS) +            { +                m_Moves.push_back(data.move); +                if (IncrementCounters(data.size)) +                    return true; +                break; +            } +        } +    } +    return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) +{ +    // Move only between blocks + +    // Go through allocations in last blocks and try to fit them inside first ones +    for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) +    { +        VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + +        for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); +            handle != VK_NULL_HANDLE; +            handle = metadata->GetNextAllocation(handle)) +        { +            MoveAllocationData moveData = GetMoveData(handle, metadata); +            // Ignore newly created allocations by defragmentation algorithm +            if (moveData.move.srcAllocation->GetUserData() == this) +                continue; +            switch (CheckCounters(moveData.move.srcAllocation->GetSize())) +            { +            case CounterStatus::Ignore: +                continue; +            case CounterStatus::End: +                return true; +            case CounterStatus::Pass: +                break; +            default: +                VMA_ASSERT(0); +            } + +            // Check all previous blocks for free space +            if (AllocInOtherBlock(0, i, moveData, vector)) +                return true; +        } +    } +    return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) +{ +    // Go over every allocation and try to fit it in previous blocks at lowest offsets, +    // if not possible: realloc within single block to minimize offset (exclude offset == 0), +    // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block) +    VMA_ASSERT(m_AlgorithmState != VMA_NULL); + +    StateBalanced& vectorState = reinterpret_cast<StateBalanced*>(m_AlgorithmState)[index]; +    if (update && vectorState.avgAllocSize == UINT64_MAX) +        UpdateVectorStatistics(vector, vectorState); + +    const size_t startMoveCount = m_Moves.size(); +    VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; +    for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) +    { +        VmaDeviceMemoryBlock* block = vector.GetBlock(i); +        VmaBlockMetadata* metadata = block->m_pMetadata; +        VkDeviceSize prevFreeRegionSize = 0; + +        for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); +            handle != VK_NULL_HANDLE; +            handle = metadata->GetNextAllocation(handle)) +        { +            MoveAllocationData moveData = GetMoveData(handle, metadata); +            // Ignore newly created allocations by defragmentation algorithm +            if (moveData.move.srcAllocation->GetUserData() == this) +                continue; +            switch (CheckCounters(moveData.move.srcAllocation->GetSize())) +            { +            case CounterStatus::Ignore: +                continue; +            case CounterStatus::End: +                return true; +            case CounterStatus::Pass: +                break; +            default: +                VMA_ASSERT(0); +            } + +            // Check all previous blocks for free space +            const size_t prevMoveCount = m_Moves.size(); +            if (AllocInOtherBlock(0, i, moveData, vector)) +                return true; + +            VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); +            // If no room found then realloc within block for lower offset +            VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); +            if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) +            { +                // Check if realloc will make sense +                if (prevFreeRegionSize >= minimalFreeRegion || +                    nextFreeRegionSize >= minimalFreeRegion || +                    moveData.size <= vectorState.avgFreeSize || +                    moveData.size <= vectorState.avgAllocSize) +                { +                    VmaAllocationRequest request = {}; +                    if (metadata->CreateAllocationRequest( +                        moveData.size, +                        moveData.alignment, +                        false, +                        moveData.type, +                        VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, +                        &request)) +                    { +                        if (metadata->GetAllocationOffset(request.allocHandle) < offset) +                        { +                            if (vector.CommitAllocationRequest( +                                request, +                                block, +                                moveData.alignment, +                                moveData.flags, +                                this, +                                moveData.type, +                                &moveData.move.dstTmpAllocation) == VK_SUCCESS) +                            { +                                m_Moves.push_back(moveData.move); +                                if (IncrementCounters(moveData.size)) +                                    return true; +                            } +                        } +                    } +                } +            } +            prevFreeRegionSize = nextFreeRegionSize; +        } +    } + +    // No moves performed, update statistics to current vector state +    if (startMoveCount == m_Moves.size() && !update) +    { +        vectorState.avgAllocSize = UINT64_MAX; +        return ComputeDefragmentation_Balanced(vector, index, false); +    } +    return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) +{ +    // Go over every allocation and try to fit it in previous blocks at lowest offsets, +    // if not possible: realloc within single block to minimize offset (exclude offset == 0) + +    for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) +    { +        VmaDeviceMemoryBlock* block = vector.GetBlock(i); +        VmaBlockMetadata* metadata = block->m_pMetadata; + +        for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); +            handle != VK_NULL_HANDLE; +            handle = metadata->GetNextAllocation(handle)) +        { +            MoveAllocationData moveData = GetMoveData(handle, metadata); +            // Ignore newly created allocations by defragmentation algorithm +            if (moveData.move.srcAllocation->GetUserData() == this) +                continue; +            switch (CheckCounters(moveData.move.srcAllocation->GetSize())) +            { +            case CounterStatus::Ignore: +                continue; +            case CounterStatus::End: +                return true; +            case CounterStatus::Pass: +                break; +            default: +                VMA_ASSERT(0); +            } + +            // Check all previous blocks for free space +            const size_t prevMoveCount = m_Moves.size(); +            if (AllocInOtherBlock(0, i, moveData, vector)) +                return true; + +            // If no room found then realloc within block for lower offset +            VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); +            if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) +            { +                VmaAllocationRequest request = {}; +                if (metadata->CreateAllocationRequest( +                    moveData.size, +                    moveData.alignment, +                    false, +                    moveData.type, +                    VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, +                    &request)) +                { +                    if (metadata->GetAllocationOffset(request.allocHandle) < offset) +                    { +                        if (vector.CommitAllocationRequest( +                            request, +                            block, +                            moveData.alignment, +                            moveData.flags, +                            this, +                            moveData.type, +                            &moveData.move.dstTmpAllocation) == VK_SUCCESS) +                        { +                            m_Moves.push_back(moveData.move); +                            if (IncrementCounters(moveData.size)) +                                return true; +                        } +                    } +                } +            } +        } +    } +    return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) +{ +    // First free single block, then populate it to the brim, then free another block, and so on + +    // Fallback to previous algorithm since without granularity conflicts it can achieve max packing +    if (vector.m_BufferImageGranularity == 1) +        return ComputeDefragmentation_Full(vector); + +    VMA_ASSERT(m_AlgorithmState != VMA_NULL); + +    StateExtensive& vectorState = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[index]; + +    bool texturePresent = false, bufferPresent = false, otherPresent = false; +    switch (vectorState.operation) +    { +    case StateExtensive::Operation::Done: // Vector defragmented +        return false; +    case StateExtensive::Operation::FindFreeBlockBuffer: +    case StateExtensive::Operation::FindFreeBlockTexture: +    case StateExtensive::Operation::FindFreeBlockAll: +    { +        // No more blocks to free, just perform fast realloc and move to cleanup +        if (vectorState.firstFreeBlock == 0) +        { +            vectorState.operation = StateExtensive::Operation::Cleanup; +            return ComputeDefragmentation_Fast(vector); +        } + +        // No free blocks, have to clear last one +        size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; +        VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; + +        const size_t prevMoveCount = m_Moves.size(); +        for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); +            handle != VK_NULL_HANDLE; +            handle = freeMetadata->GetNextAllocation(handle)) +        { +            MoveAllocationData moveData = GetMoveData(handle, freeMetadata); +            switch (CheckCounters(moveData.move.srcAllocation->GetSize())) +            { +            case CounterStatus::Ignore: +                continue; +            case CounterStatus::End: +                return true; +            case CounterStatus::Pass: +                break; +            default: +                VMA_ASSERT(0); +            } + +            // Check all previous blocks for free space +            if (AllocInOtherBlock(0, last, moveData, vector)) +            { +                // Full clear performed already +                if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) +                    vectorState.firstFreeBlock = last; +                return true; +            } +        } + +        if (prevMoveCount == m_Moves.size()) +        { +            // Cannot perform full clear, have to move data in other blocks around +            if (last != 0) +            { +                for (size_t i = last - 1; i; --i) +                { +                    if (ReallocWithinBlock(vector, vector.GetBlock(i))) +                        return true; +                } +            } + +            if (prevMoveCount == m_Moves.size()) +            { +                // No possible reallocs within blocks, try to move them around fast +                return ComputeDefragmentation_Fast(vector); +            } +        } +        else +        { +            switch (vectorState.operation) +            { +            case StateExtensive::Operation::FindFreeBlockBuffer: +                vectorState.operation = StateExtensive::Operation::MoveBuffers; +                break; +            case StateExtensive::Operation::FindFreeBlockTexture: +                vectorState.operation = StateExtensive::Operation::MoveTextures; +                break; +            case StateExtensive::Operation::FindFreeBlockAll: +                vectorState.operation = StateExtensive::Operation::MoveAll; +                break; +            default: +                VMA_ASSERT(0); +                vectorState.operation = StateExtensive::Operation::MoveTextures; +            } +            vectorState.firstFreeBlock = last; +            // Nothing done, block found without reallocations, can perform another reallocs in same pass +            return ComputeDefragmentation_Extensive(vector, index); +        } +        break; +    } +    case StateExtensive::Operation::MoveTextures: +    { +        if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, +            vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) +        { +            if (texturePresent) +            { +                vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; +                return ComputeDefragmentation_Extensive(vector, index); +            } + +            if (!bufferPresent && !otherPresent) +            { +                vectorState.operation = StateExtensive::Operation::Cleanup; +                break; +            } + +            // No more textures to move, check buffers +            vectorState.operation = StateExtensive::Operation::MoveBuffers; +            bufferPresent = false; +            otherPresent = false; +        } +        else +            break; +        VMA_FALLTHROUGH; // Fallthrough +    } +    case StateExtensive::Operation::MoveBuffers: +    { +        if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, +            vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) +        { +            if (bufferPresent) +            { +                vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; +                return ComputeDefragmentation_Extensive(vector, index); +            } + +            if (!otherPresent) +            { +                vectorState.operation = StateExtensive::Operation::Cleanup; +                break; +            } + +            // No more buffers to move, check all others +            vectorState.operation = StateExtensive::Operation::MoveAll; +            otherPresent = false; +        } +        else +            break; +        VMA_FALLTHROUGH; // Fallthrough +    } +    case StateExtensive::Operation::MoveAll: +    { +        if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, +            vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) +        { +            if (otherPresent) +            { +                vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; +                return ComputeDefragmentation_Extensive(vector, index); +            } +            // Everything moved +            vectorState.operation = StateExtensive::Operation::Cleanup; +        } +        break; +    } +    case StateExtensive::Operation::Cleanup: +        // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062). +        break; +    } + +    if (vectorState.operation == StateExtensive::Operation::Cleanup) +    { +        // All other work done, pack data in blocks even tighter if possible +        const size_t prevMoveCount = m_Moves.size(); +        for (size_t i = 0; i < vector.GetBlockCount(); ++i) +        { +            if (ReallocWithinBlock(vector, vector.GetBlock(i))) +                return true; +        } + +        if (prevMoveCount == m_Moves.size()) +            vectorState.operation = StateExtensive::Operation::Done; +    } +    return false; +} + +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ +    size_t allocCount = 0; +    size_t freeCount = 0; +    state.avgFreeSize = 0; +    state.avgAllocSize = 0; + +    for (size_t i = 0; i < vector.GetBlockCount(); ++i) +    { +        VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + +        allocCount += metadata->GetAllocationCount(); +        freeCount += metadata->GetFreeRegionsCount(); +        state.avgFreeSize += metadata->GetSumFreeSize(); +        state.avgAllocSize += metadata->GetSize(); +    } + +    state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; +    state.avgFreeSize /= freeCount; +} + +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, +    VmaBlockVector& vector, size_t firstFreeBlock, +    bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ +    const size_t prevMoveCount = m_Moves.size(); +    for (size_t i = firstFreeBlock ; i;) +    { +        VmaDeviceMemoryBlock* block = vector.GetBlock(--i); +        VmaBlockMetadata* metadata = block->m_pMetadata; + +        for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); +            handle != VK_NULL_HANDLE; +            handle = metadata->GetNextAllocation(handle)) +        { +            MoveAllocationData moveData = GetMoveData(handle, metadata); +            // Ignore newly created allocations by defragmentation algorithm +            if (moveData.move.srcAllocation->GetUserData() == this) +                continue; +            switch (CheckCounters(moveData.move.srcAllocation->GetSize())) +            { +            case CounterStatus::Ignore: +                continue; +            case CounterStatus::End: +                return true; +            case CounterStatus::Pass: +                break; +            default: +                VMA_ASSERT(0); +            } + +            // Move only single type of resources at once +            if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) +            { +                // Try to fit allocation into free blocks +                if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) +                    return false; +            } + +            if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) +                texturePresent = true; +            else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) +                bufferPresent = true; +            else +                otherPresent = true; +        } +    } +    return prevMoveCount == m_Moves.size(); +} +#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS + +#ifndef _VMA_POOL_T_FUNCTIONS +VmaPool_T::VmaPool_T( +    VmaAllocator hAllocator, +    const VmaPoolCreateInfo& createInfo, +    VkDeviceSize preferredBlockSize) +    : m_BlockVector( +        hAllocator, +        this, // hParentPool +        createInfo.memoryTypeIndex, +        createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, +        createInfo.minBlockCount, +        createInfo.maxBlockCount, +        (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), +        createInfo.blockSize != 0, // explicitBlockSize +        createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm +        createInfo.priority, +        VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment), +        createInfo.pMemoryAllocateNext), +    m_Id(0), +    m_Name(VMA_NULL) {} + +VmaPool_T::~VmaPool_T() +{ +    VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); + +    const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); +    VmaFreeString(allocs, m_Name); +} + +void VmaPool_T::SetName(const char* pName) +{ +    const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); +    VmaFreeString(allocs, m_Name); + +    if (pName != VMA_NULL) +    { +        m_Name = VmaCreateStringCopy(allocs, pName); +    } +    else +    { +        m_Name = VMA_NULL; +    } +} +#endif // _VMA_POOL_T_FUNCTIONS + +#ifndef _VMA_ALLOCATOR_T_FUNCTIONS +VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : +    m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), +    m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0), +    m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), +    m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0), +    m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0), +    m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), +    m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), +    m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), +    m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), +    m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), +    m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0), +    m_hDevice(pCreateInfo->device), +    m_hInstance(pCreateInfo->instance), +    m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), +    m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ? +        *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), +    m_AllocationObjectAllocator(&m_AllocationCallbacks), +    m_HeapSizeLimitMask(0), +    m_DeviceMemoryCount(0), +    m_PreferredLargeHeapBlockSize(0), +    m_PhysicalDevice(pCreateInfo->physicalDevice), +    m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), +    m_NextPoolId(0), +    m_GlobalMemoryTypeBits(UINT32_MAX) +{ +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        m_UseKhrDedicatedAllocation = false; +        m_UseKhrBindMemory2 = false; +    } + +    if(VMA_DEBUG_DETECT_CORRUPTION) +    { +        // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it. +        VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0); +    } + +    VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance); + +    if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0)) +    { +#if !(VMA_DEDICATED_ALLOCATION) +        if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0) +        { +            VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros."); +        } +#endif +#if !(VMA_BIND_MEMORY2) +        if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0) +        { +            VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros."); +        } +#endif +    } +#if !(VMA_MEMORY_BUDGET) +    if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros."); +    } +#endif +#if !(VMA_BUFFER_DEVICE_ADDRESS) +    if(m_UseKhrBufferDeviceAddress) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); +    } +#endif +#if VMA_VULKAN_VERSION < 1004000 +    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 4, 0) && "vulkanApiVersion >= VK_API_VERSION_1_4 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1003000 +    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 3, 0) && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1002000 +    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 2, 0) && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1001000 +    VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0) && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if !(VMA_MEMORY_PRIORITY) +    if(m_UseExtMemoryPriority) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); +    } +#endif +#if !(VMA_KHR_MAINTENANCE4) +    if(m_UseKhrMaintenance4) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); +    } +#endif +#if !(VMA_KHR_MAINTENANCE5) +    if(m_UseKhrMaintenance5) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); +    } +#endif +#if !(VMA_KHR_MAINTENANCE5) +    if(m_UseKhrMaintenance5) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); +    } +#endif + +#if !(VMA_EXTERNAL_MEMORY_WIN32) +    if(m_UseKhrExternalMemoryWin32) +    { +        VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); +    } +#endif + +    memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); +    memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); +    memset(&m_MemProps, 0, sizeof(m_MemProps)); + +    memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); +    memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions)); + +#if VMA_EXTERNAL_MEMORY +    memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes)); +#endif // #if VMA_EXTERNAL_MEMORY + +    if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) +    { +        m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData; +        m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate; +        m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree; +    } + +    ImportVulkanFunctions(pCreateInfo->pVulkanFunctions); + +    (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties); +    (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps); + +    VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT)); +    VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY)); +    VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity)); +    VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize)); + +    m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? +        pCreateInfo->preferredLargeHeapBlockSize : static_cast<VkDeviceSize>(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); + +    m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY +    if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL) +    { +        memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes, +            sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount()); +    } +#endif // #if VMA_EXTERNAL_MEMORY + +    if(pCreateInfo->pHeapSizeLimit != VMA_NULL) +    { +        for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) +        { +            const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex]; +            if(limit != VK_WHOLE_SIZE) +            { +                m_HeapSizeLimitMask |= 1u << heapIndex; +                if(limit < m_MemProps.memoryHeaps[heapIndex].size) +                { +                    m_MemProps.memoryHeaps[heapIndex].size = limit; +                } +            } +        } +    } + +    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +    { +        // Create only supported types +        if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) +        { +            const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); +            m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( +                this, +                VK_NULL_HANDLE, // hParentPool +                memTypeIndex, +                preferredBlockSize, +                0, +                SIZE_MAX, +                GetBufferImageGranularity(), +                false, // explicitBlockSize +                0, // algorithm +                0.5f, // priority (0.5 is the default per Vulkan spec) +                GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment +                VMA_NULL); // // pMemoryAllocateNext +            // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, +            // because minBlockCount is 0. +        } +    } +} + +VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) +{ +    VkResult res = VK_SUCCESS; + +#if VMA_MEMORY_BUDGET +    if(m_UseExtMemoryBudget) +    { +        UpdateVulkanBudget(); +    } +#endif // #if VMA_MEMORY_BUDGET + +    return res; +} + +VmaAllocator_T::~VmaAllocator_T() +{ +    VMA_ASSERT(m_Pools.IsEmpty()); + +    for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; ) +    { +        vma_delete(this, m_pBlockVectors[memTypeIndex]); +    } +} + +void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions) +{ +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 +    ImportVulkanFunctions_Static(); +#endif + +    if(pVulkanFunctions != VMA_NULL) +    { +        ImportVulkanFunctions_Custom(pVulkanFunctions); +    } + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 +    ImportVulkanFunctions_Dynamic(); +#endif + +    ValidateVulkanFunctions(); +} + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Static() +{ +    // Vulkan 1.0 +    m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr; +    m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr; +    m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties; +    m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties; +    m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +    m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory; +    m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory; +    m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory; +    m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges; +    m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges; +    m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory; +    m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory; +    m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements; +    m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements; +    m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer; +    m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer; +    m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage; +    m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage; +    m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer; + +    // Vulkan 1.1 +#if VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2; +        m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; +        m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; +        m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; +    } +#endif + +#if VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; +    } +#endif + +#if VMA_VULKAN_VERSION >= 1003000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) +    { +        m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements; +        m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements; +    } +#endif +} + +#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions) +{ +    VMA_ASSERT(pVulkanFunctions != VMA_NULL); + +#define VMA_COPY_IF_NOT_NULL(funcName) \ +    if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; + +    VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr); +    VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr); +    VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); +    VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); +    VMA_COPY_IF_NOT_NULL(vkAllocateMemory); +    VMA_COPY_IF_NOT_NULL(vkFreeMemory); +    VMA_COPY_IF_NOT_NULL(vkMapMemory); +    VMA_COPY_IF_NOT_NULL(vkUnmapMemory); +    VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges); +    VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges); +    VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); +    VMA_COPY_IF_NOT_NULL(vkBindImageMemory); +    VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); +    VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); +    VMA_COPY_IF_NOT_NULL(vkCreateBuffer); +    VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); +    VMA_COPY_IF_NOT_NULL(vkCreateImage); +    VMA_COPY_IF_NOT_NULL(vkDestroyImage); +    VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); +    VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 +    VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR); +    VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 +    VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); +#endif + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 +    VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); +    VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 +    VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR); +#endif +#undef VMA_COPY_IF_NOT_NULL +} + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Dynamic() +{ +    VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr && +        "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass " +        "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. " +        "Other members can be null."); + +#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \ +    if(m_VulkanFunctions.memberName == VMA_NULL) \ +        m_VulkanFunctions.memberName = \ +            (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString); +#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \ +    if(m_VulkanFunctions.memberName == VMA_NULL) \ +        m_VulkanFunctions.memberName = \ +            (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString); + +    VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties"); +    VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); +    VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory"); +    VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory"); +    VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory"); +    VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory"); +    VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges"); +    VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges"); +    VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory"); +    VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory"); +    VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements"); +    VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements"); +    VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer"); +    VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer"); +    VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage"); +    VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage"); +    VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer"); + +#if VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2"); +        VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); +        VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); +        VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); +    } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); +        // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410. +        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); +    } +    else if(m_UseExtMemoryBudget) +    { +        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); +        // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410. +        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); +    } +#endif + +#if VMA_DEDICATED_ALLOCATION +    if(m_UseKhrDedicatedAllocation) +    { +        VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR"); +        VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR"); +    } +#endif + +#if VMA_BIND_MEMORY2 +    if(m_UseKhrBindMemory2) +    { +        VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR"); +        VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR"); +    } +#endif // #if VMA_BIND_MEMORY2 + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); +    } +    else if(m_UseExtMemoryBudget) +    { +        VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); +    } +#endif // #if VMA_MEMORY_BUDGET + +#if VMA_VULKAN_VERSION >= 1003000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) +    { +        VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements"); +        VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); +    } +#endif +#if VMA_KHR_MAINTENANCE4 +    if(m_UseKhrMaintenance4) +    { +        VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR"); +        VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); +    } +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 +    if (m_UseKhrExternalMemoryWin32) +    { +        VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR"); +    } +#endif +#undef VMA_FETCH_DEVICE_FUNC +#undef VMA_FETCH_INSTANCE_FUNC +} + +#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ValidateVulkanFunctions() +{ +    VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkFlushMappedMemoryRanges != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkInvalidateMappedMemoryRanges != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); +    VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation) +    { +        VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL); +        VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL); +    } +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 +    if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2) +    { +        VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL); +        VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL); +    } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 +    if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); +    } +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 +    if (m_UseKhrExternalMemoryWin32) +    { +        VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL); +    } +#endif + +    // Not validating these due to suspected driver bugs with these function +    // pointers being null despite correct extension or Vulkan version is enabled. +    // See issue #397. Their usage in VMA is optional anyway. +    // +    // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); +    // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); +} + +VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) +{ +    const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); +    const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; +    const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE; +    return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32); +} + +VkResult VmaAllocator_T::AllocateMemoryOfType( +    VmaPool pool, +    VkDeviceSize size, +    VkDeviceSize alignment, +    bool dedicatedPreferred, +    VkBuffer dedicatedBuffer, +    VkImage dedicatedImage, +    VmaBufferImageUsage dedicatedBufferImageUsage, +    const VmaAllocationCreateInfo& createInfo, +    uint32_t memTypeIndex, +    VmaSuballocationType suballocType, +    VmaDedicatedAllocationList& dedicatedAllocations, +    VmaBlockVector& blockVector, +    size_t allocationCount, +    VmaAllocation* pAllocations) +{ +    VMA_ASSERT(pAllocations != VMA_NULL); +    VMA_DEBUG_LOG_FORMAT("  AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size); + +    VmaAllocationCreateInfo finalCreateInfo = createInfo; +    VkResult res = CalcMemTypeParams( +        finalCreateInfo, +        memTypeIndex, +        size, +        allocationCount); +    if(res != VK_SUCCESS) +        return res; + +    if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) +    { +        return AllocateDedicatedMemory( +            pool, +            size, +            suballocType, +            dedicatedAllocations, +            memTypeIndex, +            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, +            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, +            (finalCreateInfo.flags & +                (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, +            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, +            finalCreateInfo.pUserData, +            finalCreateInfo.priority, +            dedicatedBuffer, +            dedicatedImage, +            dedicatedBufferImageUsage, +            allocationCount, +            pAllocations, +            blockVector.GetAllocationNextPtr()); +    } +    else +    { +        const bool canAllocateDedicated = +            (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && +            (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize()); + +        if(canAllocateDedicated) +        { +            // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. +            if(size > blockVector.GetPreferredBlockSize() / 2) +            { +                dedicatedPreferred = true; +            } +            // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget, +            // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above +            // 3/4 of the maximum allocation count. +            if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 && +                m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) +            { +                dedicatedPreferred = false; +            } + +            if(dedicatedPreferred) +            { +                res = AllocateDedicatedMemory( +                    pool, +                    size, +                    suballocType, +                    dedicatedAllocations, +                    memTypeIndex, +                    (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, +                    (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, +                    (finalCreateInfo.flags & +                        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, +                    (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, +                    finalCreateInfo.pUserData, +                    finalCreateInfo.priority, +                    dedicatedBuffer, +                    dedicatedImage, +                    dedicatedBufferImageUsage, +                    allocationCount, +                    pAllocations, +                    blockVector.GetAllocationNextPtr()); +                if(res == VK_SUCCESS) +                { +                    // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. +                    VMA_DEBUG_LOG("    Allocated as DedicatedMemory"); +                    return VK_SUCCESS; +                } +            } +        } + +        res = blockVector.Allocate( +            size, +            alignment, +            finalCreateInfo, +            suballocType, +            allocationCount, +            pAllocations); +        if(res == VK_SUCCESS) +            return VK_SUCCESS; + +        // Try dedicated memory. +        if(canAllocateDedicated && !dedicatedPreferred) +        { +            res = AllocateDedicatedMemory( +                pool, +                size, +                suballocType, +                dedicatedAllocations, +                memTypeIndex, +                (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, +                (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, +                (finalCreateInfo.flags & +                    (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, +                (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, +                finalCreateInfo.pUserData, +                finalCreateInfo.priority, +                dedicatedBuffer, +                dedicatedImage, +                dedicatedBufferImageUsage, +                allocationCount, +                pAllocations, +                blockVector.GetAllocationNextPtr()); +            if(res == VK_SUCCESS) +            { +                // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. +                VMA_DEBUG_LOG("    Allocated as DedicatedMemory"); +                return VK_SUCCESS; +            } +        } +        // Everything failed: Return error code. +        VMA_DEBUG_LOG("    vkAllocateMemory FAILED"); +        return res; +    } +} + +VkResult VmaAllocator_T::AllocateDedicatedMemory( +    VmaPool pool, +    VkDeviceSize size, +    VmaSuballocationType suballocType, +    VmaDedicatedAllocationList& dedicatedAllocations, +    uint32_t memTypeIndex, +    bool map, +    bool isUserDataString, +    bool isMappingAllowed, +    bool canAliasMemory, +    void* pUserData, +    float priority, +    VkBuffer dedicatedBuffer, +    VkImage dedicatedImage, +    VmaBufferImageUsage dedicatedBufferImageUsage, +    size_t allocationCount, +    VmaAllocation* pAllocations, +    const void* pNextChain) +{ +    VMA_ASSERT(allocationCount > 0 && pAllocations); + +    VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; +    allocInfo.memoryTypeIndex = memTypeIndex; +    allocInfo.allocationSize = size; +    allocInfo.pNext = pNextChain; + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; +    if(!canAliasMemory) +    { +        if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +        { +            if(dedicatedBuffer != VK_NULL_HANDLE) +            { +                VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); +                dedicatedAllocInfo.buffer = dedicatedBuffer; +                VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); +            } +            else if(dedicatedImage != VK_NULL_HANDLE) +            { +                dedicatedAllocInfo.image = dedicatedImage; +                VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); +            } +        } +    } +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + +#if VMA_BUFFER_DEVICE_ADDRESS +    VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; +    if(m_UseKhrBufferDeviceAddress) +    { +        bool canContainBufferWithDeviceAddress = true; +        if(dedicatedBuffer != VK_NULL_HANDLE) +        { +            canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN || +                dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT); +        } +        else if(dedicatedImage != VK_NULL_HANDLE) +        { +            canContainBufferWithDeviceAddress = false; +        } +        if(canContainBufferWithDeviceAddress) +        { +            allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; +            VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); +        } +    } +#endif // #if VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY +    VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; +    if(m_UseExtMemoryPriority) +    { +        VMA_ASSERT(priority >= 0.f && priority <= 1.f); +        priorityInfo.priority = priority; +        VmaPnextChainPushFront(&allocInfo, &priorityInfo); +    } +#endif // #if VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY +    // Attach VkExportMemoryAllocateInfoKHR if necessary. +    VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; +    exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex); +    if(exportMemoryAllocInfo.handleTypes != 0) +    { +        VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); +    } +#endif // #if VMA_EXTERNAL_MEMORY + +    size_t allocIndex; +    VkResult res = VK_SUCCESS; +    for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) +    { +        res = AllocateDedicatedMemoryPage( +            pool, +            size, +            suballocType, +            memTypeIndex, +            allocInfo, +            map, +            isUserDataString, +            isMappingAllowed, +            pUserData, +            pAllocations + allocIndex); +        if(res != VK_SUCCESS) +        { +            break; +        } +    } + +    if(res == VK_SUCCESS) +    { +        for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) +        { +            dedicatedAllocations.Register(pAllocations[allocIndex]); +        } +        VMA_DEBUG_LOG_FORMAT("    Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex); +    } +    else +    { +        // Free all already created allocations. +        while(allocIndex--) +        { +            VmaAllocation currAlloc = pAllocations[allocIndex]; +            VkDeviceMemory hMemory = currAlloc->GetMemory(); + +            /* +            There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory +            before vkFreeMemory. + +            if(currAlloc->GetMappedData() != VMA_NULL) +            { +                (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); +            } +            */ + +            FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); +            m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); +            m_AllocationObjectAllocator.Free(currAlloc); +        } + +        memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); +    } + +    return res; +} + +VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( +    VmaPool pool, +    VkDeviceSize size, +    VmaSuballocationType suballocType, +    uint32_t memTypeIndex, +    const VkMemoryAllocateInfo& allocInfo, +    bool map, +    bool isUserDataString, +    bool isMappingAllowed, +    void* pUserData, +    VmaAllocation* pAllocation) +{ +    VkDeviceMemory hMemory = VK_NULL_HANDLE; +    VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory); +    if(res < 0) +    { +        VMA_DEBUG_LOG("    vkAllocateMemory FAILED"); +        return res; +    } + +    void* pMappedData = VMA_NULL; +    if(map) +    { +        res = (*m_VulkanFunctions.vkMapMemory)( +            m_hDevice, +            hMemory, +            0, +            VK_WHOLE_SIZE, +            0, +            &pMappedData); +        if(res < 0) +        { +            VMA_DEBUG_LOG("    vkMapMemory FAILED"); +            FreeVulkanMemory(memTypeIndex, size, hMemory); +            return res; +        } +    } + +    *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); +    (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size); +    if (isUserDataString) +        (*pAllocation)->SetName(this, (const char*)pUserData); +    else +        (*pAllocation)->SetUserData(this, pUserData); +    m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); +    if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) +    { +        FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); +    } + +    return VK_SUCCESS; +} + +void VmaAllocator_T::GetBufferMemoryRequirements( +    VkBuffer hBuffer, +    VkMemoryRequirements& memReq, +    bool& requiresDedicatedAllocation, +    bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR }; +        memReqInfo.buffer = hBuffer; + +        VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + +        VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; +        VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + +        (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + +        memReq = memReq2.memoryRequirements; +        requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); +        prefersDedicatedAllocation  = (memDedicatedReq.prefersDedicatedAllocation  != VK_FALSE); +    } +    else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    { +        (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq); +        requiresDedicatedAllocation = false; +        prefersDedicatedAllocation  = false; +    } +} + +void VmaAllocator_T::GetImageMemoryRequirements( +    VkImage hImage, +    VkMemoryRequirements& memReq, +    bool& requiresDedicatedAllocation, +    bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) +    { +        VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR }; +        memReqInfo.image = hImage; + +        VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + +        VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; +        VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + +        (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + +        memReq = memReq2.memoryRequirements; +        requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); +        prefersDedicatedAllocation  = (memDedicatedReq.prefersDedicatedAllocation  != VK_FALSE); +    } +    else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 +    { +        (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq); +        requiresDedicatedAllocation = false; +        prefersDedicatedAllocation  = false; +    } +} + +VkResult VmaAllocator_T::FindMemoryTypeIndex( +    uint32_t memoryTypeBits, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    VmaBufferImageUsage bufImgUsage, +    uint32_t* pMemoryTypeIndex) const +{ +    memoryTypeBits &= GetGlobalMemoryTypeBits(); + +    if(pAllocationCreateInfo->memoryTypeBits != 0) +    { +        memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; +    } + +    VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; +    if(!FindMemoryPreferences( +        IsIntegratedGpu(), +        *pAllocationCreateInfo, +        bufImgUsage, +        requiredFlags, preferredFlags, notPreferredFlags)) +    { +        return VK_ERROR_FEATURE_NOT_PRESENT; +    } + +    *pMemoryTypeIndex = UINT32_MAX; +    uint32_t minCost = UINT32_MAX; +    for(uint32_t memTypeIndex = 0, memTypeBit = 1; +        memTypeIndex < GetMemoryTypeCount(); +        ++memTypeIndex, memTypeBit <<= 1) +    { +        // This memory type is acceptable according to memoryTypeBits bitmask. +        if((memTypeBit & memoryTypeBits) != 0) +        { +            const VkMemoryPropertyFlags currFlags = +                m_MemProps.memoryTypes[memTypeIndex].propertyFlags; +            // This memory type contains requiredFlags. +            if((requiredFlags & ~currFlags) == 0) +            { +                // Calculate cost as number of bits from preferredFlags not present in this memory type. +                uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + +                    VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); +                // Remember memory type with lowest cost. +                if(currCost < minCost) +                { +                    *pMemoryTypeIndex = memTypeIndex; +                    if(currCost == 0) +                    { +                        return VK_SUCCESS; +                    } +                    minCost = currCost; +                } +            } +        } +    } +    return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + +VkResult VmaAllocator_T::CalcMemTypeParams( +    VmaAllocationCreateInfo& inoutCreateInfo, +    uint32_t memTypeIndex, +    VkDeviceSize size, +    size_t allocationCount) +{ +    // If memory type is not HOST_VISIBLE, disable MAPPED. +    if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && +        (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) +    { +        inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; +    } + +    if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && +        (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0) +    { +        const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); +        VmaBudget heapBudget = {}; +        GetHeapBudgets(&heapBudget, heapIndex, 1); +        if(heapBudget.usage + size * allocationCount > heapBudget.budget) +        { +            return VK_ERROR_OUT_OF_DEVICE_MEMORY; +        } +    } +    return VK_SUCCESS; +} + +VkResult VmaAllocator_T::CalcAllocationParams( +    VmaAllocationCreateInfo& inoutCreateInfo, +    bool dedicatedRequired, +    bool dedicatedPreferred) +{ +    VMA_ASSERT((inoutCreateInfo.flags & +        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != +        (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && +        "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); +    VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || +        (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && +        "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); +    if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) +    { +        if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) +        { +            VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && +                "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); +        } +    } + +    // If memory is lazily allocated, it should be always dedicated. +    if(dedicatedRequired || +        inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) +    { +        inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +    } + +    if(inoutCreateInfo.pool != VK_NULL_HANDLE) +    { +        if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() && +            (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) +        { +            VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); +            return VK_ERROR_FEATURE_NOT_PRESENT; +        } +        inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority(); +    } + +    if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && +        (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) +    { +        VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); +        return VK_ERROR_FEATURE_NOT_PRESENT; +    } + +    if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY && +        (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) +    { +        inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +    } + +    // Non-auto USAGE values imply HOST_ACCESS flags. +    // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. +    // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. +    // Otherwise they just protect from assert on mapping. +    if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && +        inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && +        inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) +    { +        if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) +        { +            inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; +        } +    } + +    return VK_SUCCESS; +} + +VkResult VmaAllocator_T::AllocateMemory( +    const VkMemoryRequirements& vkMemReq, +    bool requiresDedicatedAllocation, +    bool prefersDedicatedAllocation, +    VkBuffer dedicatedBuffer, +    VkImage dedicatedImage, +    VmaBufferImageUsage dedicatedBufferImageUsage, +    const VmaAllocationCreateInfo& createInfo, +    VmaSuballocationType suballocType, +    size_t allocationCount, +    VmaAllocation* pAllocations) +{ +    memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + +    VMA_ASSERT(VmaIsPow2(vkMemReq.alignment)); + +    if(vkMemReq.size == 0) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } + +    VmaAllocationCreateInfo createInfoFinal = createInfo; +    VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation); +    if(res != VK_SUCCESS) +        return res; + +    if(createInfoFinal.pool != VK_NULL_HANDLE) +    { +        VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector; +        return AllocateMemoryOfType( +            createInfoFinal.pool, +            vkMemReq.size, +            vkMemReq.alignment, +            prefersDedicatedAllocation, +            dedicatedBuffer, +            dedicatedImage, +            dedicatedBufferImageUsage, +            createInfoFinal, +            blockVector.GetMemoryTypeIndex(), +            suballocType, +            createInfoFinal.pool->m_DedicatedAllocations, +            blockVector, +            allocationCount, +            pAllocations); +    } +    else +    { +        // Bit mask of memory Vulkan types acceptable for this allocation. +        uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; +        uint32_t memTypeIndex = UINT32_MAX; +        res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); +        // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. +        if(res != VK_SUCCESS) +            return res; +        do +        { +            VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex]; +            VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); +            res = AllocateMemoryOfType( +                VK_NULL_HANDLE, +                vkMemReq.size, +                vkMemReq.alignment, +                requiresDedicatedAllocation || prefersDedicatedAllocation, +                dedicatedBuffer, +                dedicatedImage, +                dedicatedBufferImageUsage, +                createInfoFinal, +                memTypeIndex, +                suballocType, +                m_DedicatedAllocations[memTypeIndex], +                *blockVector, +                allocationCount, +                pAllocations); +            // Allocation succeeded +            if(res == VK_SUCCESS) +                return VK_SUCCESS; + +            // Remove old memTypeIndex from list of possibilities. +            memoryTypeBits &= ~(1u << memTypeIndex); +            // Find alternative memTypeIndex. +            res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); +        } while(res == VK_SUCCESS); + +        // No other matching memory type index could be found. +        // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. +        return VK_ERROR_OUT_OF_DEVICE_MEMORY; +    } +} + +void VmaAllocator_T::FreeMemory( +    size_t allocationCount, +    const VmaAllocation* pAllocations) +{ +    VMA_ASSERT(pAllocations); + +    for(size_t allocIndex = allocationCount; allocIndex--; ) +    { +        VmaAllocation allocation = pAllocations[allocIndex]; + +        if(allocation != VK_NULL_HANDLE) +        { +            if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) +            { +                FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); +            } + +            switch(allocation->GetType()) +            { +            case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +                { +                    VmaBlockVector* pBlockVector = VMA_NULL; +                    VmaPool hPool = allocation->GetParentPool(); +                    if(hPool != VK_NULL_HANDLE) +                    { +                        pBlockVector = &hPool->m_BlockVector; +                    } +                    else +                    { +                        const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); +                        pBlockVector = m_pBlockVectors[memTypeIndex]; +                        VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); +                    } +                    pBlockVector->Free(allocation); +                } +                break; +            case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +                FreeDedicatedMemory(allocation); +                break; +            default: +                VMA_ASSERT(0); +            } +        } +    } +} + +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) +{ +    // Initialize. +    VmaClearDetailedStatistics(pStats->total); +    for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) +        VmaClearDetailedStatistics(pStats->memoryType[i]); +    for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) +        VmaClearDetailedStatistics(pStats->memoryHeap[i]); + +    // Process default pools. +    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +    { +        VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; +        if (pBlockVector != VMA_NULL) +            pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); +    } + +    // Process custom pools. +    { +        VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); +        for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) +        { +            VmaBlockVector& blockVector = pool->m_BlockVector; +            const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); +            blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); +            pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); +        } +    } + +    // Process dedicated allocations. +    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +    { +        m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); +    } + +    // Sum from memory types to memory heaps. +    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +    { +        const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; +        VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); +    } + +    // Sum from memory heaps to total. +    for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) +        VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + +    VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || +        pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); +    VMA_ASSERT(pStats->total.unusedRangeCount == 0 || +        pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); +} + +void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) +{ +#if VMA_MEMORY_BUDGET +    if(m_UseExtMemoryBudget) +    { +        if(m_Budget.m_OperationsSinceBudgetFetch < 30) +        { +            VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); +            for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) +            { +                const uint32_t heapIndex = firstHeap + i; + +                outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; +                outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; +                outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; +                outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + +                if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) +                { +                    outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + +                        outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; +                } +                else +                { +                    outBudgets->usage = 0; +                } + +                // Have to take MIN with heap size because explicit HeapSizeLimit is included in it. +                outBudgets->budget = VMA_MIN( +                    m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size); +            } +        } +        else +        { +            UpdateVulkanBudget(); // Outside of mutex lock +            GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion +        } +    } +    else +#endif +    { +        for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) +        { +            const uint32_t heapIndex = firstHeap + i; + +            outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; +            outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; +            outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; +            outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + +            outBudgets->usage = outBudgets->statistics.blockBytes; +            outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. +        } +    } +} + +void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) +{ +    pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); +    pAllocationInfo->deviceMemory = hAllocation->GetMemory(); +    pAllocationInfo->offset = hAllocation->GetOffset(); +    pAllocationInfo->size = hAllocation->GetSize(); +    pAllocationInfo->pMappedData = hAllocation->GetMappedData(); +    pAllocationInfo->pUserData = hAllocation->GetUserData(); +    pAllocationInfo->pName = hAllocation->GetName(); +} + +void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo) +{ +    GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo); + +    switch (hAllocation->GetType()) +    { +    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +        pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); +        pAllocationInfo->dedicatedMemory = VK_FALSE; +        break; +    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +        pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size; +        pAllocationInfo->dedicatedMemory = VK_TRUE; +        break; +    default: +        VMA_ASSERT(0); +    } +} + +VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) +{ +    VMA_DEBUG_LOG_FORMAT("  CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags); + +    VmaPoolCreateInfo newCreateInfo = *pCreateInfo; + +    // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash. +    if(pCreateInfo->pMemoryAllocateNext) +    { +        VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0); +    } + +    if(newCreateInfo.maxBlockCount == 0) +    { +        newCreateInfo.maxBlockCount = SIZE_MAX; +    } +    if(newCreateInfo.minBlockCount > newCreateInfo.maxBlockCount) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } +    // Memory type index out of range or forbidden. +    if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || +        ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) +    { +        return VK_ERROR_FEATURE_NOT_PRESENT; +    } +    if(newCreateInfo.minAllocationAlignment > 0) +    { +        VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment)); +    } + +    const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); + +    *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize); + +    VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks(); +    if(res != VK_SUCCESS) +    { +        vma_delete(this, *pPool); +        *pPool = VMA_NULL; +        return res; +    } + +    // Add to m_Pools. +    { +        VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); +        (*pPool)->SetId(m_NextPoolId++); +        m_Pools.PushBack(*pPool); +    } + +    return VK_SUCCESS; +} + +void VmaAllocator_T::DestroyPool(VmaPool pool) +{ +    // Remove from m_Pools. +    { +        VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); +        m_Pools.Remove(pool); +    } + +    vma_delete(this, pool); +} + +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) +{ +    VmaClearStatistics(*pPoolStats); +    pool->m_BlockVector.AddStatistics(*pPoolStats); +    pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} + +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ +    VmaClearDetailedStatistics(*pPoolStats); +    pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); +    pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); +} + +void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) +{ +    m_CurrentFrameIndex.store(frameIndex); + +#if VMA_MEMORY_BUDGET +    if(m_UseExtMemoryBudget) +    { +        UpdateVulkanBudget(); +    } +#endif // #if VMA_MEMORY_BUDGET +} + +VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool) +{ +    return hPool->m_BlockVector.CheckCorruption(); +} + +VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) +{ +    VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT; + +    // Process default pools. +    for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +    { +        VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; +        if(pBlockVector != VMA_NULL) +        { +            VkResult localRes = pBlockVector->CheckCorruption(); +            switch(localRes) +            { +            case VK_ERROR_FEATURE_NOT_PRESENT: +                break; +            case VK_SUCCESS: +                finalRes = VK_SUCCESS; +                break; +            default: +                return localRes; +            } +        } +    } + +    // Process custom pools. +    { +        VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); +        for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) +        { +            if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) +            { +                VkResult localRes = pool->m_BlockVector.CheckCorruption(); +                switch(localRes) +                { +                case VK_ERROR_FEATURE_NOT_PRESENT: +                    break; +                case VK_SUCCESS: +                    finalRes = VK_SUCCESS; +                    break; +                default: +                    return localRes; +                } +            } +        } +    } + +    return finalRes; +} + +VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) +{ +    AtomicTransactionalIncrement<VMA_ATOMIC_UINT32> deviceMemoryCountIncrement; +    const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount); +#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT +    if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount) +    { +        return VK_ERROR_TOO_MANY_OBJECTS; +    } +#endif + +    const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); + +    // HeapSizeLimit is in effect for this heap. +    if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0) +    { +        const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; +        VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex]; +        for(;;) +        { +            const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize; +            if(blockBytesAfterAllocation > heapSize) +            { +                return VK_ERROR_OUT_OF_DEVICE_MEMORY; +            } +            if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation)) +            { +                break; +            } +        } +    } +    else +    { +        m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; +    } +    ++m_Budget.m_BlockCount[heapIndex]; + +    // VULKAN CALL vkAllocateMemory. +    VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); + +    if(res == VK_SUCCESS) +    { +#if VMA_MEMORY_BUDGET +        ++m_Budget.m_OperationsSinceBudgetFetch; +#endif + +        // Informative callback. +        if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) +        { +            (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData); +        } + +        deviceMemoryCountIncrement.Commit(); +    } +    else +    { +        --m_Budget.m_BlockCount[heapIndex]; +        m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; +    } + +    return res; +} + +void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory) +{ +    // Informative callback. +    if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL) +    { +        (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData); +    } + +    // VULKAN CALL vkFreeMemory. +    (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); + +    const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); +    --m_Budget.m_BlockCount[heapIndex]; +    m_Budget.m_BlockBytes[heapIndex] -= size; + +    --m_DeviceMemoryCount; +} + +VkResult VmaAllocator_T::BindVulkanBuffer( +    VkDeviceMemory memory, +    VkDeviceSize memoryOffset, +    VkBuffer buffer, +    const void* pNext) +{ +    if(pNext != VMA_NULL) +    { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 +        if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && +            m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL) +        { +            VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR }; +            bindBufferMemoryInfo.pNext = pNext; +            bindBufferMemoryInfo.buffer = buffer; +            bindBufferMemoryInfo.memory = memory; +            bindBufferMemoryInfo.memoryOffset = memoryOffset; +            return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); +        } +        else +#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 +        { +            return VK_ERROR_EXTENSION_NOT_PRESENT; +        } +    } +    else +    { +        return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset); +    } +} + +VkResult VmaAllocator_T::BindVulkanImage( +    VkDeviceMemory memory, +    VkDeviceSize memoryOffset, +    VkImage image, +    const void* pNext) +{ +    if(pNext != VMA_NULL) +    { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 +        if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && +            m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL) +        { +            VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR }; +            bindBufferMemoryInfo.pNext = pNext; +            bindBufferMemoryInfo.image = image; +            bindBufferMemoryInfo.memory = memory; +            bindBufferMemoryInfo.memoryOffset = memoryOffset; +            return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); +        } +        else +#endif // #if VMA_BIND_MEMORY2 +        { +            return VK_ERROR_EXTENSION_NOT_PRESENT; +        } +    } +    else +    { +        return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset); +    } +} + +VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) +{ +    switch(hAllocation->GetType()) +    { +    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +        { +            VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); +            char *pBytes = VMA_NULL; +            VkResult res = pBlock->Map(this, 1, (void**)&pBytes); +            if(res == VK_SUCCESS) +            { +                *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset(); +                hAllocation->BlockAllocMap(); +            } +            return res; +        } +    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +        return hAllocation->DedicatedAllocMap(this, ppData); +    default: +        VMA_ASSERT(0); +        return VK_ERROR_MEMORY_MAP_FAILED; +    } +} + +void VmaAllocator_T::Unmap(VmaAllocation hAllocation) +{ +    switch(hAllocation->GetType()) +    { +    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +        { +            VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); +            hAllocation->BlockAllocUnmap(); +            pBlock->Unmap(this, 1); +        } +        break; +    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +        hAllocation->DedicatedAllocUnmap(this); +        break; +    default: +        VMA_ASSERT(0); +    } +} + +VkResult VmaAllocator_T::BindBufferMemory( +    VmaAllocation hAllocation, +    VkDeviceSize allocationLocalOffset, +    VkBuffer hBuffer, +    const void* pNext) +{ +    VkResult res = VK_ERROR_UNKNOWN_COPY; +    switch(hAllocation->GetType()) +    { +    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +        res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext); +        break; +    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +    { +        VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); +        VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block."); +        res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext); +        break; +    } +    default: +        VMA_ASSERT(0); +    } +    return res; +} + +VkResult VmaAllocator_T::BindImageMemory( +    VmaAllocation hAllocation, +    VkDeviceSize allocationLocalOffset, +    VkImage hImage, +    const void* pNext) +{ +    VkResult res = VK_ERROR_UNKNOWN_COPY; +    switch(hAllocation->GetType()) +    { +    case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +        res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext); +        break; +    case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +    { +        VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); +        VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block."); +        res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext); +        break; +    } +    default: +        VMA_ASSERT(0); +    } +    return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocation( +    VmaAllocation hAllocation, +    VkDeviceSize offset, VkDeviceSize size, +    VMA_CACHE_OPERATION op) +{ +    VkResult res = VK_SUCCESS; + +    VkMappedMemoryRange memRange = {}; +    if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange)) +    { +        switch(op) +        { +        case VMA_CACHE_FLUSH: +            res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange); +            break; +        case VMA_CACHE_INVALIDATE: +            res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange); +            break; +        default: +            VMA_ASSERT(0); +        } +    } +    // else: Just ignore this call. +    return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocations( +    uint32_t allocationCount, +    const VmaAllocation* allocations, +    const VkDeviceSize* offsets, const VkDeviceSize* sizes, +    VMA_CACHE_OPERATION op) +{ +    typedef VmaStlAllocator<VkMappedMemoryRange> RangeAllocator; +    typedef VmaSmallVector<VkMappedMemoryRange, RangeAllocator, 16> RangeVector; +    RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks())); + +    for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) +    { +        const VmaAllocation alloc = allocations[allocIndex]; +        const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0; +        const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE; +        VkMappedMemoryRange newRange; +        if(GetFlushOrInvalidateRange(alloc, offset, size, newRange)) +        { +            ranges.push_back(newRange); +        } +    } + +    VkResult res = VK_SUCCESS; +    if(!ranges.empty()) +    { +        switch(op) +        { +        case VMA_CACHE_FLUSH: +            res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); +            break; +        case VMA_CACHE_INVALIDATE: +            res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); +            break; +        default: +            VMA_ASSERT(0); +        } +    } +    // else: Just ignore this call. +    return res; +} + +VkResult VmaAllocator_T::CopyMemoryToAllocation( +    const void* pSrcHostPointer, +    VmaAllocation dstAllocation, +    VkDeviceSize dstAllocationLocalOffset, +    VkDeviceSize size) +{ +    void* dstMappedData = VMA_NULL; +    VkResult res = Map(dstAllocation, &dstMappedData); +    if(res == VK_SUCCESS) +    { +        memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size); +        Unmap(dstAllocation); +        res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH); +    } +    return res; +} + +VkResult VmaAllocator_T::CopyAllocationToMemory( +    VmaAllocation srcAllocation, +    VkDeviceSize srcAllocationLocalOffset, +    void* pDstHostPointer, +    VkDeviceSize size) +{ +    void* srcMappedData = VMA_NULL; +    VkResult res = Map(srcAllocation, &srcMappedData); +    if(res == VK_SUCCESS) +    { +        res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE); +        if(res == VK_SUCCESS) +        { +            memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size); +            Unmap(srcAllocation); +        } +    } +    return res; +} + +void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) +{ +    VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + +    const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); +    VmaPool parentPool = allocation->GetParentPool(); +    if(parentPool == VK_NULL_HANDLE) +    { +        // Default pool +        m_DedicatedAllocations[memTypeIndex].Unregister(allocation); +    } +    else +    { +        // Custom pool +        parentPool->m_DedicatedAllocations.Unregister(allocation); +    } + +    VkDeviceMemory hMemory = allocation->GetMemory(); + +    /* +    There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory +    before vkFreeMemory. + +    if(allocation->GetMappedData() != VMA_NULL) +    { +        (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); +    } +    */ + +    FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); + +    m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); +    allocation->Destroy(this); +    m_AllocationObjectAllocator.Free(allocation); + +    VMA_DEBUG_LOG_FORMAT("    Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); +} + +uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const +{ +    VkBufferCreateInfo dummyBufCreateInfo; +    VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo); + +    uint32_t memoryTypeBits = 0; + +    // Create buffer. +    VkBuffer buf = VK_NULL_HANDLE; +    VkResult res = (*GetVulkanFunctions().vkCreateBuffer)( +        m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf); +    if(res == VK_SUCCESS) +    { +        // Query for supported memory types. +        VkMemoryRequirements memReq; +        (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq); +        memoryTypeBits = memReq.memoryTypeBits; + +        // Destroy buffer. +        (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks()); +    } + +    return memoryTypeBits; +} + +uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const +{ +    // Make sure memory information is already fetched. +    VMA_ASSERT(GetMemoryTypeCount() > 0); + +    uint32_t memoryTypeBits = UINT32_MAX; + +    if(!m_UseAmdDeviceCoherentMemory) +    { +        // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD. +        for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +        { +            if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) +            { +                memoryTypeBits &= ~(1u << memTypeIndex); +            } +        } +    } + +    return memoryTypeBits; +} + +bool VmaAllocator_T::GetFlushOrInvalidateRange( +    VmaAllocation allocation, +    VkDeviceSize offset, VkDeviceSize size, +    VkMappedMemoryRange& outRange) const +{ +    const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); +    if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex)) +    { +        const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; +        const VkDeviceSize allocationSize = allocation->GetSize(); +        VMA_ASSERT(offset <= allocationSize); + +        outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; +        outRange.pNext = VMA_NULL; +        outRange.memory = allocation->GetMemory(); + +        switch(allocation->GetType()) +        { +        case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: +            outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); +            if(size == VK_WHOLE_SIZE) +            { +                outRange.size = allocationSize - outRange.offset; +            } +            else +            { +                VMA_ASSERT(offset + size <= allocationSize); +                outRange.size = VMA_MIN( +                    VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize), +                    allocationSize - outRange.offset); +            } +            break; +        case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: +        { +            // 1. Still within this allocation. +            outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); +            if(size == VK_WHOLE_SIZE) +            { +                size = allocationSize - offset; +            } +            else +            { +                VMA_ASSERT(offset + size <= allocationSize); +            } +            outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize); + +            // 2. Adjust to whole block. +            const VkDeviceSize allocationOffset = allocation->GetOffset(); +            VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0); +            const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize(); +            outRange.offset += allocationOffset; +            outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset); + +            break; +        } +        default: +            VMA_ASSERT(0); +        } +        return true; +    } +    return false; +} + +#if VMA_MEMORY_BUDGET +void VmaAllocator_T::UpdateVulkanBudget() +{ +    VMA_ASSERT(m_UseExtMemoryBudget); + +    VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR }; + +    VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT }; +    VmaPnextChainPushFront(&memProps, &budgetProps); + +    GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps); + +    { +        VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex); + +        for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) +        { +            m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex]; +            m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex]; +            m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load(); + +            // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size. +            if(m_Budget.m_VulkanBudget[heapIndex] == 0) +            { +                m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. +            } +            else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size) +            { +                m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size; +            } +            if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0) +            { +                m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; +            } +        } +        m_Budget.m_OperationsSinceBudgetFetch = 0; +    } +} +#endif // VMA_MEMORY_BUDGET + +void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) +{ +    if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && +        hAllocation->IsMappingAllowed() && +        (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) +    { +        void* pData = VMA_NULL; +        VkResult res = Map(hAllocation, &pData); +        if(res == VK_SUCCESS) +        { +            memset(pData, (int)pattern, (size_t)hAllocation->GetSize()); +            FlushOrInvalidateAllocation(hAllocation, 0, VK_WHOLE_SIZE, VMA_CACHE_FLUSH); +            Unmap(hAllocation); +        } +        else +        { +            VMA_ASSERT(0 && "VMA_DEBUG_INITIALIZE_ALLOCATIONS is enabled, but couldn't map memory to fill allocation."); +        } +    } +} + +uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() +{ +    uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load(); +    if(memoryTypeBits == UINT32_MAX) +    { +        memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits(); +        m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits); +    } +    return memoryTypeBits; +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) +{ +    json.WriteString("DefaultPools"); +    json.BeginObject(); +    { +        for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +        { +            VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; +            VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; +            if (pBlockVector != VMA_NULL) +            { +                json.BeginString("Type "); +                json.ContinueString(memTypeIndex); +                json.EndString(); +                json.BeginObject(); +                { +                    json.WriteString("PreferredBlockSize"); +                    json.WriteNumber(pBlockVector->GetPreferredBlockSize()); + +                    json.WriteString("Blocks"); +                    pBlockVector->PrintDetailedMap(json); + +                    json.WriteString("DedicatedAllocations"); +                    dedicatedAllocList.BuildStatsString(json); +                } +                json.EndObject(); +            } +        } +    } +    json.EndObject(); + +    json.WriteString("CustomPools"); +    json.BeginObject(); +    { +        VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); +        if (!m_Pools.IsEmpty()) +        { +            for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) +            { +                bool displayType = true; +                size_t index = 0; +                for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) +                { +                    VmaBlockVector& blockVector = pool->m_BlockVector; +                    if (blockVector.GetMemoryTypeIndex() == memTypeIndex) +                    { +                        if (displayType) +                        { +                            json.BeginString("Type "); +                            json.ContinueString(memTypeIndex); +                            json.EndString(); +                            json.BeginArray(); +                            displayType = false; +                        } + +                        json.BeginObject(); +                        { +                            json.WriteString("Name"); +                            json.BeginString(); +                            json.ContinueString((uint64_t)index++); +                            if (pool->GetName()) +                            { +                                json.ContinueString(" - "); +                                json.ContinueString(pool->GetName()); +                            } +                            json.EndString(); + +                            json.WriteString("PreferredBlockSize"); +                            json.WriteNumber(blockVector.GetPreferredBlockSize()); + +                            json.WriteString("Blocks"); +                            blockVector.PrintDetailedMap(json); + +                            json.WriteString("DedicatedAllocations"); +                            pool->m_DedicatedAllocations.BuildStatsString(json); +                        } +                        json.EndObject(); +                    } +                } + +                if (!displayType) +                    json.EndArray(); +            } +        } +    } +    json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_ALLOCATOR_T_FUNCTIONS + + +#ifndef _VMA_PUBLIC_INTERFACE +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( +    const VmaAllocatorCreateInfo* pCreateInfo, +    VmaAllocator* pAllocator) +{ +    VMA_ASSERT(pCreateInfo && pAllocator); +    VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 || +        (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 4)); +    VMA_DEBUG_LOG("vmaCreateAllocator"); +    *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); +    VkResult result = (*pAllocator)->Init(pCreateInfo); +    if(result < 0) +    { +        vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator); +        *pAllocator = VK_NULL_HANDLE; +    } +    return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( +    VmaAllocator allocator) +{ +    if(allocator != VK_NULL_HANDLE) +    { +        VMA_DEBUG_LOG("vmaDestroyAllocator"); +        VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying. +        vma_delete(&allocationCallbacks, allocator); +    } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo) +{ +    VMA_ASSERT(allocator && pAllocatorInfo); +    pAllocatorInfo->instance = allocator->m_hInstance; +    pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice(); +    pAllocatorInfo->device = allocator->m_hDevice; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( +    VmaAllocator allocator, +    const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties) +{ +    VMA_ASSERT(allocator && ppPhysicalDeviceProperties); +    *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( +    VmaAllocator allocator, +    const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties) +{ +    VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties); +    *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( +    VmaAllocator allocator, +    uint32_t memoryTypeIndex, +    VkMemoryPropertyFlags* pFlags) +{ +    VMA_ASSERT(allocator && pFlags); +    VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount()); +    *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( +    VmaAllocator allocator, +    uint32_t frameIndex) +{ +    VMA_ASSERT(allocator); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->SetCurrentFrameIndex(frameIndex); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( +    VmaAllocator allocator, +    VmaTotalStatistics* pStats) +{ +    VMA_ASSERT(allocator && pStats); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK +    allocator->CalculateStatistics(pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( +    VmaAllocator allocator, +    VmaBudget* pBudgets) +{ +    VMA_ASSERT(allocator && pBudgets); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK +    allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount()); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( +    VmaAllocator allocator, +    char** ppStatsString, +    VkBool32 detailedMap) +{ +    VMA_ASSERT(allocator && ppStatsString); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    VmaStringBuilder sb(allocator->GetAllocationCallbacks()); +    { +        VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +        allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); + +        VmaTotalStatistics stats; +        allocator->CalculateStatistics(&stats); + +        VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); +        json.BeginObject(); +        { +            json.WriteString("General"); +            json.BeginObject(); +            { +                const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; +                const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; + +                json.WriteString("API"); +                json.WriteString("Vulkan"); + +                json.WriteString("apiVersion"); +                json.BeginString(); +                json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion)); +                json.ContinueString("."); +                json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion)); +                json.ContinueString("."); +                json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion)); +                json.EndString(); + +                json.WriteString("GPU"); +                json.WriteString(deviceProperties.deviceName); +                json.WriteString("deviceType"); +                json.WriteNumber(static_cast<uint32_t>(deviceProperties.deviceType)); + +                json.WriteString("maxMemoryAllocationCount"); +                json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); +                json.WriteString("bufferImageGranularity"); +                json.WriteNumber(deviceProperties.limits.bufferImageGranularity); +                json.WriteString("nonCoherentAtomSize"); +                json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + +                json.WriteString("memoryHeapCount"); +                json.WriteNumber(memoryProperties.memoryHeapCount); +                json.WriteString("memoryTypeCount"); +                json.WriteNumber(memoryProperties.memoryTypeCount); +            } +            json.EndObject(); +        } +        { +            json.WriteString("Total"); +            VmaPrintDetailedStatistics(json, stats.total); +        } +        { +            json.WriteString("MemoryInfo"); +            json.BeginObject(); +            { +                for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) +                { +                    json.BeginString("Heap "); +                    json.ContinueString(heapIndex); +                    json.EndString(); +                    json.BeginObject(); +                    { +                        const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; +                        json.WriteString("Flags"); +                        json.BeginArray(true); +                        { +                            if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) +                                json.WriteString("DEVICE_LOCAL"); +                        #if VMA_VULKAN_VERSION >= 1001000 +                            if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) +                                json.WriteString("MULTI_INSTANCE"); +                        #endif + +                            VkMemoryHeapFlags flags = heapInfo.flags & +                                ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT +                        #if VMA_VULKAN_VERSION >= 1001000 +                                    | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT +                        #endif +                                    ); +                            if (flags != 0) +                                json.WriteNumber(flags); +                        } +                        json.EndArray(); + +                        json.WriteString("Size"); +                        json.WriteNumber(heapInfo.size); + +                        json.WriteString("Budget"); +                        json.BeginObject(); +                        { +                            json.WriteString("BudgetBytes"); +                            json.WriteNumber(budgets[heapIndex].budget); +                            json.WriteString("UsageBytes"); +                            json.WriteNumber(budgets[heapIndex].usage); +                        } +                        json.EndObject(); + +                        json.WriteString("Stats"); +                        VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + +                        json.WriteString("MemoryPools"); +                        json.BeginObject(); +                        { +                            for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) +                            { +                                if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) +                                { +                                    json.BeginString("Type "); +                                    json.ContinueString(typeIndex); +                                    json.EndString(); +                                    json.BeginObject(); +                                    { +                                        json.WriteString("Flags"); +                                        json.BeginArray(true); +                                        { +                                            VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; +                                            if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) +                                                json.WriteString("DEVICE_LOCAL"); +                                            if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +                                                json.WriteString("HOST_VISIBLE"); +                                            if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) +                                                json.WriteString("HOST_COHERENT"); +                                            if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) +                                                json.WriteString("HOST_CACHED"); +                                            if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) +                                                json.WriteString("LAZILY_ALLOCATED"); +                                        #if VMA_VULKAN_VERSION >= 1001000 +                                            if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) +                                                json.WriteString("PROTECTED"); +                                        #endif +                                        #if VK_AMD_device_coherent_memory +                                            if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) +                                                json.WriteString("DEVICE_COHERENT_AMD"); +                                            if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) +                                                json.WriteString("DEVICE_UNCACHED_AMD"); +                                        #endif + +                                            flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT +                                        #if VMA_VULKAN_VERSION >= 1001000 +                                                | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT +                                        #endif +                                        #if VK_AMD_device_coherent_memory +                                                | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY +                                                | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY +                                        #endif +                                                | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT +                                                | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT +                                                | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); +                                            if (flags != 0) +                                                json.WriteNumber(flags); +                                        } +                                        json.EndArray(); + +                                        json.WriteString("Stats"); +                                        VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); +                                    } +                                    json.EndObject(); +                                } +                            } + +                        } +                        json.EndObject(); +                    } +                    json.EndObject(); +                } +            } +            json.EndObject(); +        } + +        if (detailedMap == VK_TRUE) +            allocator->PrintDetailedMap(json); + +        json.EndObject(); +    } + +    *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( +    VmaAllocator allocator, +    char* pStatsString) +{ +    if(pStatsString != VMA_NULL) +    { +        VMA_ASSERT(allocator); +        VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString); +    } +} + +#endif // VMA_STATS_STRING_ENABLED + +/* +This function is not protected by any mutex because it just reads immutable data. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( +    VmaAllocator allocator, +    uint32_t memoryTypeBits, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    uint32_t* pMemoryTypeIndex) +{ +    VMA_ASSERT(allocator != VK_NULL_HANDLE); +    VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); +    VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + +    return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( +    VmaAllocator allocator, +    const VkBufferCreateInfo* pBufferCreateInfo, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    uint32_t* pMemoryTypeIndex) +{ +    VMA_ASSERT(allocator != VK_NULL_HANDLE); +    VMA_ASSERT(pBufferCreateInfo != VMA_NULL); +    VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); +    VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + +    const VkDevice hDev = allocator->m_hDevice; +    const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); +    VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 +    if(funcs->vkGetDeviceBufferMemoryRequirements) +    { +        // Can query straight from VkBufferCreateInfo :) +        VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR}; +        devBufMemReq.pCreateInfo = pBufferCreateInfo; + +        VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; +        (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); + +        res = allocator->FindMemoryTypeIndex( +            memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, +            VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); +    } +    else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 +    { +        // Must create a dummy buffer to query :( +        VkBuffer hBuffer = VK_NULL_HANDLE; +        res = funcs->vkCreateBuffer( +            hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); +        if(res == VK_SUCCESS) +        { +            VkMemoryRequirements memReq = {}; +            funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); + +            res = allocator->FindMemoryTypeIndex( +                memReq.memoryTypeBits, pAllocationCreateInfo, +                VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + +            funcs->vkDestroyBuffer( +                hDev, hBuffer, allocator->GetAllocationCallbacks()); +        } +    } +    return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( +    VmaAllocator allocator, +    const VkImageCreateInfo* pImageCreateInfo, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    uint32_t* pMemoryTypeIndex) +{ +    VMA_ASSERT(allocator != VK_NULL_HANDLE); +    VMA_ASSERT(pImageCreateInfo != VMA_NULL); +    VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); +    VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + +    const VkDevice hDev = allocator->m_hDevice; +    const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); +    VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 +    if(funcs->vkGetDeviceImageMemoryRequirements) +    { +        // Can query straight from VkImageCreateInfo :) +        VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR}; +        devImgMemReq.pCreateInfo = pImageCreateInfo; +        VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && +            "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); + +        VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; +        (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); + +        res = allocator->FindMemoryTypeIndex( +            memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, +            VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); +    } +    else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 +    { +        // Must create a dummy image to query :( +        VkImage hImage = VK_NULL_HANDLE; +        res = funcs->vkCreateImage( +            hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); +        if(res == VK_SUCCESS) +        { +            VkMemoryRequirements memReq = {}; +            funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); + +            res = allocator->FindMemoryTypeIndex( +                memReq.memoryTypeBits, pAllocationCreateInfo, +                VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + +            funcs->vkDestroyImage( +                hDev, hImage, allocator->GetAllocationCallbacks()); +        } +    } +    return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( +    VmaAllocator allocator, +    const VmaPoolCreateInfo* pCreateInfo, +    VmaPool* pPool) +{ +    VMA_ASSERT(allocator && pCreateInfo && pPool); + +    VMA_DEBUG_LOG("vmaCreatePool"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->CreatePool(pCreateInfo, pPool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( +    VmaAllocator allocator, +    VmaPool pool) +{ +    VMA_ASSERT(allocator); + +    if(pool == VK_NULL_HANDLE) +    { +        return; +    } + +    VMA_DEBUG_LOG("vmaDestroyPool"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->DestroyPool(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( +    VmaAllocator allocator, +    VmaPool pool, +    VmaStatistics* pPoolStats) +{ +    VMA_ASSERT(allocator && pool && pPoolStats); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->GetPoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( +    VmaAllocator allocator, +    VmaPool pool, +    VmaDetailedStatistics* pPoolStats) +{ +    VMA_ASSERT(allocator && pool && pPoolStats); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->CalculatePoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) +{ +    VMA_ASSERT(allocator && pool); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    VMA_DEBUG_LOG("vmaCheckPoolCorruption"); + +    return allocator->CheckPoolCorruption(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( +    VmaAllocator allocator, +    VmaPool pool, +    const char** ppName) +{ +    VMA_ASSERT(allocator && pool && ppName); + +    VMA_DEBUG_LOG("vmaGetPoolName"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    *ppName = pool->GetName(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( +    VmaAllocator allocator, +    VmaPool pool, +    const char* pName) +{ +    VMA_ASSERT(allocator && pool); + +    VMA_DEBUG_LOG("vmaSetPoolName"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    pool->SetName(pName); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( +    VmaAllocator allocator, +    const VkMemoryRequirements* pVkMemoryRequirements, +    const VmaAllocationCreateInfo* pCreateInfo, +    VmaAllocation* pAllocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation); + +    VMA_DEBUG_LOG("vmaAllocateMemory"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    VkResult result = allocator->AllocateMemory( +        *pVkMemoryRequirements, +        false, // requiresDedicatedAllocation +        false, // prefersDedicatedAllocation +        VK_NULL_HANDLE, // dedicatedBuffer +        VK_NULL_HANDLE, // dedicatedImage +        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage +        *pCreateInfo, +        VMA_SUBALLOCATION_TYPE_UNKNOWN, +        1, // allocationCount +        pAllocation); + +    if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) +    { +        allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); +    } + +    return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( +    VmaAllocator allocator, +    const VkMemoryRequirements* pVkMemoryRequirements, +    const VmaAllocationCreateInfo* pCreateInfo, +    size_t allocationCount, +    VmaAllocation* pAllocations, +    VmaAllocationInfo* pAllocationInfo) +{ +    if(allocationCount == 0) +    { +        return VK_SUCCESS; +    } + +    VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocations); + +    VMA_DEBUG_LOG("vmaAllocateMemoryPages"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    VkResult result = allocator->AllocateMemory( +        *pVkMemoryRequirements, +        false, // requiresDedicatedAllocation +        false, // prefersDedicatedAllocation +        VK_NULL_HANDLE, // dedicatedBuffer +        VK_NULL_HANDLE, // dedicatedImage +        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage +        *pCreateInfo, +        VMA_SUBALLOCATION_TYPE_UNKNOWN, +        allocationCount, +        pAllocations); + +    if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) +    { +        for(size_t i = 0; i < allocationCount; ++i) +        { +            allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i); +        } +    } + +    return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( +    VmaAllocator allocator, +    VkBuffer buffer, +    const VmaAllocationCreateInfo* pCreateInfo, +    VmaAllocation* pAllocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation); + +    VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    VkMemoryRequirements vkMemReq = {}; +    bool requiresDedicatedAllocation = false; +    bool prefersDedicatedAllocation = false; +    allocator->GetBufferMemoryRequirements(buffer, vkMemReq, +        requiresDedicatedAllocation, +        prefersDedicatedAllocation); + +    VkResult result = allocator->AllocateMemory( +        vkMemReq, +        requiresDedicatedAllocation, +        prefersDedicatedAllocation, +        buffer, // dedicatedBuffer +        VK_NULL_HANDLE, // dedicatedImage +        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage +        *pCreateInfo, +        VMA_SUBALLOCATION_TYPE_BUFFER, +        1, // allocationCount +        pAllocation); + +    if(pAllocationInfo && result == VK_SUCCESS) +    { +        allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); +    } + +    return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( +    VmaAllocator allocator, +    VkImage image, +    const VmaAllocationCreateInfo* pCreateInfo, +    VmaAllocation* pAllocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation); + +    VMA_DEBUG_LOG("vmaAllocateMemoryForImage"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    VkMemoryRequirements vkMemReq = {}; +    bool requiresDedicatedAllocation = false; +    bool prefersDedicatedAllocation  = false; +    allocator->GetImageMemoryRequirements(image, vkMemReq, +        requiresDedicatedAllocation, prefersDedicatedAllocation); + +    VkResult result = allocator->AllocateMemory( +        vkMemReq, +        requiresDedicatedAllocation, +        prefersDedicatedAllocation, +        VK_NULL_HANDLE, // dedicatedBuffer +        image, // dedicatedImage +        VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage +        *pCreateInfo, +        VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, +        1, // allocationCount +        pAllocation); + +    if(pAllocationInfo && result == VK_SUCCESS) +    { +        allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); +    } + +    return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( +    VmaAllocator allocator, +    VmaAllocation allocation) +{ +    VMA_ASSERT(allocator); + +    if(allocation == VK_NULL_HANDLE) +    { +        return; +    } + +    VMA_DEBUG_LOG("vmaFreeMemory"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->FreeMemory( +        1, // allocationCount +        &allocation); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( +    VmaAllocator allocator, +    size_t allocationCount, +    const VmaAllocation* pAllocations) +{ +    if(allocationCount == 0) +    { +        return; +    } + +    VMA_ASSERT(allocator); + +    VMA_DEBUG_LOG("vmaFreeMemoryPages"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->FreeMemory(allocationCount, pAllocations); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && allocation && pAllocationInfo); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->GetAllocationInfo(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VmaAllocationInfo2* pAllocationInfo) +{ +    VMA_ASSERT(allocator && allocation && pAllocationInfo); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->GetAllocationInfo2(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( +    VmaAllocator allocator, +    VmaAllocation allocation, +    void* pUserData) +{ +    VMA_ASSERT(allocator && allocation); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocation->SetUserData(allocator, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    const char* VMA_NULLABLE pName) +{ +    allocation->SetName(allocator, pName); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkMemoryPropertyFlags* VMA_NOT_NULL pFlags) +{ +    VMA_ASSERT(allocator && allocation && pFlags); +    const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); +    *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( +    VmaAllocator allocator, +    VmaAllocation allocation, +    void** ppData) +{ +    VMA_ASSERT(allocator && allocation && ppData); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->Map(allocation, ppData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( +    VmaAllocator allocator, +    VmaAllocation allocation) +{ +    VMA_ASSERT(allocator && allocation); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    allocator->Unmap(allocation); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VkDeviceSize offset, +    VkDeviceSize size) +{ +    VMA_ASSERT(allocator && allocation); + +    VMA_DEBUG_LOG("vmaFlushAllocation"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VkDeviceSize offset, +    VkDeviceSize size) +{ +    VMA_ASSERT(allocator && allocation); + +    VMA_DEBUG_LOG("vmaInvalidateAllocation"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( +    VmaAllocator allocator, +    uint32_t allocationCount, +    const VmaAllocation* allocations, +    const VkDeviceSize* offsets, +    const VkDeviceSize* sizes) +{ +    VMA_ASSERT(allocator); + +    if(allocationCount == 0) +    { +        return VK_SUCCESS; +    } + +    VMA_ASSERT(allocations); + +    VMA_DEBUG_LOG("vmaFlushAllocations"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( +    VmaAllocator allocator, +    uint32_t allocationCount, +    const VmaAllocation* allocations, +    const VkDeviceSize* offsets, +    const VkDeviceSize* sizes) +{ +    VMA_ASSERT(allocator); + +    if(allocationCount == 0) +    { +        return VK_SUCCESS; +    } + +    VMA_ASSERT(allocations); + +    VMA_DEBUG_LOG("vmaInvalidateAllocations"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( +    VmaAllocator allocator, +    const void* pSrcHostPointer, +    VmaAllocation dstAllocation, +    VkDeviceSize dstAllocationLocalOffset, +    VkDeviceSize size) +{ +    VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation); + +    if(size == 0) +    { +        return VK_SUCCESS; +    } + +    VMA_DEBUG_LOG("vmaCopyMemoryToAllocation"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( +    VmaAllocator allocator, +    VmaAllocation srcAllocation, +    VkDeviceSize srcAllocationLocalOffset, +    void* pDstHostPointer, +    VkDeviceSize size) +{ +    VMA_ASSERT(allocator && srcAllocation && pDstHostPointer); + +    if(size == 0) +    { +        return VK_SUCCESS; +    } + +    VMA_DEBUG_LOG("vmaCopyAllocationToMemory"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( +    VmaAllocator allocator, +    uint32_t memoryTypeBits) +{ +    VMA_ASSERT(allocator); + +    VMA_DEBUG_LOG("vmaCheckCorruption"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->CheckCorruption(memoryTypeBits); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( +    VmaAllocator allocator, +    const VmaDefragmentationInfo* pInfo, +    VmaDefragmentationContext* pContext) +{ +    VMA_ASSERT(allocator && pInfo && pContext); + +    VMA_DEBUG_LOG("vmaBeginDefragmentation"); + +    if (pInfo->pool != VMA_NULL) +    { +        // Check if run on supported algorithms +        if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) +            return VK_ERROR_FEATURE_NOT_PRESENT; +    } + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); +    return VK_SUCCESS; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( +    VmaAllocator allocator, +    VmaDefragmentationContext context, +    VmaDefragmentationStats* pStats) +{ +    VMA_ASSERT(allocator && context); + +    VMA_DEBUG_LOG("vmaEndDefragmentation"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    if (pStats) +        context->GetStats(*pStats); +    vma_delete(allocator, context); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaDefragmentationContext VMA_NOT_NULL context, +    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ +    VMA_ASSERT(context && pPassInfo); + +    VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return context->DefragmentPassBegin(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaDefragmentationContext VMA_NOT_NULL context, +    VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ +    VMA_ASSERT(context && pPassInfo); + +    VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return context->DefragmentPassEnd(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VkBuffer buffer) +{ +    VMA_ASSERT(allocator && allocation && buffer); + +    VMA_DEBUG_LOG("vmaBindBufferMemory"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VkDeviceSize allocationLocalOffset, +    VkBuffer buffer, +    const void* pNext) +{ +    VMA_ASSERT(allocator && allocation && buffer); + +    VMA_DEBUG_LOG("vmaBindBufferMemory2"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VkImage image) +{ +    VMA_ASSERT(allocator && allocation && image); + +    VMA_DEBUG_LOG("vmaBindImageMemory"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    return allocator->BindImageMemory(allocation, 0, image, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( +    VmaAllocator allocator, +    VmaAllocation allocation, +    VkDeviceSize allocationLocalOffset, +    VkImage image, +    const void* pNext) +{ +    VMA_ASSERT(allocator && allocation && image); + +    VMA_DEBUG_LOG("vmaBindImageMemory2"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +        return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( +    VmaAllocator allocator, +    const VkBufferCreateInfo* pBufferCreateInfo, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    VkBuffer* pBuffer, +    VmaAllocation* pAllocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation); + +    if(pBufferCreateInfo->size == 0) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } +    if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && +        !allocator->m_UseKhrBufferDeviceAddress) +    { +        VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); +        return VK_ERROR_INITIALIZATION_FAILED; +    } + +    VMA_DEBUG_LOG("vmaCreateBuffer"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    *pBuffer = VK_NULL_HANDLE; +    *pAllocation = VK_NULL_HANDLE; + +    // 1. Create VkBuffer. +    VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( +        allocator->m_hDevice, +        pBufferCreateInfo, +        allocator->GetAllocationCallbacks(), +        pBuffer); +    if(res >= 0) +    { +        // 2. vkGetBufferMemoryRequirements. +        VkMemoryRequirements vkMemReq = {}; +        bool requiresDedicatedAllocation = false; +        bool prefersDedicatedAllocation  = false; +        allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, +            requiresDedicatedAllocation, prefersDedicatedAllocation); + +        // 3. Allocate memory using allocator. +        res = allocator->AllocateMemory( +            vkMemReq, +            requiresDedicatedAllocation, +            prefersDedicatedAllocation, +            *pBuffer, // dedicatedBuffer +            VK_NULL_HANDLE, // dedicatedImage +            VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage +            *pAllocationCreateInfo, +            VMA_SUBALLOCATION_TYPE_BUFFER, +            1, // allocationCount +            pAllocation); + +        if(res >= 0) +        { +            // 3. Bind buffer with memory. +            if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) +            { +                res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); +            } +            if(res >= 0) +            { +                // All steps succeeded. +                #if VMA_STATS_STRING_ENABLED +                    (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); +                #endif +                if(pAllocationInfo != VMA_NULL) +                { +                    allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); +                } + +                return VK_SUCCESS; +            } +            allocator->FreeMemory( +                1, // allocationCount +                pAllocation); +            *pAllocation = VK_NULL_HANDLE; +            (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); +            *pBuffer = VK_NULL_HANDLE; +            return res; +        } +        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); +        *pBuffer = VK_NULL_HANDLE; +        return res; +    } +    return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( +    VmaAllocator allocator, +    const VkBufferCreateInfo* pBufferCreateInfo, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    VkDeviceSize minAlignment, +    VkBuffer* pBuffer, +    VmaAllocation* pAllocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation); + +    if(pBufferCreateInfo->size == 0) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } +    if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && +        !allocator->m_UseKhrBufferDeviceAddress) +    { +        VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); +        return VK_ERROR_INITIALIZATION_FAILED; +    } + +    VMA_DEBUG_LOG("vmaCreateBufferWithAlignment"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    *pBuffer = VK_NULL_HANDLE; +    *pAllocation = VK_NULL_HANDLE; + +    // 1. Create VkBuffer. +    VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( +        allocator->m_hDevice, +        pBufferCreateInfo, +        allocator->GetAllocationCallbacks(), +        pBuffer); +    if(res >= 0) +    { +        // 2. vkGetBufferMemoryRequirements. +        VkMemoryRequirements vkMemReq = {}; +        bool requiresDedicatedAllocation = false; +        bool prefersDedicatedAllocation  = false; +        allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, +            requiresDedicatedAllocation, prefersDedicatedAllocation); + +        // 2a. Include minAlignment +        vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment); + +        // 3. Allocate memory using allocator. +        res = allocator->AllocateMemory( +            vkMemReq, +            requiresDedicatedAllocation, +            prefersDedicatedAllocation, +            *pBuffer, // dedicatedBuffer +            VK_NULL_HANDLE, // dedicatedImage +            VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage +            *pAllocationCreateInfo, +            VMA_SUBALLOCATION_TYPE_BUFFER, +            1, // allocationCount +            pAllocation); + +        if(res >= 0) +        { +            // 3. Bind buffer with memory. +            if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) +            { +                res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); +            } +            if(res >= 0) +            { +                // All steps succeeded. +                #if VMA_STATS_STRING_ENABLED +                    (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); +                #endif +                if(pAllocationInfo != VMA_NULL) +                { +                    allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); +                } + +                return VK_SUCCESS; +            } +            allocator->FreeMemory( +                1, // allocationCount +                pAllocation); +            *pAllocation = VK_NULL_HANDLE; +            (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); +            *pBuffer = VK_NULL_HANDLE; +            return res; +        } +        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); +        *pBuffer = VK_NULL_HANDLE; +        return res; +    } +    return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ +    return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize allocationLocalOffset, +    const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, +    VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ +    VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); +    VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize()); + +    VMA_DEBUG_LOG("vmaCreateAliasingBuffer2"); + +    *pBuffer = VK_NULL_HANDLE; + +    if (pBufferCreateInfo->size == 0) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } +    if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && +        !allocator->m_UseKhrBufferDeviceAddress) +    { +        VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); +        return VK_ERROR_INITIALIZATION_FAILED; +    } + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    // 1. Create VkBuffer. +    VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( +        allocator->m_hDevice, +        pBufferCreateInfo, +        allocator->GetAllocationCallbacks(), +        pBuffer); +    if (res >= 0) +    { +        // 2. Bind buffer with memory. +        res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL); +        if (res >= 0) +        { +            return VK_SUCCESS; +        } +        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); +    } +    return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( +    VmaAllocator allocator, +    VkBuffer buffer, +    VmaAllocation allocation) +{ +    VMA_ASSERT(allocator); + +    if(buffer == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) +    { +        return; +    } + +    VMA_DEBUG_LOG("vmaDestroyBuffer"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    if(buffer != VK_NULL_HANDLE) +    { +        (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks()); +    } + +    if(allocation != VK_NULL_HANDLE) +    { +        allocator->FreeMemory( +            1, // allocationCount +            &allocation); +    } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( +    VmaAllocator allocator, +    const VkImageCreateInfo* pImageCreateInfo, +    const VmaAllocationCreateInfo* pAllocationCreateInfo, +    VkImage* pImage, +    VmaAllocation* pAllocation, +    VmaAllocationInfo* pAllocationInfo) +{ +    VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation); + +    if(pImageCreateInfo->extent.width == 0 || +        pImageCreateInfo->extent.height == 0 || +        pImageCreateInfo->extent.depth == 0 || +        pImageCreateInfo->mipLevels == 0 || +        pImageCreateInfo->arrayLayers == 0) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } + +    VMA_DEBUG_LOG("vmaCreateImage"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    *pImage = VK_NULL_HANDLE; +    *pAllocation = VK_NULL_HANDLE; + +    // 1. Create VkImage. +    VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( +        allocator->m_hDevice, +        pImageCreateInfo, +        allocator->GetAllocationCallbacks(), +        pImage); +    if(res == VK_SUCCESS) +    { +        VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? +            VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : +            VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR; + +        // 2. Allocate memory using allocator. +        VkMemoryRequirements vkMemReq = {}; +        bool requiresDedicatedAllocation = false; +        bool prefersDedicatedAllocation  = false; +        allocator->GetImageMemoryRequirements(*pImage, vkMemReq, +            requiresDedicatedAllocation, prefersDedicatedAllocation); + +        res = allocator->AllocateMemory( +            vkMemReq, +            requiresDedicatedAllocation, +            prefersDedicatedAllocation, +            VK_NULL_HANDLE, // dedicatedBuffer +            *pImage, // dedicatedImage +            VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage +            *pAllocationCreateInfo, +            suballocType, +            1, // allocationCount +            pAllocation); + +        if(res == VK_SUCCESS) +        { +            // 3. Bind image with memory. +            if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) +            { +                res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); +            } +            if(res == VK_SUCCESS) +            { +                // All steps succeeded. +                #if VMA_STATS_STRING_ENABLED +                    (*pAllocation)->InitImageUsage(*pImageCreateInfo); +                #endif +                if(pAllocationInfo != VMA_NULL) +                { +                    allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); +                } + +                return VK_SUCCESS; +            } +            allocator->FreeMemory( +                1, // allocationCount +                pAllocation); +            *pAllocation = VK_NULL_HANDLE; +            (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); +            *pImage = VK_NULL_HANDLE; +            return res; +        } +        (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); +        *pImage = VK_NULL_HANDLE; +        return res; +    } +    return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ +    return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( +    VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, +    VkDeviceSize allocationLocalOffset, +    const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ +    VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + +    *pImage = VK_NULL_HANDLE; + +    VMA_DEBUG_LOG("vmaCreateImage2"); + +    if (pImageCreateInfo->extent.width == 0 || +        pImageCreateInfo->extent.height == 0 || +        pImageCreateInfo->extent.depth == 0 || +        pImageCreateInfo->mipLevels == 0 || +        pImageCreateInfo->arrayLayers == 0) +    { +        return VK_ERROR_INITIALIZATION_FAILED; +    } + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    // 1. Create VkImage. +    VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( +        allocator->m_hDevice, +        pImageCreateInfo, +        allocator->GetAllocationCallbacks(), +        pImage); +    if (res >= 0) +    { +        // 2. Bind image with memory. +        res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL); +        if (res >= 0) +        { +            return VK_SUCCESS; +        } +        (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); +    } +    return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( +    VmaAllocator VMA_NOT_NULL allocator, +    VkImage VMA_NULLABLE_NON_DISPATCHABLE image, +    VmaAllocation VMA_NULLABLE allocation) +{ +    VMA_ASSERT(allocator); + +    if(image == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) +    { +        return; +    } + +    VMA_DEBUG_LOG("vmaDestroyImage"); + +    VMA_DEBUG_GLOBAL_MUTEX_LOCK + +    if(image != VK_NULL_HANDLE) +    { +        (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks()); +    } +    if(allocation != VK_NULL_HANDLE) +    { +        allocator->FreeMemory( +            1, // allocationCount +            &allocation); +    } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( +    const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, +    VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock) +{ +    VMA_ASSERT(pCreateInfo && pVirtualBlock); +    VMA_ASSERT(pCreateInfo->size > 0); +    VMA_DEBUG_LOG("vmaCreateVirtualBlock"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo); +    VkResult res = (*pVirtualBlock)->Init(); +    if(res < 0) +    { +        vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock); +        *pVirtualBlock = VK_NULL_HANDLE; +    } +    return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock) +{ +    if(virtualBlock != VK_NULL_HANDLE) +    { +        VMA_DEBUG_LOG("vmaDestroyVirtualBlock"); +        VMA_DEBUG_GLOBAL_MUTEX_LOCK; +        VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying. +        vma_delete(&allocationCallbacks, virtualBlock); +    } +} + +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); +    VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL); +    VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, +    VkDeviceSize* VMA_NULLABLE pOffset) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL); +    VMA_DEBUG_LOG("vmaVirtualAllocate"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation) +{ +    if(allocation != VK_NULL_HANDLE) +    { +        VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); +        VMA_DEBUG_LOG("vmaVirtualFree"); +        VMA_DEBUG_GLOBAL_MUTEX_LOCK; +        virtualBlock->Free(allocation); +    } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); +    VMA_DEBUG_LOG("vmaClearVirtualBlock"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    virtualBlock->Clear(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); +    VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    virtualBlock->SetAllocationUserData(allocation, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaStatistics* VMA_NOT_NULL pStats) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); +    VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); +    VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    virtualBlock->CalculateDetailedStatistics(*pStats); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap) +{ +    VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks(); +    VmaStringBuilder sb(allocationCallbacks); +    virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb); +    *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, +    char* VMA_NULLABLE pStatsString) +{ +    if(pStatsString != VMA_NULL) +    { +        VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); +        VMA_DEBUG_GLOBAL_MUTEX_LOCK; +        VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); +    } +} +#if VMA_EXTERNAL_MEMORY_WIN32 +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, +    VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle) +{ +    VMA_ASSERT(allocator && allocation && pHandle); +    VMA_DEBUG_GLOBAL_MUTEX_LOCK; +    return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32  +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_PUBLIC_INTERFACE +#endif // VMA_IMPLEMENTATION + +/** +\page quick_start Quick start + +\section quick_start_project_setup Project setup + +Vulkan Memory Allocator comes in form of a "stb-style" single header file. +While you can pull the entire repository e.g. as Git module, there is also Cmake script provided, +you don't need to build it as a separate library project. +You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files. + +"Single header" doesn't mean that everything is contained in C/C++ declarations, +like it tends to be in case of inline functions or C++ templates. +It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. +If you don't do it properly, it will result in linker errors. + +To do it properly: + +-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library. +   This includes declarations of all members of the library. +-# In exactly one CPP file define following macro before this include. +   It enables also internal definitions. + +\code +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" +\endcode + +It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp". + +This library includes header `<vulkan/vulkan.h>`, which in turn +includes `<windows.h>` on Windows. If you need some specific macros defined +before including these headers (like `WIN32_LEAN_AND_MEAN` or +`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define +them before every `#include` of this library. +It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h", +that will be included in other source files instead of VMA header directly. + +This library is written in C++, but has C-compatible interface. +Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full +implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. +Some features of C++14 are used and required. Features of C++20 are used optionally when available. +Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used. + + +\section quick_start_initialization Initialization + +VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation, +structures describing parameters of objects to be created like #VmaAllocationCreateInfo, +and errors codes returned from functions using `VkResult` type. + +The first and the main object that needs to be created is #VmaAllocator. +It represents the initialization of the entire library. +Only one such object should be created per `VkDevice`. +You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made. +It must be destroyed before `VkDevice` is destroyed. + +At program startup: + +-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object. +-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object. + +Only members `physicalDevice`, `device`, `instance` are required. +However, you should inform the library which Vulkan version do you use by setting +VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable +by setting VmaAllocatorCreateInfo::flags. +Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. +See below for details. + +\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version + +VMA supports Vulkan version down to 1.0, for backward compatibility. +If you want to use higher version, you need to inform the library about it. +This is a two-step process. + +<b>Step 1: Compile time.</b> By default, VMA compiles with code supporting the highest +Vulkan version found in the included `<vulkan/vulkan.h>` that is also supported by the library. +If this is OK, you don't need to do anything. +However, if you want to compile VMA as if only some lower Vulkan version was available, +define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`. +It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version. +For example, to compile against Vulkan 1.2: + +\code +#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2 +#include "vk_mem_alloc.h" +\endcode + +<b>Step 2: Runtime.</b> Even when compiled with higher Vulkan version available, +VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object. +By default, only Vulkan 1.0 is used. +To initialize the allocator with support for higher Vulkan version, you need to set member +VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`. +See code sample below. + +\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions + +You may need to configure importing Vulkan functions. There are 3 ways to do this: + +-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows): +   - You don't need to do anything. +   - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default. +-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`, +   `vkGetDeviceProcAddr` (this is the option presented in the example below): +   - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1. +   - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr, +     VmaVulkanFunctions::vkGetDeviceProcAddr. +   - The library will fetch pointers to all other functions it needs internally. +-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like +   [Volk](https://github.com/zeux/volk): +   - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0. +   - Pass these pointers via structure #VmaVulkanFunctions. + +\subsection quick_start_initialization_enabling_extensions Enabling extensions + +VMA can automatically use following Vulkan extensions. +If you found them available on the selected physical device and you enabled them +while creating `VkInstance` / `VkDevice` object, inform VMA about their availability +by setting appropriate flags in VmaAllocatorCreateInfo::flags. + +Vulkan extension              | VMA flag +------------------------------|----------------------------------------------------- +VK_KHR_dedicated_allocation   | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT +VK_KHR_bind_memory2           | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT +VK_KHR_maintenance4           | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT +VK_KHR_maintenance5           | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT +VK_EXT_memory_budget          | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT +VK_KHR_buffer_device_address  | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +VK_EXT_memory_priority        | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +VK_KHR_external_memory_win32  | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT + +Example with fetching pointers to Vulkan functions dynamically: + +\code +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#include "vk_mem_alloc.h" + +... + +VmaVulkanFunctions vulkanFunctions = {}; +vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr; +vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr; + +VmaAllocatorCreateInfo allocatorCreateInfo = {}; +allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; +allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2; +allocatorCreateInfo.physicalDevice = physicalDevice; +allocatorCreateInfo.device = device; +allocatorCreateInfo.instance = instance; +allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; + +VmaAllocator allocator; +vmaCreateAllocator(&allocatorCreateInfo, &allocator); + +// Entire program... + +// At the end, don't forget to: +vmaDestroyAllocator(allocator); +\endcode + + +\subsection quick_start_initialization_other_config Other configuration options + +There are additional configuration options available through preprocessor macros that you can define +before including VMA header and through parameters passed in #VmaAllocatorCreateInfo. +They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`), +callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`), +or your custom `VMA_ASSERT` macro, among others. +For more information, see: @ref configuration. + + +\section quick_start_resource_allocation Resource allocation + +When you want to create a buffer or image: + +-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. +-# Fill VmaAllocationCreateInfo structure. +-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory +   already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +Don't forget to destroy your buffer and allocation objects when no longer needed: + +\code +vmaDestroyBuffer(allocator, buffer, allocation); +\endcode + +If you need to map the buffer, you must set flag +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. +There are many additional parameters that can control the choice of memory type to be used for the allocation +and other features. +For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping. + + +\page choosing_memory_type Choosing memory type + +Physical devices in Vulkan support various combinations of memory heaps and +types. Help with choosing correct and optimal memory type for your specific +resource is one of the key features of this library. You can use it by filling +appropriate members of VmaAllocationCreateInfo structure, as described below. +You can also combine multiple methods. + +-# If you just want to find memory type index that meets your requirements, you +   can use function: vmaFindMemoryTypeIndexForBufferInfo(), +   vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). +-# If you want to allocate a region of device memory without association with any +   specific image or buffer, you can use function vmaAllocateMemory(). Usage of +   this function is not recommended and usually not needed. +   vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once, +   which may be useful for sparse binding. +-# If you already have a buffer or an image created, you want to allocate memory +   for it and then you will bind it yourself, you can use function +   vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). +   For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() +   or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). +-# If you want to create a buffer or an image, allocate memory for it, and bind +   them together, all in one call, you can use function vmaCreateBuffer(), +   vmaCreateImage(). +   <b>This is the easiest and recommended way to use this library!</b> + +When using 3. or 4., the library internally queries Vulkan for memory types +supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) +and uses only one of these types. + +If no memory type can be found that meets all the requirements, these functions +return `VK_ERROR_FEATURE_NOT_PRESENT`. + +You can leave VmaAllocationCreateInfo structure completely filled with zeros. +It means no requirements are specified for memory type. +It is valid, although not very useful. + +\section choosing_memory_type_usage Usage + +The easiest way to specify memory requirements is to fill member +VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. +It defines high level, common usage types. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. + +For example, if you want to create a uniform buffer that will be filled using +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer described previously can be created like this. +It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. +See also: @ref memory_mapping. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as described below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are deprecated. + +\section choosing_memory_type_required_preferred_flags Required and preferred flags + +You can specify more detailed requirements by filling members +VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags +with a combination of bits from enum `VkMemoryPropertyFlags`. For example, +if you want to create a buffer that will be persistently mapped on host (so it +must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, +use following code: + +\code +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +A memory type is chosen that has all the required flags and as many preferred +flags set as possible. + +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). + +\section choosing_memory_type_explicit_memory_types Explicit memory types + +If you inspected memory types available on the physical device and <b>you have +a preference for memory types that you want to use</b>, you can fill member +VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set +means that a memory type with that index is allowed to be used for the +allocation. Special value 0, just like `UINT32_MAX`, means there are no +restrictions to memory type index. + +Please note that this member is NOT just a memory type index. +Still you can use it to choose just one, specific memory type. +For example, if you already determined that your buffer should be created in +memory type 2, use following code: + +\code +uint32_t memoryTypeIndex = 2; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.memoryTypeBits = 1u << memoryTypeIndex; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +You can also use this parameter to <b>exclude some memory types</b>. +If you inspect memory heaps and types available on the current physical device and +you determine that for some reason you don't want to use a specific memory type for the allocation, +you can enable automatic memory type selection but exclude certain memory type or types +by setting all bits of `memoryTypeBits` to 1 except the ones you choose. + +\code +// ... +uint32_t excludedMemoryTypeIndex = 2; +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex); +// ... +\endcode + + +\section choosing_memory_type_custom_memory_pools Custom memory pools + +If you allocate from custom memory pool, all the ways of specifying memory +requirements described above are not applicable and the aforementioned members +of VmaAllocationCreateInfo structure are ignored. Memory type is selected +explicitly when creating the pool and then used to make all the allocations from +that pool. For further details, see \ref custom_memory_pools. + +\section choosing_memory_type_dedicated_allocations Dedicated allocations + +Memory for allocations is reserved out of larger block of `VkDeviceMemory` +allocated from Vulkan internally. That is the main feature of this whole library. +You can still request a separate memory block to be created for an allocation, +just like you would do in a trivial solution without using any allocator. +In that case, a buffer or image is always bound to that memory at offset 0. +This is called a "dedicated allocation". +You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +The library can also internally decide to use dedicated allocation in some cases, e.g.: + +- When the size of the allocation is large. +- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled +  and it reports that dedicated allocation is required or recommended for the resource. +- When allocation of next big memory block fails due to not enough device memory, +  but allocation with the exact requested size succeeds. + + +\page memory_mapping Memory mapping + +To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`, +to be able to read from it or write to it in CPU code. +Mapping is possible only of memory allocated from a memory type that has +`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose. +You can use them directly with memory allocated by this library, +but it is not recommended because of following issue: +Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. +This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. +It is also not thread-safe. +Because of this, Vulkan Memory Allocator provides following facilities: + +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but these flags can still be used for consistency. + +\section memory_mapping_copy_functions Copy functions + +The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation(). +It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`, +and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`). + +It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads +(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`. + +\code +struct ConstantBuffer +{ +    ... +}; +ConstantBuffer constantBufferData = ... + +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer buf; +VmaAllocation alloc; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); + +vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer)); +\endcode + +Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory(). + +\section memory_mapping_mapping_functions Mapping functions + +The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory(). +They are safer and more convenient to use than standard Vulkan functions. +You can map an allocation multiple times simultaneously - mapping is reference-counted internally. +You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. +The way it is implemented is that the library always maps entire memory block, not just region of the allocation. +For further details, see description of vmaMapMemory() function. +Example: + +\code +// Having these objects initialized: +struct ConstantBuffer +{ +    ... +}; +ConstantBuffer constantBufferData = ... + +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... + +// You can map and fill your buffer using following code: + +void* mappedData; +vmaMapMemory(allocator, constantBufferAllocation, &mappedData); +memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); +vmaUnmapMemory(allocator, constantBufferAllocation); +\endcode + +When mapping, you may see a warning from Vulkan validation layer similar to this one: + +<i>Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.</i> + +It happens because the library maps entire `VkDeviceMemory` block, where different +types of images and buffers may end up together, especially on GPUs with unified memory like Intel. +You can safely ignore it if you are sure you access only memory of the intended +object that you wanted to map. + + +\section memory_mapping_persistently_mapped_memory Persistently mapped memory + +Keeping your memory persistently mapped is generally OK in Vulkan. +You don't need to unmap it before using its data on the GPU. +The library provides a special feature designed for that: +Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in +VmaAllocationCreateInfo::flags stay mapped all the time, +so you can just access CPU pointer to it any time +without a need to call any "map" or "unmap" function. +Example: + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | +    VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +// Buffer is already mapped. You can access its memory. +memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); +\endcode + +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. + +\section memory_mapping_cache_control Cache flush and invalidate + +Memory in Vulkan doesn't need to be unmapped before using it on GPU, +but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, +you need to manually **invalidate** cache before reading of mapped pointer +and **flush** cache after writing to mapped pointer. +Map/unmap operations don't do that automatically. +Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, +`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient +functions that refer to given allocation object: vmaFlushAllocation(), +vmaInvalidateAllocation(), +or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). + +Regions of memory specified for flush/invalidate must be aligned to +`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. +In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations +within blocks are aligned to this value, so their offsets are always multiply of +`nonCoherentAtomSize` and two different allocations never share same "line" of this size. + +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) +currently provide `HOST_COHERENT` flag on all memory types that are +`HOST_VISIBLE`, so on PC you may not need to bother. + + +\page staying_within_budget Staying within budget + +When developing a graphics-intensive game or program, it is important to avoid allocating +more GPU memory than it is physically available. When the memory is over-committed, +various bad things can happen, depending on the specific GPU, graphics driver, and +operating system: + +- It may just work without any problems. +- The application may slow down because some memory blocks are moved to system RAM +  and the GPU has to access them through PCI Express bus. +- A new allocation may take very long time to complete, even few seconds, and possibly +  freeze entire system. +- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` +  returned somewhere later. + +\section staying_within_budget_querying_for_budget Querying for budget + +To query for current memory usage and available budget, use function vmaGetHeapBudgets(). +Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. + +Please note that this function returns different information and works faster than +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, +only to obtain statistical information, e.g. for debugging purposes. + +It is recommended to use <b>VK_EXT_memory_budget</b> device extension to obtain information +about the budget from Vulkan device. VMA is able to use this extension automatically. +When not enabled, the allocator behaves same way, but then it estimates current usage +and available budget based on its internal information and Vulkan memory heap sizes, +which may be less precise. In order to use this extension: + +1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 +   required by it are available and enable them. Please note that the first is a device +   extension and the second is instance extension! +2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. +3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from +   Vulkan inside of it to avoid overhead of querying it with every allocation. + +\section staying_within_budget_controlling_memory_usage Controlling memory usage + +There are many ways in which you can try to stay within the budget. + +First, when making new allocation requires allocating a new memory block, the library +tries not to exceed the budget automatically. If a block with default recommended size +(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even +dedicated memory for just this resource. + +If the size of the requested resource plus current memory usage is more than the +budget, by default the library still tries to create it, leaving it to the Vulkan +implementation whether the allocation succeeds or fails. You can change this behavior +by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is +not made if it would exceed the budget or if the budget is already exceeded. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag +when creating resources that are not essential for the application (e.g. the texture +of a specific object) and not to pass it when creating critically important resources +(e.g. render targets). + +On AMD graphics cards there is a custom vendor extension available: <b>VK_AMD_memory_overallocation_behavior</b> +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + +Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure +a new allocation is created only when it fits inside one of the existing memory blocks. +If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +This also ensures that the function call is very fast because it never goes to Vulkan +to obtain a new block. + +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they +fit within budget. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can bind these resources to +the same place in memory, even if they have completely different parameters (width, height, format etc.). + + + +Such scenario is possible using VMA, but you need to create your images manually. +Then you need to calculate parameters of an allocation to be made using formula: + +- allocation size = max(size of each image) +- allocation alignment = max(alignment of each image) +- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) + +Following example shows two different images bound to the same place in memory, +allocated to fit largest of them. + +\code +// A 512x512 texture to be sampled. +VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img1CreateInfo.extent.width = 512; +img1CreateInfo.extent.height = 512; +img1CreateInfo.extent.depth = 1; +img1CreateInfo.mipLevels = 10; +img1CreateInfo.arrayLayers = 1; +img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; +img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; +img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +// A full screen texture to be used as color attachment. +VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img2CreateInfo.extent.width = 1920; +img2CreateInfo.extent.height = 1080; +img2CreateInfo.extent.depth = 1; +img2CreateInfo.mipLevels = 1; +img2CreateInfo.arrayLayers = 1; +img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VkImage img1; +res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); +VkImage img2; +res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); + +VkMemoryRequirements img1MemReq; +vkGetImageMemoryRequirements(device, img1, &img1MemReq); +VkMemoryRequirements img2MemReq; +vkGetImageMemoryRequirements(device, img2, &img2MemReq); + +VkMemoryRequirements finalMemReq = {}; +finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); +finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); +finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; +// Validate if(finalMemReq.memoryTypeBits != 0) + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + +VmaAllocation alloc; +res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); + +res = vmaBindImageMemory(allocator, alloc, img1); +res = vmaBindImageMemory(allocator, alloc, img2); + +// You can use img1, img2 here, but not at the same time! + +vmaFreeMemory(allocator, alloc); +vkDestroyImage(allocator, img2, nullptr); +vkDestroyImage(allocator, img1, nullptr); +\endcode + +VMA also provides convenience functions that create a buffer or image and bind it to memory +represented by an existing #VmaAllocation: +vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(), +vmaCreateAliasingImage(), vmaCreateAliasingImage2(). +Versions with "2" offer additional parameter `allocationLocalOffset`. + +Remember that using resources that alias in memory requires proper synchronization. +You need to issue a memory barrier to make sure commands that use `img1` and `img2` +don't overlap on GPU timeline. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `img1` and then want to use `img2`, you need to issue +an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. + +Additional considerations: + +- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. +See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. +- You can create more complex layout where different images and buffers are bound +at different offsets inside one large allocation. For example, one can imagine +a big texture used in some render passes, aliasing with a set of many small buffers +used between in some further passes. To bind a resource at non-zero offset in an allocation, +use vmaBindBufferMemory2() / vmaBindImageMemory2(). +- Before allocating memory for the resources you want to alias, check `memoryTypeBits` +returned in memory requirements of each resource to make sure the bits overlap. +Some GPUs may expose multiple memory types suitable e.g. only for buffers or +images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your +resources may be disjoint. Aliasing them is not possible in that case. + + +\page custom_memory_pools Custom memory pools + +A memory pool contains a number of `VkDeviceMemory` blocks. +The library automatically creates and manages default pool for each memory type available on the device. +Default memory pool automatically grows in size. +Size of allocated blocks is also variable and managed automatically. +You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null. + +You can create custom pool and allocate memory out of it. +It can be useful if you want to: + +- Keep certain kind of allocations separate from others. +- Enforce particular, fixed size of Vulkan memory blocks. +- Limit maximum amount of Vulkan memory allocated for that pool. +- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. +- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in +  #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. + +To use custom memory pools: + +-# Fill VmaPoolCreateInfo structure. +-# Call vmaCreatePool() to obtain #VmaPool handle. +-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. +   You don't need to specify any other parameters of this structure, like `usage`. + +Example: + +\code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, +    &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a pool that can have at most 2 blocks, 128 MiB each. +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.blockSize = 128ull * 1024 * 1024; +poolCreateInfo.maxBlockCount = 2; + +VmaPool pool; +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... + +// Allocate a buffer out of it. +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 1024; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... +\endcode + +You have to free all allocations made from this pool before destroying it. + +\code +vmaDestroyBuffer(allocator, buf, alloc); +vmaDestroyPool(allocator, pool); +\endcode + +New versions of this library support creating dedicated allocations in custom pools. +It is supported only when VmaPoolCreateInfo::blockSize = 0. +To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and +VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + + +\section custom_memory_pools_MemTypeIndex Choosing memory type index + +When creating a pool, you must explicitly specify memory type index. +To find the one suitable for your buffers or images, you can use helper functions +vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo(). +You need to provide structures with example parameters of buffers or images +that you are going to create in that pool. + +\code +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); + +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +// ... +\endcode + +When creating buffers/images allocated in that pool, provide following parameters: + +- `VkBufferCreateInfo`: Prefer to pass same parameters as above. +  Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior. +  Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers +  or the other way around. +- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. +  Other members are ignored anyway. + + +\section custom_memory_pools_when_not_use When not to use custom pools + +Custom pools are commonly overused by VMA users. +While it may feel natural to keep some logical groups of resources separate in memory, +in most cases it does more harm than good. +Using custom pool shouldn't be your first choice. +Instead, please make all allocations from default pools first and only use custom pools +if you can prove and measure that it is beneficial in some way, +e.g. it results in lower memory usage, better performance, etc. + +Using custom pools has disadvantages: + +- Each pool has its own collection of `VkDeviceMemory` blocks. +  Some of them may be partially or even completely empty. +  Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory. +- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex). +  When using default pools, best memory type for each of your allocations can be selected automatically +  using a carefully design algorithm that works across all kinds of GPUs. +- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure. +  When using default pools, VMA tries another compatible memory type. +- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size, +  while default pools start from small blocks and only allocate next blocks larger and larger +  up to the preferred block size. + +Many of the common concerns can be addressed in a different way than using custom pools: + +- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived) +  separate, you likely don't need to. +  VMA uses a high quality allocation algorithm that manages memory well in various cases. +  Please measure and check if using custom pools provides a benefit. +- If you want to keep your images and buffers separate, you don't need to. +  VMA respects `bufferImageGranularity` limit automatically. +- If you want to keep your mapped and not mapped allocations separate, you don't need to. +  VMA respects `nonCoherentAtomSize` limit automatically. +  It also maps only those `VkDeviceMemory` blocks that need to map any allocation. +  It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory. +- If you want to choose a custom size for the default memory block, you can set it globally instead +  using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize. +- If you want to select specific memory type for your allocation, +  you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead. +- If you need to create a buffer with certain minimum alignment, you can still do it +  using default pools with dedicated function vmaCreateBufferWithAlignment(). + + +\section linear_algorithm Linear allocation algorithm + +Each Vulkan memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + + + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating +#VmaPool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + + + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\subsection linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + + + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + + + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT +to VmaAllocationCreateInfo::flags. + + + +Double stack is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual +`VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + +\subsection linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + + + +Ring buffer is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. + +Example: + +\code +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; + +VmaDefragmentationContext defragCtx; +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... + +for(;;) +{ +    VmaDefragmentationPassMoveInfo pass; +    res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); +    if(res == VK_SUCCESS) +        break; +    else if(res != VK_INCOMPLETE) +        // Handle error... + +    for(uint32_t i = 0; i < pass.moveCount; ++i) +    { +        // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. +        VmaAllocationInfo allocInfo; +        vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo); +        MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + +        // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. +        VkImageCreateInfo imgCreateInfo = ... +        VkImage newImg; +        res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); +        // Check res... +        res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg); +        // Check res... + +        // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. +        vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); +    } + +    // Make sure the copy commands finished executing. +    vkWaitForFences(...); + +    // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. +    for(uint32_t i = 0; i < pass.moveCount; ++i) +    { +        // ... +        vkDestroyImage(device, resData->img, nullptr); +    } + +    // Update appropriate descriptors to point to the new places... + +    res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); +    if(res == VK_SUCCESS) +        break; +    else if(res != VK_INCOMPLETE) +        // Handle error... +} + +vmaEndDefragmentation(allocator, defragCtx, nullptr); +\endcode + +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: + +1. vmaBeginDefragmentationPass() function call: +   - Calculates and returns the list of allocations to be moved in this pass. +     Note this can be a time-consuming process. +   - Reserves destination memory for them by creating temporary destination allocations +     that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: +   - Inspect the returned list of allocations to be moved. +   - Create new buffers/images and bind them at the returned destination temporary allocations. +   - Copy data from source to destination resources if necessary. +   - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: +   - Frees the source memory reserved for the allocations that are moved. +   - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. +   - Frees `VkDeviceMemory` blocks that became empty. + +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. +  - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, +  filled, and used temporarily in each rendering frame, you can just recreate this image +  without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU +  using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +  This will cancel the move. +  - vmaEndDefragmentationPass() will then free the destination memory +    not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), +  you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. +  - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. + +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. + +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. + +<b>Mapping</b> is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. + +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from Vulkan. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay within budget +(see also \ref staying_within_budget). +Example: + +\code +uint32_t heapIndex = ... + +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", +    budgets[heapIndex].statistics.allocationCount, +    budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", +    budgets[heapIndex].statistics.blockCount, +    budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", +    budgets[heapIndex].usage, +    budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). +It fill structure #VmaAllocationInfo. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString(). +The result is guaranteed to be correct JSON. +It uses ANSI encoding. +Any strings provided by user (see [Allocation names](@ref allocation_names)) +are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding, +this JSON string can be treated as using this encoding. +It must be freed using function vmaFreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + +\page allocation_annotation Allocation names and user data + +\section allocation_user_data Allocation user data + +You can annotate allocations with your own information, e.g. for debugging purposes. +To do that, fill VmaAllocationCreateInfo::pUserData field when creating +an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, +some handle, index, key, ordinal number or any other value that would associate +the allocation with your custom metadata. +It is useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. + +\code +VkBufferCreateInfo bufCreateInfo = ... + +MyBufferMetadata* pMetadata = CreateBufferMetadata(); + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.pUserData = pMetadata; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +\endcode + +The pointer may be later retrieved as VmaAllocationInfo::pUserData: + +\code +VmaAllocationInfo allocInfo; +vmaGetAllocationInfo(allocator, allocation, &allocInfo); +MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; +\endcode + +It can also be changed using function vmaSetAllocationUserData(). + +Values of (non-zero) allocations' `pUserData` are printed in JSON report created by +vmaBuildStatsString() in hexadecimal form. + +\section allocation_names Allocation names + +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). +The library creates internal copy of the string, so the pointer you pass doesn't need +to be valid for whole lifetime of the allocation. You can free it after the call. + +\code +std::string imageName = "Texture: "; +imageName += fileName; +vmaSetAllocationName(allocator, allocation, imageName.c_str()); +\endcode + +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). + +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. + + +\page virtual_allocator Virtual allocator + +As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". +It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". +You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan. +A common use case is sub-allocation of pieces of one large GPU buffer. + +\section virtual_allocator_creating_virtual_block Creating virtual block + +To use this functionality, there is no main "allocator" object. +You don't need to have #VmaAllocator object created. +All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator: + +-# Fill in #VmaVirtualBlockCreateInfo structure. +-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object. + +Example: + +\code +VmaVirtualBlockCreateInfo blockCreateInfo = {}; +blockCreateInfo.size = 1048576; // 1 MB + +VmaVirtualBlock block; +VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block); +\endcode + +\section virtual_allocator_making_virtual_allocations Making virtual allocations + +#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions +using the same code as the main Vulkan memory allocator. +Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type +that represents an opaque handle to an allocation within the virtual block. + +In order to make such allocation: + +-# Fill in #VmaVirtualAllocationCreateInfo structure. +-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation. +   You can also receive `VkDeviceSize offset` that was assigned to the allocation. + +Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB + +VmaVirtualAllocation alloc; +VkDeviceSize offset; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset); +if(res == VK_SUCCESS) +{ +    // Use the 4 KB of your memory starting at offset. +} +else +{ +    // Allocation failed - no space for it could be found. Handle this error! +} +\endcode + +\section virtual_allocator_deallocation Deallocation + +When no longer needed, an allocation can be freed by calling vmaVirtualFree(). +You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate() +called for the same #VmaVirtualBlock. + +When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock(). +All allocations must be freed before the block is destroyed, which is checked internally by an assert. +However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once - +a feature not available in normal Vulkan memory allocator. Example: + +\code +vmaVirtualFree(block, alloc); +vmaDestroyVirtualBlock(block); +\endcode + +\section virtual_allocator_allocation_parameters Allocation parameters + +You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData(). +Its default value is null. +It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some +larger data structure containing more information. Example: + +\code +struct CustomAllocData +{ +    std::string m_AllocName; +}; +CustomAllocData* allocData = new CustomAllocData(); +allocData->m_AllocName = "My allocation 1"; +vmaSetVirtualAllocationUserData(block, alloc, allocData); +\endcode + +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function +vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo. +If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! +Example: + +\code +VmaVirtualAllocationInfo allocInfo; +vmaGetVirtualAllocationInfo(block, alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pUserData; + +vmaVirtualFree(block, alloc); +\endcode + +\section virtual_allocator_alignment_and_units Alignment and units + +It feels natural to express sizes and offsets in bytes. +If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member +VmaVirtualAllocationCreateInfo::alignment to request it. Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB +allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B + +VmaVirtualAllocation alloc; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr); +\endcode + +Alignments of different allocations made from one block may vary. +However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`, +you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes. +It might be more convenient, but you need to make sure to use this new unit consistently in all the places: + +- VmaVirtualBlockCreateInfo::size +- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment +- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset + +\section virtual_allocator_statistics Statistics + +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. +Example: + +\code +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); +printf("My virtual block has %llu bytes used by %u virtual allocations\n", +    stats.allocationBytes, stats.allocationCount); +\endcode + +You can also request a full list of allocations and free regions as a string in JSON format by calling +vmaBuildVirtualBlockStatsString(). +Returned string must be later freed using vmaFreeVirtualBlockStatsString(). +The format of this string differs from the one returned by the main Vulkan allocator, but it is similar. + +\section virtual_allocator_additional_considerations Additional considerations + +The "virtual allocator" functionality is implemented on a level of individual memory blocks. +Keeping track of a whole collection of blocks, allocating new ones when out of free space, +deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. + +Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory. +See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT). +You can find their description in chapter \ref custom_memory_pools. +Allocation strategies are also supported. +See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT). + +Following features are supported only by the allocator of the real GPU memory and not by virtual allocations: +buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`. + + +\page debugging_memory_usage Debugging incorrect memory usage + +If you suspect a bug with memory usage, like usage of uninitialized memory or +memory being overwritten out of bounds of an allocation, +you can use debug features of this library to verify this. + +\section debugging_memory_usage_initialization Memory initialization + +If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used, +you can enable automatic memory initialization to verify this. +To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1. + +\code +#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 +#include "vk_mem_alloc.h" +\endcode + +It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`. +Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`. +Memory is automatically mapped and unmapped if necessary. + +If you find these values while debugging your program, good chances are that you incorrectly +read Vulkan memory that is allocated but not initialized, or already freed, respectively. + +Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped. +It works also with dedicated allocations. + +\section debugging_memory_usage_margins Margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`). + + + +Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified +number of bytes as a margin after every allocation. + +\code +#define VMA_DEBUG_MARGIN 16 +#include "vk_mem_alloc.h" +\endcode + + + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all types of memory. + +Margin is applied only to allocations made out of memory blocks and not to dedicated +allocations, which have their own memory block of specific size. +It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag +or those automatically decided to put into dedicated allocations, e.g. due to its +large size or recommended by VK_KHR_dedicated_allocation extension. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + +\section debugging_memory_usage_corruption_detection Corruption detection + +You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation +of contents of the margins. + +\code +#define VMA_DEBUG_MARGIN 16 +#define VMA_DEBUG_DETECT_CORRUPTION 1 +#include "vk_mem_alloc.h" +\endcode + +When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN` +(it must be multiply of 4) after every allocation is filled with a magic number. +This idea is also know as "canary". +Memory is automatically mapped and unmapped if necessary. + +This number is validated automatically when the allocation is destroyed. +If it is not equal to the expected value, `VMA_ASSERT()` is executed. +It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation, +which indicates a serious bug. + +You can also explicitly request checking margins of all allocations in all memory blocks +that belong to specified memory types by using function vmaCheckCorruption(), +or in memory blocks that belong to specified custom pool, by using function +vmaCheckPoolCorruption(). + +Margin validation (corruption detection) works only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. + + +\section debugging_memory_usage_leak_detection Leak detection features + +At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using +`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug +builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()` +to change this behavior. + +At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()` +macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op. +If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't +quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning +individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them. + +\page other_api_interop Interop with other graphics APIs + +VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL. + +\section opengl_interop_exporting_memory Exporting memory + +If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library: + +You can create \ref custom_memory_pools for such allocations. +Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext +while creating the custom pool. +Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool, +not only while creating it, as no copy of the structure is made, +but its original pointer is used for each allocation instead. + +If you want to export all memory allocated by VMA from certain memory types, +also dedicated allocations or other allocations made from default pools, +an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes. +It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library +through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type. +Please note that new versions of the library also support dedicated allocations created in custom pools. + +You should not mix these two methods in a way that allows to apply both to the same memory type. +Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`. + + +\section opengl_interop_custom_alignment Custom alignment + +Buffers or images exported to a different API like OpenGL may require a different alignment, +higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`. +To impose such alignment: + +You can create \ref custom_memory_pools for such allocations. +Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation +to be made out of this pool. +The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image +from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically. + +If you want to create a buffer with a specific minimum alignment out of default pools, +use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`. + +Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated +allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block. +You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation. + +\section opengl_interop_extended_allocation_information Extended allocation information + +If you want to rely on VMA to allocate your buffers and images inside larger memory blocks, +but you need to know the size of the entire block and whether the allocation was made +with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve +extended allocation information in structure #VmaAllocationInfo2. + + + +\page usage_patterns Recommended usage patterns + +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + +See also slides from talk: +[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) + + +\section usage_patterns_gpu_only GPU-only resource + +<b>When:</b> +Any resources that you frequently write and read on GPU, +e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, +images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). + +<b>What to do:</b> +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +<b>Also consider:</b> +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. +Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. + +\section usage_patterns_staging_copy_upload Staging copy for upload + +<b>When:</b> +A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer +to some GPU resource. + +<b>What to do:</b> +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | +    VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode + +<b>Also consider:</b> +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. + + +\section usage_patterns_readback Readback + +<b>When:</b> +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, +e.g. results of some computations. + +<b>What to do:</b> +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | +    VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode + + +\section usage_patterns_advanced_data_uploading Advanced data uploading + +For resources that you frequently write on CPU via mapped pointer and +frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: + +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, +   even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, +   and make the device reach out to that resource directly. +   - Reads performed by the device will then go through PCI Express bus. +     The performance of this access may be limited, but it may be fine depending on the size +     of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity +     of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), +   a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` +   (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose +   a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). +   If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) +   that is available to CPU for mapping. +   - Writes performed by the host to that memory go through PCI Express bus. +     The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, +     as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, +   a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. + +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | +    VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | +    VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... + +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); + +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ +    // Allocation ended up in a mappable memory and is already mapped - write to it directly. + +    // [Executed in runtime]: +    memcpy(allocInfo.pMappedData, myData, myDataSize); +    result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); +    // Check result... + +    VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; +    bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; +    bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; +    bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; +    bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; +    bufMemBarrier.buffer = buf; +    bufMemBarrier.offset = 0; +    bufMemBarrier.size = VK_WHOLE_SIZE; + +    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, +        0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); +} +else +{ +    // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. +    VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +    stagingBufCreateInfo.size = 65536; +    stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +    VmaAllocationCreateInfo stagingAllocCreateInfo = {}; +    stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +    stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | +        VMA_ALLOCATION_CREATE_MAPPED_BIT; + +    VkBuffer stagingBuf; +    VmaAllocation stagingAlloc; +    VmaAllocationInfo stagingAllocInfo; +    result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, +        &stagingBuf, &stagingAlloc, &stagingAllocInfo); +    // Check result... + +    // [Executed in runtime]: +    memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); +    result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); +    // Check result... + +    VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; +    bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; +    bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; +    bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; +    bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; +    bufMemBarrier.buffer = stagingBuf; +    bufMemBarrier.offset = 0; +    bufMemBarrier.size = VK_WHOLE_SIZE; + +    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +        0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + +    VkBufferCopy bufCopy = { +        0, // srcOffset +        0, // dstOffset, +        myDataSize, // size +    }; + +    vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + +    VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; +    bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +    bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer +    bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; +    bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; +    bufMemBarrier2.buffer = buf; +    bufMemBarrier2.offset = 0; +    bufMemBarrier2.size = VK_WHOLE_SIZE; + +    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, +        0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); +} +\endcode + +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, +  as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, +  for temporal antialiasing or other temporal effects. +  - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` +  - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed +  in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict +  least recently used textures from VRAM. +  - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` +  - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, +    as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU +  (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or +  host memory due to its large size. +  - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` +  - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST +  - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + + +\page configuration Configuration + +Please check "CONFIGURATION SECTION" in the code to find macros that you can define +before each include of this file or change directly in this file to provide +your own implementation of basic facilities like assert, `min()` and `max()` functions, +mutex, atomic etc. + +For example, define `VMA_ASSERT(expr)` before including the library to provide +custom implementation of the assertion, compatible with your project. +By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration +and empty otherwise. + +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ +        printf((format), __VA_ARGS__); \ +        printf("\n"); \ +    } while(false) +\endcode + +\section config_Vulkan_functions Pointers to Vulkan functions + +There are multiple ways to import pointers to Vulkan functions in the library. +In the simplest case you don't need to do anything. +If the compilation or linking of your program or the initialization of the #VmaAllocator +doesn't work for you, you can try to reconfigure it. + +First, the allocator tries to fetch pointers to Vulkan functions linked statically, +like this: + +\code +m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +\endcode + +If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. + +Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. +You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or +by using a helper library like [volk](https://github.com/zeux/volk). + +Third, VMA tries to fetch remaining pointers that are still null by calling +`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. +If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. + +Finally, all the function pointers required by the library (considering selected +Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. + + +\section custom_memory_allocator Custom host memory allocator + +If you use custom allocator for CPU memory rather than default operator `new` +and `delete` from C++, you can make this library using your allocator as well +by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These +functions will be passed to Vulkan, as well as used by the library itself to +make any CPU-side allocations. + +\section allocation_callbacks Device memory allocation callbacks + +The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. +You can setup callbacks to be informed about these calls, e.g. for the purpose +of gathering some statistics. To do it, fill optional member +VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. + +\section heap_memory_limit Device heap memory limit + +When device memory of certain heap runs out of free space, new allocations may +fail (returning error code) or they may succeed, silently pushing some existing_ +memory blocks from GPU VRAM to system RAM (which degrades performance). This +behavior is implementation-dependent - it depends on GPU vendor and graphics +driver. + +On AMD cards it can be controlled while creating Vulkan device object by using +VK_AMD_memory_overallocation_behavior extension, if available. + +Alternatively, if you want to test how your program behaves with limited amount of Vulkan device +memory available without switching your graphics card to one that really has +smaller VRAM, you can use a feature of this library intended for this purpose. +To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. + + + +\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation + +VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve +performance on some GPUs. It augments Vulkan API with possibility to query +driver whether it prefers particular buffer or image to have its own, dedicated +allocation (separate `VkDeviceMemory` block) for better efficiency - to be able +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. + +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. + +Otherwise, if you want to use it as an extension: + +1 . When creating Vulkan device, check if following 2 device extensions are +supported (call `vkEnumerateDeviceExtensionProperties()`). +If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). + +- VK_KHR_get_memory_requirements2 +- VK_KHR_dedicated_allocation + +If you enabled these extensions: + +2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating +your #VmaAllocator to inform the library that you enabled required extensions +and you want the library to use them. + +\code +allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + +vmaCreateAllocator(&allocatorInfo, &allocator); +\endcode + +That is all. The extension will be automatically used whenever you create a +buffer using vmaCreateBuffer() or image using vmaCreateImage(). + +When using the extension together with Vulkan Validation Layer, you will receive +warnings like this: + +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ + +It is OK, you should just ignore it. It happens because you use function +`vkGetBufferMemoryRequirements2KHR()` instead of standard +`vkGetBufferMemoryRequirements()`, while the validation layer seems to be +unaware of it. + +To learn more about this extension, see: + +- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation) +- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) + + + +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters +  from the parameters passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters +  VMA used when creating default pools, which means `priority == 0.5f`. + + +\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory + +VK_AMD_device_coherent_memory is a device extension that enables access to +additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and +`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for +allocation of buffers intended for writing "breadcrumb markers" in between passes +or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. + +When the extension is available but has not been enabled, Vulkan physical device +still exposes those memory types, but their usage is forbidden. VMA automatically +takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt +to allocate memory of such type is made. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_amd_device_coherent_memory_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_amd_device_coherent_memory_usage Usage + +After following steps described above, you can create VMA allocations and custom pools +out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible +devices. There are multiple ways to do it, for example: + +- You can request or prefer to allocate out of such memory types by adding +  `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags +  or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with +  other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. +- If you manually found memory type index to use for this purpose, force allocation +  from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. + +\section vk_amd_device_coherent_memory_more_information More information + +To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + + +\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32 + +On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE` +of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances, +in multiple processes, and/or in multiple APIs. +VMA offers support for it. + +\section vk_khr_external_memory_win32_initialization Initialization + +1) Make sure the extension is defined in the code by including following header before including VMA: + +\code +#include <vulkan/vulkan_win32.h> +\endcode + +2) Check if "VK_KHR_external_memory_win32" is available among device extensions. +Enable it when creating the `VkDevice` object. + +3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT +when calling vmaCreateAllocator(). + +4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro +or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly. +For more information, see \ref quick_start_initialization_importing_vulkan_functions. + +\section vk_khr_external_memory_win32_preparations Preparations + +You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`. + +To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain, +and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain. +To make use of them, you need to use \ref custom_memory_pools. Example: + +\code +// Define an example buffer and allocation parameters. +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { +    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, +    nullptr, +    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 0x10000; // Doesn't matter here. +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +exampleBufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo exampleAllocCreateInfo = {}; +exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +// Find memory type index to use for the custom pool. +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator, +    &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a custom pool. +constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = { +    VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, +    nullptr, +    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo; + +VmaPool pool; +res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyPool(g_Allocator, pool); +\endcode + +Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged +for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block. +No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`. + +\section vk_khr_external_memory_win32_memory_allocation Memory allocation + +Finally, you can create a buffer with an allocation out of the custom pool. +The buffer should use same flags as the sample buffer used to find the memory type. +It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain. + +\code +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { +    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, +    nullptr, +    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = // Your desired buffer size. +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +bufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool;  // It is enough to set this one member. + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyBuffer(g_Allocator, buf, alloc); +\endcode + +If you need each allocation to have its own device memory block and start at offset 0, you can still do  +by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools. + +\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle + +After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to. +VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`. + +\code +HANDLE handle; +res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, you must close the handle. +CloseHandle(handle); +\endcode + +Documentation of the VK_KHR_external_memory_win32 extension states that: + +> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType. + +This is ensured automatically inside VMA. +The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed. +Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close. + +For further information, please check documentation of the vmaGetMemoryWin32Handle() function. + + +\page enabling_buffer_device_address Enabling buffer device address + +Device extension VK_KHR_buffer_device_address +allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. +It has been promoted to core Vulkan 1.2. + +If you want to use this feature in connection with VMA, follow these steps: + +\section enabling_buffer_device_address_initialization Initialization + +1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains +"VK_KHR_buffer_device_address". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true. + +3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add +"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +to VmaAllocatorCreateInfo::flags. + +\section enabling_buffer_device_address_usage Usage + +After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. +The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to +allocated memory blocks wherever it might be needed. + +Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. +The second part of this functionality related to "capture and replay" is not supported, +as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. + +\section enabling_buffer_device_address_more_information More information + +To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + +\page general_considerations General considerations + +\section general_considerations_thread_safety Thread safety + +- The library has no global state, so separate #VmaAllocator objects can be used +  independently. +  There should be no need to create multiple such objects though - one per `VkDevice` is enough. +- By default, all calls to functions that take #VmaAllocator as first parameter +  are safe to call from multiple threads simultaneously because they are +  synchronized internally when needed. +  This includes allocation and deallocation from default memory pool, as well as custom #VmaPool. +- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT +  flag, calls to functions that take such #VmaAllocator object must be +  synchronized externally. +- Access to a #VmaAllocation object must be externally synchronized. For example, +  you must not call vmaGetAllocationInfo() and vmaMapMemory() from different +  threads at the same time if you pass the same #VmaAllocation object to these +  functions. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. + +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, +  introducing only some internal improvements, bug fixes, optimizations etc. +  or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, +  so existing code that uses the library should continue to work, while some new +  symbols could have been added: new structures, functions, new values in existing +  enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatibility is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + +\section general_considerations_validation_layer_warnings Validation layer warnings + +When using this library, you can meet following types of warnings issued by +Vulkan validation layer. They don't necessarily indicate a bug, so you may need +to just ignore them. + +- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.* +  - It happens when VK_KHR_dedicated_allocation extension is enabled. +    `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it. +- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.* +  - It happens when you map a buffer or image, because the library maps entire +    `VkDeviceMemory` block, where different types of images and buffers may end +    up together, especially on GPUs with unified memory like Intel. +- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.* +  - It may happen when you use [defragmentation](@ref defragmentation). + +\section general_considerations_allocation_algorithm Allocation algorithm + +The library uses following algorithm for allocation, in order: + +-# Try to find free range of memory in existing blocks. +-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. +-# If failed, try to create such block with size / 2, size / 4, size / 8. +-# If failed, try to allocate separate `VkDeviceMemory` for this allocation, +   just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +-# If failed, choose other memory type that meets the requirements specified in +   VmaAllocationCreateInfo and go to point 1. +-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + +\section general_considerations_features_not_supported Features not supported + +Features deliberately excluded from the scope of this library: + +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images +   between CPU and GPU memory and related synchronization is responsibility of the user. +   Defining some "texture" object that would automatically stream its data from a +   staging copy in CPU memory to GPU memory would rather be a feature of another, +   higher-level library implemented on top of VMA. +   VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for +   buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to +   recreate these objects yourself after defragmentation. That is because the big +   structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in +   #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ +   objects in CPU memory (not Vulkan memory), allocation failures are not checked +   and handled gracefully, because that would complicate code significantly and +   is usually not needed in desktop PC applications anyway. +   Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and +   work correctly on so many different platforms is hard enough. Being free of +   any warnings, on any version of any compiler, is simply not feasible. +   There are many preprocessor macros that make some variables unused, function parameters unreferenced, +   or conditional expressions constant in some configurations. +   The code of this library should not be bigger or more complicated just to silence these warnings. +   It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but +   are not going to be included into this repository. +*/  | 
