diff options
| author | Clement Sibille <clements+git@lisible.xyz> | 2024-05-20 01:26:03 +0900 |
|---|---|---|
| committer | Clement Sibille <clements+git@lisible.xyz> | 2024-05-20 01:28:21 +0900 |
| commit | ec7c66794b993fd6a0c6db8795f8508df84e23c3 (patch) | |
| tree | d9948a89cef4d4e2178d93909ed8c0cc2c6b57b5 | |
| parent | d73919a0c717d4f4b6b7c50789f20432ec936361 (diff) | |
Add hash module
| -rw-r--r-- | meson.build | 5 | ||||
| -rw-r--r-- | src/lisiblestd/hash.c | 353 | ||||
| -rw-r--r-- | src/lisiblestd/hash.h | 72 | ||||
| -rw-r--r-- | tests/hash_table.c | 176 |
4 files changed, 605 insertions, 1 deletions
diff --git a/meson.build b/meson.build index 1918463..2aa11cd 100644 --- a/meson.build +++ b/meson.build @@ -6,7 +6,8 @@ lisiblestd_lib = library('lisiblestd', 'src/lisiblestd/memory.c', 'src/lisiblestd/string.c', 'src/lisiblestd/bytes.c', - 'src/lisiblestd/vec.c' + 'src/lisiblestd/vec.c', + 'src/lisiblestd/hash.c', ) lisiblestd_dep = declare_dependency(include_directories: lisiblestd_incdir, link_with: [lisiblestd_lib]) @@ -16,3 +17,5 @@ test_bytes = executable('test_bytes', 'tests/test_runner.c', 'tests/bytes.c', de test('test_bytes', test_bytes) test_memory = executable('test_memory', 'tests/test_runner.c', 'tests/memory.c', dependencies: [lisiblestd_dep]) test('test_memory', test_memory) +test_hash_table = executable('test_hash_table', 'tests/test_runner.c', 'tests/hash_table.c', dependencies: [lisiblestd_dep]) +test('test_hash_table', test_hash_table) diff --git a/src/lisiblestd/hash.c b/src/lisiblestd/hash.c new file mode 100644 index 0000000..46ce323 --- /dev/null +++ b/src/lisiblestd/hash.c @@ -0,0 +1,353 @@ +#include "hash.h" +#include "assert.h" +#include <stddef.h> +#include <string.h> + +void HashTable_noop_dctor_fn(Allocator *allocator, void *v) { + (void)allocator; + (void)v; +} +bool HashTable_init(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, + HashTableKeyEqFn key_eq_fn) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(hash_table != NULL); + hash_table->items = Allocator_allocate_array(allocator, initial_capacity, + sizeof(HashTableKV)); + if (!hash_table->items) { + return false; + } + + hash_table->allocator = allocator; + hash_table->capacity = initial_capacity; + hash_table->length = 0; + hash_table->key_hash_fn = key_hash_fn; + hash_table->key_eq_fn = key_eq_fn; + hash_table->key_dctor_fn = HashTable_noop_dctor_fn; + hash_table->value_dctor_fn = HashTable_noop_dctor_fn; + return true; +} + +bool HashTable_init_with_dctors(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, + HashTableKeyEqFn key_eq_fn, + HashTableDctorFn key_dctor_fn, + HashTableDctorFn value_dctor_fn) { + LSTD_ASSERT(hash_table != NULL); + if (!HashTable_init(allocator, hash_table, initial_capacity, key_hash_fn, + key_eq_fn)) { + return false; + } + hash_table->key_dctor_fn = key_dctor_fn; + hash_table->value_dctor_fn = value_dctor_fn; + return true; +} + +void HashTable_deinit(HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + HashTable_clear(hash_table); + Allocator_free(hash_table->allocator, hash_table->items); +} + +static size_t HashTable_index_for_key(const HashTable *hash_table, + const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + uint64_t hashed_key = hash_table->key_hash_fn(key); + return hashed_key % hash_table->capacity; +} + +bool HashTable_expand(HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + size_t new_capacity = hash_table->capacity * 2; + + // Checking for eventual overflow + if (new_capacity < hash_table->capacity) { + return false; + } + + HashTableKV *new_items = Allocator_allocate_array( + hash_table->allocator, new_capacity, sizeof(HashTableKV)); + if (!new_items) { + return false; + } + + for (size_t i = 0; i < hash_table->capacity; i++) { + HashTableKV *item = &hash_table->items[i]; + if (item->is_present) { + uint64_t key_hash = hash_table->key_hash_fn(item->key); + size_t new_index = key_hash % new_capacity; + bool found = false; + while (new_items[new_index].is_present) { + if (hash_table->key_eq_fn(new_items[new_index].key, item->key)) { + new_items[new_index].value = item->value; + found = true; + break; + } + + new_index++; + if (new_index >= new_capacity) { + new_index = 0; + } + } + + if (!found) { + new_items[new_index].key = item->key; + new_items[new_index].value = item->value; + new_items[new_index].is_present = true; + } + } + } + + Allocator_free(hash_table->allocator, hash_table->items); + hash_table->items = new_items; + hash_table->capacity = new_capacity; + return true; +} + +bool HashTable_insert(HashTable *hash_table, void *key, void *value) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + + if (hash_table->length >= hash_table->capacity / 2) { + if (!HashTable_expand(hash_table)) { + return false; + } + } + + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + hash_table->items[index].value = value; + return true; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + + hash_table->items[index].key = key; + hash_table->items[index].value = value; + hash_table->items[index].is_present = true; + hash_table->length++; + return true; +} + +void *HashTable_get(const HashTable *hash_table, const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + return hash_table->items[index].value; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + + return NULL; +} + +bool HashTable_has(const HashTable *hash_table, const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + return true; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + + return false; +} + +size_t HashTable_length(const HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + return hash_table->length; +} + +void HashTable_steal(HashTable *hash_table, const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + hash_table->key_dctor_fn(hash_table->allocator, + hash_table->items[index].key); + hash_table->items[index].key = NULL; + hash_table->items[index].value = NULL; + hash_table->items[index].is_present = false; + break; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + hash_table->length--; +} +void HashTable_clear(HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + + for (size_t i = 0; i < hash_table->capacity; i++) { + if (hash_table->items[i].is_present) { + hash_table->key_dctor_fn(hash_table->allocator, hash_table->items[i].key); + hash_table->value_dctor_fn(hash_table->allocator, + hash_table->items[i].value); + hash_table->items[i].is_present = false; + } + } + + hash_table->length = 0; +} + +bool HashSet_init(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, HashTableKeyHashFn hash_fn, + HashTableKeyEqFn eq_fn) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(hash_set != NULL); + return HashTable_init(allocator, hash_set, initial_capacity, hash_fn, eq_fn); +} +bool HashSet_init_with_dctor(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, + HashTableKeyHashFn hash_fn, HashTableKeyEqFn eq_fn, + HashTableDctorFn dctor_fn) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(hash_set != NULL); + return HashTable_init_with_dctors(allocator, hash_set, initial_capacity, + hash_fn, eq_fn, dctor_fn, + HashTable_noop_dctor_fn); +} +void HashSet_deinit(HashSet *hash_set) { + LSTD_ASSERT(hash_set != NULL); + HashTable_deinit(hash_set); +} +bool HashSet_insert(HashSet *hash_set, void *set_value) { + LSTD_ASSERT(hash_set != NULL); + return HashTable_insert(hash_set, set_value, NULL); +} +bool HashSet_has(const HashSet *hash_set, const void *set_value) { + LSTD_ASSERT(hash_set != NULL); + return HashTable_has(hash_set, set_value); +} +size_t HashSet_length(const HashSet *hash_set) { + LSTD_ASSERT(hash_set != NULL); + return HashTable_length(hash_set); +} +void HashSet_clear(HashSet *hash_set) { + LSTD_ASSERT(hash_set != NULL); + HashTable_clear(hash_set); +} + +struct HashTableIt { + HashTable *table; + size_t current_index; + bool iterating; +}; + +HashTableIt *HashTableIt_create(Allocator *allocator, HashTable *table) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(table != NULL); + HashTableIt *it = Allocator_allocate(allocator, sizeof(HashTableIt)); + it->table = table; + it->current_index = 0; + it->iterating = false; + return it; +} + +void HashTableIt_destroy(Allocator *allocator, HashTableIt *it) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(it != NULL); + Allocator_free(allocator, it); +} + +bool HashTableIt_next(HashTableIt *it) { + LSTD_ASSERT(it != NULL); + if (it->table->length < 1) { + return false; + } + + if (it->iterating) { + it->current_index++; + } else { + it->iterating = true; + } + + if (it->current_index >= it->table->capacity) { + return false; + } + + while (!it->table->items[it->current_index].is_present) { + it->current_index++; + if (it->current_index >= it->table->capacity) { + return false; + } + } + + return true; +} +void *HashTableIt_key(HashTableIt *it) { + LSTD_ASSERT(it != NULL); + if (!it->iterating || it->current_index >= it->table->capacity) + return NULL; + + if (!it->table->items[it->current_index].is_present) { + return NULL; + } + + return it->table->items[it->current_index].key; +} +void *HashTableIt_value(HashTableIt *it) { + LSTD_ASSERT(it != NULL); + if (!it->iterating || it->current_index >= it->table->capacity) + return NULL; + + if (!it->table->items[it->current_index].is_present) { + return NULL; + } + + return it->table->items[it->current_index].value; +} + +HashTableIt *HashTable_iter(Allocator *allocator, HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + return HashTableIt_create(allocator, hash_table); +} + +uint64_t hash_fnv_1a(const char *bytes, size_t nbytes) { + LSTD_ASSERT(bytes != NULL); + static const uint64_t FNV_OFFSET_BASIS = 14695981039346656037u; + static const uint64_t FNV_PRIME = 1099511628211u; + uint64_t hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < nbytes; i++) { + hash = hash ^ bytes[i]; + hash = hash * FNV_PRIME; + } + + return hash; +} + +uint64_t hash_str_hash(const void *str) { + LSTD_ASSERT(str != NULL); + return hash_fnv_1a(str, strlen(str)); +} + +bool hash_str_eq(const void *a, const void *b) { + LSTD_ASSERT(a != NULL); + LSTD_ASSERT(b != NULL); + return strcmp(a, b) == 0; +} +void hash_str_dctor(Allocator *allocator, void *str) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(str != NULL); + Allocator_free(allocator, str); +} diff --git a/src/lisiblestd/hash.h b/src/lisiblestd/hash.h new file mode 100644 index 0000000..0c9190c --- /dev/null +++ b/src/lisiblestd/hash.h @@ -0,0 +1,72 @@ +#ifndef LSTD_HASH_H +#define LSTD_HASH_H + +#include "memory.h" +#include <stdbool.h> + +typedef struct { + void *key; + void *value; + bool is_present; +} HashTableKV; + +typedef uint64_t (*HashTableKeyHashFn)(const void *); +typedef void (*HashTableDctorFn)(Allocator *, void *); +typedef bool (*HashTableKeyEqFn)(const void *, const void *); +typedef struct { + Allocator *allocator; + HashTableKV *items; + HashTableKeyHashFn key_hash_fn; + HashTableKeyEqFn key_eq_fn; + HashTableDctorFn key_dctor_fn; + HashTableDctorFn value_dctor_fn; + size_t capacity; + size_t length; +} HashTable; + +typedef struct HashTableIt HashTableIt; +void HashTableIt_destroy(Allocator *allocator, HashTableIt *it); +bool HashTableIt_next(HashTableIt *it); +void *HashTableIt_key(HashTableIt *it); +void *HashTableIt_value(HashTableIt *it); + +bool HashTable_init(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, HashTableKeyEqFn key_eq_fn); +bool HashTable_init_with_dctors(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, + HashTableKeyEqFn key_eq_fn, + HashTableDctorFn key_dctor_fn, + HashTableDctorFn value_dctor_fn); +void HashTable_deinit(HashTable *hash_table); +bool HashTable_insert(HashTable *hash_table, void *key, void *value); +HashTableIt *HashTable_iter(Allocator *allocator, HashTable *hash_table); +void *HashTable_get(const HashTable *hash_table, const void *key); +bool HashTable_has(const HashTable *hash_table, const void *key); +size_t HashTable_length(const HashTable *hash_table); +void HashTable_steal(HashTable *hash_table, const void *key); +void HashTable_clear(HashTable *hash_table); + +typedef HashTable HashSet; + +bool HashSet_init(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, HashTableKeyHashFn hash_fn, + HashTableKeyEqFn eq_fn); +bool HashSet_init_with_dctor(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, + HashTableKeyHashFn hash_fn, HashTableKeyEqFn eq_fn, + HashTableDctorFn dctor_fn); +void HashSet_deinit(HashSet *hash_set); +bool HashSet_insert(HashSet *hash_set, void *set_value); +bool HashSet_has(const HashSet *hash_set, const void *set_value); +size_t HashSet_length(const HashSet *hash_set); +void HashSet_clear(HashSet *hash_set); + +void HashTable_noop_dctor_fn(Allocator *, void *v); + +uint64_t hash_str_hash(const void *str); +bool hash_str_eq(const void *a, const void *b); +void hash_str_dctor(Allocator *allocator, void *str); + +#endif // LSTD_HASH_H diff --git a/tests/hash_table.c b/tests/hash_table.c new file mode 100644 index 0000000..4c3591a --- /dev/null +++ b/tests/hash_table.c @@ -0,0 +1,176 @@ +#include "test.h" +#include <lisiblestd/hash.h> + +static void hash_table_create(void) { + HashTable hash_table; + T_ASSERT(HashTable_init(&system_allocator, &hash_table, 16, hash_str_hash, + hash_str_eq)); + T_ASSERT_EQ(hash_table.capacity, 16); + T_ASSERT_EQ(hash_table.length, 0); + HashTable_deinit(&hash_table); +} + +static void hash_table_insert(void) { + HashTable hash_table; + T_ASSERT(HashTable_init(&system_allocator, &hash_table, 16, hash_str_hash, + hash_str_eq)); + T_ASSERT_EQ(hash_table.length, 0); + + char *first_key = "first_key"; + int first_value = 5; + T_ASSERT(HashTable_insert(&hash_table, first_key, &first_value)); + T_ASSERT_EQ(hash_table.length, 1); + + char *second_key = "second_key"; + int second_value = 7; + T_ASSERT(HashTable_insert(&hash_table, second_key, &second_value)); + char *third_key = "third_key"; + int third_value = 23; + T_ASSERT(HashTable_insert(&hash_table, third_key, &third_value)); + T_ASSERT_EQ(hash_table.length, 3); + HashTable_deinit(&hash_table); +} + +static void hash_table_get(void) { + HashTable hash_table; + T_ASSERT(HashTable_init(&system_allocator, &hash_table, 16, hash_str_hash, + hash_str_eq)); + char *first_key = "first_key"; + int first_value = 5; + T_ASSERT(HashTable_insert(&hash_table, first_key, &first_value)); + char *second_key = "second_key"; + int second_value = 7; + T_ASSERT(HashTable_insert(&hash_table, second_key, &second_value)); + char *third_key = "third_key"; + int third_value = 23; + T_ASSERT(HashTable_insert(&hash_table, third_key, &third_value)); + + int *returned_value = HashTable_get(&hash_table, "second_key"); + T_ASSERT(returned_value != NULL); + T_ASSERT_EQ(*returned_value, 7); + HashTable_deinit(&hash_table); +} + +static void hash_table_has(void) { + HashTable hash_table; + T_ASSERT(HashTable_init(&system_allocator, &hash_table, 16, hash_str_hash, + hash_str_eq)); + char *first_key = "first_key"; + int first_value = 5; + T_ASSERT(HashTable_insert(&hash_table, first_key, &first_value)); + char *second_key = "second_key"; + int second_value = 7; + T_ASSERT(HashTable_insert(&hash_table, second_key, &second_value)); + char *third_key = "third_key"; + int third_value = 23; + T_ASSERT(HashTable_insert(&hash_table, third_key, &third_value)); + + T_ASSERT(HashTable_has(&hash_table, "first_key")); + T_ASSERT(HashTable_has(&hash_table, "second_key")); + T_ASSERT(!HashTable_has(&hash_table, "thrid_key")); + T_ASSERT(HashTable_has(&hash_table, "third_key")); + HashTable_deinit(&hash_table); +} + +static void hash_table_expand(void) { + HashTable hash_table; + T_ASSERT(HashTable_init(&system_allocator, &hash_table, 4, hash_str_hash, + hash_str_eq)); + char *first_key = "first_key"; + int first_value = 5; + T_ASSERT(HashTable_insert(&hash_table, first_key, &first_value)); + char *second_key = "second_key"; + int second_value = 7; + T_ASSERT(HashTable_insert(&hash_table, second_key, &second_value)); + char *third_key = "third_key"; + int third_value = 23; + T_ASSERT(HashTable_insert(&hash_table, third_key, &third_value)); + char *fourth_key = "fourth_key"; + int fourth_value = 24; + T_ASSERT(HashTable_insert(&hash_table, fourth_key, &fourth_value)); + char *fifth_key = "fifth_key"; + int fifth_value = 25; + T_ASSERT(HashTable_insert(&hash_table, fifth_key, &fifth_value)); + + T_ASSERT(HashTable_has(&hash_table, "first_key")); + T_ASSERT(HashTable_has(&hash_table, "second_key")); + T_ASSERT(!HashTable_has(&hash_table, "thrid_key")); + T_ASSERT(HashTable_has(&hash_table, "third_key")); + T_ASSERT(HashTable_has(&hash_table, "fourth_key")); + T_ASSERT(HashTable_has(&hash_table, "fifth_key")); + HashTable_deinit(&hash_table); +} + +static void str_dctor_fn(Allocator *allocator, void *v) { + Allocator_free(allocator, v); +} + +static void hash_table_heap_allocated_str(void) { + HashTable hash_table; + T_ASSERT(HashTable_init_with_dctors(&system_allocator, &hash_table, 16, + hash_str_hash, hash_str_eq, + HashTable_noop_dctor_fn, str_dctor_fn)); + + char *first_value = calloc(8, sizeof(char)); + strcpy(first_value, "bonjour"); + + T_ASSERT(HashTable_insert(&hash_table, "first_key", first_value)); + + char *returned_value = HashTable_get(&hash_table, "first_key"); + T_ASSERT(strcmp(returned_value, "bonjour") == 0); + HashTable_deinit(&hash_table); +} + +static uint64_t int_hash_fn(const void *v) { return *(uint64_t *)v; } +static bool int_eq_fn(const void *a, const void *b) { + return *(uint64_t *)a == *(uint64_t *)b; +} + +static void hash_set_init(void) { + HashSet set; + HashSet_init(&system_allocator, &set, 16, int_hash_fn, int_eq_fn); + T_ASSERT_EQ(HashSet_length(&set), 0u); + HashSet_deinit(&set); +} + +static void hash_set_insert(void) { + HashSet set; + HashSet_init(&system_allocator, &set, 16, int_hash_fn, int_eq_fn); + T_ASSERT_EQ(HashSet_length(&set), 0u); + + uint64_t a = 15; + HashSet_insert(&set, &a); + T_ASSERT_EQ(HashSet_length(&set), 1u); + uint64_t b = 26; + HashSet_insert(&set, &b); + uint64_t c = 28; + HashSet_insert(&set, &c); + T_ASSERT_EQ(HashSet_length(&set), 3u); + HashSet_deinit(&set); +} + +static void hash_set_has(void) { + HashSet set; + HashSet_init(&system_allocator, &set, 16, int_hash_fn, int_eq_fn); + T_ASSERT_EQ(HashSet_length(&set), 0u); + + uint64_t a = 15; + HashSet_insert(&set, &a); + uint64_t b = 26; + HashSet_insert(&set, &b); + uint64_t c = 28; + HashSet_insert(&set, &c); + + uint64_t d = 0; + T_ASSERT(HashSet_has(&set, &a)); + T_ASSERT(HashSet_has(&set, &b)); + T_ASSERT(HashSet_has(&set, &c)); + T_ASSERT(!HashSet_has(&set, &d)); + T_ASSERT_EQ(HashSet_length(&set), 3u); + HashSet_deinit(&set); +} + +TEST_SUITE(TEST(hash_table_create), TEST(hash_table_insert), + TEST(hash_table_get), TEST(hash_table_heap_allocated_str), + TEST(hash_table_has), TEST(hash_table_expand), TEST(hash_set_init), + TEST(hash_set_insert), TEST(hash_set_has)) |
