From ec7c66794b993fd6a0c6db8795f8508df84e23c3 Mon Sep 17 00:00:00 2001 From: Clement Sibille Date: Mon, 20 May 2024 01:26:03 +0900 Subject: Add hash module --- src/lisiblestd/hash.c | 353 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lisiblestd/hash.h | 72 ++++++++++ 2 files changed, 425 insertions(+) create mode 100644 src/lisiblestd/hash.c create mode 100644 src/lisiblestd/hash.h (limited to 'src/lisiblestd') diff --git a/src/lisiblestd/hash.c b/src/lisiblestd/hash.c new file mode 100644 index 0000000..46ce323 --- /dev/null +++ b/src/lisiblestd/hash.c @@ -0,0 +1,353 @@ +#include "hash.h" +#include "assert.h" +#include +#include + +void HashTable_noop_dctor_fn(Allocator *allocator, void *v) { + (void)allocator; + (void)v; +} +bool HashTable_init(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, + HashTableKeyEqFn key_eq_fn) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(hash_table != NULL); + hash_table->items = Allocator_allocate_array(allocator, initial_capacity, + sizeof(HashTableKV)); + if (!hash_table->items) { + return false; + } + + hash_table->allocator = allocator; + hash_table->capacity = initial_capacity; + hash_table->length = 0; + hash_table->key_hash_fn = key_hash_fn; + hash_table->key_eq_fn = key_eq_fn; + hash_table->key_dctor_fn = HashTable_noop_dctor_fn; + hash_table->value_dctor_fn = HashTable_noop_dctor_fn; + return true; +} + +bool HashTable_init_with_dctors(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, + HashTableKeyEqFn key_eq_fn, + HashTableDctorFn key_dctor_fn, + HashTableDctorFn value_dctor_fn) { + LSTD_ASSERT(hash_table != NULL); + if (!HashTable_init(allocator, hash_table, initial_capacity, key_hash_fn, + key_eq_fn)) { + return false; + } + hash_table->key_dctor_fn = key_dctor_fn; + hash_table->value_dctor_fn = value_dctor_fn; + return true; +} + +void HashTable_deinit(HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + HashTable_clear(hash_table); + Allocator_free(hash_table->allocator, hash_table->items); +} + +static size_t HashTable_index_for_key(const HashTable *hash_table, + const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + uint64_t hashed_key = hash_table->key_hash_fn(key); + return hashed_key % hash_table->capacity; +} + +bool HashTable_expand(HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + size_t new_capacity = hash_table->capacity * 2; + + // Checking for eventual overflow + if (new_capacity < hash_table->capacity) { + return false; + } + + HashTableKV *new_items = Allocator_allocate_array( + hash_table->allocator, new_capacity, sizeof(HashTableKV)); + if (!new_items) { + return false; + } + + for (size_t i = 0; i < hash_table->capacity; i++) { + HashTableKV *item = &hash_table->items[i]; + if (item->is_present) { + uint64_t key_hash = hash_table->key_hash_fn(item->key); + size_t new_index = key_hash % new_capacity; + bool found = false; + while (new_items[new_index].is_present) { + if (hash_table->key_eq_fn(new_items[new_index].key, item->key)) { + new_items[new_index].value = item->value; + found = true; + break; + } + + new_index++; + if (new_index >= new_capacity) { + new_index = 0; + } + } + + if (!found) { + new_items[new_index].key = item->key; + new_items[new_index].value = item->value; + new_items[new_index].is_present = true; + } + } + } + + Allocator_free(hash_table->allocator, hash_table->items); + hash_table->items = new_items; + hash_table->capacity = new_capacity; + return true; +} + +bool HashTable_insert(HashTable *hash_table, void *key, void *value) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + + if (hash_table->length >= hash_table->capacity / 2) { + if (!HashTable_expand(hash_table)) { + return false; + } + } + + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + hash_table->items[index].value = value; + return true; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + + hash_table->items[index].key = key; + hash_table->items[index].value = value; + hash_table->items[index].is_present = true; + hash_table->length++; + return true; +} + +void *HashTable_get(const HashTable *hash_table, const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + return hash_table->items[index].value; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + + return NULL; +} + +bool HashTable_has(const HashTable *hash_table, const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + return true; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + + return false; +} + +size_t HashTable_length(const HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + return hash_table->length; +} + +void HashTable_steal(HashTable *hash_table, const void *key) { + LSTD_ASSERT(hash_table != NULL); + LSTD_ASSERT(key != NULL); + size_t index = HashTable_index_for_key(hash_table, key); + while (hash_table->items[index].is_present) { + if (hash_table->key_eq_fn(hash_table->items[index].key, key)) { + hash_table->key_dctor_fn(hash_table->allocator, + hash_table->items[index].key); + hash_table->items[index].key = NULL; + hash_table->items[index].value = NULL; + hash_table->items[index].is_present = false; + break; + } + index++; + if (index >= hash_table->capacity) { + index = 0; + } + } + hash_table->length--; +} +void HashTable_clear(HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + + for (size_t i = 0; i < hash_table->capacity; i++) { + if (hash_table->items[i].is_present) { + hash_table->key_dctor_fn(hash_table->allocator, hash_table->items[i].key); + hash_table->value_dctor_fn(hash_table->allocator, + hash_table->items[i].value); + hash_table->items[i].is_present = false; + } + } + + hash_table->length = 0; +} + +bool HashSet_init(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, HashTableKeyHashFn hash_fn, + HashTableKeyEqFn eq_fn) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(hash_set != NULL); + return HashTable_init(allocator, hash_set, initial_capacity, hash_fn, eq_fn); +} +bool HashSet_init_with_dctor(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, + HashTableKeyHashFn hash_fn, HashTableKeyEqFn eq_fn, + HashTableDctorFn dctor_fn) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(hash_set != NULL); + return HashTable_init_with_dctors(allocator, hash_set, initial_capacity, + hash_fn, eq_fn, dctor_fn, + HashTable_noop_dctor_fn); +} +void HashSet_deinit(HashSet *hash_set) { + LSTD_ASSERT(hash_set != NULL); + HashTable_deinit(hash_set); +} +bool HashSet_insert(HashSet *hash_set, void *set_value) { + LSTD_ASSERT(hash_set != NULL); + return HashTable_insert(hash_set, set_value, NULL); +} +bool HashSet_has(const HashSet *hash_set, const void *set_value) { + LSTD_ASSERT(hash_set != NULL); + return HashTable_has(hash_set, set_value); +} +size_t HashSet_length(const HashSet *hash_set) { + LSTD_ASSERT(hash_set != NULL); + return HashTable_length(hash_set); +} +void HashSet_clear(HashSet *hash_set) { + LSTD_ASSERT(hash_set != NULL); + HashTable_clear(hash_set); +} + +struct HashTableIt { + HashTable *table; + size_t current_index; + bool iterating; +}; + +HashTableIt *HashTableIt_create(Allocator *allocator, HashTable *table) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(table != NULL); + HashTableIt *it = Allocator_allocate(allocator, sizeof(HashTableIt)); + it->table = table; + it->current_index = 0; + it->iterating = false; + return it; +} + +void HashTableIt_destroy(Allocator *allocator, HashTableIt *it) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(it != NULL); + Allocator_free(allocator, it); +} + +bool HashTableIt_next(HashTableIt *it) { + LSTD_ASSERT(it != NULL); + if (it->table->length < 1) { + return false; + } + + if (it->iterating) { + it->current_index++; + } else { + it->iterating = true; + } + + if (it->current_index >= it->table->capacity) { + return false; + } + + while (!it->table->items[it->current_index].is_present) { + it->current_index++; + if (it->current_index >= it->table->capacity) { + return false; + } + } + + return true; +} +void *HashTableIt_key(HashTableIt *it) { + LSTD_ASSERT(it != NULL); + if (!it->iterating || it->current_index >= it->table->capacity) + return NULL; + + if (!it->table->items[it->current_index].is_present) { + return NULL; + } + + return it->table->items[it->current_index].key; +} +void *HashTableIt_value(HashTableIt *it) { + LSTD_ASSERT(it != NULL); + if (!it->iterating || it->current_index >= it->table->capacity) + return NULL; + + if (!it->table->items[it->current_index].is_present) { + return NULL; + } + + return it->table->items[it->current_index].value; +} + +HashTableIt *HashTable_iter(Allocator *allocator, HashTable *hash_table) { + LSTD_ASSERT(hash_table != NULL); + return HashTableIt_create(allocator, hash_table); +} + +uint64_t hash_fnv_1a(const char *bytes, size_t nbytes) { + LSTD_ASSERT(bytes != NULL); + static const uint64_t FNV_OFFSET_BASIS = 14695981039346656037u; + static const uint64_t FNV_PRIME = 1099511628211u; + uint64_t hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < nbytes; i++) { + hash = hash ^ bytes[i]; + hash = hash * FNV_PRIME; + } + + return hash; +} + +uint64_t hash_str_hash(const void *str) { + LSTD_ASSERT(str != NULL); + return hash_fnv_1a(str, strlen(str)); +} + +bool hash_str_eq(const void *a, const void *b) { + LSTD_ASSERT(a != NULL); + LSTD_ASSERT(b != NULL); + return strcmp(a, b) == 0; +} +void hash_str_dctor(Allocator *allocator, void *str) { + LSTD_ASSERT(allocator != NULL); + LSTD_ASSERT(str != NULL); + Allocator_free(allocator, str); +} diff --git a/src/lisiblestd/hash.h b/src/lisiblestd/hash.h new file mode 100644 index 0000000..0c9190c --- /dev/null +++ b/src/lisiblestd/hash.h @@ -0,0 +1,72 @@ +#ifndef LSTD_HASH_H +#define LSTD_HASH_H + +#include "memory.h" +#include + +typedef struct { + void *key; + void *value; + bool is_present; +} HashTableKV; + +typedef uint64_t (*HashTableKeyHashFn)(const void *); +typedef void (*HashTableDctorFn)(Allocator *, void *); +typedef bool (*HashTableKeyEqFn)(const void *, const void *); +typedef struct { + Allocator *allocator; + HashTableKV *items; + HashTableKeyHashFn key_hash_fn; + HashTableKeyEqFn key_eq_fn; + HashTableDctorFn key_dctor_fn; + HashTableDctorFn value_dctor_fn; + size_t capacity; + size_t length; +} HashTable; + +typedef struct HashTableIt HashTableIt; +void HashTableIt_destroy(Allocator *allocator, HashTableIt *it); +bool HashTableIt_next(HashTableIt *it); +void *HashTableIt_key(HashTableIt *it); +void *HashTableIt_value(HashTableIt *it); + +bool HashTable_init(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, HashTableKeyEqFn key_eq_fn); +bool HashTable_init_with_dctors(Allocator *allocator, HashTable *hash_table, + const size_t initial_capacity, + HashTableKeyHashFn key_hash_fn, + HashTableKeyEqFn key_eq_fn, + HashTableDctorFn key_dctor_fn, + HashTableDctorFn value_dctor_fn); +void HashTable_deinit(HashTable *hash_table); +bool HashTable_insert(HashTable *hash_table, void *key, void *value); +HashTableIt *HashTable_iter(Allocator *allocator, HashTable *hash_table); +void *HashTable_get(const HashTable *hash_table, const void *key); +bool HashTable_has(const HashTable *hash_table, const void *key); +size_t HashTable_length(const HashTable *hash_table); +void HashTable_steal(HashTable *hash_table, const void *key); +void HashTable_clear(HashTable *hash_table); + +typedef HashTable HashSet; + +bool HashSet_init(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, HashTableKeyHashFn hash_fn, + HashTableKeyEqFn eq_fn); +bool HashSet_init_with_dctor(Allocator *allocator, HashSet *hash_set, + const size_t initial_capacity, + HashTableKeyHashFn hash_fn, HashTableKeyEqFn eq_fn, + HashTableDctorFn dctor_fn); +void HashSet_deinit(HashSet *hash_set); +bool HashSet_insert(HashSet *hash_set, void *set_value); +bool HashSet_has(const HashSet *hash_set, const void *set_value); +size_t HashSet_length(const HashSet *hash_set); +void HashSet_clear(HashSet *hash_set); + +void HashTable_noop_dctor_fn(Allocator *, void *v); + +uint64_t hash_str_hash(const void *str); +bool hash_str_eq(const void *a, const void *b); +void hash_str_dctor(Allocator *allocator, void *str); + +#endif // LSTD_HASH_H -- cgit v1.2.3