cosmopolitan/third_party/chibicc/hashmap.c

114 lines
3.4 KiB
C
Raw Normal View History

2020-12-01 11:43:40 +00:00
// This is an implementation of the open-addressing hash table.
#include "third_party/chibicc/chibicc.h"
2020-12-09 21:53:02 +00:00
#define INIT_SIZE 16 // initial hash bucket size
#define LOW_WATERMARK 50 // keep usage below 50% after rehashing
#define HIGH_WATERMARK 70 // perform rehash when usage exceeds 70%
#define TOMBSTONE ((void *)-1) // represents deleted hash table entry
2020-12-01 11:43:40 +00:00
static uint64_t fnv_hash(char *s, int len) {
uint64_t hash = 0xcbf29ce484222325;
for (int i = 0; i < len; i++) {
hash *= 0x100000001b3;
hash ^= (unsigned char)s[i];
}
return hash;
}
// Make room for new entires in a given hashmap by removing
// tombstones and possibly extending the bucket size.
static void rehash(HashMap *map) {
// Compute the size of the new hashmap.
int nkeys = 0;
2020-12-09 12:00:48 +00:00
for (int i = 0; i < map->capacity; i++) {
if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE) {
nkeys++;
}
}
size_t cap = map->capacity;
2020-12-09 21:53:02 +00:00
while ((nkeys * 100) / cap >= LOW_WATERMARK) cap = cap * 2;
assert(cap > 0);
2020-12-01 11:43:40 +00:00
// Create a new hashmap and copy all key-values.
HashMap map2 = {};
map2.buckets = calloc(cap, sizeof(HashEntry));
map2.capacity = cap;
for (int i = 0; i < map->capacity; i++) {
HashEntry *ent = &map->buckets[i];
2020-12-24 07:42:56 +00:00
if (ent->key && ent->key != TOMBSTONE) {
2020-12-01 11:43:40 +00:00
hashmap_put2(&map2, ent->key, ent->keylen, ent->val);
2020-12-24 07:42:56 +00:00
}
2020-12-01 11:43:40 +00:00
}
assert(map2.used == nkeys);
*map = map2;
}
static bool match(HashEntry *ent, char *key, int keylen) {
return ent->key && ent->key != TOMBSTONE && ent->keylen == keylen &&
memcmp(ent->key, key, keylen) == 0;
}
static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
if (!map->buckets) return NULL;
uint64_t hash = fnv_hash(key, keylen);
for (int i = 0; i < map->capacity; i++) {
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];
2020-12-01 11:43:40 +00:00
if (match(ent, key, keylen)) return ent;
if (ent->key == NULL) return NULL;
}
UNREACHABLE();
}
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
if (!map->buckets) {
2020-12-09 21:53:02 +00:00
map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
map->capacity = INIT_SIZE;
} else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
rehash(map);
2020-12-01 11:43:40 +00:00
}
uint64_t hash = fnv_hash(key, keylen);
for (int i = 0; i < map->capacity; i++) {
HashEntry *ent = &map->buckets[(hash + i) & (map->capacity - 1)];
2020-12-01 11:43:40 +00:00
if (match(ent, key, keylen)) return ent;
if (ent->key == TOMBSTONE) {
ent->key = key;
ent->keylen = keylen;
return ent;
}
if (ent->key == NULL) {
ent->key = key;
ent->keylen = keylen;
map->used++;
return ent;
}
}
UNREACHABLE();
}
void *hashmap_get(HashMap *map, char *key) {
return hashmap_get2(map, key, strlen(key));
}
void *hashmap_get2(HashMap *map, char *key, int keylen) {
HashEntry *ent = get_entry(map, key, keylen);
return ent ? ent->val : NULL;
}
void hashmap_put(HashMap *map, char *key, void *val) {
hashmap_put2(map, key, strlen(key), val);
}
void hashmap_put2(HashMap *map, char *key, int keylen, void *val) {
HashEntry *ent = get_or_insert_entry(map, key, keylen);
ent->val = val;
}
void hashmap_delete(HashMap *map, char *key) {
hashmap_delete2(map, key, strlen(key));
}
void hashmap_delete2(HashMap *map, char *key, int keylen) {
HashEntry *ent = get_entry(map, key, keylen);
if (ent) ent->key = TOMBSTONE;
}