Browse Source

Fix ymap_grow()

The new test found the problem.
master
Gavin Howard 1 month ago
parent
commit
c2982735e2
Signed by: gavin GPG Key ID: C08038BDF280D33E
  1. 273
      src/map/map.c
  2. 55
      src/map/map.h

273
src/map/map.c

@ -49,6 +49,75 @@
#include <stdlib.h>
#include <string.h>
#ifdef YDEBUG
/**
* This checks the validity of the map after any operation adding or removing
* elements. I added this because of a tough bug I encountered and could not
* find. This all helped me find it, and I decided to keep it to complain loudly
* should I break something.
* @param m The map to check.
* @param func The calling function. (Used for assert messages.)
* @pre @a m must not be NULL.
* @pre @a func must not be NULL.
*/
static void
ymap_checkValidity(ymap* m, const char* const func) yallnonnull;
#endif // YDEBUG
#ifdef YDEBUG
yallnonnull
static void
ymap_checkValidity(ymap* m, const char* const func)
{
size_t i;
size_t count = 0, cap = m->cap;
uint8_t* info = m->info;
for (i = 0; i < m->cap; ++i)
{
count += (m->info[i] >= YMAP_TAKEN_BIT);
}
yassertv(count == m->size,
"%s() corrupted the map size; this is a bug", func);
// This loop checks that all of the items are inserted correctly.
for (i = 0; i < m->cap; ++i)
{
if (!(info[i] & YMAP_TAKEN_BIT)) continue;
if (info[i] > YMAP_TAKEN_BIT)
{
size_t idx;
size_t displacement = info[i] & ~YMAP_TAKEN_BIT;
size_t end_idx = i - displacement;
for (idx = i - 1; displacement; --displacement)
{
size_t wrapped = YMAP_CAP_IDX(cap, idx);
yassertv(info[wrapped] & YMAP_TAKEN_BIT,
"item at index %zu was not properly moved in %s(); "
"this is a bug", i, func);
size_t dis2 = info[wrapped] & ~YMAP_TAKEN_BIT;
size_t end_idx2 = idx - dis2;
yassertv(end_idx2 <= end_idx ||
(end_idx2 > cap && end_idx < cap),
"after function %s(): "
"items at index %zu and %zu are not in the "
"correct order; this is a bug",
func, end_idx2, end_idx);
idx -= 1;
}
}
}
}
#endif // YDEBUG
/**
* This is the procedure that prepares for the insertion.
* @param m The map to insert into.
@ -142,68 +211,96 @@ ymap_numarrays(size_t cap)
* @pre @a m must not be NULL.
*/
static YStatus
ymap_grow(ymap* m) yallnonnull
ymap_grow(ymap* m) yallnonnull;
yallnonnull
static YStatus
ymap_grow(ymap* m)
{
YStatus status = YSTATUS_MALLOC_FAIL;
size_t oldalloc = m->cap;
size_t cap = m->cap << 1;
size_t ksize = m->ksize;
size_t esize = m->esize;
size_t i;
if (m->growing) return YSTATUS_MISC_ERROR;
m->growing = true;
m->cap = cap;
size_t narrays = ymap_numarrays(oldalloc);
bool need_array_realloc =
((narrays & (narrays - 1)) == 0 && narrays >= YMAP_INITIAL_ARRAY_ALLOC);
size_t cap = m->cap;
if (need_array_realloc)
// I want to grow the map 4 times bigger each time. This is to relieve
// hash pressure when moving things, and also to move them less often.
// Both things should increase performance.
do
{
unsigned char** arrays;
size_t narrays = ymap_numarrays(cap);
bool need_array_realloc = ((narrays & (narrays - 1)) == 0 &&
narrays >= YMAP_INITIAL_ARRAY_ALLOC);
if (need_array_realloc)
{
unsigned char** arrays;
arrays = yrealloc(m->keys, (narrays << 1) * sizeof(unsigned char*));
if (yerror(arrays == NULL))
{
if (cap == oldalloc) goto err;
else break;
}
arrays = yrealloc(m->keys, (narrays << 1) * sizeof(unsigned char*));
if (yerror(arrays == NULL)) goto err;
m->keys = arrays;
m->keys = arrays;
arrays = yrealloc(m->map, (narrays << 1) * sizeof(unsigned char*));
if (yerror(arrays == NULL))
{
if (cap == oldalloc) goto err;
else break;
}
arrays = yrealloc(m->map, (narrays << 1) * sizeof(unsigned char*));
if (yerror(arrays == NULL)) goto err;
m->map = arrays;
}
m->map = arrays;
m->keys[narrays] = ymalloc(cap * ksize);
if (yerror(m->keys[narrays] == NULL))
{
if (cap == oldalloc) goto err;
else break;
}
m->map[narrays] = ymalloc(cap * esize);
if (yerror(m->map[narrays] == NULL))
{
yfree(m->keys[narrays]);
if (cap == oldalloc) goto err;
else break;
}
// Don't move this anywhere else. This is last to ensure that everything
// above worked before the cap was updated because it's used in the rest
// of the procedure if only one time around this loop was successful.
cap <<= 1;
// Don't try to optimize by removing this; if only one time around the
// loop here worked, we want to have the updated capacity in the map.
m->cap = cap;
m->max = (size_t) (m->load * (float) cap);
}
while (cap < (oldalloc << 2));
uint8_t* info = yrealloc(m->info, cap * sizeof(uint8_t));
if (yerror(info == NULL)) goto err;
// We need to make sure to zero all of the info.
memset(info + oldalloc, 0, oldalloc * sizeof(uint8_t));
memset(info + oldalloc, 0, (cap - oldalloc) * sizeof(uint8_t));
m->info = info;
m->keys[narrays] = ymalloc(oldalloc * ksize);
if (yerror(m->keys[narrays] == NULL)) goto err;
m->map[narrays] = ymalloc(oldalloc * esize);
if (yerror(m->map[narrays] == NULL))
{
yfree(m->keys[narrays]);
goto err;
}
m->max = (size_t)(m->load * (float) m->cap);
size_t moved = 0;
size_t len = m->size;
bool do_last = false;
for (size_t i = oldalloc - 1; i < oldalloc; --i)
for (i = oldalloc - 1; i < oldalloc; --i)
{
size_t midx = YMAP_CAP_IDX(oldalloc, do_last ? oldalloc - 1 : i);
do_last = false;
size_t midx = YMAP_CAP_IDX(oldalloc, i);
if (!(info[midx] & YMAP_TAKEN_BIT)) continue;
@ -229,47 +326,71 @@ ymap_grow(ymap* m) yallnonnull
}
info[midx] = 0;
}
}
size_t old_midx = midx;
midx = YMAP_CAP_IDX(oldalloc, midx + 1);
while (info[midx] > YMAP_TAKEN_BIT)
{
do_last = do_last || (midx < old_midx);
arrayIdx = YMAP_ARRAY_IDX(midx);
itemIdx = YMAP_ITEM_IDX(arrayIdx, midx);
info[old_midx] = info[midx] - 1;
unsigned char* key_ptr2 = key_ptr;
unsigned char* val_ptr2 = val_ptr;
// TODO: I am completely changing this. I am first rehashing everything and
// moving them, if necessary. That is the above loop. Notice that it does
// not move any items other than those that hash differently from what they
// did last time (or were wrapped around). That is the job of the next loop.
// It needs to find all of the items that need to be moved forward, and it
// needs to move them forward.
//
// The end condition should be that i is greater than oldalloc and that
// something was moved. This is because I need to go through the entire old
// range and ensure everything is moved, but I also need to ensure that
// those items which wrapped around (and were thus moved) are also bumped
// forward because other items might have been in the way when they were
// moved.
//
// After completing the TODO, touch up this comment and leave it in as an
// explanation of the algorithm.
unsigned char* const key_ptr2 = YMAP_KSWAP3(m, ksize);
unsigned char* const val_ptr2 = YMAP_VSWAP3(key_ptr2, ksize, esize);
// We want to make sure we iterate over the entire range that can be
// affected. This includes 2^7 beyond oldalloc because there are seven bits
// in the info for each item for displacement. However, sometimes, oldalloc
// will be smaller than that, so we want to only go that far in that case.
len = 1 << 7;
len = (oldalloc < len ? oldalloc : len) + oldalloc;
status = YSTATUS_SUCCESS;
key_ptr = YMAP_KEY(m, arrayIdx, itemIdx, ksize);
val_ptr = YMAP_ITEM(m, arrayIdx, itemIdx, esize);
for (i = 0; status == YSTATUS_SUCCESS && i < len; ++i)
{
size_t midx = YMAP_CAP_IDX(cap, i);
memcpy(key_ptr2, key_ptr, ksize);
memcpy(val_ptr2, val_ptr, esize);
if (!(info[midx] & YMAP_TAKEN_BIT) || info[midx] == YMAP_TAKEN_BIT)
{
continue;
}
old_midx = midx;
midx = YMAP_CAP_IDX(oldalloc, midx + 1);
}
size_t arrayIdx = YMAP_ARRAY_IDX(midx);
size_t itemIdx = YMAP_ITEM_IDX(arrayIdx, midx);
unsigned char* key_ptr = YMAP_KEY(m, arrayIdx, itemIdx, ksize);
unsigned char* val_ptr = YMAP_ITEM(m, arrayIdx, itemIdx, esize);
uint64_t h = m->hash(key_ptr);
uint64_t hnew = YMAP_CAP_IDX(cap, h);
info[old_midx] = 0;
}
memcpy(key_ptr2, key_ptr, ksize);
memcpy(val_ptr2, val_ptr, esize);
info[midx] = 0;
i += do_last;
status = ymap_insertHelper2(m, key_ptr2, val_ptr2, hnew);
if (++moved == len) break;
yassert(status == YSTATUS_SUCCESS,
"insert must always succeed in this case");
}
status = YSTATUS_SUCCESS;
err:
m->growing = false;
#ifdef YDEBUG
ymap_checkValidity(m, __func__);
#endif // YDEBUG
return status;
}
@ -343,7 +464,7 @@ ymap_create(const float load, const size_t ksize, const size_t esize,
yc_assert(eq || ksize == sizeof(uint64_t), YC_ASSERT_MAP_INVALID_EQ);
yc_assert(load <= 1.0f && load > 0.0f, YC_ASSERT_MAP_LOAD_RANGE);
ymap* m = ycalloc(1, sizeof(ymap) + 2 * ksize + 2 * esize);
ymap* m = ycalloc(1, sizeof(ymap) + 3 * ksize + 3 * esize);
if (yerror(!m)) return NULL;
YStatus status = ymap_init(m, load, ksize, esize, hash, eq, kdtor, vdtor);
@ -589,12 +710,16 @@ ymap_insertHelper2(ymap* m, void* key, void* val, size_t idx)
midx = YMAP_IDX(m, idx);
}
unsigned char* const kswap = YMAP_KSWAP(m);
unsigned char* const vswap = YMAP_VSWAP(kswap, ksize);
// Loop while we have not found an empty spot, and while no info overflow.
while ((m->info[midx] & YMAP_TAKEN_BIT) && info)
{
// If we need to swap...
uint8_t info2 = m->info[midx];
// Moving...
if (info > info2)
{
// Only swap when necessary.
@ -602,9 +727,6 @@ ymap_insertHelper2(ymap* m, void* key, void* val, size_t idx)
arrayIdx = YMAP_ARRAY_IDX(midx);
itemIdx = YMAP_ITEM_IDX(arrayIdx, midx);
unsigned char* kswap = (unsigned char*) (m + 1);
unsigned char* vswap = kswap + ksize + ksize;
kptr = YMAP_KEY(m, arrayIdx, itemIdx, ksize);
vptr = YMAP_ITEM(m, arrayIdx, itemIdx, esize);
@ -664,9 +786,13 @@ ymap_insertHelper(ymap* m, void* key, void* val)
status = ymap_insertHelper2(m, key, val, idx);
m->size += 1;
m->size += (status == YSTATUS_SUCCESS);
return YSTATUS_SUCCESS;
#ifdef YDEBUG
ymap_checkValidity(m, __func__);
#endif // YDEBUG
return status;
}
YStatus
@ -717,7 +843,7 @@ ymap_insertStringKey(YMap m, const char* key, const void* val)
size_t esize = m->esize;
size_t ksize = m->ksize;
unsigned char* vptr = ((unsigned char*) (m + 1)) + ksize + ksize + esize;
unsigned char* vptr = YMAP_VSWAP2(YMAP_KSWAP2(m, ksize), ksize, esize);
memcpy(vptr, val, esize);
status = ymap_insertHelper(m, &s, vptr);
@ -739,8 +865,8 @@ ymap_insert(YMap m, const void* key, const void* val)
size_t ksize = m->ksize;
size_t esize = m->esize;
unsigned char* kptr = ((unsigned char*) (m + 1)) + ksize;
unsigned char* vptr = kptr + ksize + esize;
unsigned char* kptr = YMAP_KSWAP2(m, ksize);
unsigned char* vptr = YMAP_VSWAP2(kptr, ksize, esize);
memcpy(kptr, key, ksize);
memcpy(vptr, val, esize);
@ -787,6 +913,7 @@ ymap_remove(YMap m, const void* key)
kptr = YMAP_KEY(m, arrayIdx, itemIdx, ksize);
vptr = YMAP_ITEM(m, arrayIdx, itemIdx, esize);
// Moving...
while (info[midx] > YMAP_TAKEN_BIT)
{
arrayIdx = YMAP_ARRAY_IDX(midx);
@ -813,6 +940,10 @@ ymap_remove(YMap m, const void* key)
m->info[YMAP_IDX(m, start - 1)] = 0;
m->size -= 1;
#ifdef YDEBUG
ymap_checkValidity(m, __func__);
#endif // YDEBUG
return YSTATUS_SUCCESS;
}

55
src/map/map.h

@ -164,6 +164,61 @@ extern "C" {
*/
#define YMAP_ITEM(m, ai, ii, s) ((m)->map[(ai)] + (ii) * (s))
/**
* @def YMAP_KSWAP
* Returns a pointer to the first swap area for keys.
* @param m The map to query.
* @return A pointer to the first key swap area.
*/
#define YMAP_KSWAP(m) ((unsigned char*) ((m) + 1))
/**
* @def YMAP_KSWAP2
* Returns a pointer to the second swap area for keys.
* @param m The map to query.
* @param ks The key size of the map.
* @return A pointer to the second key swap area.
*/
#define YMAP_KSWAP2(m, ks) (YMAP_KSWAP(m) + (ks))
/**
* @def YMAP_KSWAP3
* Returns a pointer to the third swap area for keys.
* @param m The map to query.
* @param ks The key size of the map.
* @return A pointer to the third key swap area.
*/
#define YMAP_KSWAP3(m, ks) (YMAP_KSWAP(m) + (ks) + (ks))
/**
* @def YMAP_VSWAP
* Returns a pointer to the first swap area for values.
* @param kp A pointer to the first key swap area.
* @param ks The key size.
* @return A pointer to the first value swap area.
*/
#define YMAP_VSWAP(kp, ks) ((kp) + 3 * (ks))
/**
* @def YMAP_VSWAP2
* Returns a pointer to the second swap area for values.
* @param kp A pointer to the second key swap area.
* @param ks The key size of the map.
* @param es The value size of the map.
* @return A pointer to the second value swap area.
*/
#define YMAP_VSWAP2(kp, ks, es) ((kp) + 2 * (ks) + (es))
/**
* @def YMAP_VSWAP3
* Returns a pointer to the third swap area for values.
* @param kp A pointer to the third key swap area.
* @param ks The key size of the map.
* @param es The value size of the map.
* @return A pointer to the third value swap area.
*/
#define YMAP_VSWAP3(kp, ks, es) ((kp) + (ks) + 2 * (es))
/**
* This is the actual map struct.
*/

Loading…
Cancel
Save