cosmopolitan/third_party/duktape/duk_heap_stringtable.c

1044 lines
31 KiB
C

/*
* Heap string table handling, string interning.
*/
#include "third_party/duktape/duk_internal.h"
/* Resize checks not needed if minsize == maxsize, typical for low memory
* targets.
*/
#define DUK__STRTAB_RESIZE_CHECK
#if (DUK_USE_STRTAB_MINSIZE == DUK_USE_STRTAB_MAXSIZE)
#undef DUK__STRTAB_RESIZE_CHECK
#endif
#if defined(DUK_USE_STRTAB_PTRCOMP)
#define DUK__HEAPPTR_ENC16(heap,ptr) DUK_USE_HEAPPTR_ENC16((heap)->heap_udata, (ptr))
#define DUK__HEAPPTR_DEC16(heap,val) DUK_USE_HEAPPTR_DEC16((heap)->heap_udata, (val))
#define DUK__GET_STRTABLE(heap) ((heap)->strtable16)
#else
#define DUK__HEAPPTR_ENC16(heap,ptr) (ptr)
#define DUK__HEAPPTR_DEC16(heap,val) (val)
#define DUK__GET_STRTABLE(heap) ((heap)->strtable)
#endif
#define DUK__STRTAB_U32_MAX_STRLEN 10 /* 4'294'967'295 */
/*
* Debug dump stringtable.
*/
#if defined(DUK_USE_DEBUG)
DUK_INTERNAL void duk_heap_strtable_dump(duk_heap *heap) {
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *strtable;
#else
duk_hstring **strtable;
#endif
duk_uint32_t i;
duk_hstring *h;
duk_size_t count_total = 0;
duk_size_t count_chain;
duk_size_t count_chain_min = DUK_SIZE_MAX;
duk_size_t count_chain_max = 0;
duk_size_t count_len[8]; /* chain lengths from 0 to 7 */
if (heap == NULL) {
DUK_D(DUK_DPRINT("string table, heap=NULL"));
return;
}
strtable = DUK__GET_STRTABLE(heap);
if (strtable == NULL) {
DUK_D(DUK_DPRINT("string table, strtab=NULL"));
return;
}
duk_memzero((void *) count_len, sizeof(count_len));
for (i = 0; i < heap->st_size; i++) {
h = DUK__HEAPPTR_DEC16(heap, strtable[i]);
count_chain = 0;
while (h != NULL) {
count_chain++;
h = h->hdr.h_next;
}
if (count_chain < sizeof(count_len) / sizeof(duk_size_t)) {
count_len[count_chain]++;
}
count_chain_max = (count_chain > count_chain_max ? count_chain : count_chain_max);
count_chain_min = (count_chain < count_chain_min ? count_chain : count_chain_min);
count_total += count_chain;
}
DUK_D(DUK_DPRINT("string table, strtab=%p, count=%lu, chain min=%lu max=%lu avg=%lf: "
"counts: %lu %lu %lu %lu %lu %lu %lu %lu ...",
(void *) heap->strtable, (unsigned long) count_total,
(unsigned long) count_chain_min, (unsigned long) count_chain_max,
(double) count_total / (double) heap->st_size,
(unsigned long) count_len[0], (unsigned long) count_len[1],
(unsigned long) count_len[2], (unsigned long) count_len[3],
(unsigned long) count_len[4], (unsigned long) count_len[5],
(unsigned long) count_len[6], (unsigned long) count_len[7]));
}
#endif /* DUK_USE_DEBUG */
/*
* Assertion helper to ensure strtable is populated correctly.
*/
#if defined(DUK_USE_ASSERTIONS)
DUK_LOCAL void duk__strtable_assert_checks(duk_heap *heap) {
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *strtable;
#else
duk_hstring **strtable;
#endif
duk_uint32_t i;
duk_hstring *h;
duk_size_t count = 0;
DUK_ASSERT(heap != NULL);
strtable = DUK__GET_STRTABLE(heap);
if (strtable != NULL) {
DUK_ASSERT(heap->st_size != 0);
DUK_ASSERT(heap->st_mask == heap->st_size - 1);
for (i = 0; i < heap->st_size; i++) {
h = DUK__HEAPPTR_DEC16(heap, strtable[i]);
while (h != NULL) {
DUK_ASSERT((DUK_HSTRING_GET_HASH(h) & heap->st_mask) == i);
count++;
h = h->hdr.h_next;
}
}
} else {
DUK_ASSERT(heap->st_size == 0);
DUK_ASSERT(heap->st_mask == 0);
}
#if defined(DUK__STRTAB_RESIZE_CHECK)
DUK_ASSERT(count == (duk_size_t) heap->st_count);
#endif
}
#endif /* DUK_USE_ASSERTIONS */
/*
* Allocate and initialize a duk_hstring.
*
* Returns a NULL if allocation or initialization fails for some reason.
*
* The string won't be inserted into the string table and isn't tracked in
* any way (link pointers will be NULL). The caller must place the string
* into the string table without any risk of a longjmp, otherwise the string
* is leaked.
*/
DUK_LOCAL duk_hstring *duk__strtable_alloc_hstring(duk_heap *heap,
const duk_uint8_t *str,
duk_uint32_t blen,
duk_uint32_t strhash,
const duk_uint8_t *extdata) {
duk_hstring *res;
const duk_uint8_t *data;
#if !defined(DUK_USE_HSTRING_ARRIDX)
duk_uarridx_t dummy;
#endif
DUK_ASSERT(heap != NULL);
DUK_UNREF(extdata);
#if defined(DUK_USE_STRLEN16)
/* If blen <= 0xffffUL, clen is also guaranteed to be <= 0xffffUL. */
if (blen > 0xffffUL) {
DUK_D(DUK_DPRINT("16-bit string blen/clen active and blen over 16 bits, reject intern"));
goto alloc_error;
}
#endif
/* XXX: Memzeroing the allocated structure is not really necessary
* because we could just initialize all fields explicitly (almost
* all fields are initialized explicitly anyway).
*/
#if defined(DUK_USE_HSTRING_EXTDATA) && defined(DUK_USE_EXTSTR_INTERN_CHECK)
if (extdata) {
res = (duk_hstring *) DUK_ALLOC(heap, sizeof(duk_hstring_external));
if (DUK_UNLIKELY(res == NULL)) {
goto alloc_error;
}
duk_memzero(res, sizeof(duk_hstring_external));
#if defined(DUK_USE_EXPLICIT_NULL_INIT)
DUK_HEAPHDR_STRING_INIT_NULLS(&res->hdr);
#endif
DUK_HEAPHDR_SET_TYPE_AND_FLAGS(&res->hdr, DUK_HTYPE_STRING, DUK_HSTRING_FLAG_EXTDATA);
DUK_ASSERT(extdata[blen] == 0); /* Application responsibility. */
data = extdata;
((duk_hstring_external *) res)->extdata = extdata;
} else
#endif /* DUK_USE_HSTRING_EXTDATA && DUK_USE_EXTSTR_INTERN_CHECK */
{
duk_uint8_t *data_tmp;
/* NUL terminate for convenient C access */
DUK_ASSERT(sizeof(duk_hstring) + blen + 1 > blen); /* No wrap, limits ensure. */
res = (duk_hstring *) DUK_ALLOC(heap, sizeof(duk_hstring) + blen + 1);
if (DUK_UNLIKELY(res == NULL)) {
goto alloc_error;
}
duk_memzero(res, sizeof(duk_hstring));
#if defined(DUK_USE_EXPLICIT_NULL_INIT)
DUK_HEAPHDR_STRING_INIT_NULLS(&res->hdr);
#endif
DUK_HEAPHDR_SET_TYPE_AND_FLAGS(&res->hdr, DUK_HTYPE_STRING, 0);
data_tmp = (duk_uint8_t *) (res + 1);
duk_memcpy(data_tmp, str, blen);
data_tmp[blen] = (duk_uint8_t) 0;
data = (const duk_uint8_t *) data_tmp;
}
DUK_HSTRING_SET_BYTELEN(res, blen);
DUK_HSTRING_SET_HASH(res, strhash);
DUK_ASSERT(!DUK_HSTRING_HAS_ARRIDX(res));
#if defined(DUK_USE_HSTRING_ARRIDX)
res->arridx = duk_js_to_arrayindex_string(data, blen);
if (res->arridx != DUK_HSTRING_NO_ARRAY_INDEX) {
#else
dummy = duk_js_to_arrayindex_string(data, blen);
if (dummy != DUK_HSTRING_NO_ARRAY_INDEX) {
#endif
/* Array index strings cannot be symbol strings,
* and they're always pure ASCII so blen == clen.
*/
DUK_HSTRING_SET_ARRIDX(res);
DUK_HSTRING_SET_ASCII(res);
DUK_ASSERT(duk_unicode_unvalidated_utf8_length(data, (duk_size_t) blen) == blen);
} else {
/* Because 'data' is NUL-terminated, we don't need a
* blen > 0 check here. For NUL (0x00) the symbol
* checks will be false.
*/
if (DUK_UNLIKELY(data[0] >= 0x80U)) {
if (data[0] <= 0x81) {
DUK_HSTRING_SET_SYMBOL(res);
} else if (data[0] == 0x82U || data[0] == 0xffU) {
DUK_HSTRING_SET_HIDDEN(res);
DUK_HSTRING_SET_SYMBOL(res);
}
}
/* Using an explicit 'ASCII' flag has larger footprint (one call site
* only) but is quite useful for the case when there's no explicit
* 'clen' in duk_hstring.
*
* The flag is set lazily for RAM strings.
*/
DUK_ASSERT(!DUK_HSTRING_HAS_ASCII(res));
#if defined(DUK_USE_HSTRING_LAZY_CLEN)
/* Charlen initialized to 0, updated on-the-fly. */
#else
duk_hstring_init_charlen(res); /* Also sets ASCII flag. */
#endif
}
DUK_DDD(DUK_DDDPRINT("interned string, hash=0x%08lx, blen=%ld, has_arridx=%ld, has_extdata=%ld",
(unsigned long) DUK_HSTRING_GET_HASH(res),
(long) DUK_HSTRING_GET_BYTELEN(res),
(long) (DUK_HSTRING_HAS_ARRIDX(res) ? 1 : 0),
(long) (DUK_HSTRING_HAS_EXTDATA(res) ? 1 : 0)));
DUK_ASSERT(res != NULL);
return res;
alloc_error:
return NULL;
}
/*
* Grow strtable allocation in-place.
*/
#if defined(DUK__STRTAB_RESIZE_CHECK)
DUK_LOCAL void duk__strtable_grow_inplace(duk_heap *heap) {
duk_uint32_t new_st_size;
duk_uint32_t old_st_size;
duk_uint32_t i;
duk_hstring *h;
duk_hstring *next;
duk_hstring *prev;
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *new_ptr;
duk_uint16_t *new_ptr_high;
#else
duk_hstring **new_ptr;
duk_hstring **new_ptr_high;
#endif
DUK_DD(DUK_DDPRINT("grow in-place: %lu -> %lu", (unsigned long) heap->st_size, (unsigned long) heap->st_size * 2));
DUK_ASSERT(heap != NULL);
DUK_ASSERT(heap->st_resizing == 1);
DUK_ASSERT(heap->st_size >= 2);
DUK_ASSERT((heap->st_size & (heap->st_size - 1)) == 0); /* 2^N */
DUK_ASSERT(DUK__GET_STRTABLE(heap) != NULL);
DUK_STATS_INC(heap, stats_strtab_resize_grow);
new_st_size = heap->st_size << 1U;
DUK_ASSERT(new_st_size > heap->st_size); /* No overflow. */
/* Reallocate the strtable first and then work in-place to rehash
* strings. We don't need an indirect allocation here: even if GC
* is triggered to satisfy the allocation, recursive strtable resize
* is prevented by flags. This is also why we don't need to use
* DUK_REALLOC_INDIRECT().
*/
#if defined(DUK_USE_STRTAB_PTRCOMP)
new_ptr = (duk_uint16_t *) DUK_REALLOC(heap, heap->strtable16, sizeof(duk_uint16_t) * new_st_size);
#else
new_ptr = (duk_hstring **) DUK_REALLOC(heap, heap->strtable, sizeof(duk_hstring *) * new_st_size);
#endif
if (DUK_UNLIKELY(new_ptr == NULL)) {
/* If realloc fails we can continue normally: the string table
* won't "fill up" although chains will gradually get longer.
* When string insertions continue, we'll quite soon try again
* with no special handling.
*/
DUK_D(DUK_DPRINT("string table grow failed, ignoring"));
return;
}
#if defined(DUK_USE_STRTAB_PTRCOMP)
heap->strtable16 = new_ptr;
#else
heap->strtable = new_ptr;
#endif
/* Rehash a single bucket into two separate ones. When we grow
* by x2 the highest 'new' bit determines whether a string remains
* in its old position (bit is 0) or goes to a new one (bit is 1).
*/
old_st_size = heap->st_size;
new_ptr_high = new_ptr + old_st_size;
for (i = 0; i < old_st_size; i++) {
duk_hstring *new_root;
duk_hstring *new_root_high;
h = DUK__HEAPPTR_DEC16(heap, new_ptr[i]);
new_root = h;
new_root_high = NULL;
prev = NULL;
while (h != NULL) {
duk_uint32_t mask;
DUK_ASSERT((DUK_HSTRING_GET_HASH(h) & heap->st_mask) == i);
next = h->hdr.h_next;
/* Example: if previous size was 256, previous mask is 0xFF
* and size is 0x100 which corresponds to the new bit that
* comes into play.
*/
DUK_ASSERT(heap->st_mask == old_st_size - 1);
mask = old_st_size;
if (DUK_HSTRING_GET_HASH(h) & mask) {
if (prev != NULL) {
prev->hdr.h_next = h->hdr.h_next;
} else {
DUK_ASSERT(h == new_root);
new_root = h->hdr.h_next;
}
h->hdr.h_next = new_root_high;
new_root_high = h;
} else {
prev = h;
}
h = next;
}
new_ptr[i] = DUK__HEAPPTR_ENC16(heap, new_root);
new_ptr_high[i] = DUK__HEAPPTR_ENC16(heap, new_root_high);
}
heap->st_size = new_st_size;
heap->st_mask = new_st_size - 1;
#if defined(DUK_USE_ASSERTIONS)
duk__strtable_assert_checks(heap);
#endif
}
#endif /* DUK__STRTAB_RESIZE_CHECK */
/*
* Shrink strtable allocation in-place.
*/
#if defined(DUK__STRTAB_RESIZE_CHECK)
DUK_LOCAL void duk__strtable_shrink_inplace(duk_heap *heap) {
duk_uint32_t new_st_size;
duk_uint32_t i;
duk_hstring *h;
duk_hstring *other;
duk_hstring *root;
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *old_ptr;
duk_uint16_t *old_ptr_high;
duk_uint16_t *new_ptr;
#else
duk_hstring **old_ptr;
duk_hstring **old_ptr_high;
duk_hstring **new_ptr;
#endif
DUK_DD(DUK_DDPRINT("shrink in-place: %lu -> %lu", (unsigned long) heap->st_size, (unsigned long) heap->st_size / 2));
DUK_ASSERT(heap != NULL);
DUK_ASSERT(heap->st_resizing == 1);
DUK_ASSERT(heap->st_size >= 2);
DUK_ASSERT((heap->st_size & (heap->st_size - 1)) == 0); /* 2^N */
DUK_ASSERT(DUK__GET_STRTABLE(heap) != NULL);
DUK_STATS_INC(heap, stats_strtab_resize_shrink);
new_st_size = heap->st_size >> 1U;
/* Combine two buckets into a single one. When we shrink, one hash
* bit (highest) disappears.
*/
old_ptr = DUK__GET_STRTABLE(heap);
old_ptr_high = old_ptr + new_st_size;
for (i = 0; i < new_st_size; i++) {
h = DUK__HEAPPTR_DEC16(heap, old_ptr[i]);
other = DUK__HEAPPTR_DEC16(heap, old_ptr_high[i]);
if (h == NULL) {
/* First chain is empty, so use second one as is. */
root = other;
} else {
/* Find end of first chain, and link in the second. */
root = h;
while (h->hdr.h_next != NULL) {
h = h->hdr.h_next;
}
h->hdr.h_next = other;
}
old_ptr[i] = DUK__HEAPPTR_ENC16(heap, root);
}
heap->st_size = new_st_size;
heap->st_mask = new_st_size - 1;
/* The strtable is now consistent and we can realloc safely. Even
* if side effects cause string interning or removal the strtable
* updates are safe. Recursive resize has been prevented by caller.
* This is also why we don't need to use DUK_REALLOC_INDIRECT().
*
* We assume a realloc() to a smaller size is guaranteed to succeed.
* It would be relatively straightforward to handle the error by
* essentially performing a "grow" step to recover.
*/
#if defined(DUK_USE_STRTAB_PTRCOMP)
new_ptr = (duk_uint16_t *) DUK_REALLOC(heap, heap->strtable16, sizeof(duk_uint16_t) * new_st_size);
DUK_ASSERT(new_ptr != NULL);
heap->strtable16 = new_ptr;
#else
new_ptr = (duk_hstring **) DUK_REALLOC(heap, heap->strtable, sizeof(duk_hstring *) * new_st_size);
DUK_ASSERT(new_ptr != NULL);
heap->strtable = new_ptr;
#endif
#if defined(DUK_USE_ASSERTIONS)
duk__strtable_assert_checks(heap);
#endif
}
#endif /* DUK__STRTAB_RESIZE_CHECK */
/*
* Grow/shrink check.
*/
#if defined(DUK__STRTAB_RESIZE_CHECK)
DUK_LOCAL DUK_COLD DUK_NOINLINE void duk__strtable_resize_check(duk_heap *heap) {
duk_uint32_t load_factor; /* fixed point */
DUK_ASSERT(heap != NULL);
#if defined(DUK_USE_STRTAB_PTRCOMP)
DUK_ASSERT(heap->strtable16 != NULL);
#else
DUK_ASSERT(heap->strtable != NULL);
#endif
DUK_STATS_INC(heap, stats_strtab_resize_check);
/* Prevent recursive resizing. */
if (DUK_UNLIKELY(heap->st_resizing != 0U)) {
DUK_D(DUK_DPRINT("prevent recursive strtable resize"));
return;
}
heap->st_resizing = 1;
DUK_ASSERT(heap->st_size >= 16U);
DUK_ASSERT((heap->st_size >> 4U) >= 1);
load_factor = heap->st_count / (heap->st_size >> 4U);
DUK_DD(DUK_DDPRINT("resize check string table: size=%lu, count=%lu, load_factor=%lu (fixed point .4; float %lf)",
(unsigned long) heap->st_size, (unsigned long) heap->st_count,
(unsigned long) load_factor,
(double) heap->st_count / (double) heap->st_size));
if (load_factor >= DUK_USE_STRTAB_GROW_LIMIT) {
if (heap->st_size >= DUK_USE_STRTAB_MAXSIZE) {
DUK_DD(DUK_DDPRINT("want to grow strtable (based on load factor) but already maximum size"));
} else {
DUK_D(DUK_DPRINT("grow string table: %lu -> %lu", (unsigned long) heap->st_size, (unsigned long) heap->st_size * 2));
#if defined(DUK_USE_DEBUG)
duk_heap_strtable_dump(heap);
#endif
duk__strtable_grow_inplace(heap);
}
} else if (load_factor <= DUK_USE_STRTAB_SHRINK_LIMIT) {
if (heap->st_size <= DUK_USE_STRTAB_MINSIZE) {
DUK_DD(DUK_DDPRINT("want to shrink strtable (based on load factor) but already minimum size"));
} else {
DUK_D(DUK_DPRINT("shrink string table: %lu -> %lu", (unsigned long) heap->st_size, (unsigned long) heap->st_size / 2));
#if defined(DUK_USE_DEBUG)
duk_heap_strtable_dump(heap);
#endif
duk__strtable_shrink_inplace(heap);
}
} else {
DUK_DD(DUK_DDPRINT("no need for strtable resize"));
}
heap->st_resizing = 0;
}
#endif /* DUK__STRTAB_RESIZE_CHECK */
/*
* Torture grow/shrink: unconditionally grow and shrink back.
*/
#if defined(DUK_USE_STRTAB_TORTURE) && defined(DUK__STRTAB_RESIZE_CHECK)
DUK_LOCAL void duk__strtable_resize_torture(duk_heap *heap) {
duk_uint32_t old_st_size;
DUK_ASSERT(heap != NULL);
old_st_size = heap->st_size;
if (old_st_size >= DUK_USE_STRTAB_MAXSIZE) {
return;
}
heap->st_resizing = 1;
duk__strtable_grow_inplace(heap);
if (heap->st_size > old_st_size) {
duk__strtable_shrink_inplace(heap);
}
heap->st_resizing = 0;
}
#endif /* DUK_USE_STRTAB_TORTURE && DUK__STRTAB_RESIZE_CHECK */
/*
* Raw intern; string already checked not to be present.
*/
DUK_LOCAL duk_hstring *duk__strtable_do_intern(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) {
duk_hstring *res;
const duk_uint8_t *extdata;
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *slot;
#else
duk_hstring **slot;
#endif
DUK_DDD(DUK_DDDPRINT("do_intern: heap=%p, str=%p, blen=%lu, strhash=%lx, st_size=%lu, st_count=%lu, load=%lf",
(void *) heap, (const void *) str, (unsigned long) blen, (unsigned long) strhash,
(unsigned long) heap->st_size, (unsigned long) heap->st_count,
(double) heap->st_count / (double) heap->st_size));
DUK_ASSERT(heap != NULL);
/* Prevent any side effects on the string table and the caller provided
* str/blen arguments while interning is in progress. For example, if
* the caller provided str/blen from a dynamic buffer, a finalizer
* might resize or modify that dynamic buffer, invalidating the call
* arguments.
*
* While finalizers must be prevented, mark-and-sweep itself is fine.
* Recursive string table resize is prevented explicitly here.
*/
heap->pf_prevent_count++;
DUK_ASSERT(heap->pf_prevent_count != 0); /* Wrap. */
#if defined(DUK_USE_STRTAB_TORTURE) && defined(DUK__STRTAB_RESIZE_CHECK)
duk__strtable_resize_torture(heap);
#endif
/* String table grow/shrink check. Because of chaining (and no
* accumulation issues as with hash probe chains and DELETED
* markers) there's never a mandatory need to resize right now.
* Check for the resize only periodically, based on st_count
* bit pattern. Because string table removal doesn't do a shrink
* check, we do that also here.
*
* Do the resize and possible grow/shrink before the new duk_hstring
* has been allocated. Otherwise we may trigger a GC when the result
* duk_hstring is not yet strongly referenced.
*/
#if defined(DUK__STRTAB_RESIZE_CHECK)
if (DUK_UNLIKELY((heap->st_count & DUK_USE_STRTAB_RESIZE_CHECK_MASK) == 0)) {
duk__strtable_resize_check(heap);
}
#endif
/* External string check (low memory optimization). */
#if defined(DUK_USE_HSTRING_EXTDATA) && defined(DUK_USE_EXTSTR_INTERN_CHECK)
extdata = (const duk_uint8_t *) DUK_USE_EXTSTR_INTERN_CHECK(heap->heap_udata, (void *) DUK_LOSE_CONST(str), (duk_size_t) blen);
#else
extdata = (const duk_uint8_t *) NULL;
#endif
/* Allocate and initialize string, not yet linked. This may cause a
* GC which may cause other strings to be interned and inserted into
* the string table before we insert our string. Finalizer execution
* is disabled intentionally to avoid a finalizer from e.g. resizing
* a buffer used as a data area for 'str'.
*/
res = duk__strtable_alloc_hstring(heap, str, blen, strhash, extdata);
/* Allow side effects again: GC must be avoided until duk_hstring
* result (if successful) has been INCREF'd.
*/
DUK_ASSERT(heap->pf_prevent_count > 0);
heap->pf_prevent_count--;
/* Alloc error handling. */
if (DUK_UNLIKELY(res == NULL)) {
#if defined(DUK_USE_HSTRING_EXTDATA) && defined(DUK_USE_EXTSTR_INTERN_CHECK)
if (extdata != NULL) {
DUK_USE_EXTSTR_FREE(heap->heap_udata, (const void *) extdata);
}
#endif
return NULL;
}
/* Insert into string table. */
#if defined(DUK_USE_STRTAB_PTRCOMP)
slot = heap->strtable16 + (strhash & heap->st_mask);
#else
slot = heap->strtable + (strhash & heap->st_mask);
#endif
DUK_ASSERT(res->hdr.h_next == NULL); /* This is the case now, but unnecessary zeroing/NULLing. */
res->hdr.h_next = DUK__HEAPPTR_DEC16(heap, *slot);
*slot = DUK__HEAPPTR_ENC16(heap, res);
/* Update string count only for successful inserts. */
#if defined(DUK__STRTAB_RESIZE_CHECK)
heap->st_count++;
#endif
/* The duk_hstring is in the string table but is not yet strongly
* reachable. Calling code MUST NOT make any allocations or other
* side effects before the duk_hstring has been INCREF'd and made
* reachable.
*/
return res;
}
/*
* Intern a string from str/blen, returning either an existing duk_hstring
* or adding a new one into the string table. The input string does -not-
* need to be NUL terminated.
*
* The input 'str' argument may point to a Duktape managed data area such as
* the data area of a dynamic buffer. It's crucial to avoid any side effects
* that might affect the data area (e.g. resize the dynamic buffer, or write
* to the buffer) before the string is fully interned.
*/
#if defined(DUK_USE_ROM_STRINGS)
DUK_LOCAL duk_hstring *duk__strtab_romstring_lookup(duk_heap *heap, const duk_uint8_t *str, duk_size_t blen, duk_uint32_t strhash) {
duk_size_t lookup_hash;
duk_hstring *curr;
DUK_ASSERT(heap != NULL);
DUK_UNREF(heap);
lookup_hash = (blen << 4);
if (blen > 0) {
lookup_hash += str[0];
}
lookup_hash &= 0xff;
curr = (duk_hstring *) DUK_LOSE_CONST(duk_rom_strings_lookup[lookup_hash]);
while (curr != NULL) {
/* Unsafe memcmp() because for zero blen, str may be NULL. */
if (strhash == DUK_HSTRING_GET_HASH(curr) &&
blen == DUK_HSTRING_GET_BYTELEN(curr) &&
duk_memcmp_unsafe((const void *) str, (const void *) DUK_HSTRING_GET_DATA(curr), blen) == 0) {
DUK_DDD(DUK_DDDPRINT("intern check: rom string: %!O, computed hash 0x%08lx, rom hash 0x%08lx",
curr, (unsigned long) strhash, (unsigned long) DUK_HSTRING_GET_HASH(curr)));
return curr;
}
curr = curr->hdr.h_next;
}
return NULL;
}
#endif /* DUK_USE_ROM_STRINGS */
DUK_INTERNAL duk_hstring *duk_heap_strtable_intern(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen) {
duk_uint32_t strhash;
duk_hstring *h;
DUK_DDD(DUK_DDDPRINT("intern check: heap=%p, str=%p, blen=%lu", (void *) heap, (const void *) str, (unsigned long) blen));
/* Preliminaries. */
/* XXX: maybe just require 'str != NULL' even for zero size? */
DUK_ASSERT(heap != NULL);
DUK_ASSERT(blen == 0 || str != NULL);
DUK_ASSERT(blen <= DUK_HSTRING_MAX_BYTELEN); /* Caller is responsible for ensuring this. */
strhash = duk_heap_hashstring(heap, str, (duk_size_t) blen);
/* String table lookup. */
DUK_ASSERT(DUK__GET_STRTABLE(heap) != NULL);
DUK_ASSERT(heap->st_size > 0);
DUK_ASSERT(heap->st_size == heap->st_mask + 1);
#if defined(DUK_USE_STRTAB_PTRCOMP)
h = DUK__HEAPPTR_DEC16(heap, heap->strtable16[strhash & heap->st_mask]);
#else
h = heap->strtable[strhash & heap->st_mask];
#endif
while (h != NULL) {
if (DUK_HSTRING_GET_HASH(h) == strhash &&
DUK_HSTRING_GET_BYTELEN(h) == blen &&
duk_memcmp_unsafe((const void *) str, (const void *) DUK_HSTRING_GET_DATA(h), (size_t) blen) == 0) {
/* Found existing entry. */
DUK_STATS_INC(heap, stats_strtab_intern_hit);
return h;
}
h = h->hdr.h_next;
}
/* ROM table lookup. Because this lookup is slower, do it only after
* RAM lookup. This works because no ROM string is ever interned into
* the RAM string table.
*/
#if defined(DUK_USE_ROM_STRINGS)
h = duk__strtab_romstring_lookup(heap, str, blen, strhash);
if (h != NULL) {
DUK_STATS_INC(heap, stats_strtab_intern_hit);
return h;
}
#endif
/* Not found in string table; insert. */
DUK_STATS_INC(heap, stats_strtab_intern_miss);
h = duk__strtable_do_intern(heap, str, blen, strhash);
return h; /* may be NULL */
}
/*
* Intern a string from u32.
*/
/* XXX: Could arrange some special handling because we know that the result
* will have an arridx flag and an ASCII flag, won't need a clen check, etc.
*/
DUK_INTERNAL duk_hstring *duk_heap_strtable_intern_u32(duk_heap *heap, duk_uint32_t val) {
duk_uint8_t buf[DUK__STRTAB_U32_MAX_STRLEN];
duk_uint8_t *p;
DUK_ASSERT(heap != NULL);
/* This is smaller and faster than a %lu sprintf. */
p = buf + sizeof(buf);
do {
p--;
*p = duk_lc_digits[val % 10];
val = val / 10;
} while (val != 0); /* For val == 0, emit exactly one '0'. */
DUK_ASSERT(p >= buf);
return duk_heap_strtable_intern(heap, (const duk_uint8_t *) p, (duk_uint32_t) ((buf + sizeof(buf)) - p));
}
/*
* Checked convenience variants.
*
* XXX: Because the main use case is for the checked variants, make them the
* main functionality and provide a safe variant separately (it is only needed
* during heap init). The problem with that is that longjmp state and error
* creation must already be possible to throw.
*/
DUK_INTERNAL duk_hstring *duk_heap_strtable_intern_checked(duk_hthread *thr, const duk_uint8_t *str, duk_uint32_t blen) {
duk_hstring *res;
DUK_ASSERT(thr != NULL);
DUK_ASSERT(thr->heap != NULL);
DUK_ASSERT(blen == 0 || str != NULL);
res = duk_heap_strtable_intern(thr->heap, str, blen);
if (DUK_UNLIKELY(res == NULL)) {
DUK_ERROR_ALLOC_FAILED(thr);
DUK_WO_NORETURN(return NULL;);
}
return res;
}
#if defined(DUK_USE_LITCACHE_SIZE)
DUK_LOCAL duk_uint_t duk__strtable_litcache_key(const duk_uint8_t *str, duk_uint32_t blen) {
duk_uintptr_t key;
DUK_ASSERT(DUK_USE_LITCACHE_SIZE > 0);
DUK_ASSERT(DUK_IS_POWER_OF_TWO((duk_uint_t) DUK_USE_LITCACHE_SIZE));
key = (duk_uintptr_t) blen ^ (duk_uintptr_t) str;
key &= (duk_uintptr_t) (DUK_USE_LITCACHE_SIZE - 1); /* Assumes size is power of 2. */
/* Due to masking, cast is in 32-bit range. */
DUK_ASSERT(key <= DUK_UINT_MAX);
return (duk_uint_t) key;
}
DUK_INTERNAL duk_hstring *duk_heap_strtable_intern_literal_checked(duk_hthread *thr, const duk_uint8_t *str, duk_uint32_t blen) {
duk_uint_t key;
duk_litcache_entry *ent;
duk_hstring *h;
/* Fast path check: literal exists in literal cache. */
key = duk__strtable_litcache_key(str, blen);
ent = thr->heap->litcache + key;
if (ent->addr == str) {
DUK_DD(DUK_DDPRINT("intern check for cached, pinned literal: str=%p, blen=%ld -> duk_hstring %!O",
(const void *) str, (long) blen, (duk_heaphdr *) ent->h));
DUK_ASSERT(ent->h != NULL);
DUK_ASSERT(DUK_HSTRING_HAS_PINNED_LITERAL(ent->h));
DUK_STATS_INC(thr->heap, stats_strtab_litcache_hit);
return ent->h;
}
/* Intern and update (overwrite) cache entry. */
h = duk_heap_strtable_intern_checked(thr, str, blen);
ent->addr = str;
ent->h = h;
DUK_STATS_INC(thr->heap, stats_strtab_litcache_miss);
/* Pin the duk_hstring until the next mark-and-sweep. This means
* litcache entries don't need to be invalidated until the next
* mark-and-sweep as their target duk_hstring is not freed before
* the mark-and-sweep happens. The pin remains even if the literal
* cache entry is overwritten, and is still useful to avoid string
* table traffic.
*/
if (!DUK_HSTRING_HAS_PINNED_LITERAL(h)) {
DUK_DD(DUK_DDPRINT("pin duk_hstring because it is a literal: %!O", (duk_heaphdr *) h));
DUK_ASSERT(!DUK_HEAPHDR_HAS_READONLY((duk_heaphdr *) h));
DUK_HSTRING_INCREF(thr, h);
DUK_HSTRING_SET_PINNED_LITERAL(h);
DUK_STATS_INC(thr->heap, stats_strtab_litcache_pin);
}
return h;
}
#endif /* DUK_USE_LITCACHE_SIZE */
DUK_INTERNAL duk_hstring *duk_heap_strtable_intern_u32_checked(duk_hthread *thr, duk_uint32_t val) {
duk_hstring *res;
DUK_ASSERT(thr != NULL);
DUK_ASSERT(thr->heap != NULL);
res = duk_heap_strtable_intern_u32(thr->heap, val);
if (DUK_UNLIKELY(res == NULL)) {
DUK_ERROR_ALLOC_FAILED(thr);
DUK_WO_NORETURN(return NULL;);
}
return res;
}
/*
* Remove (unlink) a string from the string table.
*
* Just unlinks the duk_hstring, leaving link pointers as garbage.
* Caller must free the string itself.
*/
#if defined(DUK_USE_REFERENCE_COUNTING)
/* Unlink without a 'prev' pointer. */
DUK_INTERNAL void duk_heap_strtable_unlink(duk_heap *heap, duk_hstring *h) {
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *slot;
#else
duk_hstring **slot;
#endif
duk_hstring *other;
duk_hstring *prev;
DUK_DDD(DUK_DDDPRINT("remove: heap=%p, h=%p, blen=%lu, strhash=%lx",
(void *) heap, (void *) h,
(unsigned long) (h != NULL ? DUK_HSTRING_GET_BYTELEN(h) : 0),
(unsigned long) (h != NULL ? DUK_HSTRING_GET_HASH(h) : 0)));
DUK_ASSERT(heap != NULL);
DUK_ASSERT(h != NULL);
#if defined(DUK__STRTAB_RESIZE_CHECK)
DUK_ASSERT(heap->st_count > 0);
heap->st_count--;
#endif
#if defined(DUK_USE_STRTAB_PTRCOMP)
slot = heap->strtable16 + (DUK_HSTRING_GET_HASH(h) & heap->st_mask);
#else
slot = heap->strtable + (DUK_HSTRING_GET_HASH(h) & heap->st_mask);
#endif
other = DUK__HEAPPTR_DEC16(heap, *slot);
DUK_ASSERT(other != NULL); /* At least argument string is in the chain. */
prev = NULL;
while (other != h) {
prev = other;
other = other->hdr.h_next;
DUK_ASSERT(other != NULL); /* We'll eventually find 'h'. */
}
if (prev != NULL) {
/* Middle of list. */
prev->hdr.h_next = h->hdr.h_next;
} else {
/* Head of list. */
*slot = DUK__HEAPPTR_ENC16(heap, h->hdr.h_next);
}
/* There's no resize check on a string free. The next string
* intern will do one.
*/
}
#endif /* DUK_USE_REFERENCE_COUNTING */
/* Unlink with a 'prev' pointer. */
DUK_INTERNAL void duk_heap_strtable_unlink_prev(duk_heap *heap, duk_hstring *h, duk_hstring *prev) {
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *slot;
#else
duk_hstring **slot;
#endif
DUK_DDD(DUK_DDDPRINT("remove: heap=%p, prev=%p, h=%p, blen=%lu, strhash=%lx",
(void *) heap, (void *) prev, (void *) h,
(unsigned long) (h != NULL ? DUK_HSTRING_GET_BYTELEN(h) : 0),
(unsigned long) (h != NULL ? DUK_HSTRING_GET_HASH(h) : 0)));
DUK_ASSERT(heap != NULL);
DUK_ASSERT(h != NULL);
DUK_ASSERT(prev == NULL || prev->hdr.h_next == h);
#if defined(DUK__STRTAB_RESIZE_CHECK)
DUK_ASSERT(heap->st_count > 0);
heap->st_count--;
#endif
if (prev != NULL) {
/* Middle of list. */
prev->hdr.h_next = h->hdr.h_next;
} else {
/* Head of list. */
#if defined(DUK_USE_STRTAB_PTRCOMP)
slot = heap->strtable16 + (DUK_HSTRING_GET_HASH(h) & heap->st_mask);
#else
slot = heap->strtable + (DUK_HSTRING_GET_HASH(h) & heap->st_mask);
#endif
DUK_ASSERT(DUK__HEAPPTR_DEC16(heap, *slot) == h);
*slot = DUK__HEAPPTR_ENC16(heap, h->hdr.h_next);
}
}
/*
* Force string table resize check in mark-and-sweep.
*/
DUK_INTERNAL void duk_heap_strtable_force_resize(duk_heap *heap) {
/* Does only one grow/shrink step if needed. The heap->st_resizing
* flag protects against recursive resizing.
*/
DUK_ASSERT(heap != NULL);
DUK_UNREF(heap);
#if defined(DUK__STRTAB_RESIZE_CHECK)
#if defined(DUK_USE_STRTAB_PTRCOMP)
if (heap->strtable16 != NULL) {
#else
if (heap->strtable != NULL) {
#endif
duk__strtable_resize_check(heap);
}
#endif
}
/*
* Free strings in the string table and the string table itself.
*/
DUK_INTERNAL void duk_heap_strtable_free(duk_heap *heap) {
#if defined(DUK_USE_STRTAB_PTRCOMP)
duk_uint16_t *strtable;
duk_uint16_t *st;
#else
duk_hstring **strtable;
duk_hstring **st;
#endif
duk_hstring *h;
DUK_ASSERT(heap != NULL);
#if defined(DUK_USE_ASSERTIONS)
duk__strtable_assert_checks(heap);
#endif
/* Strtable can be NULL if heap init fails. However, in that case
* heap->st_size is 0, so strtable == strtable_end and we skip the
* loop without a special check.
*/
strtable = DUK__GET_STRTABLE(heap);
st = strtable + heap->st_size;
DUK_ASSERT(strtable != NULL || heap->st_size == 0);
while (strtable != st) {
--st;
h = DUK__HEAPPTR_DEC16(heap, *st);
while (h) {
duk_hstring *h_next;
h_next = h->hdr.h_next;
/* Strings may have inner refs (extdata) in some cases. */
duk_free_hstring(heap, h);
h = h_next;
}
}
DUK_FREE(heap, strtable);
}