cosmopolitan/third_party/duktape/duk_heap_stringcache.c

310 lines
9.8 KiB
C

/*
* String cache.
*
* Provides a cache to optimize indexed string lookups. The cache keeps
* track of (byte offset, char offset) states for a fixed number of strings.
* Otherwise we'd need to scan from either end of the string, as we store
* strings in (extended) UTF-8.
*/
#include "third_party/duktape/duk_internal.h"
/*
* Delete references to given hstring from the heap string cache.
*
* String cache references are 'weak': they are not counted towards
* reference counts, nor serve as roots for mark-and-sweep. When an
* object is about to be freed, such references need to be removed.
*/
DUK_INTERNAL void duk_heap_strcache_string_remove(duk_heap *heap, duk_hstring *h) {
duk_uint_t i;
for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
duk_strcache_entry *c = heap->strcache + i;
if (c->h == h) {
DUK_DD(DUK_DDPRINT("deleting weak strcache reference to hstring %p from heap %p",
(void *) h, (void *) heap));
c->h = NULL;
/* XXX: the string shouldn't appear twice, but we now loop to the
* end anyway; if fixed, add a looping assertion to ensure there
* is no duplicate.
*/
}
}
}
/*
* String scanning helpers
*
* All bytes other than UTF-8 continuation bytes ([0x80,0xbf]) are
* considered to contribute a character. This must match how string
* character length is computed.
*/
DUK_LOCAL const duk_uint8_t *duk__scan_forwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) {
while (n > 0) {
for (;;) {
p++;
if (p >= q) {
return NULL;
}
if ((*p & 0xc0) != 0x80) {
break;
}
}
n--;
}
return p;
}
DUK_LOCAL const duk_uint8_t *duk__scan_backwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) {
while (n > 0) {
for (;;) {
p--;
if (p < q) {
return NULL;
}
if ((*p & 0xc0) != 0x80) {
break;
}
}
n--;
}
return p;
}
/*
* Convert char offset to byte offset
*
* Avoid using the string cache if possible: for ASCII strings byte and
* char offsets are equal and for short strings direct scanning may be
* better than using the string cache (which may evict a more important
* entry).
*
* Typing now assumes 32-bit string byte/char offsets (duk_uint_fast32_t).
* Better typing might be to use duk_size_t.
*
* Caller should ensure 'char_offset' is within the string bounds [0,charlen]
* (endpoint is inclusive). If this is not the case, no memory unsafe
* behavior will happen but an error will be thrown.
*/
DUK_INTERNAL duk_uint_fast32_t duk_heap_strcache_offset_char2byte(duk_hthread *thr, duk_hstring *h, duk_uint_fast32_t char_offset) {
duk_heap *heap;
duk_strcache_entry *sce;
duk_uint_fast32_t byte_offset;
duk_uint_t i;
duk_bool_t use_cache;
duk_uint_fast32_t dist_start, dist_end, dist_sce;
duk_uint_fast32_t char_length;
const duk_uint8_t *p_start;
const duk_uint8_t *p_end;
const duk_uint8_t *p_found;
/*
* For ASCII strings, the answer is simple.
*/
if (DUK_LIKELY(DUK_HSTRING_IS_ASCII(h))) {
return char_offset;
}
char_length = (duk_uint_fast32_t) DUK_HSTRING_GET_CHARLEN(h);
DUK_ASSERT(char_offset <= char_length);
if (DUK_LIKELY(DUK_HSTRING_IS_ASCII(h))) {
/* Must recheck because the 'is ascii' flag may be set
* lazily. Alternatively, we could just compare charlen
* to bytelen.
*/
return char_offset;
}
/*
* For non-ASCII strings, we need to scan forwards or backwards
* from some starting point. The starting point may be the start
* or end of the string, or some cached midpoint in the string
* cache.
*
* For "short" strings we simply scan without checking or updating
* the cache. For longer strings we check and update the cache as
* necessary, inserting a new cache entry if none exists.
*/
DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld",
(void *) h, (long) char_offset,
(long) DUK_HSTRING_GET_CHARLEN(h),
(long) DUK_HSTRING_GET_BYTELEN(h)));
heap = thr->heap;
sce = NULL;
use_cache = (char_length > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT);
if (use_cache) {
#if defined(DUK_USE_DEBUG_LEVEL) && (DUK_USE_DEBUG_LEVEL >= 2)
DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):"));
for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
duk_strcache_entry *c = heap->strcache + i;
DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
(long) i, (void *) c->h, (long) c->cidx, (long) c->bidx));
}
#endif
for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
duk_strcache_entry *c = heap->strcache + i;
if (c->h == h) {
sce = c;
break;
}
}
}
/*
* Scan from shortest distance:
* - start of string
* - end of string
* - cache entry (if exists)
*/
DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h) >= char_offset);
dist_start = char_offset;
dist_end = char_length - char_offset;
dist_sce = 0; DUK_UNREF(dist_sce); /* initialize for debug prints, needed if sce==NULL */
p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h);
p_end = (const duk_uint8_t *) (p_start + DUK_HSTRING_GET_BYTELEN(h));
p_found = NULL;
if (sce) {
if (char_offset >= sce->cidx) {
dist_sce = char_offset - sce->cidx;
if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) {
DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
"dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
"scan forwards from sce",
(long) use_cache, (void *) (sce ? sce->h : NULL),
(sce ? (long) sce->cidx : (long) -1),
(sce ? (long) sce->bidx : (long) -1),
(long) dist_start, (long) dist_end, (long) dist_sce));
p_found = duk__scan_forwards(p_start + sce->bidx,
p_end,
dist_sce);
goto scan_done;
}
} else {
dist_sce = sce->cidx - char_offset;
if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) {
DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
"dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
"scan backwards from sce",
(long) use_cache, (void *) (sce ? sce->h : NULL),
(sce ? (long) sce->cidx : (long) -1),
(sce ? (long) sce->bidx : (long) -1),
(long) dist_start, (long) dist_end, (long) dist_sce));
p_found = duk__scan_backwards(p_start + sce->bidx,
p_start,
dist_sce);
goto scan_done;
}
}
}
/* no sce, or sce scan not best */
if (dist_start <= dist_end) {
DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
"dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
"scan forwards from string start",
(long) use_cache, (void *) (sce ? sce->h : NULL),
(sce ? (long) sce->cidx : (long) -1),
(sce ? (long) sce->bidx : (long) -1),
(long) dist_start, (long) dist_end, (long) dist_sce));
p_found = duk__scan_forwards(p_start,
p_end,
dist_start);
} else {
DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
"dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
"scan backwards from string end",
(long) use_cache, (void *) (sce ? sce->h : NULL),
(sce ? (long) sce->cidx : (long) -1),
(sce ? (long) sce->bidx : (long) -1),
(long) dist_start, (long) dist_end, (long) dist_sce));
p_found = duk__scan_backwards(p_end,
p_start,
dist_end);
}
scan_done:
if (DUK_UNLIKELY(p_found == NULL)) {
/* Scan error: this shouldn't normally happen; it could happen if
* string is not valid UTF-8 data, and clen/blen are not consistent
* with the scanning algorithm.
*/
goto scan_error;
}
DUK_ASSERT(p_found >= p_start);
DUK_ASSERT(p_found <= p_end); /* may be equal */
byte_offset = (duk_uint32_t) (p_found - p_start);
DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld",
(void *) h, (long) char_offset, (long) byte_offset));
/*
* Update cache entry (allocating if necessary), and move the
* cache entry to the first place (in an "LRU" policy).
*/
if (use_cache) {
/* update entry, allocating if necessary */
if (!sce) {
sce = heap->strcache + DUK_HEAP_STRCACHE_SIZE - 1; /* take last entry */
sce->h = h;
}
DUK_ASSERT(sce != NULL);
sce->bidx = (duk_uint32_t) (p_found - p_start);
sce->cidx = (duk_uint32_t) char_offset;
/* LRU: move our entry to first */
if (sce > &heap->strcache[0]) {
/*
* A C
* B A
* C <- sce ==> B
* D D
*/
duk_strcache_entry tmp;
tmp = *sce;
duk_memmove((void *) (&heap->strcache[1]),
(const void *) (&heap->strcache[0]),
(size_t) (((char *) sce) - ((char *) &heap->strcache[0])));
heap->strcache[0] = tmp;
/* 'sce' points to the wrong entry here, but is no longer used */
}
#if defined(DUK_USE_DEBUG_LEVEL) && (DUK_USE_DEBUG_LEVEL >= 2)
DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):"));
for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
duk_strcache_entry *c = heap->strcache + i;
DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
(long) i, (void *) c->h, (long) c->cidx, (long) c->bidx));
}
#endif
}
return byte_offset;
scan_error:
DUK_ERROR_INTERNAL(thr);
DUK_WO_NORETURN(return 0;);
}