1635 lines
48 KiB
C
1635 lines
48 KiB
C
/*
|
|
* String built-ins
|
|
*
|
|
* Most String built-ins must only accept strings (or String objects).
|
|
* Symbols, represented internally as strings, must be generally rejected.
|
|
* The duk_push_this_coercible_to_string() helper does this automatically.
|
|
*/
|
|
|
|
/* XXX: There are several limitations in the current implementation for
|
|
* strings with >= 0x80000000UL characters. In some cases one would need
|
|
* to be able to represent the range [-0xffffffff,0xffffffff] and so on.
|
|
* Generally character and byte length are assumed to fit into signed 32
|
|
* bits (< 0x80000000UL). Places with issues are not marked explicitly
|
|
* below in all cases, look for signed type usage (duk_int_t etc) for
|
|
* offsets/lengths.
|
|
*/
|
|
|
|
#include "third_party/duktape/duk_internal.h"
|
|
|
|
#if defined(DUK_USE_STRING_BUILTIN)
|
|
|
|
/*
|
|
* Helpers
|
|
*/
|
|
|
|
DUK_LOCAL duk_hstring *duk__str_tostring_notregexp(duk_hthread *thr,
|
|
duk_idx_t idx) {
|
|
duk_hstring *h;
|
|
|
|
if (duk_get_class_number(thr, idx) == DUK_HOBJECT_CLASS_REGEXP) {
|
|
DUK_ERROR_TYPE_INVALID_ARGS(thr);
|
|
DUK_WO_NORETURN(return NULL;);
|
|
}
|
|
h = duk_to_hstring(thr, idx);
|
|
DUK_ASSERT(h != NULL);
|
|
|
|
return h;
|
|
}
|
|
|
|
DUK_LOCAL duk_int_t duk__str_search_shared(duk_hthread *thr,
|
|
duk_hstring *h_this,
|
|
duk_hstring *h_search,
|
|
duk_int_t start_cpos,
|
|
duk_bool_t backwards) {
|
|
duk_int_t cpos;
|
|
duk_int_t bpos;
|
|
const duk_uint8_t *p_start, *p_end, *p;
|
|
const duk_uint8_t *q_start;
|
|
duk_int_t q_blen;
|
|
duk_uint8_t firstbyte;
|
|
duk_uint8_t t;
|
|
|
|
cpos = start_cpos;
|
|
|
|
/* Empty searchstring always matches; cpos must be clamped here.
|
|
* (If q_blen were < 0 due to clamped coercion, it would also be
|
|
* caught here.)
|
|
*/
|
|
q_start = DUK_HSTRING_GET_DATA(h_search);
|
|
q_blen = (duk_int_t)DUK_HSTRING_GET_BYTELEN(h_search);
|
|
if (q_blen <= 0) {
|
|
return cpos;
|
|
}
|
|
DUK_ASSERT(q_blen > 0);
|
|
|
|
bpos = (duk_int_t)duk_heap_strcache_offset_char2byte(thr, h_this,
|
|
(duk_uint32_t)cpos);
|
|
|
|
p_start = DUK_HSTRING_GET_DATA(h_this);
|
|
p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this);
|
|
p = p_start + bpos;
|
|
|
|
/* This loop is optimized for size. For speed, there should be
|
|
* two separate loops, and we should ensure that memcmp() can be
|
|
* used without an extra "will searchstring fit" check. Doing
|
|
* the preconditioning for 'p' and 'p_end' is easy but cpos
|
|
* must be updated if 'p' is wound back (backward scanning).
|
|
*/
|
|
|
|
firstbyte = q_start[0]; /* leading byte of match string */
|
|
while (p <= p_end && p >= p_start) {
|
|
t = *p;
|
|
|
|
/* For ECMAScript strings, this check can only match for
|
|
* initial UTF-8 bytes (not continuation bytes). For other
|
|
* strings all bets are off.
|
|
*/
|
|
|
|
if ((t == firstbyte) && ((duk_size_t)(p_end - p) >= (duk_size_t)q_blen)) {
|
|
DUK_ASSERT(q_blen > 0);
|
|
if (duk_memcmp((const void *)p, (const void *)q_start, (size_t)q_blen) ==
|
|
0) {
|
|
return cpos;
|
|
}
|
|
}
|
|
|
|
/* track cpos while scanning */
|
|
if (backwards) {
|
|
/* when going backwards, we decrement cpos 'early';
|
|
* 'p' may point to a continuation byte of the char
|
|
* at offset 'cpos', but that's OK because we'll
|
|
* backtrack all the way to the initial byte.
|
|
*/
|
|
if ((t & 0xc0) != 0x80) {
|
|
cpos--;
|
|
}
|
|
p--;
|
|
} else {
|
|
if ((t & 0xc0) != 0x80) {
|
|
cpos++;
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
/* Not found. Empty string case is handled specially above. */
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Constructor
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_constructor(duk_hthread *thr) {
|
|
duk_hstring *h;
|
|
duk_uint_t flags;
|
|
|
|
/* String constructor needs to distinguish between an argument not given at
|
|
* all vs. given as 'undefined'. We're a vararg function to handle this
|
|
* properly.
|
|
*/
|
|
|
|
/* XXX: copy current activation flags to thr, including current magic,
|
|
* is_constructor_call etc. This takes a few bytes in duk_hthread but
|
|
* makes call sites smaller (there are >30 is_constructor_call and get
|
|
* current magic call sites.
|
|
*/
|
|
|
|
if (duk_get_top(thr) == 0) {
|
|
duk_push_hstring_empty(thr);
|
|
} else {
|
|
h = duk_to_hstring_acceptsymbol(thr, 0);
|
|
if (DUK_UNLIKELY(DUK_HSTRING_HAS_SYMBOL(h) &&
|
|
!duk_is_constructor_call(thr))) {
|
|
duk_push_symbol_descriptive_string(thr, h);
|
|
duk_replace(thr, 0);
|
|
}
|
|
}
|
|
duk_to_string(thr, 0); /* catches symbol argument for constructor call */
|
|
DUK_ASSERT(duk_is_string(thr, 0));
|
|
duk_set_top(thr, 1); /* Top may be 1 or larger. */
|
|
|
|
if (duk_is_constructor_call(thr)) {
|
|
/* String object internal value is immutable */
|
|
flags = DUK_HOBJECT_FLAG_EXTENSIBLE | DUK_HOBJECT_FLAG_FASTREFS |
|
|
DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ |
|
|
DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING);
|
|
duk_push_object_helper(thr, flags, DUK_BIDX_STRING_PROTOTYPE);
|
|
duk_dup_0(thr);
|
|
duk_xdef_prop_stridx_short(thr, -2, DUK_STRIDX_INT_VALUE,
|
|
DUK_PROPDESC_FLAGS_NONE);
|
|
}
|
|
/* Note: unbalanced stack on purpose */
|
|
|
|
return 1;
|
|
}
|
|
|
|
DUK_LOCAL duk_ret_t duk__construct_from_codepoints(duk_hthread *thr,
|
|
duk_bool_t nonbmp) {
|
|
duk_bufwriter_ctx bw_alloc;
|
|
duk_bufwriter_ctx *bw;
|
|
duk_idx_t i, n;
|
|
duk_ucodepoint_t cp;
|
|
|
|
/* XXX: It would be nice to build the string directly but ToUint16()
|
|
* coercion is needed so a generic helper would not be very
|
|
* helpful (perhaps coerce the value stack first here and then
|
|
* build a string from a duk_tval number sequence in one go?).
|
|
*/
|
|
|
|
n = duk_get_top(thr);
|
|
|
|
bw = &bw_alloc;
|
|
DUK_BW_INIT_PUSHBUF(
|
|
thr, bw, (duk_size_t)n); /* initial estimate for ASCII only codepoints */
|
|
|
|
for (i = 0; i < n; i++) {
|
|
/* XXX: could improve bufwriter handling to write multiple codepoints
|
|
* with one ensure call but the relative benefit would be quite small.
|
|
*/
|
|
|
|
if (nonbmp) {
|
|
/* ES2015 requires that (1) SameValue(cp, ToInteger(cp)) and
|
|
* (2) cp >= 0 and cp <= 0x10ffff. This check does not
|
|
* implement the steps exactly but the outcome should be
|
|
* the same.
|
|
*/
|
|
duk_int32_t i32 = 0;
|
|
if (!duk_is_whole_get_int32(duk_to_number(thr, i), &i32) || i32 < 0 ||
|
|
i32 > 0x10ffffL) {
|
|
DUK_DCERROR_RANGE_INVALID_ARGS(thr);
|
|
}
|
|
DUK_ASSERT(i32 >= 0 && i32 <= 0x10ffffL);
|
|
cp = (duk_ucodepoint_t)i32;
|
|
DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp);
|
|
} else {
|
|
#if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT)
|
|
/* ToUint16() coercion is mandatory in the E5.1 specification, but
|
|
* this non-compliant behavior makes more sense because we support
|
|
* non-BMP codepoints. Don't use CESU-8 because that'd create
|
|
* surrogate pairs.
|
|
*/
|
|
cp = (duk_ucodepoint_t)duk_to_uint32(thr, i);
|
|
DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp);
|
|
#else
|
|
cp = (duk_ucodepoint_t)duk_to_uint16(thr, i);
|
|
DUK_ASSERT(cp >= 0 && cp <= 0x10ffffL);
|
|
DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
DUK_BW_COMPACT(thr, bw);
|
|
(void)duk_buffer_to_string(thr,
|
|
-1); /* Safe, extended UTF-8 or CESU-8 encoded. */
|
|
return 1;
|
|
}
|
|
|
|
DUK_INTERNAL duk_ret_t
|
|
duk_bi_string_constructor_from_char_code(duk_hthread *thr) {
|
|
return duk__construct_from_codepoints(thr, 0 /*nonbmp*/);
|
|
}
|
|
|
|
#if defined(DUK_USE_ES6)
|
|
DUK_INTERNAL duk_ret_t
|
|
duk_bi_string_constructor_from_code_point(duk_hthread *thr) {
|
|
return duk__construct_from_codepoints(thr, 1 /*nonbmp*/);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* toString(), valueOf()
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_to_string(duk_hthread *thr) {
|
|
duk_tval *tv;
|
|
|
|
duk_push_this(thr);
|
|
tv = duk_require_tval(thr, -1);
|
|
DUK_ASSERT(tv != NULL);
|
|
|
|
if (DUK_TVAL_IS_STRING(tv)) {
|
|
/* return as is */
|
|
} else if (DUK_TVAL_IS_OBJECT(tv)) {
|
|
duk_hobject *h = DUK_TVAL_GET_OBJECT(tv);
|
|
DUK_ASSERT(h != NULL);
|
|
|
|
/* Must be a "string object", i.e. class "String" */
|
|
if (DUK_HOBJECT_GET_CLASS_NUMBER(h) != DUK_HOBJECT_CLASS_STRING) {
|
|
goto type_error;
|
|
}
|
|
|
|
duk_xget_owndataprop_stridx_short(thr, -1, DUK_STRIDX_INT_VALUE);
|
|
DUK_ASSERT(duk_is_string(thr, -1));
|
|
} else {
|
|
goto type_error;
|
|
}
|
|
|
|
(void)duk_require_hstring_notsymbol(
|
|
thr, -1); /* Reject symbols (and wrapped symbols). */
|
|
return 1;
|
|
|
|
type_error:
|
|
DUK_DCERROR_TYPE_INVALID_ARGS(thr);
|
|
}
|
|
|
|
/*
|
|
* Character and charcode access
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_at(duk_hthread *thr) {
|
|
duk_hstring *h;
|
|
duk_int_t pos;
|
|
|
|
/* XXX: faster implementation */
|
|
|
|
h = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h != NULL);
|
|
|
|
pos = duk_to_int(thr, 0);
|
|
|
|
if (sizeof(duk_size_t) >= sizeof(duk_uint_t)) {
|
|
/* Cast to duk_size_t works in this case:
|
|
* - If pos < 0, (duk_size_t) pos will always be
|
|
* >= max_charlen, and result will be the empty string
|
|
* (see duk_substring()).
|
|
* - If pos >= 0, pos + 1 cannot wrap.
|
|
*/
|
|
DUK_ASSERT((duk_size_t)DUK_INT_MIN >= DUK_HSTRING_MAX_BYTELEN);
|
|
DUK_ASSERT((duk_size_t)DUK_INT_MAX + 1U > (duk_size_t)DUK_INT_MAX);
|
|
duk_substring(thr, -1, (duk_size_t)pos, (duk_size_t)pos + 1U);
|
|
} else {
|
|
/* If size_t is smaller than int, explicit bounds checks
|
|
* are needed because an int may wrap multiple times.
|
|
*/
|
|
if (DUK_UNLIKELY(pos < 0 || (duk_uint_t)pos >=
|
|
(duk_uint_t)DUK_HSTRING_GET_CHARLEN(h))) {
|
|
duk_push_hstring_empty(thr);
|
|
} else {
|
|
duk_substring(thr, -1, (duk_size_t)pos, (duk_size_t)pos + 1U);
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* Magic: 0=charCodeAt, 1=codePointAt */
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_hthread *thr) {
|
|
duk_int_t pos;
|
|
duk_hstring *h;
|
|
duk_bool_t clamped;
|
|
duk_uint32_t cp;
|
|
duk_int_t magic;
|
|
|
|
/* XXX: faster implementation */
|
|
|
|
DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *)duk_get_tval(thr, 0)));
|
|
|
|
h = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h != NULL);
|
|
|
|
pos = duk_to_int_clamped_raw(
|
|
thr, 0 /*index*/, 0 /*min(incl)*/,
|
|
(duk_int_t)DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/,
|
|
&clamped /*out_clamped*/);
|
|
#if defined(DUK_USE_ES6)
|
|
magic = duk_get_current_magic(thr);
|
|
#else
|
|
DUK_ASSERT(duk_get_current_magic(thr) == 0);
|
|
magic = 0;
|
|
#endif
|
|
if (clamped) {
|
|
/* For out-of-bounds indices .charCodeAt() returns NaN and
|
|
* .codePointAt() returns undefined.
|
|
*/
|
|
if (magic != 0) {
|
|
return 0;
|
|
}
|
|
duk_push_nan(thr);
|
|
} else {
|
|
DUK_ASSERT(pos >= 0);
|
|
cp = (duk_uint32_t)duk_hstring_char_code_at_raw(
|
|
thr, h, (duk_uint_t)pos, (duk_bool_t)magic /*surrogate_aware*/);
|
|
duk_push_u32(thr, cp);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* substring(), substr(), slice()
|
|
*/
|
|
|
|
/* XXX: any chance of merging these three similar but still slightly
|
|
* different algorithms so that footprint would be reduced?
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substring(duk_hthread *thr) {
|
|
duk_hstring *h;
|
|
duk_int_t start_pos, end_pos;
|
|
duk_int_t len;
|
|
|
|
h = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h != NULL);
|
|
len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h);
|
|
|
|
/* [ start end str ] */
|
|
|
|
start_pos = duk_to_int_clamped(thr, 0, 0, len);
|
|
if (duk_is_undefined(thr, 1)) {
|
|
end_pos = len;
|
|
} else {
|
|
end_pos = duk_to_int_clamped(thr, 1, 0, len);
|
|
}
|
|
DUK_ASSERT(start_pos >= 0 && start_pos <= len);
|
|
DUK_ASSERT(end_pos >= 0 && end_pos <= len);
|
|
|
|
if (start_pos > end_pos) {
|
|
duk_int_t tmp = start_pos;
|
|
start_pos = end_pos;
|
|
end_pos = tmp;
|
|
}
|
|
|
|
DUK_ASSERT(end_pos >= start_pos);
|
|
|
|
duk_substring(thr, -1, (duk_size_t)start_pos, (duk_size_t)end_pos);
|
|
return 1;
|
|
}
|
|
|
|
#if defined(DUK_USE_SECTION_B)
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_hthread *thr) {
|
|
duk_hstring *h;
|
|
duk_int_t start_pos, end_pos;
|
|
duk_int_t len;
|
|
|
|
/* Unlike non-obsolete String calls, substr() algorithm in E5.1
|
|
* specification will happily coerce undefined and null to strings
|
|
* ("undefined" and "null").
|
|
*/
|
|
duk_push_this(thr);
|
|
h = duk_to_hstring_m1(thr); /* Reject Symbols. */
|
|
DUK_ASSERT(h != NULL);
|
|
len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h);
|
|
|
|
/* [ start length str ] */
|
|
|
|
/* The implementation for computing of start_pos and end_pos differs
|
|
* from the standard algorithm, but is intended to result in the exactly
|
|
* same behavior. This is not always obvious.
|
|
*/
|
|
|
|
/* combines steps 2 and 5; -len ensures max() not needed for step 5 */
|
|
start_pos = duk_to_int_clamped(thr, 0, -len, len);
|
|
if (start_pos < 0) {
|
|
start_pos = len + start_pos;
|
|
}
|
|
DUK_ASSERT(start_pos >= 0 && start_pos <= len);
|
|
|
|
/* combines steps 3, 6; step 7 is not needed */
|
|
if (duk_is_undefined(thr, 1)) {
|
|
end_pos = len;
|
|
} else {
|
|
DUK_ASSERT(start_pos <= len);
|
|
end_pos = start_pos + duk_to_int_clamped(thr, 1, 0, len - start_pos);
|
|
}
|
|
DUK_ASSERT(start_pos >= 0 && start_pos <= len);
|
|
DUK_ASSERT(end_pos >= 0 && end_pos <= len);
|
|
DUK_ASSERT(end_pos >= start_pos);
|
|
|
|
duk_substring(thr, -1, (duk_size_t)start_pos, (duk_size_t)end_pos);
|
|
return 1;
|
|
}
|
|
#endif /* DUK_USE_SECTION_B */
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_hthread *thr) {
|
|
duk_hstring *h;
|
|
duk_int_t start_pos, end_pos;
|
|
duk_int_t len;
|
|
|
|
h = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h != NULL);
|
|
len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h);
|
|
|
|
/* [ start end str ] */
|
|
|
|
start_pos = duk_to_int_clamped(thr, 0, -len, len);
|
|
if (start_pos < 0) {
|
|
start_pos = len + start_pos;
|
|
}
|
|
if (duk_is_undefined(thr, 1)) {
|
|
end_pos = len;
|
|
} else {
|
|
end_pos = duk_to_int_clamped(thr, 1, -len, len);
|
|
if (end_pos < 0) {
|
|
end_pos = len + end_pos;
|
|
}
|
|
}
|
|
DUK_ASSERT(start_pos >= 0 && start_pos <= len);
|
|
DUK_ASSERT(end_pos >= 0 && end_pos <= len);
|
|
|
|
if (end_pos < start_pos) {
|
|
end_pos = start_pos;
|
|
}
|
|
|
|
DUK_ASSERT(end_pos >= start_pos);
|
|
|
|
duk_substring(thr, -1, (duk_size_t)start_pos, (duk_size_t)end_pos);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Case conversion
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t
|
|
duk_bi_string_prototype_caseconv_shared(duk_hthread *thr) {
|
|
duk_small_int_t uppercase = duk_get_current_magic(thr);
|
|
|
|
(void)duk_push_this_coercible_to_string(thr);
|
|
duk_unicode_case_convert_string(thr, (duk_bool_t)uppercase);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* indexOf() and lastIndexOf()
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t
|
|
duk_bi_string_prototype_indexof_shared(duk_hthread *thr) {
|
|
duk_hstring *h_this;
|
|
duk_hstring *h_search;
|
|
duk_int_t clen_this;
|
|
duk_int_t cpos;
|
|
duk_small_uint_t is_lastindexof = (duk_small_uint_t)duk_get_current_magic(
|
|
thr); /* 0=indexOf, 1=lastIndexOf */
|
|
|
|
h_this = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h_this != NULL);
|
|
clen_this = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h_this);
|
|
|
|
h_search = duk_to_hstring(thr, 0);
|
|
DUK_ASSERT(h_search != NULL);
|
|
|
|
duk_to_number(thr, 1);
|
|
if (duk_is_nan(thr, 1) && is_lastindexof) {
|
|
/* indexOf: NaN should cause pos to be zero.
|
|
* lastIndexOf: NaN should cause pos to be +Infinity
|
|
* (and later be clamped to len).
|
|
*/
|
|
cpos = clen_this;
|
|
} else {
|
|
cpos = duk_to_int_clamped(thr, 1, 0, clen_this);
|
|
}
|
|
|
|
cpos = duk__str_search_shared(thr, h_this, h_search, cpos,
|
|
is_lastindexof /*backwards*/);
|
|
duk_push_int(thr, cpos);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* replace()
|
|
*/
|
|
|
|
/* XXX: the current implementation works but is quite clunky; it compiles
|
|
* to almost 1,4kB of x86 code so it needs to be simplified (better approach,
|
|
* shared helpers, etc). Some ideas for refactoring:
|
|
*
|
|
* - a primitive to convert a string into a regexp matcher (reduces matching
|
|
* code at the cost of making matching much slower)
|
|
* - use replace() as a basic helper for match() and split(), which are both
|
|
* much simpler
|
|
* - API call to get_prop and to_boolean
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_hthread *thr) {
|
|
duk_hstring *h_input;
|
|
duk_hstring *h_match;
|
|
duk_hstring *h_search;
|
|
duk_hobject *h_re;
|
|
duk_bufwriter_ctx bw_alloc;
|
|
duk_bufwriter_ctx *bw;
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
duk_bool_t is_regexp;
|
|
duk_bool_t is_global;
|
|
#endif
|
|
duk_bool_t is_repl_func;
|
|
duk_uint32_t match_start_coff, match_start_boff;
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
duk_int_t match_caps;
|
|
#endif
|
|
duk_uint32_t prev_match_end_boff;
|
|
const duk_uint8_t *r_start, *r_end, *r; /* repl string scan */
|
|
duk_size_t tmp_sz;
|
|
|
|
DUK_ASSERT_TOP(thr, 2);
|
|
h_input = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h_input != NULL);
|
|
|
|
bw = &bw_alloc;
|
|
DUK_BW_INIT_PUSHBUF(
|
|
thr, bw,
|
|
DUK_HSTRING_GET_BYTELEN(
|
|
h_input)); /* input size is good output starting point */
|
|
|
|
DUK_ASSERT_TOP(thr, 4);
|
|
|
|
/* stack[0] = search value
|
|
* stack[1] = replace value
|
|
* stack[2] = input string
|
|
* stack[3] = result buffer
|
|
*/
|
|
|
|
h_re = duk_get_hobject_with_class(thr, 0, DUK_HOBJECT_CLASS_REGEXP);
|
|
if (h_re) {
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
is_regexp = 1;
|
|
is_global = duk_get_prop_stridx_boolean(thr, 0, DUK_STRIDX_GLOBAL, NULL);
|
|
|
|
if (is_global) {
|
|
/* start match from beginning */
|
|
duk_push_int(thr, 0);
|
|
duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
}
|
|
#else /* DUK_USE_REGEXP_SUPPORT */
|
|
DUK_DCERROR_UNSUPPORTED(thr);
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
} else {
|
|
duk_to_string(thr, 0); /* rejects symbols */
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
is_regexp = 0;
|
|
is_global = 0;
|
|
#endif
|
|
}
|
|
|
|
if (duk_is_function(thr, 1)) {
|
|
is_repl_func = 1;
|
|
r_start = NULL;
|
|
r_end = NULL;
|
|
} else {
|
|
duk_hstring *h_repl;
|
|
|
|
is_repl_func = 0;
|
|
h_repl = duk_to_hstring(thr, 1); /* reject symbols */
|
|
DUK_ASSERT(h_repl != NULL);
|
|
r_start = DUK_HSTRING_GET_DATA(h_repl);
|
|
r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl);
|
|
}
|
|
|
|
prev_match_end_boff = 0;
|
|
|
|
for (;;) {
|
|
/*
|
|
* If matching with a regexp:
|
|
* - non-global RegExp: lastIndex not touched on a match, zeroed
|
|
* on a non-match
|
|
* - global RegExp: on match, lastIndex will be updated by regexp
|
|
* executor to point to next char after the matching part (so that
|
|
* characters in the matching part are not matched again)
|
|
*
|
|
* If matching with a string:
|
|
* - always non-global match, find first occurrence
|
|
*
|
|
* We need:
|
|
* - The character offset of start-of-match for the replacer function
|
|
* - The byte offsets for start-of-match and end-of-match to implement
|
|
* the replacement values $&, $`, and $', and to copy non-matching
|
|
* input string portions (including header and trailer) verbatim.
|
|
*
|
|
* NOTE: the E5.1 specification is a bit vague how the RegExp should
|
|
* behave in the replacement process; e.g. is matching done first for
|
|
* all matches (in the global RegExp case) before any replacer calls
|
|
* are made? See: test-bi-string-proto-replace.js for discussion.
|
|
*/
|
|
|
|
DUK_ASSERT_TOP(thr, 4);
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
if (is_regexp) {
|
|
duk_dup_0(thr);
|
|
duk_dup_2(thr);
|
|
duk_regexp_match(thr); /* [ ... regexp input ] -> [ res_obj ] */
|
|
if (!duk_is_object(thr, -1)) {
|
|
duk_pop(thr);
|
|
break;
|
|
}
|
|
|
|
duk_get_prop_stridx_short(thr, -1, DUK_STRIDX_INDEX);
|
|
DUK_ASSERT(duk_is_number(thr, -1));
|
|
match_start_coff = duk_get_uint(thr, -1);
|
|
duk_pop(thr);
|
|
|
|
duk_get_prop_index(thr, -1, 0);
|
|
DUK_ASSERT(duk_is_string(thr, -1));
|
|
h_match = duk_known_hstring(thr, -1);
|
|
duk_pop(
|
|
thr); /* h_match is borrowed, remains reachable through match_obj */
|
|
|
|
if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) {
|
|
/* This should be equivalent to match() algorithm step 8.f.iii.2:
|
|
* detect an empty match and allow it, but don't allow it twice.
|
|
*/
|
|
duk_uint32_t last_index;
|
|
|
|
duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
last_index = (duk_uint32_t)duk_get_uint(thr, -1);
|
|
DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld",
|
|
(long)last_index, (long)(last_index + 1)));
|
|
duk_pop(thr);
|
|
duk_push_uint(thr, (duk_uint_t)(last_index + 1));
|
|
duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
}
|
|
|
|
DUK_ASSERT(duk_get_length(thr, -1) <= DUK_INT_MAX); /* string limits */
|
|
match_caps = (duk_int_t)duk_get_length(thr, -1);
|
|
} else {
|
|
#else /* DUK_USE_REGEXP_SUPPORT */
|
|
{ /* unconditionally */
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
const duk_uint8_t *p_start, *p_end, *p; /* input string scan */
|
|
const duk_uint8_t *q_start; /* match string */
|
|
duk_size_t q_blen;
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
DUK_ASSERT(!is_global); /* single match always */
|
|
#endif
|
|
|
|
p_start = DUK_HSTRING_GET_DATA(h_input);
|
|
p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
|
|
p = p_start;
|
|
|
|
h_search = duk_known_hstring(thr, 0);
|
|
q_start = DUK_HSTRING_GET_DATA(h_search);
|
|
q_blen = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h_search);
|
|
|
|
p_end -= q_blen; /* ensure full memcmp() fits in while */
|
|
|
|
match_start_coff = 0;
|
|
|
|
while (p <= p_end) {
|
|
DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) +
|
|
DUK_HSTRING_GET_BYTELEN(h_input));
|
|
if (duk_memcmp((const void *)p, (const void *)q_start,
|
|
(size_t)q_blen) == 0) {
|
|
duk_dup_0(thr);
|
|
h_match = duk_known_hstring(thr, -1);
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
match_caps = 0;
|
|
#endif
|
|
goto found;
|
|
}
|
|
|
|
/* track utf-8 non-continuation bytes */
|
|
if ((p[0] & 0xc0) != 0x80) {
|
|
match_start_coff++;
|
|
}
|
|
p++;
|
|
}
|
|
|
|
/* not found */
|
|
break;
|
|
}
|
|
found:
|
|
|
|
/* stack[0] = search value
|
|
* stack[1] = replace value
|
|
* stack[2] = input string
|
|
* stack[3] = result buffer
|
|
* stack[4] = regexp match OR match string
|
|
*/
|
|
|
|
match_start_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte(
|
|
thr, h_input, match_start_coff);
|
|
|
|
tmp_sz = (duk_size_t)(match_start_boff - prev_match_end_boff);
|
|
DUK_BW_WRITE_ENSURE_BYTES(
|
|
thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
|
|
|
|
prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match);
|
|
|
|
if (is_repl_func) {
|
|
duk_idx_t idx_args;
|
|
duk_hstring *h_repl;
|
|
|
|
/* regexp res_obj is at index 4 */
|
|
|
|
duk_dup_1(thr);
|
|
idx_args = duk_get_top(thr);
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
if (is_regexp) {
|
|
duk_int_t idx;
|
|
duk_require_stack(thr, match_caps + 2);
|
|
for (idx = 0; idx < match_caps; idx++) {
|
|
/* match followed by capture(s) */
|
|
duk_get_prop_index(thr, 4, (duk_uarridx_t)idx);
|
|
}
|
|
} else {
|
|
#else /* DUK_USE_REGEXP_SUPPORT */
|
|
{ /* unconditionally */
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
/* match == search string, by definition */
|
|
duk_dup_0(thr);
|
|
}
|
|
duk_push_uint(thr, (duk_uint_t)match_start_coff);
|
|
duk_dup_2(thr);
|
|
|
|
/* [ ... replacer match [captures] match_char_offset input ] */
|
|
|
|
duk_call(thr, duk_get_top(thr) - idx_args);
|
|
h_repl = duk_to_hstring_m1(thr); /* -> [ ... repl_value ] */
|
|
DUK_ASSERT(h_repl != NULL);
|
|
|
|
DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl);
|
|
|
|
duk_pop(thr); /* repl_value */
|
|
} else {
|
|
r = r_start;
|
|
|
|
while (r < r_end) {
|
|
duk_int_t ch1;
|
|
duk_int_t ch2;
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
duk_int_t ch3;
|
|
#endif
|
|
duk_size_t left;
|
|
|
|
ch1 = *r++;
|
|
if (ch1 != DUK_ASC_DOLLAR) {
|
|
goto repl_write;
|
|
}
|
|
DUK_ASSERT(r <= r_end);
|
|
left = (duk_size_t)(r_end - r);
|
|
|
|
if (left <= 0) {
|
|
goto repl_write;
|
|
}
|
|
|
|
ch2 = r[0];
|
|
switch (ch2) {
|
|
case DUK_ASC_DOLLAR: {
|
|
ch1 = (1u << 8) + DUK_ASC_DOLLAR;
|
|
goto repl_write;
|
|
}
|
|
case DUK_ASC_AMP: {
|
|
DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match);
|
|
r++;
|
|
continue;
|
|
}
|
|
case DUK_ASC_GRAVE: {
|
|
tmp_sz = (duk_size_t)match_start_boff;
|
|
DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input),
|
|
tmp_sz);
|
|
r++;
|
|
continue;
|
|
}
|
|
case DUK_ASC_SINGLEQUOTE: {
|
|
duk_uint32_t match_end_boff;
|
|
|
|
/* Use match charlen instead of bytelen, just in case the input and
|
|
* match codepoint encodings would have different lengths.
|
|
*/
|
|
/* XXX: charlen computed here, and also in char2byte helper. */
|
|
match_end_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte(
|
|
thr, h_input,
|
|
match_start_coff +
|
|
(duk_uint_fast32_t)DUK_HSTRING_GET_CHARLEN(h_match));
|
|
|
|
tmp_sz =
|
|
(duk_size_t)(DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff);
|
|
DUK_BW_WRITE_ENSURE_BYTES(
|
|
thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff,
|
|
tmp_sz);
|
|
r++;
|
|
continue;
|
|
}
|
|
default: {
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
duk_int_t capnum, captmp, capadv;
|
|
/* XXX: optional check, match_caps is zero if no regexp,
|
|
* so dollar will be interpreted literally anyway.
|
|
*/
|
|
|
|
if (!is_regexp) {
|
|
goto repl_write;
|
|
}
|
|
|
|
if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) {
|
|
goto repl_write;
|
|
}
|
|
capnum = ch2 - DUK_ASC_0;
|
|
capadv = 1;
|
|
|
|
if (left >= 2) {
|
|
ch3 = r[1];
|
|
if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) {
|
|
captmp = capnum * 10 + (ch3 - DUK_ASC_0);
|
|
if (captmp < match_caps) {
|
|
capnum = captmp;
|
|
capadv = 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (capnum > 0 && capnum < match_caps) {
|
|
DUK_ASSERT(is_regexp != 0); /* match_caps == 0 without regexps */
|
|
|
|
/* regexp res_obj is at offset 4 */
|
|
duk_get_prop_index(thr, 4, (duk_uarridx_t)capnum);
|
|
if (duk_is_string(thr, -1)) {
|
|
duk_hstring *h_tmp_str;
|
|
|
|
h_tmp_str = duk_known_hstring(thr, -1);
|
|
|
|
DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str);
|
|
} else {
|
|
/* undefined -> skip (replaced with empty) */
|
|
}
|
|
duk_pop(thr);
|
|
r += capadv;
|
|
continue;
|
|
} else {
|
|
goto repl_write;
|
|
}
|
|
#else /* DUK_USE_REGEXP_SUPPORT */
|
|
goto repl_write; /* unconditionally */
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
} /* default case */
|
|
} /* switch (ch2) */
|
|
|
|
repl_write:
|
|
/* ch1 = (r_increment << 8) + byte */
|
|
|
|
DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t)(ch1 & 0xff));
|
|
r += ch1 >> 8;
|
|
} /* while repl */
|
|
} /* if (is_repl_func) */
|
|
|
|
duk_pop(thr); /* pop regexp res_obj or match string */
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
if (!is_global) {
|
|
#else
|
|
{ /* unconditionally; is_global==0 */
|
|
#endif
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* trailer */
|
|
tmp_sz = (duk_size_t)(DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff);
|
|
DUK_BW_WRITE_ENSURE_BYTES(
|
|
thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
|
|
|
|
DUK_ASSERT_TOP(thr, 4);
|
|
DUK_BW_COMPACT(thr, bw);
|
|
(void)duk_buffer_to_string(thr, -1); /* Safe if inputs are safe. */
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* split()
|
|
*/
|
|
|
|
/* XXX: very messy now, but works; clean up, remove unused variables (nomimally
|
|
* used so compiler doesn't complain).
|
|
*/
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_hthread *thr) {
|
|
duk_hstring *h_input;
|
|
duk_hstring *h_sep;
|
|
duk_uint32_t limit;
|
|
duk_uint32_t arr_idx;
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
duk_bool_t is_regexp;
|
|
#endif
|
|
duk_bool_t matched; /* set to 1 if any match exists (needed for empty input
|
|
special case) */
|
|
duk_uint32_t prev_match_end_coff, prev_match_end_boff;
|
|
duk_uint32_t match_start_boff, match_start_coff;
|
|
duk_uint32_t match_end_boff, match_end_coff;
|
|
|
|
h_input = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h_input != NULL);
|
|
|
|
duk_push_array(thr);
|
|
|
|
if (duk_is_undefined(thr, 1)) {
|
|
limit = 0xffffffffUL;
|
|
} else {
|
|
limit = duk_to_uint32(thr, 1);
|
|
}
|
|
|
|
if (limit == 0) {
|
|
return 1;
|
|
}
|
|
|
|
/* If the separator is a RegExp, make a "clone" of it. The specification
|
|
* algorithm calls [[Match]] directly for specific indices; we emulate this
|
|
* by tweaking lastIndex and using a "force global" variant of
|
|
* duk_regexp_match() which will use global-style matching even when the
|
|
* RegExp itself is non-global.
|
|
*/
|
|
|
|
if (duk_is_undefined(thr, 0)) {
|
|
/* The spec algorithm first does "R = ToString(separator)" before checking
|
|
* whether separator is undefined. Since this is side effect free, we can
|
|
* skip the ToString() here.
|
|
*/
|
|
duk_dup_2(thr);
|
|
duk_put_prop_index(thr, 3, 0);
|
|
return 1;
|
|
} else if (duk_get_hobject_with_class(thr, 0, DUK_HOBJECT_CLASS_REGEXP) !=
|
|
NULL) {
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
duk_push_hobject_bidx(thr, DUK_BIDX_REGEXP_CONSTRUCTOR);
|
|
duk_dup_0(thr);
|
|
duk_new(thr, 1); /* [ ... RegExp val ] -> [ ... res ] */
|
|
duk_replace(thr, 0);
|
|
/* lastIndex is initialized to zero by new RegExp() */
|
|
is_regexp = 1;
|
|
#else
|
|
DUK_DCERROR_UNSUPPORTED(thr);
|
|
#endif
|
|
} else {
|
|
duk_to_string(thr, 0);
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
is_regexp = 0;
|
|
#endif
|
|
}
|
|
|
|
/* stack[0] = separator (string or regexp)
|
|
* stack[1] = limit
|
|
* stack[2] = input string
|
|
* stack[3] = result array
|
|
*/
|
|
|
|
prev_match_end_boff = 0;
|
|
prev_match_end_coff = 0;
|
|
arr_idx = 0;
|
|
matched = 0;
|
|
|
|
for (;;) {
|
|
/*
|
|
* The specification uses RegExp [[Match]] to attempt match at specific
|
|
* offsets. We don't have such a primitive, so we use an actual RegExp
|
|
* and tweak lastIndex. Since the RegExp may be non-global, we use a
|
|
* special variant which forces global-like behavior for matching.
|
|
*/
|
|
|
|
DUK_ASSERT_TOP(thr, 4);
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
if (is_regexp) {
|
|
duk_dup_0(thr);
|
|
duk_dup_2(thr);
|
|
duk_regexp_match_force_global(
|
|
thr); /* [ ... regexp input ] -> [ res_obj ] */
|
|
if (!duk_is_object(thr, -1)) {
|
|
duk_pop(thr);
|
|
break;
|
|
}
|
|
matched = 1;
|
|
|
|
duk_get_prop_stridx_short(thr, -1, DUK_STRIDX_INDEX);
|
|
DUK_ASSERT(duk_is_number(thr, -1));
|
|
match_start_coff = duk_get_uint(thr, -1);
|
|
match_start_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte(
|
|
thr, h_input, match_start_coff);
|
|
duk_pop(thr);
|
|
|
|
if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) {
|
|
/* don't allow an empty match at the end of the string */
|
|
duk_pop(thr);
|
|
break;
|
|
}
|
|
|
|
duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
DUK_ASSERT(duk_is_number(thr, -1));
|
|
match_end_coff = duk_get_uint(thr, -1);
|
|
match_end_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte(
|
|
thr, h_input, match_end_coff);
|
|
duk_pop(thr);
|
|
|
|
/* empty match -> bump and continue */
|
|
if (prev_match_end_boff == match_end_boff) {
|
|
duk_push_uint(thr, (duk_uint_t)(match_end_coff + 1));
|
|
duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
duk_pop(thr);
|
|
continue;
|
|
}
|
|
} else {
|
|
#else /* DUK_USE_REGEXP_SUPPORT */
|
|
{ /* unconditionally */
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
const duk_uint8_t *p_start, *p_end, *p; /* input string scan */
|
|
const duk_uint8_t *q_start; /* match string */
|
|
duk_size_t q_blen, q_clen;
|
|
|
|
p_start = DUK_HSTRING_GET_DATA(h_input);
|
|
p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
|
|
p = p_start + prev_match_end_boff;
|
|
|
|
h_sep = duk_known_hstring(thr, 0); /* symbol already rejected above */
|
|
q_start = DUK_HSTRING_GET_DATA(h_sep);
|
|
q_blen = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h_sep);
|
|
q_clen = (duk_size_t)DUK_HSTRING_GET_CHARLEN(h_sep);
|
|
|
|
p_end -= q_blen; /* ensure full memcmp() fits in while */
|
|
|
|
match_start_coff = prev_match_end_coff;
|
|
|
|
if (q_blen == 0) {
|
|
/* Handle empty separator case: it will always match, and always
|
|
* triggers the check in step 13.c.iii initially. Note that we
|
|
* must skip to either end of string or start of first codepoint,
|
|
* skipping over any continuation bytes!
|
|
*
|
|
* Don't allow an empty string to match at the end of the input.
|
|
*/
|
|
|
|
matched = 1; /* empty separator can always match */
|
|
|
|
match_start_coff++;
|
|
p++;
|
|
while (p < p_end) {
|
|
if ((p[0] & 0xc0) != 0x80) {
|
|
goto found;
|
|
}
|
|
p++;
|
|
}
|
|
goto not_found;
|
|
}
|
|
|
|
DUK_ASSERT(q_blen > 0 && q_clen > 0);
|
|
while (p <= p_end) {
|
|
DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) +
|
|
DUK_HSTRING_GET_BYTELEN(h_input));
|
|
DUK_ASSERT(q_blen > 0); /* no issues with empty memcmp() */
|
|
if (duk_memcmp((const void *)p, (const void *)q_start,
|
|
(size_t)q_blen) == 0) {
|
|
/* never an empty match, so step 13.c.iii can't be triggered */
|
|
goto found;
|
|
}
|
|
|
|
/* track utf-8 non-continuation bytes */
|
|
if ((p[0] & 0xc0) != 0x80) {
|
|
match_start_coff++;
|
|
}
|
|
p++;
|
|
}
|
|
|
|
not_found:
|
|
/* not found */
|
|
break;
|
|
|
|
found:
|
|
matched = 1;
|
|
match_start_boff = (duk_uint32_t)(p - p_start);
|
|
match_end_coff = (duk_uint32_t)(
|
|
match_start_coff + q_clen); /* constrained by string length */
|
|
match_end_boff = (duk_uint32_t)(match_start_boff + q_blen); /* ditto */
|
|
|
|
/* empty match (may happen with empty separator) -> bump and continue */
|
|
if (prev_match_end_boff == match_end_boff) {
|
|
prev_match_end_boff++;
|
|
prev_match_end_coff++;
|
|
continue;
|
|
}
|
|
} /* if (is_regexp) */
|
|
|
|
/* stack[0] = separator (string or regexp)
|
|
* stack[1] = limit
|
|
* stack[2] = input string
|
|
* stack[3] = result array
|
|
* stack[4] = regexp res_obj (if is_regexp)
|
|
*/
|
|
|
|
DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end "
|
|
"b=%ld,c=%ld, prev_end b=%ld,c=%ld",
|
|
(long)match_start_boff, (long)match_start_coff,
|
|
(long)match_end_boff, (long)match_end_coff,
|
|
(long)prev_match_end_boff, (long)prev_match_end_coff));
|
|
|
|
duk_push_lstring(
|
|
thr,
|
|
(const char *)(DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff),
|
|
(duk_size_t)(match_start_boff - prev_match_end_boff));
|
|
duk_put_prop_index(thr, 3, arr_idx);
|
|
arr_idx++;
|
|
if (arr_idx >= limit) {
|
|
goto hit_limit;
|
|
}
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
if (is_regexp) {
|
|
duk_size_t i, len;
|
|
|
|
len = duk_get_length(thr, 4);
|
|
for (i = 1; i < len; i++) {
|
|
DUK_ASSERT(i <= DUK_UARRIDX_MAX); /* cannot have >4G captures */
|
|
duk_get_prop_index(thr, 4, (duk_uarridx_t)i);
|
|
duk_put_prop_index(thr, 3, arr_idx);
|
|
arr_idx++;
|
|
if (arr_idx >= limit) {
|
|
goto hit_limit;
|
|
}
|
|
}
|
|
|
|
duk_pop(thr);
|
|
/* lastIndex already set up for next match */
|
|
} else {
|
|
#else /* DUK_USE_REGEXP_SUPPORT */
|
|
{
|
|
/* unconditionally */
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
/* no action */
|
|
}
|
|
|
|
prev_match_end_boff = match_end_boff;
|
|
prev_match_end_coff = match_end_coff;
|
|
continue;
|
|
} /* for */
|
|
|
|
/* Combined step 11 (empty string special case) and 14-15. */
|
|
|
|
DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld",
|
|
(long)prev_match_end_boff, (long)prev_match_end_coff));
|
|
|
|
if (DUK_HSTRING_GET_BYTELEN(h_input) > 0 || !matched) {
|
|
/* Add trailer if:
|
|
* a) non-empty input
|
|
* b) empty input and no (zero size) match found (step 11)
|
|
*/
|
|
|
|
duk_push_lstring(
|
|
thr, (const char *)DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff,
|
|
(duk_size_t)(DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff));
|
|
duk_put_prop_index(thr, 3, arr_idx);
|
|
/* No arr_idx update or limit check */
|
|
}
|
|
|
|
return 1;
|
|
|
|
hit_limit:
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
if (is_regexp) {
|
|
duk_pop(thr);
|
|
}
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Various
|
|
*/
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
DUK_LOCAL void duk__to_regexp_helper(duk_hthread *thr, duk_idx_t idx,
|
|
duk_bool_t force_new) {
|
|
duk_hobject *h;
|
|
|
|
/* Shared helper for match() steps 3-4, search() steps 3-4. */
|
|
|
|
DUK_ASSERT(idx >= 0);
|
|
|
|
if (force_new) {
|
|
goto do_new;
|
|
}
|
|
|
|
h = duk_get_hobject_with_class(thr, idx, DUK_HOBJECT_CLASS_REGEXP);
|
|
if (!h) {
|
|
goto do_new;
|
|
}
|
|
return;
|
|
|
|
do_new:
|
|
duk_push_hobject_bidx(thr, DUK_BIDX_REGEXP_CONSTRUCTOR);
|
|
duk_dup(thr, idx);
|
|
duk_new(thr, 1); /* [ ... RegExp val ] -> [ ... res ] */
|
|
duk_replace(thr, idx);
|
|
}
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_hthread *thr) {
|
|
/* Easiest way to implement the search required by the specification
|
|
* is to do a RegExp test() with lastIndex forced to zero. To avoid
|
|
* side effects on the argument, "clone" the RegExp if a RegExp was
|
|
* given as input.
|
|
*
|
|
* The global flag of the RegExp should be ignored; setting lastIndex
|
|
* to zero (which happens when "cloning" the RegExp) should have an
|
|
* equivalent effect.
|
|
*/
|
|
|
|
DUK_ASSERT_TOP(thr, 1);
|
|
(void)duk_push_this_coercible_to_string(thr); /* at index 1 */
|
|
duk__to_regexp_helper(thr, 0 /*index*/, 1 /*force_new*/);
|
|
|
|
/* stack[0] = regexp
|
|
* stack[1] = string
|
|
*/
|
|
|
|
/* Avoid using RegExp.prototype methods, as they're writable and
|
|
* configurable and may have been changed.
|
|
*/
|
|
|
|
duk_dup_0(thr);
|
|
duk_dup_1(thr); /* [ ... re_obj input ] */
|
|
duk_regexp_match(thr); /* -> [ ... res_obj ] */
|
|
|
|
if (!duk_is_object(thr, -1)) {
|
|
duk_push_int(thr, -1);
|
|
return 1;
|
|
}
|
|
|
|
duk_get_prop_stridx_short(thr, -1, DUK_STRIDX_INDEX);
|
|
DUK_ASSERT(duk_is_number(thr, -1));
|
|
return 1;
|
|
}
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
|
|
#if defined(DUK_USE_REGEXP_SUPPORT)
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_hthread *thr) {
|
|
duk_bool_t global;
|
|
duk_int_t prev_last_index;
|
|
duk_int_t this_index;
|
|
duk_int_t arr_idx;
|
|
|
|
DUK_ASSERT_TOP(thr, 1);
|
|
(void)duk_push_this_coercible_to_string(thr);
|
|
duk__to_regexp_helper(thr, 0 /*index*/, 0 /*force_new*/);
|
|
global = duk_get_prop_stridx_boolean(thr, 0, DUK_STRIDX_GLOBAL, NULL);
|
|
DUK_ASSERT_TOP(thr, 2);
|
|
|
|
/* stack[0] = regexp
|
|
* stack[1] = string
|
|
*/
|
|
|
|
if (!global) {
|
|
duk_regexp_match(thr); /* -> [ res_obj ] */
|
|
return 1; /* return 'res_obj' */
|
|
}
|
|
|
|
/* Global case is more complex. */
|
|
|
|
/* [ regexp string ] */
|
|
|
|
duk_push_int(thr, 0);
|
|
duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
duk_push_array(thr);
|
|
|
|
/* [ regexp string res_arr ] */
|
|
|
|
prev_last_index = 0;
|
|
arr_idx = 0;
|
|
|
|
for (;;) {
|
|
DUK_ASSERT_TOP(thr, 3);
|
|
|
|
duk_dup_0(thr);
|
|
duk_dup_1(thr);
|
|
duk_regexp_match(thr); /* -> [ ... regexp string ] -> [ ... res_obj ] */
|
|
|
|
if (!duk_is_object(thr, -1)) {
|
|
duk_pop(thr);
|
|
break;
|
|
}
|
|
|
|
duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
DUK_ASSERT(duk_is_number(thr, -1));
|
|
this_index = duk_get_int(thr, -1);
|
|
duk_pop(thr);
|
|
|
|
if (this_index == prev_last_index) {
|
|
this_index++;
|
|
duk_push_int(thr, this_index);
|
|
duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX);
|
|
}
|
|
prev_last_index = this_index;
|
|
|
|
duk_get_prop_index(thr, -1, 0); /* match string */
|
|
duk_put_prop_index(thr, 2, (duk_uarridx_t)arr_idx);
|
|
arr_idx++;
|
|
duk_pop(thr); /* res_obj */
|
|
}
|
|
|
|
if (arr_idx == 0) {
|
|
duk_push_null(thr);
|
|
}
|
|
|
|
return 1; /* return 'res_arr' or 'null' */
|
|
}
|
|
#endif /* DUK_USE_REGEXP_SUPPORT */
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_concat(duk_hthread *thr) {
|
|
/* duk_concat() coerces arguments with ToString() in correct order */
|
|
(void)duk_push_this_coercible_to_string(thr);
|
|
duk_insert(thr, 0); /* this is relatively expensive */
|
|
duk_concat(thr, duk_get_top(thr));
|
|
return 1;
|
|
}
|
|
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_trim(duk_hthread *thr) {
|
|
DUK_ASSERT_TOP(thr, 0);
|
|
(void)duk_push_this_coercible_to_string(thr);
|
|
duk_trim(thr, 0);
|
|
DUK_ASSERT_TOP(thr, 1);
|
|
return 1;
|
|
}
|
|
|
|
#if defined(DUK_USE_ES6)
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_repeat(duk_hthread *thr) {
|
|
duk_hstring *h_input;
|
|
duk_size_t input_blen;
|
|
duk_size_t result_len;
|
|
duk_int_t count_signed;
|
|
duk_uint_t count;
|
|
const duk_uint8_t *src;
|
|
duk_uint8_t *buf;
|
|
duk_uint8_t *p;
|
|
duk_double_t d;
|
|
#if !defined(DUK_USE_PREFER_SIZE)
|
|
duk_size_t copy_size;
|
|
duk_uint8_t *p_end;
|
|
#endif
|
|
|
|
DUK_ASSERT_TOP(thr, 1);
|
|
h_input = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h_input != NULL);
|
|
input_blen = DUK_HSTRING_GET_BYTELEN(h_input);
|
|
|
|
/* Count is ToNumber() coerced; +Infinity must be always rejected
|
|
* (even if input string is zero length), as well as negative values
|
|
* and -Infinity. -Infinity doesn't require an explicit check
|
|
* because duk_get_int() clamps it to DUK_INT_MIN which gets rejected
|
|
* as a negative value (regardless of input string length).
|
|
*/
|
|
d = duk_to_number(thr, 0);
|
|
if (duk_double_is_posinf(d)) {
|
|
goto fail_range;
|
|
}
|
|
count_signed = duk_get_int(thr, 0);
|
|
if (count_signed < 0) {
|
|
goto fail_range;
|
|
}
|
|
count = (duk_uint_t)count_signed;
|
|
|
|
/* Overflow check for result length. */
|
|
result_len = count * input_blen;
|
|
if (count != 0 && result_len / count != input_blen) {
|
|
goto fail_range;
|
|
}
|
|
|
|
/* Temporary fixed buffer, later converted to string. */
|
|
buf = (duk_uint8_t *)duk_push_fixed_buffer_nozero(thr, result_len);
|
|
DUK_ASSERT(buf != NULL);
|
|
src = (const duk_uint8_t *)DUK_HSTRING_GET_DATA(h_input);
|
|
DUK_ASSERT(src != NULL);
|
|
|
|
#if defined(DUK_USE_PREFER_SIZE)
|
|
p = buf;
|
|
while (count-- > 0) {
|
|
duk_memcpy((void *)p, (const void *)src,
|
|
input_blen); /* copy size may be zero, but pointers are valid */
|
|
p += input_blen;
|
|
}
|
|
#else /* DUK_USE_PREFER_SIZE */
|
|
/* Take advantage of already copied pieces to speed up the process
|
|
* especially for small repeated strings.
|
|
*/
|
|
p = buf;
|
|
p_end = p + result_len;
|
|
copy_size = input_blen;
|
|
for (;;) {
|
|
duk_size_t remain = (duk_size_t)(p_end - p);
|
|
DUK_DDD(DUK_DDDPRINT(
|
|
"remain=%ld, copy_size=%ld, input_blen=%ld, result_len=%ld",
|
|
(long)remain, (long)copy_size, (long)input_blen, (long)result_len));
|
|
if (remain <= copy_size) {
|
|
/* If result_len is zero, this case is taken and does
|
|
* a zero size copy (with valid pointers).
|
|
*/
|
|
duk_memcpy((void *)p, (const void *)src, remain);
|
|
break;
|
|
} else {
|
|
duk_memcpy((void *)p, (const void *)src, copy_size);
|
|
p += copy_size;
|
|
}
|
|
|
|
src = (const duk_uint8_t *)buf; /* Use buf as source for larger copies. */
|
|
copy_size = (duk_size_t)(p - buf);
|
|
}
|
|
#endif /* DUK_USE_PREFER_SIZE */
|
|
|
|
/* XXX: It would be useful to be able to create a duk_hstring with
|
|
* a certain byte size whose data area wasn't initialized and which
|
|
* wasn't in the string table yet. This would allow a string to be
|
|
* constructed directly without a buffer temporary and when it was
|
|
* finished, it could be injected into the string table. Currently
|
|
* this isn't possible because duk_hstrings are only tracked by the
|
|
* intern table (they are not in heap_allocated).
|
|
*/
|
|
|
|
duk_buffer_to_string(thr, -1); /* Safe if input is safe. */
|
|
return 1;
|
|
|
|
fail_range:
|
|
DUK_DCERROR_RANGE_INVALID_ARGS(thr);
|
|
}
|
|
#endif /* DUK_USE_ES6 */
|
|
|
|
DUK_INTERNAL duk_ret_t
|
|
duk_bi_string_prototype_locale_compare(duk_hthread *thr) {
|
|
duk_hstring *h1;
|
|
duk_hstring *h2;
|
|
duk_size_t h1_len, h2_len, prefix_len;
|
|
duk_small_int_t ret = 0;
|
|
duk_small_int_t rc;
|
|
|
|
/* The current implementation of localeCompare() is simply a codepoint
|
|
* by codepoint comparison, implemented with a simple string compare
|
|
* because UTF-8 should preserve codepoint ordering (assuming valid
|
|
* shortest UTF-8 encoding).
|
|
*
|
|
* The specification requires that the return value must be related
|
|
* to the sort order: e.g. negative means that 'this' comes before
|
|
* 'that' in sort order. We assume an ascending sort order.
|
|
*/
|
|
|
|
/* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */
|
|
|
|
h1 = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h1 != NULL);
|
|
|
|
h2 = duk_to_hstring(thr, 0);
|
|
DUK_ASSERT(h2 != NULL);
|
|
|
|
h1_len = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h1);
|
|
h2_len = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h2);
|
|
prefix_len = (h1_len <= h2_len ? h1_len : h2_len);
|
|
|
|
rc = (duk_small_int_t)duk_memcmp((const void *)DUK_HSTRING_GET_DATA(h1),
|
|
(const void *)DUK_HSTRING_GET_DATA(h2),
|
|
(size_t)prefix_len);
|
|
|
|
if (rc < 0) {
|
|
ret = -1;
|
|
goto done;
|
|
} else if (rc > 0) {
|
|
ret = 1;
|
|
goto done;
|
|
}
|
|
|
|
/* prefix matches, lengths matter now */
|
|
if (h1_len > h2_len) {
|
|
ret = 1;
|
|
goto done;
|
|
} else if (h1_len == h2_len) {
|
|
DUK_ASSERT(ret == 0);
|
|
goto done;
|
|
}
|
|
ret = -1;
|
|
goto done;
|
|
|
|
done:
|
|
duk_push_int(thr, (duk_int_t)ret);
|
|
return 1;
|
|
}
|
|
|
|
#if defined(DUK_USE_ES6)
|
|
DUK_INTERNAL duk_ret_t
|
|
duk_bi_string_prototype_startswith_endswith(duk_hthread *thr) {
|
|
duk_int_t magic;
|
|
duk_hstring *h;
|
|
duk_hstring *h_search;
|
|
duk_size_t blen_search;
|
|
const duk_uint8_t *p_cmp_start;
|
|
duk_bool_t result;
|
|
|
|
h = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h != NULL);
|
|
|
|
h_search = duk__str_tostring_notregexp(thr, 0);
|
|
DUK_ASSERT(h_search != NULL);
|
|
|
|
magic = duk_get_current_magic(thr);
|
|
|
|
p_cmp_start = (const duk_uint8_t *)DUK_HSTRING_GET_DATA(h);
|
|
blen_search = DUK_HSTRING_GET_BYTELEN(h_search);
|
|
|
|
if (duk_is_undefined(thr, 1)) {
|
|
if (magic) {
|
|
p_cmp_start = p_cmp_start + DUK_HSTRING_GET_BYTELEN(h) - blen_search;
|
|
} else {
|
|
/* p_cmp_start already OK */
|
|
}
|
|
} else {
|
|
duk_int_t len;
|
|
duk_int_t pos;
|
|
|
|
DUK_ASSERT(DUK_HSTRING_MAX_BYTELEN <= DUK_INT_MAX);
|
|
len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h);
|
|
pos = duk_to_int_clamped(thr, 1, 0, len);
|
|
DUK_ASSERT(pos >= 0 && pos <= len);
|
|
|
|
if (magic) {
|
|
p_cmp_start -=
|
|
blen_search; /* Conceptually subtracted last, but do already here. */
|
|
}
|
|
DUK_ASSERT(pos >= 0 && pos <= len);
|
|
|
|
p_cmp_start +=
|
|
duk_heap_strcache_offset_char2byte(thr, h, (duk_uint_fast32_t)pos);
|
|
}
|
|
|
|
/* The main comparison can be done using a memcmp() rather than
|
|
* doing codepoint comparisons: for CESU-8 strings there is a
|
|
* canonical representation for every codepoint. But we do need
|
|
* to deal with the char/byte offset translation to find the
|
|
* comparison range.
|
|
*/
|
|
|
|
result = 0;
|
|
if (p_cmp_start >= DUK_HSTRING_GET_DATA(h) &&
|
|
(duk_size_t)(p_cmp_start - (const duk_uint8_t *)DUK_HSTRING_GET_DATA(h)) +
|
|
blen_search <=
|
|
DUK_HSTRING_GET_BYTELEN(h)) {
|
|
if (duk_memcmp((const void *)p_cmp_start,
|
|
(const void *)DUK_HSTRING_GET_DATA(h_search),
|
|
(size_t)blen_search) == 0) {
|
|
result = 1;
|
|
}
|
|
}
|
|
|
|
duk_push_boolean(thr, result);
|
|
return 1;
|
|
}
|
|
#endif /* DUK_USE_ES6 */
|
|
|
|
#if defined(DUK_USE_ES6)
|
|
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_includes(duk_hthread *thr) {
|
|
duk_hstring *h;
|
|
duk_hstring *h_search;
|
|
duk_int_t len;
|
|
duk_int_t pos;
|
|
|
|
h = duk_push_this_coercible_to_string(thr);
|
|
DUK_ASSERT(h != NULL);
|
|
|
|
h_search = duk__str_tostring_notregexp(thr, 0);
|
|
DUK_ASSERT(h_search != NULL);
|
|
|
|
len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h);
|
|
pos = duk_to_int_clamped(thr, 1, 0, len);
|
|
DUK_ASSERT(pos >= 0 && pos <= len);
|
|
|
|
pos = duk__str_search_shared(thr, h, h_search, pos, 0 /*backwards*/);
|
|
duk_push_boolean(thr, pos >= 0);
|
|
return 1;
|
|
}
|
|
#endif /* DUK_USE_ES6 */
|
|
#endif /* DUK_USE_STRING_BUILTIN */
|