/* * String built-ins * * Most String built-ins must only accept strings (or String objects). * Symbols, represented internally as strings, must be generally rejected. * The duk_push_this_coercible_to_string() helper does this automatically. */ /* XXX: There are several limitations in the current implementation for * strings with >= 0x80000000UL characters. In some cases one would need * to be able to represent the range [-0xffffffff,0xffffffff] and so on. * Generally character and byte length are assumed to fit into signed 32 * bits (< 0x80000000UL). Places with issues are not marked explicitly * below in all cases, look for signed type usage (duk_int_t etc) for * offsets/lengths. */ #include "third_party/duktape/duk_internal.h" #if defined(DUK_USE_STRING_BUILTIN) /* * Helpers */ DUK_LOCAL duk_hstring *duk__str_tostring_notregexp(duk_hthread *thr, duk_idx_t idx) { duk_hstring *h; if (duk_get_class_number(thr, idx) == DUK_HOBJECT_CLASS_REGEXP) { DUK_ERROR_TYPE_INVALID_ARGS(thr); DUK_WO_NORETURN(return NULL;); } h = duk_to_hstring(thr, idx); DUK_ASSERT(h != NULL); return h; } DUK_LOCAL duk_int_t duk__str_search_shared(duk_hthread *thr, duk_hstring *h_this, duk_hstring *h_search, duk_int_t start_cpos, duk_bool_t backwards) { duk_int_t cpos; duk_int_t bpos; const duk_uint8_t *p_start, *p_end, *p; const duk_uint8_t *q_start; duk_int_t q_blen; duk_uint8_t firstbyte; duk_uint8_t t; cpos = start_cpos; /* Empty searchstring always matches; cpos must be clamped here. * (If q_blen were < 0 due to clamped coercion, it would also be * caught here.) */ q_start = DUK_HSTRING_GET_DATA(h_search); q_blen = (duk_int_t)DUK_HSTRING_GET_BYTELEN(h_search); if (q_blen <= 0) { return cpos; } DUK_ASSERT(q_blen > 0); bpos = (duk_int_t)duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t)cpos); p_start = DUK_HSTRING_GET_DATA(h_this); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this); p = p_start + bpos; /* This loop is optimized for size. For speed, there should be * two separate loops, and we should ensure that memcmp() can be * used without an extra "will searchstring fit" check. Doing * the preconditioning for 'p' and 'p_end' is easy but cpos * must be updated if 'p' is wound back (backward scanning). */ firstbyte = q_start[0]; /* leading byte of match string */ while (p <= p_end && p >= p_start) { t = *p; /* For ECMAScript strings, this check can only match for * initial UTF-8 bytes (not continuation bytes). For other * strings all bets are off. */ if ((t == firstbyte) && ((duk_size_t)(p_end - p) >= (duk_size_t)q_blen)) { DUK_ASSERT(q_blen > 0); if (duk_memcmp((const void *)p, (const void *)q_start, (size_t)q_blen) == 0) { return cpos; } } /* track cpos while scanning */ if (backwards) { /* when going backwards, we decrement cpos 'early'; * 'p' may point to a continuation byte of the char * at offset 'cpos', but that's OK because we'll * backtrack all the way to the initial byte. */ if ((t & 0xc0) != 0x80) { cpos--; } p--; } else { if ((t & 0xc0) != 0x80) { cpos++; } p++; } } /* Not found. Empty string case is handled specially above. */ return -1; } /* * Constructor */ DUK_INTERNAL duk_ret_t duk_bi_string_constructor(duk_hthread *thr) { duk_hstring *h; duk_uint_t flags; /* String constructor needs to distinguish between an argument not given at * all vs. given as 'undefined'. We're a vararg function to handle this * properly. */ /* XXX: copy current activation flags to thr, including current magic, * is_constructor_call etc. This takes a few bytes in duk_hthread but * makes call sites smaller (there are >30 is_constructor_call and get * current magic call sites. */ if (duk_get_top(thr) == 0) { duk_push_hstring_empty(thr); } else { h = duk_to_hstring_acceptsymbol(thr, 0); if (DUK_UNLIKELY(DUK_HSTRING_HAS_SYMBOL(h) && !duk_is_constructor_call(thr))) { duk_push_symbol_descriptive_string(thr, h); duk_replace(thr, 0); } } duk_to_string(thr, 0); /* catches symbol argument for constructor call */ DUK_ASSERT(duk_is_string(thr, 0)); duk_set_top(thr, 1); /* Top may be 1 or larger. */ if (duk_is_constructor_call(thr)) { /* String object internal value is immutable */ flags = DUK_HOBJECT_FLAG_EXTENSIBLE | DUK_HOBJECT_FLAG_FASTREFS | DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ | DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING); duk_push_object_helper(thr, flags, DUK_BIDX_STRING_PROTOTYPE); duk_dup_0(thr); duk_xdef_prop_stridx_short(thr, -2, DUK_STRIDX_INT_VALUE, DUK_PROPDESC_FLAGS_NONE); } /* Note: unbalanced stack on purpose */ return 1; } DUK_LOCAL duk_ret_t duk__construct_from_codepoints(duk_hthread *thr, duk_bool_t nonbmp) { duk_bufwriter_ctx bw_alloc; duk_bufwriter_ctx *bw; duk_idx_t i, n; duk_ucodepoint_t cp; /* XXX: It would be nice to build the string directly but ToUint16() * coercion is needed so a generic helper would not be very * helpful (perhaps coerce the value stack first here and then * build a string from a duk_tval number sequence in one go?). */ n = duk_get_top(thr); bw = &bw_alloc; DUK_BW_INIT_PUSHBUF( thr, bw, (duk_size_t)n); /* initial estimate for ASCII only codepoints */ for (i = 0; i < n; i++) { /* XXX: could improve bufwriter handling to write multiple codepoints * with one ensure call but the relative benefit would be quite small. */ if (nonbmp) { /* ES2015 requires that (1) SameValue(cp, ToInteger(cp)) and * (2) cp >= 0 and cp <= 0x10ffff. This check does not * implement the steps exactly but the outcome should be * the same. */ duk_int32_t i32 = 0; if (!duk_is_whole_get_int32(duk_to_number(thr, i), &i32) || i32 < 0 || i32 > 0x10ffffL) { DUK_DCERROR_RANGE_INVALID_ARGS(thr); } DUK_ASSERT(i32 >= 0 && i32 <= 0x10ffffL); cp = (duk_ucodepoint_t)i32; DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp); } else { #if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT) /* ToUint16() coercion is mandatory in the E5.1 specification, but * this non-compliant behavior makes more sense because we support * non-BMP codepoints. Don't use CESU-8 because that'd create * surrogate pairs. */ cp = (duk_ucodepoint_t)duk_to_uint32(thr, i); DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp); #else cp = (duk_ucodepoint_t)duk_to_uint16(thr, i); DUK_ASSERT(cp >= 0 && cp <= 0x10ffffL); DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp); #endif } } DUK_BW_COMPACT(thr, bw); (void)duk_buffer_to_string(thr, -1); /* Safe, extended UTF-8 or CESU-8 encoded. */ return 1; } DUK_INTERNAL duk_ret_t duk_bi_string_constructor_from_char_code(duk_hthread *thr) { return duk__construct_from_codepoints(thr, 0 /*nonbmp*/); } #if defined(DUK_USE_ES6) DUK_INTERNAL duk_ret_t duk_bi_string_constructor_from_code_point(duk_hthread *thr) { return duk__construct_from_codepoints(thr, 1 /*nonbmp*/); } #endif /* * toString(), valueOf() */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_to_string(duk_hthread *thr) { duk_tval *tv; duk_push_this(thr); tv = duk_require_tval(thr, -1); DUK_ASSERT(tv != NULL); if (DUK_TVAL_IS_STRING(tv)) { /* return as is */ } else if (DUK_TVAL_IS_OBJECT(tv)) { duk_hobject *h = DUK_TVAL_GET_OBJECT(tv); DUK_ASSERT(h != NULL); /* Must be a "string object", i.e. class "String" */ if (DUK_HOBJECT_GET_CLASS_NUMBER(h) != DUK_HOBJECT_CLASS_STRING) { goto type_error; } duk_xget_owndataprop_stridx_short(thr, -1, DUK_STRIDX_INT_VALUE); DUK_ASSERT(duk_is_string(thr, -1)); } else { goto type_error; } (void)duk_require_hstring_notsymbol( thr, -1); /* Reject symbols (and wrapped symbols). */ return 1; type_error: DUK_DCERROR_TYPE_INVALID_ARGS(thr); } /* * Character and charcode access */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_at(duk_hthread *thr) { duk_hstring *h; duk_int_t pos; /* XXX: faster implementation */ h = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h != NULL); pos = duk_to_int(thr, 0); if (sizeof(duk_size_t) >= sizeof(duk_uint_t)) { /* Cast to duk_size_t works in this case: * - If pos < 0, (duk_size_t) pos will always be * >= max_charlen, and result will be the empty string * (see duk_substring()). * - If pos >= 0, pos + 1 cannot wrap. */ DUK_ASSERT((duk_size_t)DUK_INT_MIN >= DUK_HSTRING_MAX_BYTELEN); DUK_ASSERT((duk_size_t)DUK_INT_MAX + 1U > (duk_size_t)DUK_INT_MAX); duk_substring(thr, -1, (duk_size_t)pos, (duk_size_t)pos + 1U); } else { /* If size_t is smaller than int, explicit bounds checks * are needed because an int may wrap multiple times. */ if (DUK_UNLIKELY(pos < 0 || (duk_uint_t)pos >= (duk_uint_t)DUK_HSTRING_GET_CHARLEN(h))) { duk_push_hstring_empty(thr); } else { duk_substring(thr, -1, (duk_size_t)pos, (duk_size_t)pos + 1U); } } return 1; } /* Magic: 0=charCodeAt, 1=codePointAt */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_hthread *thr) { duk_int_t pos; duk_hstring *h; duk_bool_t clamped; duk_uint32_t cp; duk_int_t magic; /* XXX: faster implementation */ DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *)duk_get_tval(thr, 0))); h = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h != NULL); pos = duk_to_int_clamped_raw( thr, 0 /*index*/, 0 /*min(incl)*/, (duk_int_t)DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/, &clamped /*out_clamped*/); #if defined(DUK_USE_ES6) magic = duk_get_current_magic(thr); #else DUK_ASSERT(duk_get_current_magic(thr) == 0); magic = 0; #endif if (clamped) { /* For out-of-bounds indices .charCodeAt() returns NaN and * .codePointAt() returns undefined. */ if (magic != 0) { return 0; } duk_push_nan(thr); } else { DUK_ASSERT(pos >= 0); cp = (duk_uint32_t)duk_hstring_char_code_at_raw( thr, h, (duk_uint_t)pos, (duk_bool_t)magic /*surrogate_aware*/); duk_push_u32(thr, cp); } return 1; } /* * substring(), substr(), slice() */ /* XXX: any chance of merging these three similar but still slightly * different algorithms so that footprint would be reduced? */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substring(duk_hthread *thr) { duk_hstring *h; duk_int_t start_pos, end_pos; duk_int_t len; h = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h != NULL); len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h); /* [ start end str ] */ start_pos = duk_to_int_clamped(thr, 0, 0, len); if (duk_is_undefined(thr, 1)) { end_pos = len; } else { end_pos = duk_to_int_clamped(thr, 1, 0, len); } DUK_ASSERT(start_pos >= 0 && start_pos <= len); DUK_ASSERT(end_pos >= 0 && end_pos <= len); if (start_pos > end_pos) { duk_int_t tmp = start_pos; start_pos = end_pos; end_pos = tmp; } DUK_ASSERT(end_pos >= start_pos); duk_substring(thr, -1, (duk_size_t)start_pos, (duk_size_t)end_pos); return 1; } #if defined(DUK_USE_SECTION_B) DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_hthread *thr) { duk_hstring *h; duk_int_t start_pos, end_pos; duk_int_t len; /* Unlike non-obsolete String calls, substr() algorithm in E5.1 * specification will happily coerce undefined and null to strings * ("undefined" and "null"). */ duk_push_this(thr); h = duk_to_hstring_m1(thr); /* Reject Symbols. */ DUK_ASSERT(h != NULL); len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h); /* [ start length str ] */ /* The implementation for computing of start_pos and end_pos differs * from the standard algorithm, but is intended to result in the exactly * same behavior. This is not always obvious. */ /* combines steps 2 and 5; -len ensures max() not needed for step 5 */ start_pos = duk_to_int_clamped(thr, 0, -len, len); if (start_pos < 0) { start_pos = len + start_pos; } DUK_ASSERT(start_pos >= 0 && start_pos <= len); /* combines steps 3, 6; step 7 is not needed */ if (duk_is_undefined(thr, 1)) { end_pos = len; } else { DUK_ASSERT(start_pos <= len); end_pos = start_pos + duk_to_int_clamped(thr, 1, 0, len - start_pos); } DUK_ASSERT(start_pos >= 0 && start_pos <= len); DUK_ASSERT(end_pos >= 0 && end_pos <= len); DUK_ASSERT(end_pos >= start_pos); duk_substring(thr, -1, (duk_size_t)start_pos, (duk_size_t)end_pos); return 1; } #endif /* DUK_USE_SECTION_B */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_hthread *thr) { duk_hstring *h; duk_int_t start_pos, end_pos; duk_int_t len; h = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h != NULL); len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h); /* [ start end str ] */ start_pos = duk_to_int_clamped(thr, 0, -len, len); if (start_pos < 0) { start_pos = len + start_pos; } if (duk_is_undefined(thr, 1)) { end_pos = len; } else { end_pos = duk_to_int_clamped(thr, 1, -len, len); if (end_pos < 0) { end_pos = len + end_pos; } } DUK_ASSERT(start_pos >= 0 && start_pos <= len); DUK_ASSERT(end_pos >= 0 && end_pos <= len); if (end_pos < start_pos) { end_pos = start_pos; } DUK_ASSERT(end_pos >= start_pos); duk_substring(thr, -1, (duk_size_t)start_pos, (duk_size_t)end_pos); return 1; } /* * Case conversion */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_caseconv_shared(duk_hthread *thr) { duk_small_int_t uppercase = duk_get_current_magic(thr); (void)duk_push_this_coercible_to_string(thr); duk_unicode_case_convert_string(thr, (duk_bool_t)uppercase); return 1; } /* * indexOf() and lastIndexOf() */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_hthread *thr) { duk_hstring *h_this; duk_hstring *h_search; duk_int_t clen_this; duk_int_t cpos; duk_small_uint_t is_lastindexof = (duk_small_uint_t)duk_get_current_magic( thr); /* 0=indexOf, 1=lastIndexOf */ h_this = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h_this != NULL); clen_this = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h_this); h_search = duk_to_hstring(thr, 0); DUK_ASSERT(h_search != NULL); duk_to_number(thr, 1); if (duk_is_nan(thr, 1) && is_lastindexof) { /* indexOf: NaN should cause pos to be zero. * lastIndexOf: NaN should cause pos to be +Infinity * (and later be clamped to len). */ cpos = clen_this; } else { cpos = duk_to_int_clamped(thr, 1, 0, clen_this); } cpos = duk__str_search_shared(thr, h_this, h_search, cpos, is_lastindexof /*backwards*/); duk_push_int(thr, cpos); return 1; } /* * replace() */ /* XXX: the current implementation works but is quite clunky; it compiles * to almost 1,4kB of x86 code so it needs to be simplified (better approach, * shared helpers, etc). Some ideas for refactoring: * * - a primitive to convert a string into a regexp matcher (reduces matching * code at the cost of making matching much slower) * - use replace() as a basic helper for match() and split(), which are both * much simpler * - API call to get_prop and to_boolean */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_hthread *thr) { duk_hstring *h_input; duk_hstring *h_match; duk_hstring *h_search; duk_hobject *h_re; duk_bufwriter_ctx bw_alloc; duk_bufwriter_ctx *bw; #if defined(DUK_USE_REGEXP_SUPPORT) duk_bool_t is_regexp; duk_bool_t is_global; #endif duk_bool_t is_repl_func; duk_uint32_t match_start_coff, match_start_boff; #if defined(DUK_USE_REGEXP_SUPPORT) duk_int_t match_caps; #endif duk_uint32_t prev_match_end_boff; const duk_uint8_t *r_start, *r_end, *r; /* repl string scan */ duk_size_t tmp_sz; DUK_ASSERT_TOP(thr, 2); h_input = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h_input != NULL); bw = &bw_alloc; DUK_BW_INIT_PUSHBUF( thr, bw, DUK_HSTRING_GET_BYTELEN( h_input)); /* input size is good output starting point */ DUK_ASSERT_TOP(thr, 4); /* stack[0] = search value * stack[1] = replace value * stack[2] = input string * stack[3] = result buffer */ h_re = duk_get_hobject_with_class(thr, 0, DUK_HOBJECT_CLASS_REGEXP); if (h_re) { #if defined(DUK_USE_REGEXP_SUPPORT) is_regexp = 1; is_global = duk_get_prop_stridx_boolean(thr, 0, DUK_STRIDX_GLOBAL, NULL); if (is_global) { /* start match from beginning */ duk_push_int(thr, 0); duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); } #else /* DUK_USE_REGEXP_SUPPORT */ DUK_DCERROR_UNSUPPORTED(thr); #endif /* DUK_USE_REGEXP_SUPPORT */ } else { duk_to_string(thr, 0); /* rejects symbols */ #if defined(DUK_USE_REGEXP_SUPPORT) is_regexp = 0; is_global = 0; #endif } if (duk_is_function(thr, 1)) { is_repl_func = 1; r_start = NULL; r_end = NULL; } else { duk_hstring *h_repl; is_repl_func = 0; h_repl = duk_to_hstring(thr, 1); /* reject symbols */ DUK_ASSERT(h_repl != NULL); r_start = DUK_HSTRING_GET_DATA(h_repl); r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl); } prev_match_end_boff = 0; for (;;) { /* * If matching with a regexp: * - non-global RegExp: lastIndex not touched on a match, zeroed * on a non-match * - global RegExp: on match, lastIndex will be updated by regexp * executor to point to next char after the matching part (so that * characters in the matching part are not matched again) * * If matching with a string: * - always non-global match, find first occurrence * * We need: * - The character offset of start-of-match for the replacer function * - The byte offsets for start-of-match and end-of-match to implement * the replacement values $&, $`, and $', and to copy non-matching * input string portions (including header and trailer) verbatim. * * NOTE: the E5.1 specification is a bit vague how the RegExp should * behave in the replacement process; e.g. is matching done first for * all matches (in the global RegExp case) before any replacer calls * are made? See: test-bi-string-proto-replace.js for discussion. */ DUK_ASSERT_TOP(thr, 4); #if defined(DUK_USE_REGEXP_SUPPORT) if (is_regexp) { duk_dup_0(thr); duk_dup_2(thr); duk_regexp_match(thr); /* [ ... regexp input ] -> [ res_obj ] */ if (!duk_is_object(thr, -1)) { duk_pop(thr); break; } duk_get_prop_stridx_short(thr, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(thr, -1)); match_start_coff = duk_get_uint(thr, -1); duk_pop(thr); duk_get_prop_index(thr, -1, 0); DUK_ASSERT(duk_is_string(thr, -1)); h_match = duk_known_hstring(thr, -1); duk_pop( thr); /* h_match is borrowed, remains reachable through match_obj */ if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) { /* This should be equivalent to match() algorithm step 8.f.iii.2: * detect an empty match and allow it, but don't allow it twice. */ duk_uint32_t last_index; duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); last_index = (duk_uint32_t)duk_get_uint(thr, -1); DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld", (long)last_index, (long)(last_index + 1))); duk_pop(thr); duk_push_uint(thr, (duk_uint_t)(last_index + 1)); duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); } DUK_ASSERT(duk_get_length(thr, -1) <= DUK_INT_MAX); /* string limits */ match_caps = (duk_int_t)duk_get_length(thr, -1); } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ const duk_uint8_t *q_start; /* match string */ duk_size_t q_blen; #if defined(DUK_USE_REGEXP_SUPPORT) DUK_ASSERT(!is_global); /* single match always */ #endif p_start = DUK_HSTRING_GET_DATA(h_input); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); p = p_start; h_search = duk_known_hstring(thr, 0); q_start = DUK_HSTRING_GET_DATA(h_search); q_blen = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h_search); p_end -= q_blen; /* ensure full memcmp() fits in while */ match_start_coff = 0; while (p <= p_end) { DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); if (duk_memcmp((const void *)p, (const void *)q_start, (size_t)q_blen) == 0) { duk_dup_0(thr); h_match = duk_known_hstring(thr, -1); #if defined(DUK_USE_REGEXP_SUPPORT) match_caps = 0; #endif goto found; } /* track utf-8 non-continuation bytes */ if ((p[0] & 0xc0) != 0x80) { match_start_coff++; } p++; } /* not found */ break; } found: /* stack[0] = search value * stack[1] = replace value * stack[2] = input string * stack[3] = result buffer * stack[4] = regexp match OR match string */ match_start_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte( thr, h_input, match_start_coff); tmp_sz = (duk_size_t)(match_start_boff - prev_match_end_boff); DUK_BW_WRITE_ENSURE_BYTES( thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match); if (is_repl_func) { duk_idx_t idx_args; duk_hstring *h_repl; /* regexp res_obj is at index 4 */ duk_dup_1(thr); idx_args = duk_get_top(thr); #if defined(DUK_USE_REGEXP_SUPPORT) if (is_regexp) { duk_int_t idx; duk_require_stack(thr, match_caps + 2); for (idx = 0; idx < match_caps; idx++) { /* match followed by capture(s) */ duk_get_prop_index(thr, 4, (duk_uarridx_t)idx); } } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ /* match == search string, by definition */ duk_dup_0(thr); } duk_push_uint(thr, (duk_uint_t)match_start_coff); duk_dup_2(thr); /* [ ... replacer match [captures] match_char_offset input ] */ duk_call(thr, duk_get_top(thr) - idx_args); h_repl = duk_to_hstring_m1(thr); /* -> [ ... repl_value ] */ DUK_ASSERT(h_repl != NULL); DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl); duk_pop(thr); /* repl_value */ } else { r = r_start; while (r < r_end) { duk_int_t ch1; duk_int_t ch2; #if defined(DUK_USE_REGEXP_SUPPORT) duk_int_t ch3; #endif duk_size_t left; ch1 = *r++; if (ch1 != DUK_ASC_DOLLAR) { goto repl_write; } DUK_ASSERT(r <= r_end); left = (duk_size_t)(r_end - r); if (left <= 0) { goto repl_write; } ch2 = r[0]; switch (ch2) { case DUK_ASC_DOLLAR: { ch1 = (1u << 8) + DUK_ASC_DOLLAR; goto repl_write; } case DUK_ASC_AMP: { DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match); r++; continue; } case DUK_ASC_GRAVE: { tmp_sz = (duk_size_t)match_start_boff; DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz); r++; continue; } case DUK_ASC_SINGLEQUOTE: { duk_uint32_t match_end_boff; /* Use match charlen instead of bytelen, just in case the input and * match codepoint encodings would have different lengths. */ /* XXX: charlen computed here, and also in char2byte helper. */ match_end_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte( thr, h_input, match_start_coff + (duk_uint_fast32_t)DUK_HSTRING_GET_CHARLEN(h_match)); tmp_sz = (duk_size_t)(DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff); DUK_BW_WRITE_ENSURE_BYTES( thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz); r++; continue; } default: { #if defined(DUK_USE_REGEXP_SUPPORT) duk_int_t capnum, captmp, capadv; /* XXX: optional check, match_caps is zero if no regexp, * so dollar will be interpreted literally anyway. */ if (!is_regexp) { goto repl_write; } if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) { goto repl_write; } capnum = ch2 - DUK_ASC_0; capadv = 1; if (left >= 2) { ch3 = r[1]; if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) { captmp = capnum * 10 + (ch3 - DUK_ASC_0); if (captmp < match_caps) { capnum = captmp; capadv = 2; } } } if (capnum > 0 && capnum < match_caps) { DUK_ASSERT(is_regexp != 0); /* match_caps == 0 without regexps */ /* regexp res_obj is at offset 4 */ duk_get_prop_index(thr, 4, (duk_uarridx_t)capnum); if (duk_is_string(thr, -1)) { duk_hstring *h_tmp_str; h_tmp_str = duk_known_hstring(thr, -1); DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str); } else { /* undefined -> skip (replaced with empty) */ } duk_pop(thr); r += capadv; continue; } else { goto repl_write; } #else /* DUK_USE_REGEXP_SUPPORT */ goto repl_write; /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ } /* default case */ } /* switch (ch2) */ repl_write: /* ch1 = (r_increment << 8) + byte */ DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t)(ch1 & 0xff)); r += ch1 >> 8; } /* while repl */ } /* if (is_repl_func) */ duk_pop(thr); /* pop regexp res_obj or match string */ #if defined(DUK_USE_REGEXP_SUPPORT) if (!is_global) { #else { /* unconditionally; is_global==0 */ #endif break; } } /* trailer */ tmp_sz = (duk_size_t)(DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff); DUK_BW_WRITE_ENSURE_BYTES( thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); DUK_ASSERT_TOP(thr, 4); DUK_BW_COMPACT(thr, bw); (void)duk_buffer_to_string(thr, -1); /* Safe if inputs are safe. */ return 1; } /* * split() */ /* XXX: very messy now, but works; clean up, remove unused variables (nomimally * used so compiler doesn't complain). */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_hthread *thr) { duk_hstring *h_input; duk_hstring *h_sep; duk_uint32_t limit; duk_uint32_t arr_idx; #if defined(DUK_USE_REGEXP_SUPPORT) duk_bool_t is_regexp; #endif duk_bool_t matched; /* set to 1 if any match exists (needed for empty input special case) */ duk_uint32_t prev_match_end_coff, prev_match_end_boff; duk_uint32_t match_start_boff, match_start_coff; duk_uint32_t match_end_boff, match_end_coff; h_input = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h_input != NULL); duk_push_array(thr); if (duk_is_undefined(thr, 1)) { limit = 0xffffffffUL; } else { limit = duk_to_uint32(thr, 1); } if (limit == 0) { return 1; } /* If the separator is a RegExp, make a "clone" of it. The specification * algorithm calls [[Match]] directly for specific indices; we emulate this * by tweaking lastIndex and using a "force global" variant of * duk_regexp_match() which will use global-style matching even when the * RegExp itself is non-global. */ if (duk_is_undefined(thr, 0)) { /* The spec algorithm first does "R = ToString(separator)" before checking * whether separator is undefined. Since this is side effect free, we can * skip the ToString() here. */ duk_dup_2(thr); duk_put_prop_index(thr, 3, 0); return 1; } else if (duk_get_hobject_with_class(thr, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) { #if defined(DUK_USE_REGEXP_SUPPORT) duk_push_hobject_bidx(thr, DUK_BIDX_REGEXP_CONSTRUCTOR); duk_dup_0(thr); duk_new(thr, 1); /* [ ... RegExp val ] -> [ ... res ] */ duk_replace(thr, 0); /* lastIndex is initialized to zero by new RegExp() */ is_regexp = 1; #else DUK_DCERROR_UNSUPPORTED(thr); #endif } else { duk_to_string(thr, 0); #if defined(DUK_USE_REGEXP_SUPPORT) is_regexp = 0; #endif } /* stack[0] = separator (string or regexp) * stack[1] = limit * stack[2] = input string * stack[3] = result array */ prev_match_end_boff = 0; prev_match_end_coff = 0; arr_idx = 0; matched = 0; for (;;) { /* * The specification uses RegExp [[Match]] to attempt match at specific * offsets. We don't have such a primitive, so we use an actual RegExp * and tweak lastIndex. Since the RegExp may be non-global, we use a * special variant which forces global-like behavior for matching. */ DUK_ASSERT_TOP(thr, 4); #if defined(DUK_USE_REGEXP_SUPPORT) if (is_regexp) { duk_dup_0(thr); duk_dup_2(thr); duk_regexp_match_force_global( thr); /* [ ... regexp input ] -> [ res_obj ] */ if (!duk_is_object(thr, -1)) { duk_pop(thr); break; } matched = 1; duk_get_prop_stridx_short(thr, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(thr, -1)); match_start_coff = duk_get_uint(thr, -1); match_start_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte( thr, h_input, match_start_coff); duk_pop(thr); if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) { /* don't allow an empty match at the end of the string */ duk_pop(thr); break; } duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); DUK_ASSERT(duk_is_number(thr, -1)); match_end_coff = duk_get_uint(thr, -1); match_end_boff = (duk_uint32_t)duk_heap_strcache_offset_char2byte( thr, h_input, match_end_coff); duk_pop(thr); /* empty match -> bump and continue */ if (prev_match_end_boff == match_end_boff) { duk_push_uint(thr, (duk_uint_t)(match_end_coff + 1)); duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); duk_pop(thr); continue; } } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ const duk_uint8_t *q_start; /* match string */ duk_size_t q_blen, q_clen; p_start = DUK_HSTRING_GET_DATA(h_input); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); p = p_start + prev_match_end_boff; h_sep = duk_known_hstring(thr, 0); /* symbol already rejected above */ q_start = DUK_HSTRING_GET_DATA(h_sep); q_blen = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h_sep); q_clen = (duk_size_t)DUK_HSTRING_GET_CHARLEN(h_sep); p_end -= q_blen; /* ensure full memcmp() fits in while */ match_start_coff = prev_match_end_coff; if (q_blen == 0) { /* Handle empty separator case: it will always match, and always * triggers the check in step 13.c.iii initially. Note that we * must skip to either end of string or start of first codepoint, * skipping over any continuation bytes! * * Don't allow an empty string to match at the end of the input. */ matched = 1; /* empty separator can always match */ match_start_coff++; p++; while (p < p_end) { if ((p[0] & 0xc0) != 0x80) { goto found; } p++; } goto not_found; } DUK_ASSERT(q_blen > 0 && q_clen > 0); while (p <= p_end) { DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); DUK_ASSERT(q_blen > 0); /* no issues with empty memcmp() */ if (duk_memcmp((const void *)p, (const void *)q_start, (size_t)q_blen) == 0) { /* never an empty match, so step 13.c.iii can't be triggered */ goto found; } /* track utf-8 non-continuation bytes */ if ((p[0] & 0xc0) != 0x80) { match_start_coff++; } p++; } not_found: /* not found */ break; found: matched = 1; match_start_boff = (duk_uint32_t)(p - p_start); match_end_coff = (duk_uint32_t)( match_start_coff + q_clen); /* constrained by string length */ match_end_boff = (duk_uint32_t)(match_start_boff + q_blen); /* ditto */ /* empty match (may happen with empty separator) -> bump and continue */ if (prev_match_end_boff == match_end_boff) { prev_match_end_boff++; prev_match_end_coff++; continue; } } /* if (is_regexp) */ /* stack[0] = separator (string or regexp) * stack[1] = limit * stack[2] = input string * stack[3] = result array * stack[4] = regexp res_obj (if is_regexp) */ DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end " "b=%ld,c=%ld, prev_end b=%ld,c=%ld", (long)match_start_boff, (long)match_start_coff, (long)match_end_boff, (long)match_end_coff, (long)prev_match_end_boff, (long)prev_match_end_coff)); duk_push_lstring( thr, (const char *)(DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff), (duk_size_t)(match_start_boff - prev_match_end_boff)); duk_put_prop_index(thr, 3, arr_idx); arr_idx++; if (arr_idx >= limit) { goto hit_limit; } #if defined(DUK_USE_REGEXP_SUPPORT) if (is_regexp) { duk_size_t i, len; len = duk_get_length(thr, 4); for (i = 1; i < len; i++) { DUK_ASSERT(i <= DUK_UARRIDX_MAX); /* cannot have >4G captures */ duk_get_prop_index(thr, 4, (duk_uarridx_t)i); duk_put_prop_index(thr, 3, arr_idx); arr_idx++; if (arr_idx >= limit) { goto hit_limit; } } duk_pop(thr); /* lastIndex already set up for next match */ } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ /* no action */ } prev_match_end_boff = match_end_boff; prev_match_end_coff = match_end_coff; continue; } /* for */ /* Combined step 11 (empty string special case) and 14-15. */ DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld", (long)prev_match_end_boff, (long)prev_match_end_coff)); if (DUK_HSTRING_GET_BYTELEN(h_input) > 0 || !matched) { /* Add trailer if: * a) non-empty input * b) empty input and no (zero size) match found (step 11) */ duk_push_lstring( thr, (const char *)DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, (duk_size_t)(DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff)); duk_put_prop_index(thr, 3, arr_idx); /* No arr_idx update or limit check */ } return 1; hit_limit: #if defined(DUK_USE_REGEXP_SUPPORT) if (is_regexp) { duk_pop(thr); } #endif return 1; } /* * Various */ #if defined(DUK_USE_REGEXP_SUPPORT) DUK_LOCAL void duk__to_regexp_helper(duk_hthread *thr, duk_idx_t idx, duk_bool_t force_new) { duk_hobject *h; /* Shared helper for match() steps 3-4, search() steps 3-4. */ DUK_ASSERT(idx >= 0); if (force_new) { goto do_new; } h = duk_get_hobject_with_class(thr, idx, DUK_HOBJECT_CLASS_REGEXP); if (!h) { goto do_new; } return; do_new: duk_push_hobject_bidx(thr, DUK_BIDX_REGEXP_CONSTRUCTOR); duk_dup(thr, idx); duk_new(thr, 1); /* [ ... RegExp val ] -> [ ... res ] */ duk_replace(thr, idx); } #endif /* DUK_USE_REGEXP_SUPPORT */ #if defined(DUK_USE_REGEXP_SUPPORT) DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_hthread *thr) { /* Easiest way to implement the search required by the specification * is to do a RegExp test() with lastIndex forced to zero. To avoid * side effects on the argument, "clone" the RegExp if a RegExp was * given as input. * * The global flag of the RegExp should be ignored; setting lastIndex * to zero (which happens when "cloning" the RegExp) should have an * equivalent effect. */ DUK_ASSERT_TOP(thr, 1); (void)duk_push_this_coercible_to_string(thr); /* at index 1 */ duk__to_regexp_helper(thr, 0 /*index*/, 1 /*force_new*/); /* stack[0] = regexp * stack[1] = string */ /* Avoid using RegExp.prototype methods, as they're writable and * configurable and may have been changed. */ duk_dup_0(thr); duk_dup_1(thr); /* [ ... re_obj input ] */ duk_regexp_match(thr); /* -> [ ... res_obj ] */ if (!duk_is_object(thr, -1)) { duk_push_int(thr, -1); return 1; } duk_get_prop_stridx_short(thr, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(thr, -1)); return 1; } #endif /* DUK_USE_REGEXP_SUPPORT */ #if defined(DUK_USE_REGEXP_SUPPORT) DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_hthread *thr) { duk_bool_t global; duk_int_t prev_last_index; duk_int_t this_index; duk_int_t arr_idx; DUK_ASSERT_TOP(thr, 1); (void)duk_push_this_coercible_to_string(thr); duk__to_regexp_helper(thr, 0 /*index*/, 0 /*force_new*/); global = duk_get_prop_stridx_boolean(thr, 0, DUK_STRIDX_GLOBAL, NULL); DUK_ASSERT_TOP(thr, 2); /* stack[0] = regexp * stack[1] = string */ if (!global) { duk_regexp_match(thr); /* -> [ res_obj ] */ return 1; /* return 'res_obj' */ } /* Global case is more complex. */ /* [ regexp string ] */ duk_push_int(thr, 0); duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); duk_push_array(thr); /* [ regexp string res_arr ] */ prev_last_index = 0; arr_idx = 0; for (;;) { DUK_ASSERT_TOP(thr, 3); duk_dup_0(thr); duk_dup_1(thr); duk_regexp_match(thr); /* -> [ ... regexp string ] -> [ ... res_obj ] */ if (!duk_is_object(thr, -1)) { duk_pop(thr); break; } duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); DUK_ASSERT(duk_is_number(thr, -1)); this_index = duk_get_int(thr, -1); duk_pop(thr); if (this_index == prev_last_index) { this_index++; duk_push_int(thr, this_index); duk_put_prop_stridx_short(thr, 0, DUK_STRIDX_LAST_INDEX); } prev_last_index = this_index; duk_get_prop_index(thr, -1, 0); /* match string */ duk_put_prop_index(thr, 2, (duk_uarridx_t)arr_idx); arr_idx++; duk_pop(thr); /* res_obj */ } if (arr_idx == 0) { duk_push_null(thr); } return 1; /* return 'res_arr' or 'null' */ } #endif /* DUK_USE_REGEXP_SUPPORT */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_concat(duk_hthread *thr) { /* duk_concat() coerces arguments with ToString() in correct order */ (void)duk_push_this_coercible_to_string(thr); duk_insert(thr, 0); /* this is relatively expensive */ duk_concat(thr, duk_get_top(thr)); return 1; } DUK_INTERNAL duk_ret_t duk_bi_string_prototype_trim(duk_hthread *thr) { DUK_ASSERT_TOP(thr, 0); (void)duk_push_this_coercible_to_string(thr); duk_trim(thr, 0); DUK_ASSERT_TOP(thr, 1); return 1; } #if defined(DUK_USE_ES6) DUK_INTERNAL duk_ret_t duk_bi_string_prototype_repeat(duk_hthread *thr) { duk_hstring *h_input; duk_size_t input_blen; duk_size_t result_len; duk_int_t count_signed; duk_uint_t count; const duk_uint8_t *src; duk_uint8_t *buf; duk_uint8_t *p; duk_double_t d; #if !defined(DUK_USE_PREFER_SIZE) duk_size_t copy_size; duk_uint8_t *p_end; #endif DUK_ASSERT_TOP(thr, 1); h_input = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h_input != NULL); input_blen = DUK_HSTRING_GET_BYTELEN(h_input); /* Count is ToNumber() coerced; +Infinity must be always rejected * (even if input string is zero length), as well as negative values * and -Infinity. -Infinity doesn't require an explicit check * because duk_get_int() clamps it to DUK_INT_MIN which gets rejected * as a negative value (regardless of input string length). */ d = duk_to_number(thr, 0); if (duk_double_is_posinf(d)) { goto fail_range; } count_signed = duk_get_int(thr, 0); if (count_signed < 0) { goto fail_range; } count = (duk_uint_t)count_signed; /* Overflow check for result length. */ result_len = count * input_blen; if (count != 0 && result_len / count != input_blen) { goto fail_range; } /* Temporary fixed buffer, later converted to string. */ buf = (duk_uint8_t *)duk_push_fixed_buffer_nozero(thr, result_len); DUK_ASSERT(buf != NULL); src = (const duk_uint8_t *)DUK_HSTRING_GET_DATA(h_input); DUK_ASSERT(src != NULL); #if defined(DUK_USE_PREFER_SIZE) p = buf; while (count-- > 0) { duk_memcpy((void *)p, (const void *)src, input_blen); /* copy size may be zero, but pointers are valid */ p += input_blen; } #else /* DUK_USE_PREFER_SIZE */ /* Take advantage of already copied pieces to speed up the process * especially for small repeated strings. */ p = buf; p_end = p + result_len; copy_size = input_blen; for (;;) { duk_size_t remain = (duk_size_t)(p_end - p); DUK_DDD(DUK_DDDPRINT( "remain=%ld, copy_size=%ld, input_blen=%ld, result_len=%ld", (long)remain, (long)copy_size, (long)input_blen, (long)result_len)); if (remain <= copy_size) { /* If result_len is zero, this case is taken and does * a zero size copy (with valid pointers). */ duk_memcpy((void *)p, (const void *)src, remain); break; } else { duk_memcpy((void *)p, (const void *)src, copy_size); p += copy_size; } src = (const duk_uint8_t *)buf; /* Use buf as source for larger copies. */ copy_size = (duk_size_t)(p - buf); } #endif /* DUK_USE_PREFER_SIZE */ /* XXX: It would be useful to be able to create a duk_hstring with * a certain byte size whose data area wasn't initialized and which * wasn't in the string table yet. This would allow a string to be * constructed directly without a buffer temporary and when it was * finished, it could be injected into the string table. Currently * this isn't possible because duk_hstrings are only tracked by the * intern table (they are not in heap_allocated). */ duk_buffer_to_string(thr, -1); /* Safe if input is safe. */ return 1; fail_range: DUK_DCERROR_RANGE_INVALID_ARGS(thr); } #endif /* DUK_USE_ES6 */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_locale_compare(duk_hthread *thr) { duk_hstring *h1; duk_hstring *h2; duk_size_t h1_len, h2_len, prefix_len; duk_small_int_t ret = 0; duk_small_int_t rc; /* The current implementation of localeCompare() is simply a codepoint * by codepoint comparison, implemented with a simple string compare * because UTF-8 should preserve codepoint ordering (assuming valid * shortest UTF-8 encoding). * * The specification requires that the return value must be related * to the sort order: e.g. negative means that 'this' comes before * 'that' in sort order. We assume an ascending sort order. */ /* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */ h1 = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h1 != NULL); h2 = duk_to_hstring(thr, 0); DUK_ASSERT(h2 != NULL); h1_len = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h1); h2_len = (duk_size_t)DUK_HSTRING_GET_BYTELEN(h2); prefix_len = (h1_len <= h2_len ? h1_len : h2_len); rc = (duk_small_int_t)duk_memcmp((const void *)DUK_HSTRING_GET_DATA(h1), (const void *)DUK_HSTRING_GET_DATA(h2), (size_t)prefix_len); if (rc < 0) { ret = -1; goto done; } else if (rc > 0) { ret = 1; goto done; } /* prefix matches, lengths matter now */ if (h1_len > h2_len) { ret = 1; goto done; } else if (h1_len == h2_len) { DUK_ASSERT(ret == 0); goto done; } ret = -1; goto done; done: duk_push_int(thr, (duk_int_t)ret); return 1; } #if defined(DUK_USE_ES6) DUK_INTERNAL duk_ret_t duk_bi_string_prototype_startswith_endswith(duk_hthread *thr) { duk_int_t magic; duk_hstring *h; duk_hstring *h_search; duk_size_t blen_search; const duk_uint8_t *p_cmp_start; duk_bool_t result; h = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h != NULL); h_search = duk__str_tostring_notregexp(thr, 0); DUK_ASSERT(h_search != NULL); magic = duk_get_current_magic(thr); p_cmp_start = (const duk_uint8_t *)DUK_HSTRING_GET_DATA(h); blen_search = DUK_HSTRING_GET_BYTELEN(h_search); if (duk_is_undefined(thr, 1)) { if (magic) { p_cmp_start = p_cmp_start + DUK_HSTRING_GET_BYTELEN(h) - blen_search; } else { /* p_cmp_start already OK */ } } else { duk_int_t len; duk_int_t pos; DUK_ASSERT(DUK_HSTRING_MAX_BYTELEN <= DUK_INT_MAX); len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h); pos = duk_to_int_clamped(thr, 1, 0, len); DUK_ASSERT(pos >= 0 && pos <= len); if (magic) { p_cmp_start -= blen_search; /* Conceptually subtracted last, but do already here. */ } DUK_ASSERT(pos >= 0 && pos <= len); p_cmp_start += duk_heap_strcache_offset_char2byte(thr, h, (duk_uint_fast32_t)pos); } /* The main comparison can be done using a memcmp() rather than * doing codepoint comparisons: for CESU-8 strings there is a * canonical representation for every codepoint. But we do need * to deal with the char/byte offset translation to find the * comparison range. */ result = 0; if (p_cmp_start >= DUK_HSTRING_GET_DATA(h) && (duk_size_t)(p_cmp_start - (const duk_uint8_t *)DUK_HSTRING_GET_DATA(h)) + blen_search <= DUK_HSTRING_GET_BYTELEN(h)) { if (duk_memcmp((const void *)p_cmp_start, (const void *)DUK_HSTRING_GET_DATA(h_search), (size_t)blen_search) == 0) { result = 1; } } duk_push_boolean(thr, result); return 1; } #endif /* DUK_USE_ES6 */ #if defined(DUK_USE_ES6) DUK_INTERNAL duk_ret_t duk_bi_string_prototype_includes(duk_hthread *thr) { duk_hstring *h; duk_hstring *h_search; duk_int_t len; duk_int_t pos; h = duk_push_this_coercible_to_string(thr); DUK_ASSERT(h != NULL); h_search = duk__str_tostring_notregexp(thr, 0); DUK_ASSERT(h_search != NULL); len = (duk_int_t)DUK_HSTRING_GET_CHARLEN(h); pos = duk_to_int_clamped(thr, 1, 0, len); DUK_ASSERT(pos >= 0 && pos <= len); pos = duk__str_search_shared(thr, h, h_search, pos, 0 /*backwards*/); duk_push_boolean(thr, pos >= 0); return 1; } #endif /* DUK_USE_ES6 */ #endif /* DUK_USE_STRING_BUILTIN */