ref: ef891c5c9e8a49415674edf768a6f88c8f353809
dir: /jsstring.c/
#include "jsi.h" #include "jsvalue.h" #include "jsbuiltin.h" #include "utf.h" #include "regex.h" #define nelem(a) (sizeof (a) / sizeof (a)[0]) int js_runeat(js_State *J, const char *s, int i) { Rune rune = 0; while (i-- >= 0) { rune = *(unsigned char*)s; if (rune < Runeself) { if (rune == 0) return 0; ++s; } else s += chartorune(&rune, s); } return rune; } const char *js_utfidxtoptr(const char *s, int i) { Rune rune; while (i-- > 0) { rune = *(unsigned char*)s; if (rune < Runeself) { if (rune == 0) return NULL; ++s; } else s += chartorune(&rune, s); } return s; } int js_utfptrtoidx(const char *s, const char *p) { Rune rune; int i = 0; while (s < p) { if (*(unsigned char *)s < Runeself) ++s; else s += chartorune(&rune, s); ++i; } return i; } static int jsB_new_String(js_State *J, int argc) { js_newstring(J, js_isdefined(J, 1) ? js_tostring(J, 1) : ""); return 1; } static int jsB_String(js_State *J, int argc) { js_pushliteral(J, js_isdefined(J, 1) ? js_tostring(J, 1) : ""); return 1; } static int Sp_toString(js_State *J, int argc) { js_Object *self = js_toobject(J, 0); if (self->type != JS_CSTRING) js_typeerror(J, "not a string"); js_pushliteral(J, self->u.s.string); return 1; } static int Sp_valueOf(js_State *J, int argc) { js_Object *self = js_toobject(J, 0); if (self->type != JS_CSTRING) js_typeerror(J, "not a string"); js_pushliteral(J, self->u.s.string); return 1; } static int Sp_charAt(js_State *J, int argc) { char buf[UTFmax + 1]; const char *s = js_tostring(J, 0); int pos = js_tointeger(J, 1); Rune rune = js_runeat(J, s, pos); if (rune > 0) { buf[runetochar(buf, &rune)] = 0; js_pushstring(J, buf); } else { js_pushliteral(J, ""); } return 1; } static int Sp_charCodeAt(js_State *J, int argc) { const char *s = js_tostring(J, 0); int pos = js_tointeger(J, 1); Rune rune = js_runeat(J, s, pos); if (rune > 0) js_pushnumber(J, rune); else js_pushnumber(J, NAN); return 1; } static int Sp_concat(js_State *J, int argc) { char * volatile out; const char *s; int i, n; if (argc == 0) return 1; s = js_tostring(J, 0); n = strlen(s); out = malloc(n + 1); strcpy(out, s); if (js_try(J)) { free(out); js_throw(J); } for (i = 1; i <= argc; ++i) { s = js_tostring(J, i); n += strlen(s); out = realloc(out, n + 1); strcat(out, s); } js_pushstring(J, out); js_endtry(J); free(out); return 1; } static int Sp_indexOf(js_State *J, int argc) { const char *haystack = js_tostring(J, 0); const char *needle = js_tostring(J, 1); int pos = js_tointeger(J, 2); int len = strlen(needle); int k = 0; Rune rune; while (*haystack) { if (k >= pos && !strncmp(haystack, needle, len)) { js_pushnumber(J, k); return 1; } haystack += chartorune(&rune, haystack); ++k; } js_pushnumber(J, -1); return 1; } static int Sp_lastIndexOf(js_State *J, int argc) { const char *haystack = js_tostring(J, 0); const char *needle = js_tostring(J, 1); int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : strlen(haystack); int len = strlen(needle); int k = 0, last = -1; Rune rune; while (*haystack && k <= pos) { if (!strncmp(haystack, needle, len)) last = k; haystack += chartorune(&rune, haystack); ++k; } js_pushnumber(J, last); return 1; } static int Sp_localeCompare(js_State *J, int argc) { const char *a = js_tostring(J, 0); const char *b = js_tostring(J, 1); return strcmp(a, b); } static int Sp_slice(js_State *J, int argc) { const char *str = js_tostring(J, 0); const char *ss, *ee; int len = utflen(str); int s = js_tointeger(J, 1); int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len; s = s < 0 ? s + len : s; e = e < 0 ? e + len : e; s = s < 0 ? 0 : s > len ? len : s; e = e < 0 ? 0 : e > len ? len : e; if (s < e) { ss = js_utfidxtoptr(str, s); ee = js_utfidxtoptr(ss, e - s); } else { ss = js_utfidxtoptr(str, e); ee = js_utfidxtoptr(ss, s - e); } js_pushlstring(J, ss, ee - ss); return 1; } static int Sp_substring(js_State *J, int argc) { const char *str = js_tostring(J, 0); const char *ss, *ee; int len = utflen(str); int s = js_tointeger(J, 1); int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len; s = s < 0 ? 0 : s > len ? len : s; e = e < 0 ? 0 : e > len ? len : e; if (s < e) { ss = js_utfidxtoptr(str, s); ee = js_utfidxtoptr(ss, e - s); } else { ss = js_utfidxtoptr(str, e); ee = js_utfidxtoptr(ss, s - e); } js_pushlstring(J, ss, ee - ss); return 1; } static int Sp_toLowerCase(js_State *J, int argc) { const char *src = js_tostring(J, 0); char *dst = malloc(UTFmax * strlen(src) + 1); const char *s = src; char *d = dst; Rune rune; while (*s) { s += chartorune(&rune, s); rune = tolowerrune(rune); d += runetochar(d, &rune); } if (js_try(J)) { free(dst); js_throw(J); } js_pushstring(J, dst); js_endtry(J); free(dst); return 1; } static int Sp_toUpperCase(js_State *J, int argc) { const char *src = js_tostring(J, 0); char *dst = malloc(UTFmax * strlen(src) + 1); const char *s = src; char *d = dst; Rune rune; while (*s) { s += chartorune(&rune, s); rune = toupperrune(rune); d += runetochar(d, &rune); } if (js_try(J)) { free(dst); js_throw(J); } js_pushstring(J, dst); js_endtry(J); free(dst); return 1; } static int istrim(int c) { return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF || c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029; } static int Sp_trim(js_State *J, int argc) { const char *s, *e; s = js_tostring(J, 0); while (istrim(*s)) ++s; e = s + strlen(s); while (e > s && istrim(e[-1])) --e; js_pushlstring(J, s, e - s); return 1; } static int S_fromCharCode(js_State *J, int argc) { int i; Rune c; char *s, *p; s = p = malloc(argc * UTFmax + 1); if (js_try(J)) { free(s); js_throw(J); } for (i = 1; i <= argc; ++i) { c = js_tointeger(J, i); // TODO: ToUInt16() p += runetochar(p, &c); } *p = 0; js_pushstring(J, s); js_endtry(J); free(s); return 1; } static int Sp_match(js_State *J, int argc) { js_Regexp *re; Resub m[10]; const char *text; unsigned int len; const char *a, *b, *c, *e; text = js_tostring(J, 0); if (js_isregexp(J, 1)) js_copy(J, 1); else if (js_isundefined(J, 1)) js_newregexp(J, "", 0); else js_newregexp(J, js_tostring(J, 1), 0); re = js_toregexp(J, -1); if (!(re->flags & JS_REGEXP_G)) return js_RegExp_prototype_exec(J, re, text); re->last = 0; js_newarray(J); len = 0; a = text; e = text + strlen(text); while (a <= e) { if (js_regexec(re->prog, a, nelem(m), m, a > text ? REG_NOTBOL : 0)) break; b = m[0].sp; c = m[0].ep; js_pushlstring(J, b, c - b); js_setindex(J, -2, len++); a = c; if (c - b == 0) ++a; } return 1; } static int Sp_search(js_State *J, int argc) { js_Regexp *re; Resub m[10]; const char *text; text = js_tostring(J, 0); if (js_isregexp(J, 1)) js_copy(J, 1); else if (js_isundefined(J, 1)) js_newregexp(J, "", 0); else js_newregexp(J, js_tostring(J, 1), 0); re = js_toregexp(J, -1); if (!js_regexec(re->prog, text, nelem(m), m, 0)) js_pushnumber(J, js_utfptrtoidx(text, m[0].sp)); else js_pushnumber(J, -1); return 1; } static int Sp_replace_regexp(js_State *J, int argc) { js_Regexp *re; Resub m[10]; const char *source, *s, *r; js_Buffer *sb = NULL; int n, x; source = js_tostring(J, 0); re = js_toregexp(J, 1); if (js_regexec(re->prog, source, nelem(m), m, 0)) { js_copy(J, 0); return 1; } re->last = 0; loop: s = m[0].sp; n = m[0].ep - m[0].sp; if (js_iscallable(J, 2)) { js_copy(J, 2); js_pushglobal(J); for (x = 0; m[x].sp; ++x) /* arg 0..x: substring and subexps that matched */ js_pushlstring(J, m[x].sp, m[x].ep - m[x].sp); js_pushnumber(J, s - source); /* arg x+2: offset within search string */ js_copy(J, 0); /* arg x+3: search string */ js_call(J, 2 + x); r = js_tostring(J, -1); sb_putm(&sb, source, s); sb_puts(&sb, r); js_pop(J, 1); } else { r = js_tostring(J, 2); sb_putm(&sb, source, s); while (*r) { if (*r == '$') { switch (*(++r)) { case '$': sb_putc(&sb, '$'); break; case '`': sb_putm(&sb, source, s); break; case '\'': sb_puts(&sb, s + n); break; case '&': sb_putm(&sb, s, s + n); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': x = *r - '0'; if (m[x].sp) { sb_putm(&sb, m[x].sp, m[x].ep); } else { sb_putc(&sb, '$'); sb_putc(&sb, '0'+x); } break; default: sb_putc(&sb, '$'); sb_putc(&sb, *r); break; } ++r; } else { sb_putc(&sb, *r++); } } } if (re->flags & JS_REGEXP_G) { source = m[0].ep; if (n == 0) { if (*source) sb_putc(&sb, *source++); else goto end; } if (!js_regexec(re->prog, source, nelem(m), m, REG_NOTBOL)) goto loop; } end: sb_puts(&sb, s + n); sb_putc(&sb, 0); if (js_try(J)) { free(sb); js_throw(J); } js_pushstring(J, sb ? sb->s : ""); js_endtry(J); free(sb); return 1; } static int Sp_replace_string(js_State *J, int argc) { const char *source, *needle, *s, *r; js_Buffer *sb = NULL; int n; source = js_tostring(J, 0); needle = js_tostring(J, 1); s = strstr(source, needle); if (!s) { js_copy(J, 0); return 1; } n = strlen(needle); if (js_iscallable(J, 2)) { js_copy(J, 2); js_pushglobal(J); js_pushlstring(J, s, n); /* arg 1: substring that matched */ js_pushnumber(J, s - source); /* arg 2: offset within search string */ js_copy(J, 0); /* arg 3: search string */ js_call(J, 3); r = js_tostring(J, -1); sb_putm(&sb, source, s); sb_puts(&sb, r); sb_puts(&sb, s + n); sb_putc(&sb, 0); js_pop(J, 1); } else { r = js_tostring(J, 2); sb_putm(&sb, source, s); while (*r) { if (*r == '$') { switch (*(++r)) { case '$': sb_putc(&sb, '$'); break; case '&': sb_putm(&sb, s, s + n); break; case '`': sb_putm(&sb, source, s); break; case '\'': sb_puts(&sb, s + n); break; default: sb_putc(&sb, '$'); sb_putc(&sb, *r); break; } ++r; } else { sb_putc(&sb, *r++); } } sb_puts(&sb, s + n); sb_putc(&sb, 0); } if (js_try(J)) { free(sb); js_throw(J); } js_pushstring(J, sb ? sb->s : ""); js_endtry(J); free(sb); return 1; } static int Sp_replace(js_State *J, int argc) { if (js_isregexp(J, 1)) return Sp_replace_regexp(J, argc); return Sp_replace_string(J, argc); } static int Sp_split_regexp(js_State *J, int argc) { js_Regexp *re; Resub m[10]; const char *text; unsigned int limit, len, k; const char *p, *a, *b, *c, *e; text = js_tostring(J, 0); re = js_toregexp(J, 1); limit = js_isdefined(J, 2) ? js_touint32(J, 2) : 1 << 30; js_newarray(J); len = 0; e = text + strlen(text); /* splitting the empty string */ if (e == 0) { if (js_regexec(re->prog, text, nelem(m), m, 0)) { if (len == limit) return 1; js_pushliteral(J, ""); js_setindex(J, -2, 0); } return 1; } p = a = text; while (a < e) { if (js_regexec(re->prog, a, nelem(m), m, a > text ? REG_NOTBOL : 0)) break; /* no match */ b = m[0].sp; c = m[0].ep; /* empty string at end of last match */ if (b == p) { ++a; continue; } if (len == limit) return 1; js_pushlstring(J, p, b - p); js_setindex(J, -2, len++); for (k = 1; k < nelem(m) && m[k].sp; ++k) { if (len == limit) return 1; js_pushlstring(J, m[k].sp, m[k].ep - m[k].sp); js_setindex(J, -2, len++); } a = p = c; } if (len == limit) return 1; js_pushstring(J, p); js_setindex(J, -2, len); return 1; } static int Sp_split_string(js_State *J, int argc) { const char *str = js_tostring(J, 0); const char *sep = js_tostring(J, 1); unsigned int limit = js_isdefined(J, 2) ? js_touint32(J, 2) : 1 << 30; unsigned int i, n; js_newarray(J); n = strlen(sep); /* empty string */ if (n == 0) { Rune rune; for (i = 0; *str && i < limit; ++i) { n = chartorune(&rune, str); js_pushlstring(J, str, n); js_setindex(J, -2, i); str += n; } return 1; } for (i = 0; str && i < limit; ++i) { const char *s = strstr(str, sep); if (s) { js_pushlstring(J, str, s-str); js_setindex(J, -2, i); str = s + n; } else { js_pushstring(J, str); js_setindex(J, -2, i); str = NULL; } } return 1; } static int Sp_split(js_State *J, int argc) { if (js_isundefined(J, 1)) { js_newarray(J); js_copy(J, 0); js_setindex(J, -2, 0); return 1; } if (js_isregexp(J, 1)) return Sp_split_regexp(J, argc); return Sp_split_string(J, argc); } void jsB_initstring(js_State *J) { J->String_prototype->u.s.string = ""; J->String_prototype->u.s.length = 0; js_pushobject(J, J->String_prototype); { jsB_propf(J, "toString", Sp_toString, 0); jsB_propf(J, "valueOf", Sp_valueOf, 0); jsB_propf(J, "charAt", Sp_charAt, 1); jsB_propf(J, "charCodeAt", Sp_charCodeAt, 1); jsB_propf(J, "concat", Sp_concat, 1); jsB_propf(J, "indexOf", Sp_indexOf, 1); jsB_propf(J, "lastIndexOf", Sp_lastIndexOf, 1); jsB_propf(J, "localeCompare", Sp_localeCompare, 1); jsB_propf(J, "match", Sp_match, 1); jsB_propf(J, "replace", Sp_replace, 2); jsB_propf(J, "search", Sp_search, 1); jsB_propf(J, "slice", Sp_slice, 2); jsB_propf(J, "split", Sp_split, 2); jsB_propf(J, "substring", Sp_substring, 2); jsB_propf(J, "toLowerCase", Sp_toLowerCase, 0); jsB_propf(J, "toLocaleLowerCase", Sp_toLowerCase, 0); jsB_propf(J, "toUpperCase", Sp_toUpperCase, 0); jsB_propf(J, "toLocaleUpperCase", Sp_toUpperCase, 0); /* ES5 */ jsB_propf(J, "trim", Sp_trim, 0); } js_newcconstructor(J, jsB_String, jsB_new_String, 1); { jsB_propf(J, "fromCharCode", S_fromCharCode, 1); } js_defglobal(J, "String", JS_DONTENUM); }