shithub: libmujs

ref: a3975e7b0a4a2b694ee635cb9d27b413234c846a
dir: /jsstring.c/

View raw version
#include "jsi.h"
#include "utf.h"
#include "regexp.h"

static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
{
	int result = js_regexec(prog, string, sub, eflags);
	if (result < 0)
		js_error(J, "regexec failed");
	return result;
}

static const char *checkstring(js_State *J, int idx)
{
	if (!js_iscoercible(J, idx))
		js_typeerror(J, "string function called on null or undefined");
	return js_tostring(J, idx);
}

int js_runeat(js_State *J, const char *s, int i)
{
	Rune rune = EOF;
	while (i >= 0) {
		rune = *(unsigned char*)s;
		if (rune < Runeself) {
			if (rune == 0)
				return EOF;
			++s;
			--i;
		} else {
			s += chartorune(&rune, s);
			if (rune >= 0x10000)
				i -= 2;
			else
				--i;
		}
	}
	if (rune >= 0x10000) {
		/* high surrogate */
		if (i == -2)
			return 0xd800 + ((rune - 0x10000) >> 10);
		/* low surrogate */
		else
			return 0xdc00 + ((rune - 0x10000) & 0x3ff);
	}
	return rune;
}

int js_utflen(const char *s)
{
	int c;
	int n;
	Rune rune;

	n = 0;
	for(;;) {
		c = *(unsigned char *)s;
		if (c < Runeself) {
			if (c == 0)
				return n;
			s++;
			n++;
		} else {
			s += chartorune(&rune, s);
			if (rune >= 0x10000)
				n += 2;
			else
				n++;
		}
	}
}

int js_utfptrtoidx(const char *s, const char *p)
{
	Rune rune;
	int i = 0;
	while (s < p) {
		if (*(unsigned char *)s < Runeself)
			++s;
		else
			s += chartorune(&rune, s);
		if (rune >= 0x10000)
			i += 2;
		else
			i += 1;
	}
	return i;
}

static void jsB_new_String(js_State *J)
{
	js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
}

static void jsB_String(js_State *J)
{
	js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
}

static void Sp_toString(js_State *J)
{
	js_Object *self = js_toobject(J, 0);
	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
	js_pushstring(J, self->u.s.string);
}

static void Sp_valueOf(js_State *J)
{
	js_Object *self = js_toobject(J, 0);
	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
	js_pushstring(J, self->u.s.string);
}

static void Sp_charAt(js_State *J)
{
	char buf[UTFmax + 1];
	const char *s = checkstring(J, 0);
	int pos = js_tointeger(J, 1);
	Rune rune = js_runeat(J, s, pos);
	if (rune >= 0) {
		buf[runetochar(buf, &rune)] = 0;
		js_pushstring(J, buf);
	} else {
		js_pushliteral(J, "");
	}
}

static void Sp_charCodeAt(js_State *J)
{
	const char *s = checkstring(J, 0);
	int pos = js_tointeger(J, 1);
	Rune rune = js_runeat(J, s, pos);
	if (rune >= 0)
		js_pushnumber(J, rune);
	else
		js_pushnumber(J, NAN);
}

static void Sp_concat(js_State *J)
{
	int i, top = js_gettop(J);
	int n;
	char * volatile out = NULL;
	const char *s;

	if (top == 1)
		return;

	s = checkstring(J, 0);
	n = 1 + strlen(s);

	if (js_try(J)) {
		js_free(J, out);
		js_throw(J);
	}

	if (n > JS_STRLIMIT)
		js_rangeerror(J, "invalid string length");
	out = js_malloc(J, n);
	strcpy(out, s);

	for (i = 1; i < top; ++i) {
		s = js_tostring(J, i);
		n += strlen(s);
		if (n > JS_STRLIMIT)
			js_rangeerror(J, "invalid string length");
		out = js_realloc(J, out, n);
		strcat(out, s);
	}

	js_pushstring(J, out);
	js_endtry(J);
	js_free(J, out);
}

static void Sp_indexOf(js_State *J)
{
	const char *haystack = checkstring(J, 0);
	const char *needle = js_tostring(J, 1);
	int pos = js_tointeger(J, 2);
	int len = strlen(needle);
	int k = 0;
	Rune rune;
	while (*haystack) {
		if (k >= pos && !strncmp(haystack, needle, len)) {
			js_pushnumber(J, k);
			return;
		}
		haystack += chartorune(&rune, haystack);
		++k;
	}
	js_pushnumber(J, -1);
}

static void Sp_lastIndexOf(js_State *J)
{
	const char *haystack = checkstring(J, 0);
	const char *needle = js_tostring(J, 1);
	int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
	int len = strlen(needle);
	int k = 0, last = -1;
	Rune rune;
	while (*haystack && k <= pos) {
		if (!strncmp(haystack, needle, len))
			last = k;
		haystack += chartorune(&rune, haystack);
		++k;
	}
	js_pushnumber(J, last);
}

static void Sp_localeCompare(js_State *J)
{
	const char *a = checkstring(J, 0);
	const char *b = js_tostring(J, 1);
	js_pushnumber(J, strcmp(a, b));
}

static void Sp_substring_imp(js_State *J, const char *s, int a, int n)
{
	Rune head_rune = 0, tail_rune = 0;
	const char *head, *tail;
	char *p;
	int i, k, head_len, tail_len;

	/* find start of substring */
	head = s;
	for (i = 0; i < a; ++i) {
		head += chartorune(&head_rune, head);
		if (head_rune >= 0x10000)
			++i;
	}

	/* find end of substring */
	tail = head;
	for (k = i - a; k < n; ++k) {
		tail += chartorune(&tail_rune, tail);
		if (tail_rune >= 0x10000)
			++k;
	}

	/* no surrogate pair splits! */
	if (i == a && k == n) {
		js_pushlstring(J, head, tail - head);
		return;
	}

	if (js_try(J)) {
		js_free(J, p);
		js_throw(J);
	}

	p = js_malloc(J, UTFmax + (tail - head));

	/* substring starts with low surrogate (head is just after character) */
	if (i > a) {
		head_rune = 0xdc00 + ((head_rune - 0x10000) & 0x3ff);
		head_len = runetochar(p, &head_rune);
		memcpy(p + head_len, head, tail - head);
		js_pushlstring(J, p, head_len + (tail - head));
	}

	/* substring ends with high surrogate (tail is just after character) */
	if (k > n) {
		tail -= runelen(tail_rune);
		memcpy(p, head, tail - head);
		tail_rune = 0xd800 + ((tail_rune - 0x10000) >> 10);
		tail_len = runetochar(p + (tail - head), &tail_rune);
		js_pushlstring(J, p, (tail - head) + tail_len);
	}

	js_endtry(J);
	js_free(J, p);
}

static void Sp_slice(js_State *J)
{
	const char *str = checkstring(J, 0);
	int len = js_utflen(str);
	int s = js_tointeger(J, 1);
	int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;

	s = s < 0 ? s + len : s;
	e = e < 0 ? e + len : e;

	s = s < 0 ? 0 : s > len ? len : s;
	e = e < 0 ? 0 : e > len ? len : e;

	if (s < e)
		Sp_substring_imp(J, str, s, e - s);
	else
		Sp_substring_imp(J, str, e, s - e);
}

static void Sp_substring(js_State *J)
{
	const char *str = checkstring(J, 0);
	int len = js_utflen(str);
	int s = js_tointeger(J, 1);
	int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;

	s = s < 0 ? 0 : s > len ? len : s;
	e = e < 0 ? 0 : e > len ? len : e;

	if (s < e)
		Sp_substring_imp(J, str, s, e - s);
	else
		Sp_substring_imp(J, str, e, s - e);
}

static void Sp_toLowerCase(js_State *J)
{
	const char *s = checkstring(J, 0);
	char * volatile dst = NULL;
	char *d;
	Rune rune;

	if (js_try(J)) {
		js_free(J, dst);
		js_throw(J);
	}

	d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
	while (*s) {
		s += chartorune(&rune, s);
		rune = tolowerrune(rune);
		d += runetochar(d, &rune);
	}
	*d = 0;

	js_pushstring(J, dst);
	js_endtry(J);
	js_free(J, dst);
}

static void Sp_toUpperCase(js_State *J)
{
	const char *s = checkstring(J, 0);
	char * volatile dst = NULL;
	char *d;
	Rune rune;

	if (js_try(J)) {
		js_free(J, dst);
		js_throw(J);
	}

	d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
	while (*s) {
		s += chartorune(&rune, s);
		rune = toupperrune(rune);
		d += runetochar(d, &rune);
	}
	*d = 0;

	js_pushstring(J, dst);
	js_endtry(J);
	js_free(J, dst);
}

static int istrim(int c)
{
	return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
		c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}

static void Sp_trim(js_State *J)
{
	const char *s, *e;
	s = checkstring(J, 0);
	while (istrim(*s))
		++s;
	e = s + strlen(s);
	while (e > s && istrim(e[-1]))
		--e;
	js_pushlstring(J, s, e - s);
}

static void S_fromCharCode(js_State *J)
{
	int i, top = js_gettop(J);
	char * volatile s = NULL;
	char *p;
	Rune c;

	if (js_try(J)) {
		js_free(J, s);
		js_throw(J);
	}

	s = p = js_malloc(J, (top-1) * UTFmax + 1);

	for (i = 1; i < top; ++i) {
		c = js_touint32(J, i);
		p += runetochar(p, &c);
	}
	*p = 0;

	js_pushstring(J, s);
	js_endtry(J);
	js_free(J, s);
}

static void Sp_match(js_State *J)
{
	js_Regexp *re;
	const char *text;
	int len;
	const char *a, *b, *c, *e;
	Resub m;

	text = checkstring(J, 0);

	if (js_isregexp(J, 1))
		js_copy(J, 1);
	else if (js_isundefined(J, 1))
		js_newregexp(J, "", 0);
	else
		js_newregexp(J, js_tostring(J, 1), 0);

	re = js_toregexp(J, -1);
	if (!(re->flags & JS_REGEXP_G)) {
		js_RegExp_prototype_exec(J, re, text);
		return;
	}

	re->last = 0;

	js_newarray(J);

	len = 0;
	a = text;
	e = text + strlen(text);
	while (a <= e) {
		if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
			break;

		b = m.sub[0].sp;
		c = m.sub[0].ep;

		js_pushlstring(J, b, c - b);
		js_setindex(J, -2, len++);

		a = c;
		if (c - b == 0)
			++a;
	}

	if (len == 0) {
		js_pop(J, 1);
		js_pushnull(J);
	}
}

static void Sp_search(js_State *J)
{
	js_Regexp *re;
	const char *text;
	Resub m;

	text = checkstring(J, 0);

	if (js_isregexp(J, 1))
		js_copy(J, 1);
	else if (js_isundefined(J, 1))
		js_newregexp(J, "", 0);
	else
		js_newregexp(J, js_tostring(J, 1), 0);

	re = js_toregexp(J, -1);

	if (!js_doregexec(J, re->prog, text, &m, 0))
		js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
	else
		js_pushnumber(J, -1);
}

static void Sp_replace_regexp(js_State *J)
{
	js_Regexp *re;
	const char *source, *s, *r;
	js_Buffer *sb = NULL;
	int n, x;
	Resub m;

	source = checkstring(J, 0);
	re = js_toregexp(J, 1);

	if (js_doregexec(J, re->prog, source, &m, 0)) {
		js_copy(J, 0);
		return;
	}

	re->last = 0;

loop:
	s = m.sub[0].sp;
	n = m.sub[0].ep - m.sub[0].sp;

	if (js_iscallable(J, 2)) {
		js_copy(J, 2);
		js_pushundefined(J);
		for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
			js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
		js_pushnumber(J, s - source); /* arg x+2: offset within search string */
		js_copy(J, 0); /* arg x+3: search string */
		js_call(J, 2 + x);
		r = js_tostring(J, -1);
		js_putm(J, &sb, source, s);
		js_puts(J, &sb, r);
		js_pop(J, 1);
	} else {
		r = js_tostring(J, 2);
		js_putm(J, &sb, source, s);
		while (*r) {
			if (*r == '$') {
				switch (*(++r)) {
				case 0: --r; /* end of string; back up */
				/* fallthrough */
				case '$': js_putc(J, &sb, '$'); break;
				case '`': js_putm(J, &sb, source, s); break;
				case '\'': js_puts(J, &sb, s + n); break;
				case '&':
					js_putm(J, &sb, s, s + n);
					break;
				case '0': case '1': case '2': case '3': case '4':
				case '5': case '6': case '7': case '8': case '9':
					x = *r - '0';
					if (r[1] >= '0' && r[1] <= '9')
						x = x * 10 + *(++r) - '0';
					if (x > 0 && x < m.nsub) {
						js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
					} else {
						js_putc(J, &sb, '$');
						if (x > 10) {
							js_putc(J, &sb, '0' + x / 10);
							js_putc(J, &sb, '0' + x % 10);
						} else {
							js_putc(J, &sb, '0' + x);
						}
					}
					break;
				default:
					js_putc(J, &sb, '$');
					js_putc(J, &sb, *r);
					break;
				}
				++r;
			} else {
				js_putc(J, &sb, *r++);
			}
		}
	}

	if (re->flags & JS_REGEXP_G) {
		source = m.sub[0].ep;
		if (n == 0) {
			if (*source)
				js_putc(J, &sb, *source++);
			else
				goto end;
		}
		if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
			goto loop;
	}

end:
	js_puts(J, &sb, s + n);
	js_putc(J, &sb, 0);

	if (js_try(J)) {
		js_free(J, sb);
		js_throw(J);
	}
	js_pushstring(J, sb ? sb->s : "");
	js_endtry(J);
	js_free(J, sb);
}

static void Sp_replace_string(js_State *J)
{
	const char *source, *needle, *s, *r;
	js_Buffer *sb = NULL;
	int n;

	source = checkstring(J, 0);
	needle = js_tostring(J, 1);

	s = strstr(source, needle);
	if (!s) {
		js_copy(J, 0);
		return;
	}
	n = strlen(needle);

	if (js_iscallable(J, 2)) {
		js_copy(J, 2);
		js_pushundefined(J);
		js_pushlstring(J, s, n); /* arg 1: substring that matched */
		js_pushnumber(J, s - source); /* arg 2: offset within search string */
		js_copy(J, 0); /* arg 3: search string */
		js_call(J, 3);
		r = js_tostring(J, -1);
		js_putm(J, &sb, source, s);
		js_puts(J, &sb, r);
		js_puts(J, &sb, s + n);
		js_putc(J, &sb, 0);
		js_pop(J, 1);
	} else {
		r = js_tostring(J, 2);
		js_putm(J, &sb, source, s);
		while (*r) {
			if (*r == '$') {
				switch (*(++r)) {
				case 0: --r; /* end of string; back up */
				/* fallthrough */
				case '$': js_putc(J, &sb, '$'); break;
				case '&': js_putm(J, &sb, s, s + n); break;
				case '`': js_putm(J, &sb, source, s); break;
				case '\'': js_puts(J, &sb, s + n); break;
				default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
				}
				++r;
			} else {
				js_putc(J, &sb, *r++);
			}
		}
		js_puts(J, &sb, s + n);
		js_putc(J, &sb, 0);
	}

	if (js_try(J)) {
		js_free(J, sb);
		js_throw(J);
	}
	js_pushstring(J, sb ? sb->s : "");
	js_endtry(J);
	js_free(J, sb);
}

static void Sp_replace(js_State *J)
{
	if (js_isregexp(J, 1))
		Sp_replace_regexp(J);
	else
		Sp_replace_string(J);
}

static void Sp_split_regexp(js_State *J)
{
	js_Regexp *re;
	const char *text;
	int limit, len, k;
	const char *p, *a, *b, *c, *e;
	Resub m;

	text = checkstring(J, 0);
	re = js_toregexp(J, 1);
	limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;

	js_newarray(J);
	len = 0;

	if (limit == 0)
		return;

	e = text + strlen(text);

	/* splitting the empty string */
	if (e == text) {
		if (js_doregexec(J, re->prog, text, &m, 0)) {
			js_pushliteral(J, "");
			js_setindex(J, -2, 0);
		}
		return;
	}

	p = a = text;
	while (a < e) {
		if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
			break; /* no match */

		b = m.sub[0].sp;
		c = m.sub[0].ep;

		/* empty string at end of last match */
		if (b == c && b == p) {
			++a;
			continue;
		}

		if (len == limit) return;
		js_pushlstring(J, p, b - p);
		js_setindex(J, -2, len++);

		for (k = 1; k < m.nsub; ++k) {
			if (len == limit) return;
			js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
			js_setindex(J, -2, len++);
		}

		a = p = c;
	}

	if (len == limit) return;
	js_pushstring(J, p);
	js_setindex(J, -2, len);
}

static void Sp_split_string(js_State *J)
{
	const char *str = checkstring(J, 0);
	const char *sep = js_tostring(J, 1);
	int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
	int i, n;

	js_newarray(J);

	if (limit == 0)
		return;

	n = strlen(sep);

	/* empty string */
	if (n == 0) {
		Rune rune;
		for (i = 0; *str && i < limit; ++i) {
			n = chartorune(&rune, str);
			js_pushlstring(J, str, n);
			js_setindex(J, -2, i);
			str += n;
		}
		return;
	}

	for (i = 0; str && i < limit; ++i) {
		const char *s = strstr(str, sep);
		if (s) {
			js_pushlstring(J, str, s-str);
			js_setindex(J, -2, i);
			str = s + n;
		} else {
			js_pushstring(J, str);
			js_setindex(J, -2, i);
			str = NULL;
		}
	}
}

static void Sp_split(js_State *J)
{
	if (js_isundefined(J, 1)) {
		js_newarray(J);
		js_pushstring(J, js_tostring(J, 0));
		js_setindex(J, -2, 0);
	} else if (js_isregexp(J, 1)) {
		Sp_split_regexp(J);
	} else {
		Sp_split_string(J);
	}
}

void jsB_initstring(js_State *J)
{
	J->String_prototype->u.s.shrstr[0] = 0;
	J->String_prototype->u.s.string = J->String_prototype->u.s.shrstr;
	J->String_prototype->u.s.length = 0;

	js_pushobject(J, J->String_prototype);
	{
		jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
		jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
		jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
		jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
		jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
		jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
		jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
		jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
		jsB_propf(J, "String.prototype.match", Sp_match, 1);
		jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
		jsB_propf(J, "String.prototype.search", Sp_search, 1);
		jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
		jsB_propf(J, "String.prototype.split", Sp_split, 2);
		jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
		jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
		jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
		jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
		jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);

		/* ES5 */
		jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
	}
	js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
	{
		jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
	}
	js_defglobal(J, "String", JS_DONTENUM);
}