shithub: libmujs

Download patch

ref: ab5236f243874178ac55207301a6d9a3ca0eeade
parent: 0ddbab57c46882e2fcce1f73917aa875c84f9185
author: Tor Andersson <[email protected]>
date: Mon Feb 10 10:01:52 EST 2014

UTF string index/pointer juggling.

--- a/jsi.h
+++ b/jsi.h
@@ -41,7 +41,8 @@
 js_Regexp *js_toregexp(js_State *J, int idx);
 int js_isarrayindex(js_State *J, const char *str, unsigned int *idx);
 int js_runeat(js_State *J, const char *s, int i);
-void js_pushcharat(js_State *J, const char *s, int pos);
+int js_utfptrtoidx(const char *s, const char *p);
+const char *js_utfidxtoptr(const char *s, int i);
 
 void js_dup(js_State *J);
 void js_rot2(js_State *J);
--- a/jsrun.c
+++ b/jsrun.c
@@ -3,6 +3,8 @@
 #include "jsvalue.h"
 #include "jsrun.h"
 
+#include "utf.h"
+
 static void jsR_run(js_State *J, js_Function *F);
 
 /* Push values on stack */
@@ -365,6 +367,18 @@
 	return !strcmp(buf, str);
 }
 
+static void js_pushrune(js_State *J, Rune rune)
+{
+	char buf[UTFmax + 1];
+	if (rune > 0) {
+		buf[runetochar(buf, &rune)] = 0;
+		js_pushstring(J, buf);
+	} else {
+		js_pushundefined(J);
+	}
+}
+
+
 static int jsR_hasproperty(js_State *J, js_Object *obj, const char *name)
 {
 	js_Property *ref;
@@ -383,7 +397,7 @@
 			return 1;
 		}
 		if (js_isarrayindex(J, name, &k)) {
-			js_pushcharat(J, obj->u.s.string, k);
+			js_pushrune(J, js_runeat(J, obj->u.s.string, k));
 			return 1;
 		}
 	}
--- a/jsstring.c
+++ b/jsstring.c
@@ -7,34 +7,6 @@
 
 #define nelem(a) (sizeof (a) / sizeof (a)[0])
 
-static int jsB_new_String(js_State *J, int argc)
-{
-	js_newstring(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
-	return 1;
-}
-
-static int jsB_String(js_State *J, int argc)
-{
-	js_pushliteral(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
-	return 1;
-}
-
-static int Sp_toString(js_State *J, int argc)
-{
-	js_Object *self = js_toobject(J, 0);
-	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
-	js_pushliteral(J, self->u.s.string);
-	return 1;
-}
-
-static int Sp_valueOf(js_State *J, int argc)
-{
-	js_Object *self = js_toobject(J, 0);
-	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
-	js_pushliteral(J, self->u.s.string);
-	return 1;
-}
-
 int js_runeat(js_State *J, const char *s, int i)
 {
 	Rune rune = 0;
@@ -50,9 +22,9 @@
 	return rune;
 }
 
-static inline const char *utfidx(const char *s, int i)
+const char *js_utfidxtoptr(const char *s, int i)
 {
-	Rune rune = 0;
+	Rune rune;
 	while (i-- > 0) {
 		rune = *(unsigned char*)s;
 		if (rune < Runeself) {
@@ -65,18 +37,48 @@
 	return s;
 }
 
-void js_pushcharat(js_State *J, const char *s, int pos)
+int js_utfptrtoidx(const char *s, const char *p)
 {
-	char buf[UTFmax + 1];
-	Rune rune = js_runeat(J, s, pos);
-	if (rune > 0) {
-		buf[runetochar(buf, &rune)] = 0;
-		js_pushstring(J, buf);
-	} else {
-		js_pushundefined(J);
+	Rune rune;
+	int i = 0;
+	while (s < p) {
+		if (*(unsigned char *)s < Runeself)
+			++s;
+		else
+			s += chartorune(&rune, s);
+		++i;
 	}
+	return i;
 }
 
+static int jsB_new_String(js_State *J, int argc)
+{
+	js_newstring(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
+	return 1;
+}
+
+static int jsB_String(js_State *J, int argc)
+{
+	js_pushliteral(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
+	return 1;
+}
+
+static int Sp_toString(js_State *J, int argc)
+{
+	js_Object *self = js_toobject(J, 0);
+	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
+	js_pushliteral(J, self->u.s.string);
+	return 1;
+}
+
+static int Sp_valueOf(js_State *J, int argc)
+{
+	js_Object *self = js_toobject(J, 0);
+	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
+	js_pushliteral(J, self->u.s.string);
+	return 1;
+}
+
 static int Sp_charAt(js_State *J, int argc)
 {
 	char buf[UTFmax + 1];
@@ -196,11 +198,11 @@
 	e = e < 0 ? 0 : e > len ? len : e;
 
 	if (s < e) {
-		ss = utfidx(str, s);
-		ee = utfidx(ss, e - s);
+		ss = js_utfidxtoptr(str, s);
+		ee = js_utfidxtoptr(ss, e - s);
 	} else {
-		ss = utfidx(str, e);
-		ee = utfidx(ss, s - e);
+		ss = js_utfidxtoptr(str, e);
+		ee = js_utfidxtoptr(ss, s - e);
 	}
 
 	js_pushlstring(J, ss, ee - ss);
@@ -219,11 +221,11 @@
 	e = e < 0 ? 0 : e > len ? len : e;
 
 	if (s < e) {
-		ss = utfidx(str, s);
-		ee = utfidx(ss, e - s);
+		ss = js_utfidxtoptr(str, s);
+		ee = js_utfidxtoptr(ss, e - s);
 	} else {
-		ss = utfidx(str, e);
-		ee = utfidx(ss, s - e);
+		ss = js_utfidxtoptr(str, e);
+		ee = js_utfidxtoptr(ss, s - e);
 	}
 
 	js_pushlstring(J, ss, ee - ss);
@@ -381,7 +383,7 @@
 	re = js_toregexp(J, -1);
 
 	if (!regexec(re->prog, text, nelem(m), m, 0))
-		js_pushnumber(J, m[0].rm_so); // TODO: convert to utf-8 index offset
+		js_pushnumber(J, js_utfptrtoidx(text, text + m[0].rm_so));
 	else
 		js_pushnumber(J, -1);