ref: ab5236f243874178ac55207301a6d9a3ca0eeade
parent: 0ddbab57c46882e2fcce1f73917aa875c84f9185
author: Tor Andersson <[email protected]>
date: Mon Feb 10 10:01:52 EST 2014
UTF string index/pointer juggling.
--- a/jsi.h
+++ b/jsi.h
@@ -41,7 +41,8 @@
js_Regexp *js_toregexp(js_State *J, int idx);
int js_isarrayindex(js_State *J, const char *str, unsigned int *idx);
int js_runeat(js_State *J, const char *s, int i);
-void js_pushcharat(js_State *J, const char *s, int pos);
+int js_utfptrtoidx(const char *s, const char *p);
+const char *js_utfidxtoptr(const char *s, int i);
void js_dup(js_State *J);
void js_rot2(js_State *J);
--- a/jsrun.c
+++ b/jsrun.c
@@ -3,6 +3,8 @@
#include "jsvalue.h"
#include "jsrun.h"
+#include "utf.h"
+
static void jsR_run(js_State *J, js_Function *F);
/* Push values on stack */
@@ -365,6 +367,18 @@
return !strcmp(buf, str);
}
+static void js_pushrune(js_State *J, Rune rune)
+{
+ char buf[UTFmax + 1];
+ if (rune > 0) {
+ buf[runetochar(buf, &rune)] = 0;
+ js_pushstring(J, buf);
+ } else {
+ js_pushundefined(J);
+ }
+}
+
+
static int jsR_hasproperty(js_State *J, js_Object *obj, const char *name)
{
js_Property *ref;
@@ -383,7 +397,7 @@
return 1;
}
if (js_isarrayindex(J, name, &k)) {
- js_pushcharat(J, obj->u.s.string, k);
+ js_pushrune(J, js_runeat(J, obj->u.s.string, k));
return 1;
}
}
--- a/jsstring.c
+++ b/jsstring.c
@@ -7,34 +7,6 @@
#define nelem(a) (sizeof (a) / sizeof (a)[0])
-static int jsB_new_String(js_State *J, int argc)
-{
- js_newstring(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
- return 1;
-}
-
-static int jsB_String(js_State *J, int argc)
-{
- js_pushliteral(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
- return 1;
-}
-
-static int Sp_toString(js_State *J, int argc)
-{
- js_Object *self = js_toobject(J, 0);
- if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
- js_pushliteral(J, self->u.s.string);
- return 1;
-}
-
-static int Sp_valueOf(js_State *J, int argc)
-{
- js_Object *self = js_toobject(J, 0);
- if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
- js_pushliteral(J, self->u.s.string);
- return 1;
-}
-
int js_runeat(js_State *J, const char *s, int i)
{
Rune rune = 0;
@@ -50,9 +22,9 @@
return rune;
}
-static inline const char *utfidx(const char *s, int i)
+const char *js_utfidxtoptr(const char *s, int i)
{
- Rune rune = 0;
+ Rune rune;
while (i-- > 0) {
rune = *(unsigned char*)s;
if (rune < Runeself) {
@@ -65,18 +37,48 @@
return s;
}
-void js_pushcharat(js_State *J, const char *s, int pos)
+int js_utfptrtoidx(const char *s, const char *p)
{
- char buf[UTFmax + 1];
- Rune rune = js_runeat(J, s, pos);
- if (rune > 0) {
- buf[runetochar(buf, &rune)] = 0;
- js_pushstring(J, buf);
- } else {
- js_pushundefined(J);
+ Rune rune;
+ int i = 0;
+ while (s < p) {
+ if (*(unsigned char *)s < Runeself)
+ ++s;
+ else
+ s += chartorune(&rune, s);
+ ++i;
}
+ return i;
}
+static int jsB_new_String(js_State *J, int argc)
+{
+ js_newstring(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
+ return 1;
+}
+
+static int jsB_String(js_State *J, int argc)
+{
+ js_pushliteral(J, js_isdefined(J, 1) ? js_tostring(J, 1) : "");
+ return 1;
+}
+
+static int Sp_toString(js_State *J, int argc)
+{
+ js_Object *self = js_toobject(J, 0);
+ if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
+ js_pushliteral(J, self->u.s.string);
+ return 1;
+}
+
+static int Sp_valueOf(js_State *J, int argc)
+{
+ js_Object *self = js_toobject(J, 0);
+ if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
+ js_pushliteral(J, self->u.s.string);
+ return 1;
+}
+
static int Sp_charAt(js_State *J, int argc)
{
char buf[UTFmax + 1];
@@ -196,11 +198,11 @@
e = e < 0 ? 0 : e > len ? len : e;
if (s < e) {
- ss = utfidx(str, s);
- ee = utfidx(ss, e - s);
+ ss = js_utfidxtoptr(str, s);
+ ee = js_utfidxtoptr(ss, e - s);
} else {
- ss = utfidx(str, e);
- ee = utfidx(ss, s - e);
+ ss = js_utfidxtoptr(str, e);
+ ee = js_utfidxtoptr(ss, s - e);
}
js_pushlstring(J, ss, ee - ss);
@@ -219,11 +221,11 @@
e = e < 0 ? 0 : e > len ? len : e;
if (s < e) {
- ss = utfidx(str, s);
- ee = utfidx(ss, e - s);
+ ss = js_utfidxtoptr(str, s);
+ ee = js_utfidxtoptr(ss, e - s);
} else {
- ss = utfidx(str, e);
- ee = utfidx(ss, s - e);
+ ss = js_utfidxtoptr(str, e);
+ ee = js_utfidxtoptr(ss, s - e);
}
js_pushlstring(J, ss, ee - ss);
@@ -381,7 +383,7 @@
re = js_toregexp(J, -1);
if (!regexec(re->prog, text, nelem(m), m, 0))
- js_pushnumber(J, m[0].rm_so); // TODO: convert to utf-8 index offset
+ js_pushnumber(J, js_utfptrtoidx(text, text + m[0].rm_so));
else
js_pushnumber(J, -1);