ref: 8837293e1085d71e10ef9216c457458450419801
dir: /string.c/
/* string functions */ #include "llt.h" #include "flisp.h" BUILTIN("string?", stringp) { argcount(nargs, 1); return fl_isstring(args[0]) ? FL_T : FL_F; } BUILTIN("string.count", string_count) { size_t start = 0; if (nargs < 1 || nargs > 3) argcount(nargs, 1); if (!fl_isstring(args[0])) type_error("string", args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t stop = len; if (nargs > 1) { start = toulong(args[1]); if (start > len) bounds_error(args[0], args[1]); if (nargs > 2) { stop = toulong(args[2]); if (stop > len) bounds_error(args[0], args[2]); if (stop <= start) return fixnum(0); } } char *str = cvalue_data(args[0]); return size_wrap(u8_charnum(str+start, stop-start)); } BUILTIN("string.width", string_width) { argcount(nargs, 1); if (iscprim(args[0])) { cprim_t *cp = (cprim_t*)ptr(args[0]); if (cp_class(cp) == wchartype) { int w = wcwidth(*(wchar_t*)cp_data(cp)); if (w < 0) return FL_F; return fixnum(w); } } char *s = tostring(args[0]); return size_wrap(u8_strwidth(s)); } BUILTIN("string.reverse", string_reverse) { argcount(nargs, 1); if (!fl_isstring(args[0])) type_error("string", args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); value_t ns = cvalue_string(len); u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len); return ns; } BUILTIN("string.encode", string_encode) { argcount(nargs, 1); if (iscvalue(args[0])) { cvalue_t *cv = (cvalue_t*)ptr(args[0]); fltype_t *t = cv_class(cv); if (t->eltype == wchartype) { size_t nc = cv_len(cv) / sizeof(uint32_t); uint32_t *ptr = (uint32_t*)cv_data(cv); size_t nbytes = u8_codingsize(ptr, nc); value_t str = cvalue_string(nbytes); ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer u8_toutf8(cvalue_data(str), nbytes, ptr, nc); return str; } } type_error("wchar array", args[0]); } BUILTIN("string.decode", string_decode) { int term=0; if (nargs == 2) { term = (args[1] != FL_F); } else { argcount(nargs, 1); } if (!fl_isstring(args[0])) type_error("string", args[0]); cvalue_t *cv = (cvalue_t*)ptr(args[0]); char *ptr = (char*)cv_data(cv); size_t nb = cv_len(cv); size_t nc = u8_charnum(ptr, nb); size_t newsz = nc*sizeof(uint32_t); if (term) newsz += sizeof(uint32_t); value_t wcstr = cvalue(wcstringtype, newsz); ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer uint32_t *pwc = cvalue_data(wcstr); u8_toucs(pwc, nc, ptr, nb); if (term) pwc[nc] = 0; return wcstr; } extern BUILTIN("buffer", buffer); extern value_t stream_to_string(value_t *ps); BUILTIN("string", string) { if (nargs == 1 && fl_isstring(args[0])) return args[0]; value_t arg, buf = fn_builtin_buffer(nil, 0); fl_gc_handle(&buf); ios_t *s = value2c(ios_t*,buf); int i; value_t oldpr = symbol_value(printreadablysym); value_t oldpp = symbol_value(printprettysym); set(printreadablysym, FL_F); set(printprettysym, FL_F); FOR_ARGS(i,0,arg,args) { USED(arg); fl_print(s, args[i]); } set(printreadablysym, oldpr); set(printprettysym, oldpp); value_t outp = stream_to_string(&buf); fl_free_gc_handles(1); return outp; } BUILTIN("string.split", string_split) { argcount(nargs, 2); char *s = tostring(args[0]); char *delim = tostring(args[1]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t dlen = cv_len((cvalue_t*)ptr(args[1])); size_t ssz, tokend, tokstart, i=0; value_t first=FL_NIL, c=FL_NIL, last; size_t junk; fl_gc_handle(&first); fl_gc_handle(&last); do { // find and allocate next token tokstart = tokend = i; while (i < len && !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk)) tokend = i; ssz = tokend - tokstart; last = c; // save previous cons cell c = fl_cons(cvalue_string(ssz), FL_NIL); // we've done allocation; reload movable pointers s = cv_data((cvalue_t*)ptr(args[0])); delim = cv_data((cvalue_t*)ptr(args[1])); if (ssz) memmove(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz); // link new cell if (last == FL_NIL) first = c; // first time, save first cons else ((cons_t*)ptr(last))->cdr = c; // note this tricky condition: if the string ends with a // delimiter, we need to go around one more time to add an // empty string. this happens when (i==len && tokend<i) } while (i < len || (i==len && (tokend!=i))); fl_free_gc_handles(2); return first; } BUILTIN("string.sub", string_sub) { if (nargs != 2) argcount(nargs, 3); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t i1, i2; i1 = toulong(args[1]); if (i1 > len) bounds_error(args[0], args[1]); if (nargs == 3) { i2 = toulong(args[2]); if (i2 > len) bounds_error(args[0], args[2]); } else { i2 = len; } if (i2 <= i1) return cvalue_string(0); value_t ns = cvalue_string(i2-i1); memmove(cv_data((cvalue_t*)ptr(ns)), &s[i1], i2-i1); return ns; } BUILTIN("string.char", string_char) { argcount(nargs, 2); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t i = toulong(args[1]); if (i >= len) bounds_error(args[0], args[1]); size_t sl = u8_seqlen(&s[i]); if (sl > len || i > len-sl) bounds_error(args[0], args[1]); return mk_wchar(u8_nextchar(s, &i)); } BUILTIN("char.upcase", char_upcase) { argcount(nargs, 1); cprim_t *cp = (cprim_t*)ptr(args[0]); if (!iscprim(args[0]) || cp_class(cp) != wchartype) type_error("wchar", args[0]); return mk_wchar(towupper(*(int32_t*)cp_data(cp))); } BUILTIN("char.downcase", char_downcase) { argcount(nargs, 1); cprim_t *cp = (cprim_t*)ptr(args[0]); if (!iscprim(args[0]) || cp_class(cp) != wchartype) type_error("wchar", args[0]); return mk_wchar(towlower(*(int32_t*)cp_data(cp))); } BUILTIN("char-alphabetic?", char_alphabeticp) { argcount(nargs, 1); cprim_t *cp = (cprim_t*)ptr(args[0]); if (!iscprim(args[0]) || cp_class(cp) != wchartype) type_error("wchar", args[0]); return iswalpha(*(int32_t*)cp_data(cp)) ? FL_T : FL_F; } static value_t mem_find_byte(char *s, char c, size_t start, size_t len) { char *p = memchr(s+start, c, len-start); if (p == nil) return FL_F; return size_wrap((size_t)(p - s)); } BUILTIN("string.find", string_find) { char cbuf[8]; size_t start = 0; if (nargs == 3) start = toulong(args[2]); else argcount(nargs, 2); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); if (start > len) bounds_error(args[0], args[2]); char *needle; size_t needlesz; value_t v = args[1]; cprim_t *cp = (cprim_t*)ptr(v); if (iscprim(v) && cp_class(cp) == wchartype) { uint32_t c = *(uint32_t*)cp_data(cp); if (c <= 0x7f) return mem_find_byte(s, (char)c, start, len); needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1); needle = cbuf; } else if (iscprim(v) && cp_class(cp) == bytetype) { return mem_find_byte(s, *(char*)cp_data(cp), start, len); } else if (fl_isstring(v)) { cvalue_t *cv = (cvalue_t*)ptr(v); needlesz = cv_len(cv); needle = (char*)cv_data(cv); } else { type_error("string", args[1]); } if (needlesz > len-start) return FL_F; else if (needlesz == 1) return mem_find_byte(s, needle[0], start, len); else if (needlesz == 0) return size_wrap(start); size_t i; for(i=start; i < len-needlesz+1; i++) { if (s[i] == needle[0]) { if (!memcmp(&s[i+1], needle+1, needlesz-1)) return size_wrap(i); } } return FL_F; } BUILTIN("string.inc", string_inc) { if (nargs < 2 || nargs > 3) argcount(nargs, 2); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t i = toulong(args[1]); size_t cnt = 1; if (nargs == 3) cnt = toulong(args[2]); while (cnt--) { if (i >= len) bounds_error(args[0], args[1]); (void)(isutf(s[++i]) || isutf(s[++i]) || isutf(s[++i]) || ++i); } return size_wrap(i); } BUILTIN("string.dec", string_dec) { if (nargs < 2 || nargs > 3) argcount(nargs, 2); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t i = toulong(args[1]); size_t cnt = 1; if (nargs == 3) cnt = toulong(args[2]); // note: i is allowed to start at index len if (i > len) bounds_error(args[0], args[1]); while (cnt--) { if (i == 0) bounds_error(args[0], args[1]); (void)(isutf(s[--i]) || isutf(s[--i]) || isutf(s[--i]) || --i); } return size_wrap(i); } static unsigned long get_radix_arg(value_t arg) { unsigned long radix = toulong(arg); if (radix < 2 || radix > 36) lerrorf(ArgError, "invalid radix"); return radix; } BUILTIN("number->string", number_2_string) { if (nargs < 1 || nargs > 2) argcount(nargs, 2); value_t n = args[0]; int neg = 0; uint64_t num; if (isfixnum(n)) num = numval(n); else if (!iscprim(n)) type_error("integer", n); else num = conv_to_uint64(cp_data((cprim_t*)ptr(n)), cp_numtype((cprim_t*)ptr(n))); if (numval(fl_compare(args[0],fixnum(0))) < 0) { num = -num; neg = 1; } unsigned long radix = 10; if (nargs == 2) radix = get_radix_arg(args[1]); char buf[128]; char *str = uint2str(buf, sizeof(buf), num, radix); if (neg && str > &buf[0]) *(--str) = '-'; return string_from_cstr(str); } BUILTIN("string->number", string_2_number) { if (nargs < 1 || nargs > 2) argcount(nargs, 2); char *str = tostring(args[0]); value_t n; unsigned long radix = 0; if (nargs == 2) radix = get_radix_arg(args[1]); if (!isnumtok_base(str, &n, (int)radix)) return FL_F; return n; } BUILTIN("string.isutf8", string_isutf8) { argcount(nargs, 1); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); return u8_isvalid(s, len) ? FL_T : FL_F; }