ref: 274bea2da8ccb512cb47f6fc02e6acbd3d561c67
dir: /string.c/
/* string functions */ #include "flisp.h" #include "operators.h" #include "cvalues.h" #include "print.h" #include "read.h" #include "equal.h" #include "iostream.h" BUILTIN("string?", stringp) { argcount(nargs, 1); return fl_isstring(args[0]) ? FL(t) : FL(f); } BUILTIN("string-length", string_length) { size_t start = 0; if(nargs < 1 || nargs > 3) argcount(nargs, 1); if(!fl_isstring(args[0])) type_error("string", args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); size_t stop = len; if(nargs > 1){ start = tosize(args[1]); if(start > len) bounds_error(args[0], args[1]); if(nargs > 2){ stop = tosize(args[2]); if(stop > len) bounds_error(args[0], args[2]); if(stop <= start) return fixnum(0); } } char *str = cvalue_data(args[0]); return size_wrap(u8_charnum(str+start, stop-start)); } BUILTIN("string-width", string_width) { argcount(nargs, 1); if(iscprim(args[0])){ cprim_t *cp = ptr(args[0]); if(cp_class(cp) == FL(runetype)){ int w = wcwidth(*(Rune*)cp_data(cp)); return w < 0 ? FL(f) : fixnum(w); } } return size_wrap(u8_strwidth(tostring(args[0]))); } BUILTIN("string-reverse", string_reverse) { argcount(nargs, 1); if(!fl_isstring(args[0])) type_error("string", args[0]); size_t len = cv_len(ptr(args[0])); value_t ns = cvalue_string(len); u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len); return ns; } BUILTIN("string-encode", string_encode) { argcount(nargs, 1); if(iscvalue(args[0])){ cvalue_t *cv = ptr(args[0]); fltype_t *t = cv_class(cv); if(t->eltype == FL(runetype)){ size_t nr = cv_len(cv) / sizeof(Rune); Rune *r = (Rune*)cv_data(cv); size_t nb = runenlen(r, nr); value_t str = cvalue_string(nb); char *s = cvalue_data(str); for(size_t i = 0; i < nr; i++) s += runetochar(s, r+i); return str; } } type_error("rune array", args[0]); } BUILTIN("string-decode", string_decode) { int term = 0; if(nargs == 2) term = args[1] != FL(f); else argcount(nargs, 1); if(!fl_isstring(args[0])) type_error("string", args[0]); cvalue_t *cv = ptr(args[0]); char *ptr = (char*)cv_data(cv); size_t nb = cv_len(cv); size_t nc = utfnlen(ptr, nb); size_t newsz = nc*sizeof(Rune); if(term) newsz += sizeof(Rune); value_t runestr = cvalue(FL(runestringtype), newsz); ptr = cvalue_data(args[0]); // relocatable pointer Rune *r = cvalue_data(runestr); for(size_t i = 0; i < nb; i++) ptr += chartorune(r+i, ptr); if(term) r[nb] = 0; return runestr; } extern BUILTIN("buffer", buffer); BUILTIN("string", string) { if(nargs == 1 && fl_isstring(args[0])) return args[0]; value_t arg, buf = fn_builtin_buffer(nil, 0); fl_gc_handle(&buf); ios_t *s = value2c(ios_t*, buf); value_t oldpr = symbol_value(FL(printreadablysym)); value_t oldpp = symbol_value(FL(printprettysym)); set(FL(printreadablysym), FL(f)); set(FL(printprettysym), FL(f)); uint32_t i; FOR_ARGS(i, 0, arg, args){ USED(arg); fl_print(s, args[i]); } set(FL(printreadablysym), oldpr); set(FL(printprettysym), oldpp); value_t outp = stream_to_string(&buf); fl_free_gc_handles(1); return outp; } BUILTIN("string-split", string_split) { argcount(nargs, 2); char *s = tostring(args[0]); char *delim = tostring(args[1]); size_t len = cv_len(ptr(args[0])); size_t dlen = cv_len(ptr(args[1])); size_t ssz, tokend, tokstart, i = 0; value_t first = FL(Nil), c = FL(Nil), last; size_t junk; fl_gc_handle(&first); fl_gc_handle(&last); do{ // find and allocate next token tokstart = tokend = i; while(i < len && !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk)) tokend = i; ssz = tokend - tokstart; last = c; // save previous cons cell c = fl_cons(cvalue_string(ssz), FL(Nil)); // we've done allocation; reload movable pointers s = cvalue_data(args[0]); delim = cvalue_data(args[1]); if(ssz) memmove(cvalue_data(car_(c)), &s[tokstart], ssz); // link new cell if(last == FL(Nil)) first = c; // first time, save first cons else ((cons_t*)ptr(last))->cdr = c; // note this tricky condition: if the string ends with a // delimiter, we need to go around one more time to add an // empty string. this happens when (i == len && tokend < i) }while(i < len || (i == len && (tokend != i))); fl_free_gc_handles(2); return first; } BUILTIN("string-sub", string_sub) { if(nargs != 2) argcount(nargs, 3); char *s = tostring(args[0]); size_t lenbytes = cv_len((cvalue_t*)ptr(args[0])); size_t startbytes, n, startchar = tosize(args[1]); for(startbytes = n = 0; n < startchar && startbytes < lenbytes; n++) startbytes += u8_seqlen(s+startbytes); if(n != startchar) bounds_error(args[0], args[1]); size_t endbytes = lenbytes; if(nargs == 3){ size_t endchar = tosize(args[2]); for(endbytes = startbytes; n < endchar && endbytes < lenbytes; n++) endbytes += u8_seqlen(s+endbytes); if(n != endchar) bounds_error(args[0], args[2]); } if(endbytes == startbytes) return symbol_value(FL(emptystringsym)); value_t ns = cvalue_string(endbytes-startbytes); s = cvalue_data(args[0]); // reload after alloc memmove(cvalue_data(ns), s+startbytes, endbytes-startbytes); return ns; } BUILTIN("string-char", string_char) { argcount(nargs, 2); char *s = tostring(args[0]); size_t lenbytes = cv_len(ptr(args[0])); size_t startbytes, n, startchar = tosize(args[1]); for(startbytes = n = 0; n < startchar && startbytes < lenbytes; n++) startbytes += u8_seqlen(s+startbytes); if(n != startchar) bounds_error(args[0], args[1]); Rune r; chartorune(&r, s+startbytes); return mk_rune(r); } BUILTIN("char-upcase", char_upcase) { argcount(nargs, 1); cprim_t *cp = (cprim_t*)ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return mk_rune(toupperrune(*(Rune*)cp_data(cp))); } BUILTIN("char-downcase", char_downcase) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return mk_rune(tolowerrune(*(Rune*)cp_data(cp))); } BUILTIN("char-titlecase", char_titlecase) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return mk_rune(totitlerune(*(Rune*)cp_data(cp))); } BUILTIN("char-alphabetic?", char_alphabeticp) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return isalpharune(*(Rune*)cp_data(cp)) ? FL(t) : FL(f); } BUILTIN("char-lower-case?", char_lower_casep) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return islowerrune(*(Rune*)cp_data(cp)) ? FL(t) : FL(f); } BUILTIN("char-upper-case?", char_upper_casep) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return isupperrune(*(Rune*)cp_data(cp)) ? FL(t) : FL(f); } BUILTIN("char-title-case?", char_title_casep) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return istitlerune(*(Rune*)cp_data(cp)) ? FL(t) : FL(f); } BUILTIN("char-numeric?", char_numericp) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return isdigitrune(*(Rune*)cp_data(cp)) ? FL(t) : FL(f); } BUILTIN("char-whitespace?", char_whitespacep) { argcount(nargs, 1); cprim_t *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != FL(runetype)) type_error("rune", args[0]); return isspacerune(*(Rune*)cp_data(cp)) ? FL(t) : FL(f); } BUILTIN("string-find", string_find) { char cbuf[UTFmax+1]; size_t start = 0; if(nargs == 3) start = tosize(args[2]); else argcount(nargs, 2); char *s = tostring(args[0]); size_t len = cv_len(ptr(args[0])); if(start > len) bounds_error(args[0], args[2]); char *needle; size_t needlesz; value_t v = args[1]; cprim_t *cp = ptr(v); if(iscprim(v) && cp_class(cp) == FL(runetype)){ Rune r = *(Rune*)cp_data(cp); needlesz = runetochar(cbuf, &r); needle = cbuf; needle[needlesz] = 0; }else if(iscprim(v) && cp_class(cp) == FL(bytetype)){ needlesz = 1; needle = cbuf; needle[0] = *(char*)cp_data(cp); needle[needlesz] = 0; }else if(fl_isstring(v)){ cvalue_t *cv = (cvalue_t*)ptr(v); needlesz = cv_len(cv); needle = (char*)cv_data(cv); }else{ type_error("string", args[1]); } if(needlesz > len-start) return FL(f); if(needlesz == 0) return size_wrap(start); size_t i; for(i = start; i < len-needlesz+1; i++){ if(s[i] == needle[0] && memcmp(&s[i+1], needle+1, needlesz-1) == 0) return size_wrap(i); } return FL(f); } static unsigned long get_radix_arg(value_t arg) { unsigned long radix = tosize(arg); if(radix < 2 || radix > 36) lerrorf(FL(ArgError), "invalid radix"); return radix; } BUILTIN("number->string", number_2_string) { if(nargs < 1 || nargs > 2) argcount(nargs, 2); value_t n = args[0]; int neg = 0; uint64_t num; if(isfixnum(n)) num = numval(n); else if(!iscprim(n)) type_error("integer", n); else num = conv_to_uint64(cp_data(ptr(n)), cp_numtype(ptr(n))); if(numval(fl_compare(args[0], fixnum(0))) < 0){ num = -num; neg = 1; } unsigned long radix = 10; if(nargs == 2) radix = get_radix_arg(args[1]); char buf[128]; char *str = uint2str(buf, sizeof(buf), num, radix); if(neg && str > &buf[0]) *(--str) = '-'; return string_from_cstr(str); } BUILTIN("string->number", string_2_number) { if(nargs < 1 || nargs > 2) argcount(nargs, 2); char *str = tostring(args[0]); value_t n; unsigned long radix = 0; if(nargs == 2) radix = get_radix_arg(args[1]); if(!fl_read_numtok(str, &n, (int)radix)) return FL(f); return n; } BUILTIN("string-utf8?", string_utf8p) { argcount(nargs, 1); char *s = tostring(args[0]); size_t len = cv_len((cvalue_t*)ptr(args[0])); return u8_isvalid(s, len) ? FL(t) : FL(f); }