ref: b9872910ce781e5818322af3e31c655ca8d87821
dir: /utf16.c/
/* Horror stories: http://en.wikipedia.org/wiki/UTF-16 */ #include "tagspriv.h" #define rchr(s) (be ? ((s)[0]<<8 | (s)[1]) : ((s)[1]<<8 | (s)[0])) static const uint8_t mark[] = {0x00, 0x00, 0xc0, 0xe0, 0xf0}; int utf16to8(uint8_t *o, int osz, const uint8_t *s, int sz) { uint32_t c, c2; int i, wr, j; bool be; i = 0; be = true; if(s[0] == 0xfe && s[1] == 0xff) i += 2; else if(s[0] == 0xff && s[1] == 0xfe){ be = false; i += 2; } for(; i < sz-1 && osz > 1;){ c = rchr(&s[i]); i += 2; if(c >= 0xd800 && c <= 0xdbff && i < sz-1){ c2 = rchr(&s[i]); if(c2 >= 0xdc00 && c2 <= 0xdfff){ c = 0x10000 | (c - 0xd800)<<10 | (c2 - 0xdc00); i += 2; }else return -1; }else if(c >= 0xdc00 && c <= 0xdfff) return -1; if(c < 0x80) wr = 1; else if(c < 0x800) wr = 2; else if(c < 0x10000) wr = 3; else wr = 4; osz -= wr; if(osz < 1) break; o += wr; for(j = wr; j > 1; j--){ *(--o) = (c & 0xbf) | 0x80; c >>= 6; } *(--o) = (uint8_t)c | mark[wr]; o += wr; } *o = 0; return i; }