shithub: femtolisp

Download patch

ref: 54d05728b0fd7042a66c147ea25cc7d78f1a5a4f
parent: 64afa666f159be2f11dc99a3f42a72c48fa38b6f
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Wed Oct 23 08:45:29 EDT 2024

reduce custom unicode logic, reuse the one in Plan 9 instead

--- a/3rd/wcwidth.c
+++ b/3rd/wcwidth.c
@@ -18,8 +18,8 @@
 #include "../llt.h"
 
 struct width_interval {
-        int start;
-        int end;
+	int start;
+	int end;
 };
 
 // From https://github.com/jquast/wcwidth/blob/master/wcwidth/table_zero.py
@@ -26,349 +26,349 @@
 // from https://github.com/jquast/wcwidth/pull/64
 // at commit 1b9b6585b0080ea5cb88dc9815796505724793fe (2022-12-16):
 static struct width_interval ZERO_WIDTH[] = {
-        {0x00300, 0x0036f},  // Combining Grave Accent  ..Combining Latin Small Le
-        {0x00483, 0x00489},  // Combining Cyrillic Titlo..Combining Cyrillic Milli
-        {0x00591, 0x005bd},  // Hebrew Accent Etnahta   ..Hebrew Point Meteg
-        {0x005bf, 0x005bf},  // Hebrew Point Rafe       ..Hebrew Point Rafe
-        {0x005c1, 0x005c2},  // Hebrew Point Shin Dot   ..Hebrew Point Sin Dot
-        {0x005c4, 0x005c5},  // Hebrew Mark Upper Dot   ..Hebrew Mark Lower Dot
-        {0x005c7, 0x005c7},  // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata
-        {0x00610, 0x0061a},  // Arabic Sign Sallallahou ..Arabic Small Kasra
-        {0x0064b, 0x0065f},  // Arabic Fathatan         ..Arabic Wavy Hamza Below
-        {0x00670, 0x00670},  // Arabic Letter Superscrip..Arabic Letter Superscrip
-        {0x006d6, 0x006dc},  // Arabic Small High Ligatu..Arabic Small High Seen
-        {0x006df, 0x006e4},  // Arabic Small High Rounde..Arabic Small High Madda
-        {0x006e7, 0x006e8},  // Arabic Small High Yeh   ..Arabic Small High Noon
-        {0x006ea, 0x006ed},  // Arabic Empty Centre Low ..Arabic Small Low Meem
-        {0x00711, 0x00711},  // Syriac Letter Superscrip..Syriac Letter Superscrip
-        {0x00730, 0x0074a},  // Syriac Pthaha Above     ..Syriac Barrekh
-        {0x007a6, 0x007b0},  // Thaana Abafili          ..Thaana Sukun
-        {0x007eb, 0x007f3},  // Nko Combining Short High..Nko Combining Double Dot
-        {0x007fd, 0x007fd},  // Nko Dantayalan          ..Nko Dantayalan
-        {0x00816, 0x00819},  // Samaritan Mark In       ..Samaritan Mark Dagesh
-        {0x0081b, 0x00823},  // Samaritan Mark Epentheti..Samaritan Vowel Sign A
-        {0x00825, 0x00827},  // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
-        {0x00829, 0x0082d},  // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
-        {0x00859, 0x0085b},  // Mandaic Affrication Mark..Mandaic Gemination Mark
-        {0x00898, 0x0089f},  // Arabic Small High Word A..Arabic Half Madda Over M
-        {0x008ca, 0x008e1},  // Arabic Small High Farsi ..Arabic Small High Sign S
-        {0x008e3, 0x00902},  // Arabic Turned Damma Belo..Devanagari Sign Anusvara
-        {0x0093a, 0x0093a},  // Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe
-        {0x0093c, 0x0093c},  // Devanagari Sign Nukta   ..Devanagari Sign Nukta
-        {0x00941, 0x00948},  // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai
-        {0x0094d, 0x0094d},  // Devanagari Sign Virama  ..Devanagari Sign Virama
-        {0x00951, 0x00957},  // Devanagari Stress Sign U..Devanagari Vowel Sign Uu
-        {0x00962, 0x00963},  // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo
-        {0x00981, 0x00981},  // Bengali Sign Candrabindu..Bengali Sign Candrabindu
-        {0x009bc, 0x009bc},  // Bengali Sign Nukta      ..Bengali Sign Nukta
-        {0x009c1, 0x009c4},  // Bengali Vowel Sign U    ..Bengali Vowel Sign Vocal
-        {0x009cd, 0x009cd},  // Bengali Sign Virama     ..Bengali Sign Virama
-        {0x009e2, 0x009e3},  // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal
-        {0x009fe, 0x009fe},  // Bengali Sandhi Mark     ..Bengali Sandhi Mark
-        {0x00a01, 0x00a02},  // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi
-        {0x00a3c, 0x00a3c},  // Gurmukhi Sign Nukta     ..Gurmukhi Sign Nukta
-        {0x00a41, 0x00a42},  // Gurmukhi Vowel Sign U   ..Gurmukhi Vowel Sign Uu
-        {0x00a47, 0x00a48},  // Gurmukhi Vowel Sign Ee  ..Gurmukhi Vowel Sign Ai
-        {0x00a4b, 0x00a4d},  // Gurmukhi Vowel Sign Oo  ..Gurmukhi Sign Virama
-        {0x00a51, 0x00a51},  // Gurmukhi Sign Udaat     ..Gurmukhi Sign Udaat
-        {0x00a70, 0x00a71},  // Gurmukhi Tippi          ..Gurmukhi Addak
-        {0x00a75, 0x00a75},  // Gurmukhi Sign Yakash    ..Gurmukhi Sign Yakash
-        {0x00a81, 0x00a82},  // Gujarati Sign Candrabind..Gujarati Sign Anusvara
-        {0x00abc, 0x00abc},  // Gujarati Sign Nukta     ..Gujarati Sign Nukta
-        {0x00ac1, 0x00ac5},  // Gujarati Vowel Sign U   ..Gujarati Vowel Sign Cand
-        {0x00ac7, 0x00ac8},  // Gujarati Vowel Sign E   ..Gujarati Vowel Sign Ai
-        {0x00acd, 0x00acd},  // Gujarati Sign Virama    ..Gujarati Sign Virama
-        {0x00ae2, 0x00ae3},  // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca
-        {0x00afa, 0x00aff},  // Gujarati Sign Sukun     ..Gujarati Sign Two-circle
-        {0x00b01, 0x00b01},  // Oriya Sign Candrabindu  ..Oriya Sign Candrabindu
-        {0x00b3c, 0x00b3c},  // Oriya Sign Nukta        ..Oriya Sign Nukta
-        {0x00b3f, 0x00b3f},  // Oriya Vowel Sign I      ..Oriya Vowel Sign I
-        {0x00b41, 0x00b44},  // Oriya Vowel Sign U      ..Oriya Vowel Sign Vocalic
-        {0x00b4d, 0x00b4d},  // Oriya Sign Virama       ..Oriya Sign Virama
-        {0x00b55, 0x00b56},  // Oriya Sign Overline     ..Oriya Ai Length Mark
-        {0x00b62, 0x00b63},  // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic
-        {0x00b82, 0x00b82},  // Tamil Sign Anusvara     ..Tamil Sign Anusvara
-        {0x00bc0, 0x00bc0},  // Tamil Vowel Sign Ii     ..Tamil Vowel Sign Ii
-        {0x00bcd, 0x00bcd},  // Tamil Sign Virama       ..Tamil Sign Virama
-        {0x00c00, 0x00c00},  // Telugu Sign Combining Ca..Telugu Sign Combining Ca
-        {0x00c04, 0x00c04},  // Telugu Sign Combining An..Telugu Sign Combining An
-        {0x00c3c, 0x00c3c},  // Telugu Sign Nukta       ..Telugu Sign Nukta
-        {0x00c3e, 0x00c40},  // Telugu Vowel Sign Aa    ..Telugu Vowel Sign Ii
-        {0x00c46, 0x00c48},  // Telugu Vowel Sign E     ..Telugu Vowel Sign Ai
-        {0x00c4a, 0x00c4d},  // Telugu Vowel Sign O     ..Telugu Sign Virama
-        {0x00c55, 0x00c56},  // Telugu Length Mark      ..Telugu Ai Length Mark
-        {0x00c62, 0x00c63},  // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali
-        {0x00c81, 0x00c81},  // Kannada Sign Candrabindu..Kannada Sign Candrabindu
-        {0x00cbc, 0x00cbc},  // Kannada Sign Nukta      ..Kannada Sign Nukta
-        {0x00cbf, 0x00cbf},  // Kannada Vowel Sign I    ..Kannada Vowel Sign I
-        {0x00cc6, 0x00cc6},  // Kannada Vowel Sign E    ..Kannada Vowel Sign E
-        {0x00ccc, 0x00ccd},  // Kannada Vowel Sign Au   ..Kannada Sign Virama
-        {0x00ce2, 0x00ce3},  // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
-        {0x00d00, 0x00d01},  // Malayalam Sign Combining..Malayalam Sign Candrabin
-        {0x00d3b, 0x00d3c},  // Malayalam Sign Vertical ..Malayalam Sign Circular
-        {0x00d41, 0x00d44},  // Malayalam Vowel Sign U  ..Malayalam Vowel Sign Voc
-        {0x00d4d, 0x00d4d},  // Malayalam Sign Virama   ..Malayalam Sign Virama
-        {0x00d62, 0x00d63},  // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc
-        {0x00d81, 0x00d81},  // Sinhala Sign Candrabindu..Sinhala Sign Candrabindu
-        {0x00dca, 0x00dca},  // Sinhala Sign Al-lakuna  ..Sinhala Sign Al-lakuna
-        {0x00dd2, 0x00dd4},  // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti
-        {0x00dd6, 0x00dd6},  // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga
-        {0x00e31, 0x00e31},  // Thai Character Mai Han-a..Thai Character Mai Han-a
-        {0x00e34, 0x00e3a},  // Thai Character Sara I   ..Thai Character Phinthu
-        {0x00e47, 0x00e4e},  // Thai Character Maitaikhu..Thai Character Yamakkan
-        {0x00eb1, 0x00eb1},  // Lao Vowel Sign Mai Kan  ..Lao Vowel Sign Mai Kan
-        {0x00eb4, 0x00ebc},  // Lao Vowel Sign I        ..Lao Semivowel Sign Lo
-        {0x00ec8, 0x00ece},  // Lao Tone Mai Ek         ..(nil)
-        {0x00f18, 0x00f19},  // Tibetan Astrological Sig..Tibetan Astrological Sig
-        {0x00f35, 0x00f35},  // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
-        {0x00f37, 0x00f37},  // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
-        {0x00f39, 0x00f39},  // Tibetan Mark Tsa -phru  ..Tibetan Mark Tsa -phru
-        {0x00f71, 0x00f7e},  // Tibetan Vowel Sign Aa   ..Tibetan Sign Rjes Su Nga
-        {0x00f80, 0x00f84},  // Tibetan Vowel Sign Rever..Tibetan Mark Halanta
-        {0x00f86, 0x00f87},  // Tibetan Sign Lci Rtags  ..Tibetan Sign Yang Rtags
-        {0x00f8d, 0x00f97},  // Tibetan Subjoined Sign L..Tibetan Subjoined Letter
-        {0x00f99, 0x00fbc},  // Tibetan Subjoined Letter..Tibetan Subjoined Letter
-        {0x00fc6, 0x00fc6},  // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda
-        {0x0102d, 0x01030},  // Myanmar Vowel Sign I    ..Myanmar Vowel Sign Uu
-        {0x01032, 0x01037},  // Myanmar Vowel Sign Ai   ..Myanmar Sign Dot Below
-        {0x01039, 0x0103a},  // Myanmar Sign Virama     ..Myanmar Sign Asat
-        {0x0103d, 0x0103e},  // Myanmar Consonant Sign M..Myanmar Consonant Sign M
-        {0x01058, 0x01059},  // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
-        {0x0105e, 0x01060},  // Myanmar Consonant Sign M..Myanmar Consonant Sign M
-        {0x01071, 0x01074},  // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah
-        {0x01082, 0x01082},  // Myanmar Consonant Sign S..Myanmar Consonant Sign S
-        {0x01085, 0x01086},  // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan
-        {0x0108d, 0x0108d},  // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci
-        {0x0109d, 0x0109d},  // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton
-        {0x0135d, 0x0135f},  // Ethiopic Combining Gemin..Ethiopic Combining Gemin
-        {0x01712, 0x01714},  // Tagalog Vowel Sign I    ..Tagalog Sign Virama
-        {0x01732, 0x01733},  // Hanunoo Vowel Sign I    ..Hanunoo Vowel Sign U
-        {0x01752, 0x01753},  // Buhid Vowel Sign I      ..Buhid Vowel Sign U
-        {0x01772, 0x01773},  // Tagbanwa Vowel Sign I   ..Tagbanwa Vowel Sign U
-        {0x017b4, 0x017b5},  // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa
-        {0x017b7, 0x017bd},  // Khmer Vowel Sign I      ..Khmer Vowel Sign Ua
-        {0x017c6, 0x017c6},  // Khmer Sign Nikahit      ..Khmer Sign Nikahit
-        {0x017c9, 0x017d3},  // Khmer Sign Muusikatoan  ..Khmer Sign Bathamasat
-        {0x017dd, 0x017dd},  // Khmer Sign Atthacan     ..Khmer Sign Atthacan
-        {0x0180b, 0x0180d},  // Mongolian Free Variation..Mongolian Free Variation
-        {0x0180f, 0x0180f},  // Mongolian Free Variation..Mongolian Free Variation
-        {0x01885, 0x01886},  // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
-        {0x018a9, 0x018a9},  // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
-        {0x01920, 0x01922},  // Limbu Vowel Sign A      ..Limbu Vowel Sign U
-        {0x01927, 0x01928},  // Limbu Vowel Sign E      ..Limbu Vowel Sign O
-        {0x01932, 0x01932},  // Limbu Small Letter Anusv..Limbu Small Letter Anusv
-        {0x01939, 0x0193b},  // Limbu Sign Mukphreng    ..Limbu Sign Sa-i
-        {0x01a17, 0x01a18},  // Buginese Vowel Sign I   ..Buginese Vowel Sign U
-        {0x01a1b, 0x01a1b},  // Buginese Vowel Sign Ae  ..Buginese Vowel Sign Ae
-        {0x01a56, 0x01a56},  // Tai Tham Consonant Sign ..Tai Tham Consonant Sign
-        {0x01a58, 0x01a5e},  // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign
-        {0x01a60, 0x01a60},  // Tai Tham Sign Sakot     ..Tai Tham Sign Sakot
-        {0x01a62, 0x01a62},  // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai
-        {0x01a65, 0x01a6c},  // Tai Tham Vowel Sign I   ..Tai Tham Vowel Sign Oa B
-        {0x01a73, 0x01a7c},  // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue
-        {0x01a7f, 0x01a7f},  // Tai Tham Combining Crypt..Tai Tham Combining Crypt
-        {0x01ab0, 0x01ace},  // Combining Doubled Circum..Combining Latin Small Le
-        {0x01b00, 0x01b03},  // Balinese Sign Ulu Ricem ..Balinese Sign Surang
-        {0x01b34, 0x01b34},  // Balinese Sign Rerekan   ..Balinese Sign Rerekan
-        {0x01b36, 0x01b3a},  // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R
-        {0x01b3c, 0x01b3c},  // Balinese Vowel Sign La L..Balinese Vowel Sign La L
-        {0x01b42, 0x01b42},  // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe
-        {0x01b6b, 0x01b73},  // Balinese Musical Symbol ..Balinese Musical Symbol
-        {0x01b80, 0x01b81},  // Sundanese Sign Panyecek ..Sundanese Sign Panglayar
-        {0x01ba2, 0x01ba5},  // Sundanese Consonant Sign..Sundanese Vowel Sign Pan
-        {0x01ba8, 0x01ba9},  // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan
-        {0x01bab, 0x01bad},  // Sundanese Sign Virama   ..Sundanese Consonant Sign
-        {0x01be6, 0x01be6},  // Batak Sign Tompi        ..Batak Sign Tompi
-        {0x01be8, 0x01be9},  // Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee
-        {0x01bed, 0x01bed},  // Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O
-        {0x01bef, 0x01bf1},  // Batak Vowel Sign U For S..Batak Consonant Sign H
-        {0x01c2c, 0x01c33},  // Lepcha Vowel Sign E     ..Lepcha Consonant Sign T
-        {0x01c36, 0x01c37},  // Lepcha Sign Ran         ..Lepcha Sign Nukta
-        {0x01cd0, 0x01cd2},  // Vedic Tone Karshana     ..Vedic Tone Prenkha
-        {0x01cd4, 0x01ce0},  // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash
-        {0x01ce2, 0x01ce8},  // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda
-        {0x01ced, 0x01ced},  // Vedic Sign Tiryak       ..Vedic Sign Tiryak
-        {0x01cf4, 0x01cf4},  // Vedic Tone Candra Above ..Vedic Tone Candra Above
-        {0x01cf8, 0x01cf9},  // Vedic Tone Ring Above   ..Vedic Tone Double Ring A
-        {0x01dc0, 0x01dff},  // Combining Dotted Grave A..Combining Right Arrowhea
-        {0x020d0, 0x020f0},  // Combining Left Harpoon A..Combining Asterisk Above
-        {0x02cef, 0x02cf1},  // Coptic Combining Ni Abov..Coptic Combining Spiritu
-        {0x02d7f, 0x02d7f},  // Tifinagh Consonant Joine..Tifinagh Consonant Joine
-        {0x02de0, 0x02dff},  // Combining Cyrillic Lette..Combining Cyrillic Lette
-        {0x0302a, 0x0302d},  // Ideographic Level Tone M..Ideographic Entering Ton
-        {0x03099, 0x0309a},  // Combining Katakana-hirag..Combining Katakana-hirag
-        {0x0a66f, 0x0a672},  // Combining Cyrillic Vzmet..Combining Cyrillic Thous
-        {0x0a674, 0x0a67d},  // Combining Cyrillic Lette..Combining Cyrillic Payer
-        {0x0a69e, 0x0a69f},  // Combining Cyrillic Lette..Combining Cyrillic Lette
-        {0x0a6f0, 0x0a6f1},  // Bamum Combining Mark Koq..Bamum Combining Mark Tuk
-        {0x0a802, 0x0a802},  // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva
-        {0x0a806, 0x0a806},  // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant
-        {0x0a80b, 0x0a80b},  // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva
-        {0x0a825, 0x0a826},  // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
-        {0x0a82c, 0x0a82c},  // Syloti Nagri Sign Altern..Syloti Nagri Sign Altern
-        {0x0a8c4, 0x0a8c5},  // Saurashtra Sign Virama  ..Saurashtra Sign Candrabi
-        {0x0a8e0, 0x0a8f1},  // Combining Devanagari Dig..Combining Devanagari Sig
-        {0x0a8ff, 0x0a8ff},  // Devanagari Vowel Sign Ay..Devanagari Vowel Sign Ay
-        {0x0a926, 0x0a92d},  // Kayah Li Vowel Ue       ..Kayah Li Tone Calya Plop
-        {0x0a947, 0x0a951},  // Rejang Vowel Sign I     ..Rejang Consonant Sign R
-        {0x0a980, 0x0a982},  // Javanese Sign Panyangga ..Javanese Sign Layar
-        {0x0a9b3, 0x0a9b3},  // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu
-        {0x0a9b6, 0x0a9b9},  // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku
-        {0x0a9bc, 0x0a9bd},  // Javanese Vowel Sign Pepe..Javanese Consonant Sign
-        {0x0a9e5, 0x0a9e5},  // Myanmar Sign Shan Saw   ..Myanmar Sign Shan Saw
-        {0x0aa29, 0x0aa2e},  // Cham Vowel Sign Aa      ..Cham Vowel Sign Oe
-        {0x0aa31, 0x0aa32},  // Cham Vowel Sign Au      ..Cham Vowel Sign Ue
-        {0x0aa35, 0x0aa36},  // Cham Consonant Sign La  ..Cham Consonant Sign Wa
-        {0x0aa43, 0x0aa43},  // Cham Consonant Sign Fina..Cham Consonant Sign Fina
-        {0x0aa4c, 0x0aa4c},  // Cham Consonant Sign Fina..Cham Consonant Sign Fina
-        {0x0aa7c, 0x0aa7c},  // Myanmar Sign Tai Laing T..Myanmar Sign Tai Laing T
-        {0x0aab0, 0x0aab0},  // Tai Viet Mai Kang       ..Tai Viet Mai Kang
-        {0x0aab2, 0x0aab4},  // Tai Viet Vowel I        ..Tai Viet Vowel U
-        {0x0aab7, 0x0aab8},  // Tai Viet Mai Khit       ..Tai Viet Vowel Ia
-        {0x0aabe, 0x0aabf},  // Tai Viet Vowel Am       ..Tai Viet Tone Mai Ek
-        {0x0aac1, 0x0aac1},  // Tai Viet Tone Mai Tho   ..Tai Viet Tone Mai Tho
-        {0x0aaec, 0x0aaed},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
-        {0x0aaf6, 0x0aaf6},  // Meetei Mayek Virama     ..Meetei Mayek Virama
-        {0x0abe5, 0x0abe5},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
-        {0x0abe8, 0x0abe8},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
-        {0x0abed, 0x0abed},  // Meetei Mayek Apun Iyek  ..Meetei Mayek Apun Iyek
-        {0x0fb1e, 0x0fb1e},  // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani
-        {0x0fe00, 0x0fe0f},  // Variation Selector-1    ..Variation Selector-16
-        {0x0fe20, 0x0fe2f},  // Combining Ligature Left ..Combining Cyrillic Titlo
-        {0x101fd, 0x101fd},  // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi
-        {0x102e0, 0x102e0},  // Coptic Epact Thousands M..Coptic Epact Thousands M
-        {0x10376, 0x1037a},  // Combining Old Permic Let..Combining Old Permic Let
-        {0x10a01, 0x10a03},  // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
-        {0x10a05, 0x10a06},  // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
-        {0x10a0c, 0x10a0f},  // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
-        {0x10a38, 0x10a3a},  // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
-        {0x10a3f, 0x10a3f},  // Kharoshthi Virama       ..Kharoshthi Virama
-        {0x10ae5, 0x10ae6},  // Manichaean Abbreviation ..Manichaean Abbreviation
-        {0x10d24, 0x10d27},  // Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas
-        {0x10eab, 0x10eac},  // Yezidi Combining Hamza M..Yezidi Combining Madda M
-        {0x10efd, 0x10eff},  // (nil)                   ..(nil)
-        {0x10f46, 0x10f50},  // Sogdian Combining Dot Be..Sogdian Combining Stroke
-        {0x10f82, 0x10f85},  // Old Uyghur Combining Dot..Old Uyghur Combining Two
-        {0x11001, 0x11001},  // Brahmi Sign Anusvara    ..Brahmi Sign Anusvara
-        {0x11038, 0x11046},  // Brahmi Vowel Sign Aa    ..Brahmi Virama
-        {0x11070, 0x11070},  // Brahmi Sign Old Tamil Vi..Brahmi Sign Old Tamil Vi
-        {0x11073, 0x11074},  // Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
-        {0x1107f, 0x11081},  // Brahmi Number Joiner    ..Kaithi Sign Anusvara
-        {0x110b3, 0x110b6},  // Kaithi Vowel Sign U     ..Kaithi Vowel Sign Ai
-        {0x110b9, 0x110ba},  // Kaithi Sign Virama      ..Kaithi Sign Nukta
-        {0x110c2, 0x110c2},  // Kaithi Vowel Sign Vocali..Kaithi Vowel Sign Vocali
-        {0x11100, 0x11102},  // Chakma Sign Candrabindu ..Chakma Sign Visarga
-        {0x11127, 0x1112b},  // Chakma Vowel Sign A     ..Chakma Vowel Sign Uu
-        {0x1112d, 0x11134},  // Chakma Vowel Sign Ai    ..Chakma Maayyaa
-        {0x11173, 0x11173},  // Mahajani Sign Nukta     ..Mahajani Sign Nukta
-        {0x11180, 0x11181},  // Sharada Sign Candrabindu..Sharada Sign Anusvara
-        {0x111b6, 0x111be},  // Sharada Vowel Sign U    ..Sharada Vowel Sign O
-        {0x111c9, 0x111cc},  // Sharada Sandhi Mark     ..Sharada Extra Short Vowe
-        {0x111cf, 0x111cf},  // Sharada Sign Inverted Ca..Sharada Sign Inverted Ca
-        {0x1122f, 0x11231},  // Khojki Vowel Sign U     ..Khojki Vowel Sign Ai
-        {0x11234, 0x11234},  // Khojki Sign Anusvara    ..Khojki Sign Anusvara
-        {0x11236, 0x11237},  // Khojki Sign Nukta       ..Khojki Sign Shadda
-        {0x1123e, 0x1123e},  // Khojki Sign Sukun       ..Khojki Sign Sukun
-        {0x11241, 0x11241},  // (nil)                   ..(nil)
-        {0x112df, 0x112df},  // Khudawadi Sign Anusvara ..Khudawadi Sign Anusvara
-        {0x112e3, 0x112ea},  // Khudawadi Vowel Sign U  ..Khudawadi Sign Virama
-        {0x11300, 0x11301},  // Grantha Sign Combining A..Grantha Sign Candrabindu
-        {0x1133b, 0x1133c},  // Combining Bindu Below   ..Grantha Sign Nukta
-        {0x11340, 0x11340},  // Grantha Vowel Sign Ii   ..Grantha Vowel Sign Ii
-        {0x11366, 0x1136c},  // Combining Grantha Digit ..Combining Grantha Digit
-        {0x11370, 0x11374},  // Combining Grantha Letter..Combining Grantha Letter
-        {0x11438, 0x1143f},  // Newa Vowel Sign U       ..Newa Vowel Sign Ai
-        {0x11442, 0x11444},  // Newa Sign Virama        ..Newa Sign Anusvara
-        {0x11446, 0x11446},  // Newa Sign Nukta         ..Newa Sign Nukta
-        {0x1145e, 0x1145e},  // Newa Sandhi Mark        ..Newa Sandhi Mark
-        {0x114b3, 0x114b8},  // Tirhuta Vowel Sign U    ..Tirhuta Vowel Sign Vocal
-        {0x114ba, 0x114ba},  // Tirhuta Vowel Sign Short..Tirhuta Vowel Sign Short
-        {0x114bf, 0x114c0},  // Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara
-        {0x114c2, 0x114c3},  // Tirhuta Sign Virama     ..Tirhuta Sign Nukta
-        {0x115b2, 0x115b5},  // Siddham Vowel Sign U    ..Siddham Vowel Sign Vocal
-        {0x115bc, 0x115bd},  // Siddham Sign Candrabindu..Siddham Sign Anusvara
-        {0x115bf, 0x115c0},  // Siddham Sign Virama     ..Siddham Sign Nukta
-        {0x115dc, 0x115dd},  // Siddham Vowel Sign Alter..Siddham Vowel Sign Alter
-        {0x11633, 0x1163a},  // Modi Vowel Sign U       ..Modi Vowel Sign Ai
-        {0x1163d, 0x1163d},  // Modi Sign Anusvara      ..Modi Sign Anusvara
-        {0x1163f, 0x11640},  // Modi Sign Virama        ..Modi Sign Ardhacandra
-        {0x116ab, 0x116ab},  // Takri Sign Anusvara     ..Takri Sign Anusvara
-        {0x116ad, 0x116ad},  // Takri Vowel Sign Aa     ..Takri Vowel Sign Aa
-        {0x116b0, 0x116b5},  // Takri Vowel Sign U      ..Takri Vowel Sign Au
-        {0x116b7, 0x116b7},  // Takri Sign Nukta        ..Takri Sign Nukta
-        {0x1171d, 0x1171f},  // Ahom Consonant Sign Medi..Ahom Consonant Sign Medi
-        {0x11722, 0x11725},  // Ahom Vowel Sign I       ..Ahom Vowel Sign Uu
-        {0x11727, 0x1172b},  // Ahom Vowel Sign Aw      ..Ahom Sign Killer
-        {0x1182f, 0x11837},  // Dogra Vowel Sign U      ..Dogra Sign Anusvara
-        {0x11839, 0x1183a},  // Dogra Sign Virama       ..Dogra Sign Nukta
-        {0x1193b, 0x1193c},  // Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab
-        {0x1193e, 0x1193e},  // Dives Akuru Virama      ..Dives Akuru Virama
-        {0x11943, 0x11943},  // Dives Akuru Sign Nukta  ..Dives Akuru Sign Nukta
-        {0x119d4, 0x119d7},  // Nandinagari Vowel Sign U..Nandinagari Vowel Sign V
-        {0x119da, 0x119db},  // Nandinagari Vowel Sign E..Nandinagari Vowel Sign A
-        {0x119e0, 0x119e0},  // Nandinagari Sign Virama ..Nandinagari Sign Virama
-        {0x11a01, 0x11a0a},  // Zanabazar Square Vowel S..Zanabazar Square Vowel L
-        {0x11a33, 0x11a38},  // Zanabazar Square Final C..Zanabazar Square Sign An
-        {0x11a3b, 0x11a3e},  // Zanabazar Square Cluster..Zanabazar Square Cluster
-        {0x11a47, 0x11a47},  // Zanabazar Square Subjoin..Zanabazar Square Subjoin
-        {0x11a51, 0x11a56},  // Soyombo Vowel Sign I    ..Soyombo Vowel Sign Oe
-        {0x11a59, 0x11a5b},  // Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar
-        {0x11a8a, 0x11a96},  // Soyombo Final Consonant ..Soyombo Sign Anusvara
-        {0x11a98, 0x11a99},  // Soyombo Gemination Mark ..Soyombo Subjoiner
-        {0x11c30, 0x11c36},  // Bhaiksuki Vowel Sign I  ..Bhaiksuki Vowel Sign Voc
-        {0x11c38, 0x11c3d},  // Bhaiksuki Vowel Sign E  ..Bhaiksuki Sign Anusvara
-        {0x11c3f, 0x11c3f},  // Bhaiksuki Sign Virama   ..Bhaiksuki Sign Virama
-        {0x11c92, 0x11ca7},  // Marchen Subjoined Letter..Marchen Subjoined Letter
-        {0x11caa, 0x11cb0},  // Marchen Subjoined Letter..Marchen Vowel Sign Aa
-        {0x11cb2, 0x11cb3},  // Marchen Vowel Sign U    ..Marchen Vowel Sign E
-        {0x11cb5, 0x11cb6},  // Marchen Sign Anusvara   ..Marchen Sign Candrabindu
-        {0x11d31, 0x11d36},  // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
-        {0x11d3a, 0x11d3a},  // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
-        {0x11d3c, 0x11d3d},  // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
-        {0x11d3f, 0x11d45},  // Masaram Gondi Vowel Sign..Masaram Gondi Virama
-        {0x11d47, 0x11d47},  // Masaram Gondi Ra-kara   ..Masaram Gondi Ra-kara
-        {0x11d90, 0x11d91},  // Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
-        {0x11d95, 0x11d95},  // Gunjala Gondi Sign Anusv..Gunjala Gondi Sign Anusv
-        {0x11d97, 0x11d97},  // Gunjala Gondi Virama    ..Gunjala Gondi Virama
-        {0x11ef3, 0x11ef4},  // Makasar Vowel Sign I    ..Makasar Vowel Sign U
-        {0x11f00, 0x11f01},  // (nil)                   ..(nil)
-        {0x11f36, 0x11f3a},  // (nil)                   ..(nil)
-        {0x11f40, 0x11f40},  // (nil)                   ..(nil)
-        {0x11f42, 0x11f42},  // (nil)                   ..(nil)
-        {0x13440, 0x13440},  // (nil)                   ..(nil)
-        {0x13447, 0x13455},  // (nil)                   ..(nil)
-        {0x16af0, 0x16af4},  // Bassa Vah Combining High..Bassa Vah Combining High
-        {0x16b30, 0x16b36},  // Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta
-        {0x16f4f, 0x16f4f},  // Miao Sign Consonant Modi..Miao Sign Consonant Modi
-        {0x16f8f, 0x16f92},  // Miao Tone Right         ..Miao Tone Below
-        {0x16fe4, 0x16fe4},  // Khitan Small Script Fill..Khitan Small Script Fill
-        {0x1bc9d, 0x1bc9e},  // Duployan Thick Letter Se..Duployan Double Mark
-        {0x1cf00, 0x1cf2d},  // Znamenny Combining Mark ..Znamenny Combining Mark
-        {0x1cf30, 0x1cf46},  // Znamenny Combining Tonal..Znamenny Priznak Modifie
-        {0x1d167, 0x1d169},  // Musical Symbol Combining..Musical Symbol Combining
-        {0x1d17b, 0x1d182},  // Musical Symbol Combining..Musical Symbol Combining
-        {0x1d185, 0x1d18b},  // Musical Symbol Combining..Musical Symbol Combining
-        {0x1d1aa, 0x1d1ad},  // Musical Symbol Combining..Musical Symbol Combining
-        {0x1d242, 0x1d244},  // Combining Greek Musical ..Combining Greek Musical
-        {0x1da00, 0x1da36},  // Signwriting Head Rim    ..Signwriting Air Sucking
-        {0x1da3b, 0x1da6c},  // Signwriting Mouth Closed..Signwriting Excitement
-        {0x1da75, 0x1da75},  // Signwriting Upper Body T..Signwriting Upper Body T
-        {0x1da84, 0x1da84},  // Signwriting Location Hea..Signwriting Location Hea
-        {0x1da9b, 0x1da9f},  // Signwriting Fill Modifie..Signwriting Fill Modifie
-        {0x1daa1, 0x1daaf},  // Signwriting Rotation Mod..Signwriting Rotation Mod
-        {0x1e000, 0x1e006},  // Combining Glagolitic Let..Combining Glagolitic Let
-        {0x1e008, 0x1e018},  // Combining Glagolitic Let..Combining Glagolitic Let
-        {0x1e01b, 0x1e021},  // Combining Glagolitic Let..Combining Glagolitic Let
-        {0x1e023, 0x1e024},  // Combining Glagolitic Let..Combining Glagolitic Let
-        {0x1e026, 0x1e02a},  // Combining Glagolitic Let..Combining Glagolitic Let
-        {0x1e08f, 0x1e08f},  // (nil)                   ..(nil)
-        {0x1e130, 0x1e136},  // Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T
-        {0x1e2ae, 0x1e2ae},  // Toto Sign Rising Tone   ..Toto Sign Rising Tone
-        {0x1e2ec, 0x1e2ef},  // Wancho Tone Tup         ..Wancho Tone Koini
-        {0x1e4ec, 0x1e4ef},  // (nil)                   ..(nil)
-        {0x1e8d0, 0x1e8d6},  // Mende Kikakui Combining ..Mende Kikakui Combining
-        {0x1e944, 0x1e94a},  // Adlam Alif Lengthener   ..Adlam Nukta
-        {0xe0100, 0xe01ef},  // Variation Selector-17   ..Variation Selector-256
+	{0x00300, 0x0036f},  // Combining Grave Accent  ..Combining Latin Small Le
+	{0x00483, 0x00489},  // Combining Cyrillic Titlo..Combining Cyrillic Milli
+	{0x00591, 0x005bd},  // Hebrew Accent Etnahta   ..Hebrew Point Meteg
+	{0x005bf, 0x005bf},  // Hebrew Point Rafe       ..Hebrew Point Rafe
+	{0x005c1, 0x005c2},  // Hebrew Point Shin Dot   ..Hebrew Point Sin Dot
+	{0x005c4, 0x005c5},  // Hebrew Mark Upper Dot   ..Hebrew Mark Lower Dot
+	{0x005c7, 0x005c7},  // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata
+	{0x00610, 0x0061a},  // Arabic Sign Sallallahou ..Arabic Small Kasra
+	{0x0064b, 0x0065f},  // Arabic Fathatan	 ..Arabic Wavy Hamza Below
+	{0x00670, 0x00670},  // Arabic Letter Superscrip..Arabic Letter Superscrip
+	{0x006d6, 0x006dc},  // Arabic Small High Ligatu..Arabic Small High Seen
+	{0x006df, 0x006e4},  // Arabic Small High Rounde..Arabic Small High Madda
+	{0x006e7, 0x006e8},  // Arabic Small High Yeh   ..Arabic Small High Noon
+	{0x006ea, 0x006ed},  // Arabic Empty Centre Low ..Arabic Small Low Meem
+	{0x00711, 0x00711},  // Syriac Letter Superscrip..Syriac Letter Superscrip
+	{0x00730, 0x0074a},  // Syriac Pthaha Above     ..Syriac Barrekh
+	{0x007a6, 0x007b0},  // Thaana Abafili	  ..Thaana Sukun
+	{0x007eb, 0x007f3},  // Nko Combining Short High..Nko Combining Double Dot
+	{0x007fd, 0x007fd},  // Nko Dantayalan	  ..Nko Dantayalan
+	{0x00816, 0x00819},  // Samaritan Mark In       ..Samaritan Mark Dagesh
+	{0x0081b, 0x00823},  // Samaritan Mark Epentheti..Samaritan Vowel Sign A
+	{0x00825, 0x00827},  // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
+	{0x00829, 0x0082d},  // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
+	{0x00859, 0x0085b},  // Mandaic Affrication Mark..Mandaic Gemination Mark
+	{0x00898, 0x0089f},  // Arabic Small High Word A..Arabic Half Madda Over M
+	{0x008ca, 0x008e1},  // Arabic Small High Farsi ..Arabic Small High Sign S
+	{0x008e3, 0x00902},  // Arabic Turned Damma Belo..Devanagari Sign Anusvara
+	{0x0093a, 0x0093a},  // Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe
+	{0x0093c, 0x0093c},  // Devanagari Sign Nukta   ..Devanagari Sign Nukta
+	{0x00941, 0x00948},  // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai
+	{0x0094d, 0x0094d},  // Devanagari Sign Virama  ..Devanagari Sign Virama
+	{0x00951, 0x00957},  // Devanagari Stress Sign U..Devanagari Vowel Sign Uu
+	{0x00962, 0x00963},  // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo
+	{0x00981, 0x00981},  // Bengali Sign Candrabindu..Bengali Sign Candrabindu
+	{0x009bc, 0x009bc},  // Bengali Sign Nukta      ..Bengali Sign Nukta
+	{0x009c1, 0x009c4},  // Bengali Vowel Sign U    ..Bengali Vowel Sign Vocal
+	{0x009cd, 0x009cd},  // Bengali Sign Virama     ..Bengali Sign Virama
+	{0x009e2, 0x009e3},  // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal
+	{0x009fe, 0x009fe},  // Bengali Sandhi Mark     ..Bengali Sandhi Mark
+	{0x00a01, 0x00a02},  // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi
+	{0x00a3c, 0x00a3c},  // Gurmukhi Sign Nukta     ..Gurmukhi Sign Nukta
+	{0x00a41, 0x00a42},  // Gurmukhi Vowel Sign U   ..Gurmukhi Vowel Sign Uu
+	{0x00a47, 0x00a48},  // Gurmukhi Vowel Sign Ee  ..Gurmukhi Vowel Sign Ai
+	{0x00a4b, 0x00a4d},  // Gurmukhi Vowel Sign Oo  ..Gurmukhi Sign Virama
+	{0x00a51, 0x00a51},  // Gurmukhi Sign Udaat     ..Gurmukhi Sign Udaat
+	{0x00a70, 0x00a71},  // Gurmukhi Tippi	  ..Gurmukhi Addak
+	{0x00a75, 0x00a75},  // Gurmukhi Sign Yakash    ..Gurmukhi Sign Yakash
+	{0x00a81, 0x00a82},  // Gujarati Sign Candrabind..Gujarati Sign Anusvara
+	{0x00abc, 0x00abc},  // Gujarati Sign Nukta     ..Gujarati Sign Nukta
+	{0x00ac1, 0x00ac5},  // Gujarati Vowel Sign U   ..Gujarati Vowel Sign Cand
+	{0x00ac7, 0x00ac8},  // Gujarati Vowel Sign E   ..Gujarati Vowel Sign Ai
+	{0x00acd, 0x00acd},  // Gujarati Sign Virama    ..Gujarati Sign Virama
+	{0x00ae2, 0x00ae3},  // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca
+	{0x00afa, 0x00aff},  // Gujarati Sign Sukun     ..Gujarati Sign Two-circle
+	{0x00b01, 0x00b01},  // Oriya Sign Candrabindu  ..Oriya Sign Candrabindu
+	{0x00b3c, 0x00b3c},  // Oriya Sign Nukta	..Oriya Sign Nukta
+	{0x00b3f, 0x00b3f},  // Oriya Vowel Sign I      ..Oriya Vowel Sign I
+	{0x00b41, 0x00b44},  // Oriya Vowel Sign U      ..Oriya Vowel Sign Vocalic
+	{0x00b4d, 0x00b4d},  // Oriya Sign Virama       ..Oriya Sign Virama
+	{0x00b55, 0x00b56},  // Oriya Sign Overline     ..Oriya Ai Length Mark
+	{0x00b62, 0x00b63},  // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic
+	{0x00b82, 0x00b82},  // Tamil Sign Anusvara     ..Tamil Sign Anusvara
+	{0x00bc0, 0x00bc0},  // Tamil Vowel Sign Ii     ..Tamil Vowel Sign Ii
+	{0x00bcd, 0x00bcd},  // Tamil Sign Virama       ..Tamil Sign Virama
+	{0x00c00, 0x00c00},  // Telugu Sign Combining Ca..Telugu Sign Combining Ca
+	{0x00c04, 0x00c04},  // Telugu Sign Combining An..Telugu Sign Combining An
+	{0x00c3c, 0x00c3c},  // Telugu Sign Nukta       ..Telugu Sign Nukta
+	{0x00c3e, 0x00c40},  // Telugu Vowel Sign Aa    ..Telugu Vowel Sign Ii
+	{0x00c46, 0x00c48},  // Telugu Vowel Sign E     ..Telugu Vowel Sign Ai
+	{0x00c4a, 0x00c4d},  // Telugu Vowel Sign O     ..Telugu Sign Virama
+	{0x00c55, 0x00c56},  // Telugu Length Mark      ..Telugu Ai Length Mark
+	{0x00c62, 0x00c63},  // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali
+	{0x00c81, 0x00c81},  // Kannada Sign Candrabindu..Kannada Sign Candrabindu
+	{0x00cbc, 0x00cbc},  // Kannada Sign Nukta      ..Kannada Sign Nukta
+	{0x00cbf, 0x00cbf},  // Kannada Vowel Sign I    ..Kannada Vowel Sign I
+	{0x00cc6, 0x00cc6},  // Kannada Vowel Sign E    ..Kannada Vowel Sign E
+	{0x00ccc, 0x00ccd},  // Kannada Vowel Sign Au   ..Kannada Sign Virama
+	{0x00ce2, 0x00ce3},  // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
+	{0x00d00, 0x00d01},  // Malayalam Sign Combining..Malayalam Sign Candrabin
+	{0x00d3b, 0x00d3c},  // Malayalam Sign Vertical ..Malayalam Sign Circular
+	{0x00d41, 0x00d44},  // Malayalam Vowel Sign U  ..Malayalam Vowel Sign Voc
+	{0x00d4d, 0x00d4d},  // Malayalam Sign Virama   ..Malayalam Sign Virama
+	{0x00d62, 0x00d63},  // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc
+	{0x00d81, 0x00d81},  // Sinhala Sign Candrabindu..Sinhala Sign Candrabindu
+	{0x00dca, 0x00dca},  // Sinhala Sign Al-lakuna  ..Sinhala Sign Al-lakuna
+	{0x00dd2, 0x00dd4},  // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti
+	{0x00dd6, 0x00dd6},  // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga
+	{0x00e31, 0x00e31},  // Thai Character Mai Han-a..Thai Character Mai Han-a
+	{0x00e34, 0x00e3a},  // Thai Character Sara I   ..Thai Character Phinthu
+	{0x00e47, 0x00e4e},  // Thai Character Maitaikhu..Thai Character Yamakkan
+	{0x00eb1, 0x00eb1},  // Lao Vowel Sign Mai Kan  ..Lao Vowel Sign Mai Kan
+	{0x00eb4, 0x00ebc},  // Lao Vowel Sign I	..Lao Semivowel Sign Lo
+	{0x00ec8, 0x00ece},  // Lao Tone Mai Ek	 ..(nil)
+	{0x00f18, 0x00f19},  // Tibetan Astrological Sig..Tibetan Astrological Sig
+	{0x00f35, 0x00f35},  // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
+	{0x00f37, 0x00f37},  // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
+	{0x00f39, 0x00f39},  // Tibetan Mark Tsa -phru  ..Tibetan Mark Tsa -phru
+	{0x00f71, 0x00f7e},  // Tibetan Vowel Sign Aa   ..Tibetan Sign Rjes Su Nga
+	{0x00f80, 0x00f84},  // Tibetan Vowel Sign Rever..Tibetan Mark Halanta
+	{0x00f86, 0x00f87},  // Tibetan Sign Lci Rtags  ..Tibetan Sign Yang Rtags
+	{0x00f8d, 0x00f97},  // Tibetan Subjoined Sign L..Tibetan Subjoined Letter
+	{0x00f99, 0x00fbc},  // Tibetan Subjoined Letter..Tibetan Subjoined Letter
+	{0x00fc6, 0x00fc6},  // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda
+	{0x0102d, 0x01030},  // Myanmar Vowel Sign I    ..Myanmar Vowel Sign Uu
+	{0x01032, 0x01037},  // Myanmar Vowel Sign Ai   ..Myanmar Sign Dot Below
+	{0x01039, 0x0103a},  // Myanmar Sign Virama     ..Myanmar Sign Asat
+	{0x0103d, 0x0103e},  // Myanmar Consonant Sign M..Myanmar Consonant Sign M
+	{0x01058, 0x01059},  // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
+	{0x0105e, 0x01060},  // Myanmar Consonant Sign M..Myanmar Consonant Sign M
+	{0x01071, 0x01074},  // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah
+	{0x01082, 0x01082},  // Myanmar Consonant Sign S..Myanmar Consonant Sign S
+	{0x01085, 0x01086},  // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan
+	{0x0108d, 0x0108d},  // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci
+	{0x0109d, 0x0109d},  // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton
+	{0x0135d, 0x0135f},  // Ethiopic Combining Gemin..Ethiopic Combining Gemin
+	{0x01712, 0x01714},  // Tagalog Vowel Sign I    ..Tagalog Sign Virama
+	{0x01732, 0x01733},  // Hanunoo Vowel Sign I    ..Hanunoo Vowel Sign U
+	{0x01752, 0x01753},  // Buhid Vowel Sign I      ..Buhid Vowel Sign U
+	{0x01772, 0x01773},  // Tagbanwa Vowel Sign I   ..Tagbanwa Vowel Sign U
+	{0x017b4, 0x017b5},  // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa
+	{0x017b7, 0x017bd},  // Khmer Vowel Sign I      ..Khmer Vowel Sign Ua
+	{0x017c6, 0x017c6},  // Khmer Sign Nikahit      ..Khmer Sign Nikahit
+	{0x017c9, 0x017d3},  // Khmer Sign Muusikatoan  ..Khmer Sign Bathamasat
+	{0x017dd, 0x017dd},  // Khmer Sign Atthacan     ..Khmer Sign Atthacan
+	{0x0180b, 0x0180d},  // Mongolian Free Variation..Mongolian Free Variation
+	{0x0180f, 0x0180f},  // Mongolian Free Variation..Mongolian Free Variation
+	{0x01885, 0x01886},  // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
+	{0x018a9, 0x018a9},  // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
+	{0x01920, 0x01922},  // Limbu Vowel Sign A      ..Limbu Vowel Sign U
+	{0x01927, 0x01928},  // Limbu Vowel Sign E      ..Limbu Vowel Sign O
+	{0x01932, 0x01932},  // Limbu Small Letter Anusv..Limbu Small Letter Anusv
+	{0x01939, 0x0193b},  // Limbu Sign Mukphreng    ..Limbu Sign Sa-i
+	{0x01a17, 0x01a18},  // Buginese Vowel Sign I   ..Buginese Vowel Sign U
+	{0x01a1b, 0x01a1b},  // Buginese Vowel Sign Ae  ..Buginese Vowel Sign Ae
+	{0x01a56, 0x01a56},  // Tai Tham Consonant Sign ..Tai Tham Consonant Sign
+	{0x01a58, 0x01a5e},  // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign
+	{0x01a60, 0x01a60},  // Tai Tham Sign Sakot     ..Tai Tham Sign Sakot
+	{0x01a62, 0x01a62},  // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai
+	{0x01a65, 0x01a6c},  // Tai Tham Vowel Sign I   ..Tai Tham Vowel Sign Oa B
+	{0x01a73, 0x01a7c},  // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue
+	{0x01a7f, 0x01a7f},  // Tai Tham Combining Crypt..Tai Tham Combining Crypt
+	{0x01ab0, 0x01ace},  // Combining Doubled Circum..Combining Latin Small Le
+	{0x01b00, 0x01b03},  // Balinese Sign Ulu Ricem ..Balinese Sign Surang
+	{0x01b34, 0x01b34},  // Balinese Sign Rerekan   ..Balinese Sign Rerekan
+	{0x01b36, 0x01b3a},  // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R
+	{0x01b3c, 0x01b3c},  // Balinese Vowel Sign La L..Balinese Vowel Sign La L
+	{0x01b42, 0x01b42},  // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe
+	{0x01b6b, 0x01b73},  // Balinese Musical Symbol ..Balinese Musical Symbol
+	{0x01b80, 0x01b81},  // Sundanese Sign Panyecek ..Sundanese Sign Panglayar
+	{0x01ba2, 0x01ba5},  // Sundanese Consonant Sign..Sundanese Vowel Sign Pan
+	{0x01ba8, 0x01ba9},  // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan
+	{0x01bab, 0x01bad},  // Sundanese Sign Virama   ..Sundanese Consonant Sign
+	{0x01be6, 0x01be6},  // Batak Sign Tompi	..Batak Sign Tompi
+	{0x01be8, 0x01be9},  // Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee
+	{0x01bed, 0x01bed},  // Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O
+	{0x01bef, 0x01bf1},  // Batak Vowel Sign U For S..Batak Consonant Sign H
+	{0x01c2c, 0x01c33},  // Lepcha Vowel Sign E     ..Lepcha Consonant Sign T
+	{0x01c36, 0x01c37},  // Lepcha Sign Ran	 ..Lepcha Sign Nukta
+	{0x01cd0, 0x01cd2},  // Vedic Tone Karshana     ..Vedic Tone Prenkha
+	{0x01cd4, 0x01ce0},  // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash
+	{0x01ce2, 0x01ce8},  // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda
+	{0x01ced, 0x01ced},  // Vedic Sign Tiryak       ..Vedic Sign Tiryak
+	{0x01cf4, 0x01cf4},  // Vedic Tone Candra Above ..Vedic Tone Candra Above
+	{0x01cf8, 0x01cf9},  // Vedic Tone Ring Above   ..Vedic Tone Double Ring A
+	{0x01dc0, 0x01dff},  // Combining Dotted Grave A..Combining Right Arrowhea
+	{0x020d0, 0x020f0},  // Combining Left Harpoon A..Combining Asterisk Above
+	{0x02cef, 0x02cf1},  // Coptic Combining Ni Abov..Coptic Combining Spiritu
+	{0x02d7f, 0x02d7f},  // Tifinagh Consonant Joine..Tifinagh Consonant Joine
+	{0x02de0, 0x02dff},  // Combining Cyrillic Lette..Combining Cyrillic Lette
+	{0x0302a, 0x0302d},  // Ideographic Level Tone M..Ideographic Entering Ton
+	{0x03099, 0x0309a},  // Combining Katakana-hirag..Combining Katakana-hirag
+	{0x0a66f, 0x0a672},  // Combining Cyrillic Vzmet..Combining Cyrillic Thous
+	{0x0a674, 0x0a67d},  // Combining Cyrillic Lette..Combining Cyrillic Payer
+	{0x0a69e, 0x0a69f},  // Combining Cyrillic Lette..Combining Cyrillic Lette
+	{0x0a6f0, 0x0a6f1},  // Bamum Combining Mark Koq..Bamum Combining Mark Tuk
+	{0x0a802, 0x0a802},  // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva
+	{0x0a806, 0x0a806},  // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant
+	{0x0a80b, 0x0a80b},  // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva
+	{0x0a825, 0x0a826},  // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
+	{0x0a82c, 0x0a82c},  // Syloti Nagri Sign Altern..Syloti Nagri Sign Altern
+	{0x0a8c4, 0x0a8c5},  // Saurashtra Sign Virama  ..Saurashtra Sign Candrabi
+	{0x0a8e0, 0x0a8f1},  // Combining Devanagari Dig..Combining Devanagari Sig
+	{0x0a8ff, 0x0a8ff},  // Devanagari Vowel Sign Ay..Devanagari Vowel Sign Ay
+	{0x0a926, 0x0a92d},  // Kayah Li Vowel Ue       ..Kayah Li Tone Calya Plop
+	{0x0a947, 0x0a951},  // Rejang Vowel Sign I     ..Rejang Consonant Sign R
+	{0x0a980, 0x0a982},  // Javanese Sign Panyangga ..Javanese Sign Layar
+	{0x0a9b3, 0x0a9b3},  // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu
+	{0x0a9b6, 0x0a9b9},  // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku
+	{0x0a9bc, 0x0a9bd},  // Javanese Vowel Sign Pepe..Javanese Consonant Sign
+	{0x0a9e5, 0x0a9e5},  // Myanmar Sign Shan Saw   ..Myanmar Sign Shan Saw
+	{0x0aa29, 0x0aa2e},  // Cham Vowel Sign Aa      ..Cham Vowel Sign Oe
+	{0x0aa31, 0x0aa32},  // Cham Vowel Sign Au      ..Cham Vowel Sign Ue
+	{0x0aa35, 0x0aa36},  // Cham Consonant Sign La  ..Cham Consonant Sign Wa
+	{0x0aa43, 0x0aa43},  // Cham Consonant Sign Fina..Cham Consonant Sign Fina
+	{0x0aa4c, 0x0aa4c},  // Cham Consonant Sign Fina..Cham Consonant Sign Fina
+	{0x0aa7c, 0x0aa7c},  // Myanmar Sign Tai Laing T..Myanmar Sign Tai Laing T
+	{0x0aab0, 0x0aab0},  // Tai Viet Mai Kang       ..Tai Viet Mai Kang
+	{0x0aab2, 0x0aab4},  // Tai Viet Vowel I	..Tai Viet Vowel U
+	{0x0aab7, 0x0aab8},  // Tai Viet Mai Khit       ..Tai Viet Vowel Ia
+	{0x0aabe, 0x0aabf},  // Tai Viet Vowel Am       ..Tai Viet Tone Mai Ek
+	{0x0aac1, 0x0aac1},  // Tai Viet Tone Mai Tho   ..Tai Viet Tone Mai Tho
+	{0x0aaec, 0x0aaed},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+	{0x0aaf6, 0x0aaf6},  // Meetei Mayek Virama     ..Meetei Mayek Virama
+	{0x0abe5, 0x0abe5},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+	{0x0abe8, 0x0abe8},  // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+	{0x0abed, 0x0abed},  // Meetei Mayek Apun Iyek  ..Meetei Mayek Apun Iyek
+	{0x0fb1e, 0x0fb1e},  // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani
+	{0x0fe00, 0x0fe0f},  // Variation Selector-1    ..Variation Selector-16
+	{0x0fe20, 0x0fe2f},  // Combining Ligature Left ..Combining Cyrillic Titlo
+	{0x101fd, 0x101fd},  // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi
+	{0x102e0, 0x102e0},  // Coptic Epact Thousands M..Coptic Epact Thousands M
+	{0x10376, 0x1037a},  // Combining Old Permic Let..Combining Old Permic Let
+	{0x10a01, 0x10a03},  // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
+	{0x10a05, 0x10a06},  // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
+	{0x10a0c, 0x10a0f},  // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
+	{0x10a38, 0x10a3a},  // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
+	{0x10a3f, 0x10a3f},  // Kharoshthi Virama       ..Kharoshthi Virama
+	{0x10ae5, 0x10ae6},  // Manichaean Abbreviation ..Manichaean Abbreviation
+	{0x10d24, 0x10d27},  // Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas
+	{0x10eab, 0x10eac},  // Yezidi Combining Hamza M..Yezidi Combining Madda M
+	{0x10efd, 0x10eff},  // (nil)		   ..(nil)
+	{0x10f46, 0x10f50},  // Sogdian Combining Dot Be..Sogdian Combining Stroke
+	{0x10f82, 0x10f85},  // Old Uyghur Combining Dot..Old Uyghur Combining Two
+	{0x11001, 0x11001},  // Brahmi Sign Anusvara    ..Brahmi Sign Anusvara
+	{0x11038, 0x11046},  // Brahmi Vowel Sign Aa    ..Brahmi Virama
+	{0x11070, 0x11070},  // Brahmi Sign Old Tamil Vi..Brahmi Sign Old Tamil Vi
+	{0x11073, 0x11074},  // Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
+	{0x1107f, 0x11081},  // Brahmi Number Joiner    ..Kaithi Sign Anusvara
+	{0x110b3, 0x110b6},  // Kaithi Vowel Sign U     ..Kaithi Vowel Sign Ai
+	{0x110b9, 0x110ba},  // Kaithi Sign Virama      ..Kaithi Sign Nukta
+	{0x110c2, 0x110c2},  // Kaithi Vowel Sign Vocali..Kaithi Vowel Sign Vocali
+	{0x11100, 0x11102},  // Chakma Sign Candrabindu ..Chakma Sign Visarga
+	{0x11127, 0x1112b},  // Chakma Vowel Sign A     ..Chakma Vowel Sign Uu
+	{0x1112d, 0x11134},  // Chakma Vowel Sign Ai    ..Chakma Maayyaa
+	{0x11173, 0x11173},  // Mahajani Sign Nukta     ..Mahajani Sign Nukta
+	{0x11180, 0x11181},  // Sharada Sign Candrabindu..Sharada Sign Anusvara
+	{0x111b6, 0x111be},  // Sharada Vowel Sign U    ..Sharada Vowel Sign O
+	{0x111c9, 0x111cc},  // Sharada Sandhi Mark     ..Sharada Extra Short Vowe
+	{0x111cf, 0x111cf},  // Sharada Sign Inverted Ca..Sharada Sign Inverted Ca
+	{0x1122f, 0x11231},  // Khojki Vowel Sign U     ..Khojki Vowel Sign Ai
+	{0x11234, 0x11234},  // Khojki Sign Anusvara    ..Khojki Sign Anusvara
+	{0x11236, 0x11237},  // Khojki Sign Nukta       ..Khojki Sign Shadda
+	{0x1123e, 0x1123e},  // Khojki Sign Sukun       ..Khojki Sign Sukun
+	{0x11241, 0x11241},  // (nil)		   ..(nil)
+	{0x112df, 0x112df},  // Khudawadi Sign Anusvara ..Khudawadi Sign Anusvara
+	{0x112e3, 0x112ea},  // Khudawadi Vowel Sign U  ..Khudawadi Sign Virama
+	{0x11300, 0x11301},  // Grantha Sign Combining A..Grantha Sign Candrabindu
+	{0x1133b, 0x1133c},  // Combining Bindu Below   ..Grantha Sign Nukta
+	{0x11340, 0x11340},  // Grantha Vowel Sign Ii   ..Grantha Vowel Sign Ii
+	{0x11366, 0x1136c},  // Combining Grantha Digit ..Combining Grantha Digit
+	{0x11370, 0x11374},  // Combining Grantha Letter..Combining Grantha Letter
+	{0x11438, 0x1143f},  // Newa Vowel Sign U       ..Newa Vowel Sign Ai
+	{0x11442, 0x11444},  // Newa Sign Virama	..Newa Sign Anusvara
+	{0x11446, 0x11446},  // Newa Sign Nukta	 ..Newa Sign Nukta
+	{0x1145e, 0x1145e},  // Newa Sandhi Mark	..Newa Sandhi Mark
+	{0x114b3, 0x114b8},  // Tirhuta Vowel Sign U    ..Tirhuta Vowel Sign Vocal
+	{0x114ba, 0x114ba},  // Tirhuta Vowel Sign Short..Tirhuta Vowel Sign Short
+	{0x114bf, 0x114c0},  // Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara
+	{0x114c2, 0x114c3},  // Tirhuta Sign Virama     ..Tirhuta Sign Nukta
+	{0x115b2, 0x115b5},  // Siddham Vowel Sign U    ..Siddham Vowel Sign Vocal
+	{0x115bc, 0x115bd},  // Siddham Sign Candrabindu..Siddham Sign Anusvara
+	{0x115bf, 0x115c0},  // Siddham Sign Virama     ..Siddham Sign Nukta
+	{0x115dc, 0x115dd},  // Siddham Vowel Sign Alter..Siddham Vowel Sign Alter
+	{0x11633, 0x1163a},  // Modi Vowel Sign U       ..Modi Vowel Sign Ai
+	{0x1163d, 0x1163d},  // Modi Sign Anusvara      ..Modi Sign Anusvara
+	{0x1163f, 0x11640},  // Modi Sign Virama	..Modi Sign Ardhacandra
+	{0x116ab, 0x116ab},  // Takri Sign Anusvara     ..Takri Sign Anusvara
+	{0x116ad, 0x116ad},  // Takri Vowel Sign Aa     ..Takri Vowel Sign Aa
+	{0x116b0, 0x116b5},  // Takri Vowel Sign U      ..Takri Vowel Sign Au
+	{0x116b7, 0x116b7},  // Takri Sign Nukta	..Takri Sign Nukta
+	{0x1171d, 0x1171f},  // Ahom Consonant Sign Medi..Ahom Consonant Sign Medi
+	{0x11722, 0x11725},  // Ahom Vowel Sign I       ..Ahom Vowel Sign Uu
+	{0x11727, 0x1172b},  // Ahom Vowel Sign Aw      ..Ahom Sign Killer
+	{0x1182f, 0x11837},  // Dogra Vowel Sign U      ..Dogra Sign Anusvara
+	{0x11839, 0x1183a},  // Dogra Sign Virama       ..Dogra Sign Nukta
+	{0x1193b, 0x1193c},  // Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab
+	{0x1193e, 0x1193e},  // Dives Akuru Virama      ..Dives Akuru Virama
+	{0x11943, 0x11943},  // Dives Akuru Sign Nukta  ..Dives Akuru Sign Nukta
+	{0x119d4, 0x119d7},  // Nandinagari Vowel Sign U..Nandinagari Vowel Sign V
+	{0x119da, 0x119db},  // Nandinagari Vowel Sign E..Nandinagari Vowel Sign A
+	{0x119e0, 0x119e0},  // Nandinagari Sign Virama ..Nandinagari Sign Virama
+	{0x11a01, 0x11a0a},  // Zanabazar Square Vowel S..Zanabazar Square Vowel L
+	{0x11a33, 0x11a38},  // Zanabazar Square Final C..Zanabazar Square Sign An
+	{0x11a3b, 0x11a3e},  // Zanabazar Square Cluster..Zanabazar Square Cluster
+	{0x11a47, 0x11a47},  // Zanabazar Square Subjoin..Zanabazar Square Subjoin
+	{0x11a51, 0x11a56},  // Soyombo Vowel Sign I    ..Soyombo Vowel Sign Oe
+	{0x11a59, 0x11a5b},  // Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar
+	{0x11a8a, 0x11a96},  // Soyombo Final Consonant ..Soyombo Sign Anusvara
+	{0x11a98, 0x11a99},  // Soyombo Gemination Mark ..Soyombo Subjoiner
+	{0x11c30, 0x11c36},  // Bhaiksuki Vowel Sign I  ..Bhaiksuki Vowel Sign Voc
+	{0x11c38, 0x11c3d},  // Bhaiksuki Vowel Sign E  ..Bhaiksuki Sign Anusvara
+	{0x11c3f, 0x11c3f},  // Bhaiksuki Sign Virama   ..Bhaiksuki Sign Virama
+	{0x11c92, 0x11ca7},  // Marchen Subjoined Letter..Marchen Subjoined Letter
+	{0x11caa, 0x11cb0},  // Marchen Subjoined Letter..Marchen Vowel Sign Aa
+	{0x11cb2, 0x11cb3},  // Marchen Vowel Sign U    ..Marchen Vowel Sign E
+	{0x11cb5, 0x11cb6},  // Marchen Sign Anusvara   ..Marchen Sign Candrabindu
+	{0x11d31, 0x11d36},  // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+	{0x11d3a, 0x11d3a},  // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+	{0x11d3c, 0x11d3d},  // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+	{0x11d3f, 0x11d45},  // Masaram Gondi Vowel Sign..Masaram Gondi Virama
+	{0x11d47, 0x11d47},  // Masaram Gondi Ra-kara   ..Masaram Gondi Ra-kara
+	{0x11d90, 0x11d91},  // Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
+	{0x11d95, 0x11d95},  // Gunjala Gondi Sign Anusv..Gunjala Gondi Sign Anusv
+	{0x11d97, 0x11d97},  // Gunjala Gondi Virama    ..Gunjala Gondi Virama
+	{0x11ef3, 0x11ef4},  // Makasar Vowel Sign I    ..Makasar Vowel Sign U
+	{0x11f00, 0x11f01},  // (nil)		   ..(nil)
+	{0x11f36, 0x11f3a},  // (nil)		   ..(nil)
+	{0x11f40, 0x11f40},  // (nil)		   ..(nil)
+	{0x11f42, 0x11f42},  // (nil)		   ..(nil)
+	{0x13440, 0x13440},  // (nil)		   ..(nil)
+	{0x13447, 0x13455},  // (nil)		   ..(nil)
+	{0x16af0, 0x16af4},  // Bassa Vah Combining High..Bassa Vah Combining High
+	{0x16b30, 0x16b36},  // Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta
+	{0x16f4f, 0x16f4f},  // Miao Sign Consonant Modi..Miao Sign Consonant Modi
+	{0x16f8f, 0x16f92},  // Miao Tone Right	 ..Miao Tone Below
+	{0x16fe4, 0x16fe4},  // Khitan Small Script Fill..Khitan Small Script Fill
+	{0x1bc9d, 0x1bc9e},  // Duployan Thick Letter Se..Duployan Double Mark
+	{0x1cf00, 0x1cf2d},  // Znamenny Combining Mark ..Znamenny Combining Mark
+	{0x1cf30, 0x1cf46},  // Znamenny Combining Tonal..Znamenny Priznak Modifie
+	{0x1d167, 0x1d169},  // Musical Symbol Combining..Musical Symbol Combining
+	{0x1d17b, 0x1d182},  // Musical Symbol Combining..Musical Symbol Combining
+	{0x1d185, 0x1d18b},  // Musical Symbol Combining..Musical Symbol Combining
+	{0x1d1aa, 0x1d1ad},  // Musical Symbol Combining..Musical Symbol Combining
+	{0x1d242, 0x1d244},  // Combining Greek Musical ..Combining Greek Musical
+	{0x1da00, 0x1da36},  // Signwriting Head Rim    ..Signwriting Air Sucking
+	{0x1da3b, 0x1da6c},  // Signwriting Mouth Closed..Signwriting Excitement
+	{0x1da75, 0x1da75},  // Signwriting Upper Body T..Signwriting Upper Body T
+	{0x1da84, 0x1da84},  // Signwriting Location Hea..Signwriting Location Hea
+	{0x1da9b, 0x1da9f},  // Signwriting Fill Modifie..Signwriting Fill Modifie
+	{0x1daa1, 0x1daaf},  // Signwriting Rotation Mod..Signwriting Rotation Mod
+	{0x1e000, 0x1e006},  // Combining Glagolitic Let..Combining Glagolitic Let
+	{0x1e008, 0x1e018},  // Combining Glagolitic Let..Combining Glagolitic Let
+	{0x1e01b, 0x1e021},  // Combining Glagolitic Let..Combining Glagolitic Let
+	{0x1e023, 0x1e024},  // Combining Glagolitic Let..Combining Glagolitic Let
+	{0x1e026, 0x1e02a},  // Combining Glagolitic Let..Combining Glagolitic Let
+	{0x1e08f, 0x1e08f},  // (nil)		   ..(nil)
+	{0x1e130, 0x1e136},  // Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T
+	{0x1e2ae, 0x1e2ae},  // Toto Sign Rising Tone   ..Toto Sign Rising Tone
+	{0x1e2ec, 0x1e2ef},  // Wancho Tone Tup	 ..Wancho Tone Koini
+	{0x1e4ec, 0x1e4ef},  // (nil)		   ..(nil)
+	{0x1e8d0, 0x1e8d6},  // Mende Kikakui Combining ..Mende Kikakui Combining
+	{0x1e944, 0x1e94a},  // Adlam Alif Lengthener   ..Adlam Nukta
+	{0xe0100, 0xe01ef},  // Variation Selector-17   ..Variation Selector-256
 };
 
 // https://github.com/jquast/wcwidth/blob/master/wcwidth/table_wide.py
@@ -375,168 +375,175 @@
 // from https://github.com/jquast/wcwidth/pull/64
 // at commit 1b9b6585b0080ea5cb88dc9815796505724793fe (2022-12-16):
 static struct width_interval WIDE_EASTASIAN[] = {
-        {0x01100, 0x0115f},  // Hangul Choseong Kiyeok  ..Hangul Choseong Filler
-        {0x0231a, 0x0231b},  // Watch                   ..Hourglass
-        {0x02329, 0x0232a},  // Left-pointing Angle Brac..Right-pointing Angle Bra
-        {0x023e9, 0x023ec},  // Black Right-pointing Dou..Black Down-pointing Doub
-        {0x023f0, 0x023f0},  // Alarm Clock             ..Alarm Clock
-        {0x023f3, 0x023f3},  // Hourglass With Flowing S..Hourglass With Flowing S
-        {0x025fd, 0x025fe},  // White Medium Small Squar..Black Medium Small Squar
-        {0x02614, 0x02615},  // Umbrella With Rain Drops..Hot Beverage
-        {0x02648, 0x02653},  // Aries                   ..Pisces
-        {0x0267f, 0x0267f},  // Wheelchair Symbol       ..Wheelchair Symbol
-        {0x02693, 0x02693},  // Anchor                  ..Anchor
-        {0x026a1, 0x026a1},  // High Voltage Sign       ..High Voltage Sign
-        {0x026aa, 0x026ab},  // Medium White Circle     ..Medium Black Circle
-        {0x026bd, 0x026be},  // Soccer Ball             ..Baseball
-        {0x026c4, 0x026c5},  // Snowman Without Snow    ..Sun Behind Cloud
-        {0x026ce, 0x026ce},  // Ophiuchus               ..Ophiuchus
-        {0x026d4, 0x026d4},  // No Entry                ..No Entry
-        {0x026ea, 0x026ea},  // Church                  ..Church
-        {0x026f2, 0x026f3},  // Fountain                ..Flag In Hole
-        {0x026f5, 0x026f5},  // Sailboat                ..Sailboat
-        {0x026fa, 0x026fa},  // Tent                    ..Tent
-        {0x026fd, 0x026fd},  // Fuel Pump               ..Fuel Pump
-        {0x02705, 0x02705},  // White Heavy Check Mark  ..White Heavy Check Mark
-        {0x0270a, 0x0270b},  // Raised Fist             ..Raised Hand
-        {0x02728, 0x02728},  // Sparkles                ..Sparkles
-        {0x0274c, 0x0274c},  // Cross Mark              ..Cross Mark
-        {0x0274e, 0x0274e},  // Negative Squared Cross M..Negative Squared Cross M
-        {0x02753, 0x02755},  // Black Question Mark Orna..White Exclamation Mark O
-        {0x02757, 0x02757},  // Heavy Exclamation Mark S..Heavy Exclamation Mark S
-        {0x02795, 0x02797},  // Heavy Plus Sign         ..Heavy Division Sign
-        {0x027b0, 0x027b0},  // Curly Loop              ..Curly Loop
-        {0x027bf, 0x027bf},  // Double Curly Loop       ..Double Curly Loop
-        {0x02b1b, 0x02b1c},  // Black Large Square      ..White Large Square
-        {0x02b50, 0x02b50},  // White Medium Star       ..White Medium Star
-        {0x02b55, 0x02b55},  // Heavy Large Circle      ..Heavy Large Circle
-        {0x02e80, 0x02e99},  // Cjk Radical Repeat      ..Cjk Radical Rap
-        {0x02e9b, 0x02ef3},  // Cjk Radical Choke       ..Cjk Radical C-simplified
-        {0x02f00, 0x02fd5},  // Kangxi Radical One      ..Kangxi Radical Flute
-        {0x02ff0, 0x02ffb},  // Ideographic Description ..Ideographic Description
-        {0x03000, 0x0303e},  // Ideographic Space       ..Ideographic Variation In
-        {0x03041, 0x03096},  // Hiragana Letter Small A ..Hiragana Letter Small Ke
-        {0x03099, 0x030ff},  // Combining Katakana-hirag..Katakana Digraph Koto
-        {0x03105, 0x0312f},  // Bopomofo Letter B       ..Bopomofo Letter Nn
-        {0x03131, 0x0318e},  // Hangul Letter Kiyeok    ..Hangul Letter Araeae
-        {0x03190, 0x031e3},  // Ideographic Annotation L..Cjk Stroke Q
-        {0x031f0, 0x0321e},  // Katakana Letter Small Ku..Parenthesized Korean Cha
-        {0x03220, 0x03247},  // Parenthesized Ideograph ..Circled Ideograph Koto
-        {0x03250, 0x04dbf},  // Partnership Sign        ..Cjk Unified Ideograph-4d
-        {0x04e00, 0x0a48c},  // Cjk Unified Ideograph-4e..Yi Syllable Yyr
-        {0x0a490, 0x0a4c6},  // Yi Radical Qot          ..Yi Radical Ke
-        {0x0a960, 0x0a97c},  // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo
-        {0x0ac00, 0x0d7a3},  // Hangul Syllable Ga      ..Hangul Syllable Hih
-        {0x0f900, 0x0faff},  // Cjk Compatibility Ideogr..(nil)
-        {0x0fe10, 0x0fe19},  // Presentation Form For Ve..Presentation Form For Ve
-        {0x0fe30, 0x0fe52},  // Presentation Form For Ve..Small Full Stop
-        {0x0fe54, 0x0fe66},  // Small Semicolon         ..Small Equals Sign
-        {0x0fe68, 0x0fe6b},  // Small Reverse Solidus   ..Small Commercial At
-        {0x0ff01, 0x0ff60},  // Fullwidth Exclamation Ma..Fullwidth Right White Pa
-        {0x0ffe0, 0x0ffe6},  // Fullwidth Cent Sign     ..Fullwidth Won Sign
-        {0x16fe0, 0x16fe4},  // Tangut Iteration Mark   ..Khitan Small Script Fill
-        {0x16ff0, 0x16ff1},  // Vietnamese Alternate Rea..Vietnamese Alternate Rea
-        {0x17000, 0x187f7},  // (nil)                   ..(nil)
-        {0x18800, 0x18cd5},  // Tangut Component-001    ..Khitan Small Script Char
-        {0x18d00, 0x18d08},  // (nil)                   ..(nil)
-        {0x1aff0, 0x1aff3},  // Katakana Letter Minnan T..Katakana Letter Minnan T
-        {0x1aff5, 0x1affb},  // Katakana Letter Minnan T..Katakana Letter Minnan N
-        {0x1affd, 0x1affe},  // Katakana Letter Minnan N..Katakana Letter Minnan N
-        {0x1b000, 0x1b122},  // Katakana Letter Archaic ..Katakana Letter Archaic
-        {0x1b132, 0x1b132},  // (nil)                   ..(nil)
-        {0x1b150, 0x1b152},  // Hiragana Letter Small Wi..Hiragana Letter Small Wo
-        {0x1b155, 0x1b155},  // (nil)                   ..(nil)
-        {0x1b164, 0x1b167},  // Katakana Letter Small Wi..Katakana Letter Small N
-        {0x1b170, 0x1b2fb},  // Nushu Character-1b170   ..Nushu Character-1b2fb
-        {0x1f004, 0x1f004},  // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon
-        {0x1f0cf, 0x1f0cf},  // Playing Card Black Joker..Playing Card Black Joker
-        {0x1f18e, 0x1f18e},  // Negative Squared Ab     ..Negative Squared Ab
-        {0x1f191, 0x1f19a},  // Squared Cl              ..Squared Vs
-        {0x1f200, 0x1f202},  // Square Hiragana Hoka    ..Squared Katakana Sa
-        {0x1f210, 0x1f23b},  // Squared Cjk Unified Ideo..Squared Cjk Unified Ideo
-        {0x1f240, 0x1f248},  // Tortoise Shell Bracketed..Tortoise Shell Bracketed
-        {0x1f250, 0x1f251},  // Circled Ideograph Advant..Circled Ideograph Accept
-        {0x1f260, 0x1f265},  // Rounded Symbol For Fu   ..Rounded Symbol For Cai
-        {0x1f300, 0x1f320},  // Cyclone                 ..Shooting Star
-        {0x1f32d, 0x1f335},  // Hot Dog                 ..Cactus
-        {0x1f337, 0x1f37c},  // Tulip                   ..Baby Bottle
-        {0x1f37e, 0x1f393},  // Bottle With Popping Cork..Graduation Cap
-        {0x1f3a0, 0x1f3ca},  // Carousel Horse          ..Swimmer
-        {0x1f3cf, 0x1f3d3},  // Cricket Bat And Ball    ..Table Tennis Paddle And
-        {0x1f3e0, 0x1f3f0},  // House Building          ..European Castle
-        {0x1f3f4, 0x1f3f4},  // Waving Black Flag       ..Waving Black Flag
-        {0x1f3f8, 0x1f43e},  // Badminton Racquet And Sh..Paw Prints
-        {0x1f440, 0x1f440},  // Eyes                    ..Eyes
-        {0x1f442, 0x1f4fc},  // Ear                     ..Videocassette
-        {0x1f4ff, 0x1f53d},  // Prayer Beads            ..Down-pointing Small Red
-        {0x1f54b, 0x1f54e},  // Kaaba                   ..Menorah With Nine Branch
-        {0x1f550, 0x1f567},  // Clock Face One Oclock   ..Clock Face Twelve-thirty
-        {0x1f57a, 0x1f57a},  // Man Dancing             ..Man Dancing
-        {0x1f595, 0x1f596},  // Reversed Hand With Middl..Raised Hand With Part Be
-        {0x1f5a4, 0x1f5a4},  // Black Heart             ..Black Heart
-        {0x1f5fb, 0x1f64f},  // Mount Fuji              ..Person With Folded Hands
-        {0x1f680, 0x1f6c5},  // Rocket                  ..Left Luggage
-        {0x1f6cc, 0x1f6cc},  // Sleeping Accommodation  ..Sleeping Accommodation
-        {0x1f6d0, 0x1f6d2},  // Place Of Worship        ..Shopping Trolley
-        {0x1f6d5, 0x1f6d7},  // Hindu Temple            ..Elevator
-        {0x1f6dc, 0x1f6df},  // (nil)                   ..Ring Buoy
-        {0x1f6eb, 0x1f6ec},  // Airplane Departure      ..Airplane Arriving
-        {0x1f6f4, 0x1f6fc},  // Scooter                 ..Roller Skate
-        {0x1f7e0, 0x1f7eb},  // Large Orange Circle     ..Large Brown Square
-        {0x1f7f0, 0x1f7f0},  // Heavy Equals Sign       ..Heavy Equals Sign
-        {0x1f90c, 0x1f93a},  // Pinched Fingers         ..Fencer
-        {0x1f93c, 0x1f945},  // Wrestlers               ..Goal Net
-        {0x1f947, 0x1f9ff},  // First Place Medal       ..Nazar Amulet
-        {0x1fa70, 0x1fa7c},  // Ballet Shoes            ..Crutch
-        {0x1fa80, 0x1fa88},  // Yo-yo                   ..(nil)
-        {0x1fa90, 0x1fabd},  // Ringed Planet           ..(nil)
-        {0x1fabf, 0x1fac5},  // (nil)                   ..Person With Crown
-        {0x1face, 0x1fadb},  // (nil)                   ..(nil)
-        {0x1fae0, 0x1fae8},  // Melting Face            ..(nil)
-        {0x1faf0, 0x1faf8},  // Hand With Index Finger A..(nil)
-        {0x20000, 0x2fffd},  // Cjk Unified Ideograph-20..(nil)
-        {0x30000, 0x3fffd},  // Cjk Unified Ideograph-30..(nil)
+	{0x01100, 0x0115f},  // Hangul Choseong Kiyeok  ..Hangul Choseong Filler
+	{0x0231a, 0x0231b},  // Watch		   ..Hourglass
+	{0x02329, 0x0232a},  // Left-pointing Angle Brac..Right-pointing Angle Bra
+	{0x023e9, 0x023ec},  // Black Right-pointing Dou..Black Down-pointing Doub
+	{0x023f0, 0x023f0},  // Alarm Clock	     ..Alarm Clock
+	{0x023f3, 0x023f3},  // Hourglass With Flowing S..Hourglass With Flowing S
+	{0x025fd, 0x025fe},  // White Medium Small Squar..Black Medium Small Squar
+	{0x02614, 0x02615},  // Umbrella With Rain Drops..Hot Beverage
+	{0x02648, 0x02653},  // Aries		   ..Pisces
+	{0x0267f, 0x0267f},  // Wheelchair Symbol       ..Wheelchair Symbol
+	{0x02693, 0x02693},  // Anchor		  ..Anchor
+	{0x026a1, 0x026a1},  // High Voltage Sign       ..High Voltage Sign
+	{0x026aa, 0x026ab},  // Medium White Circle     ..Medium Black Circle
+	{0x026bd, 0x026be},  // Soccer Ball	     ..Baseball
+	{0x026c4, 0x026c5},  // Snowman Without Snow    ..Sun Behind Cloud
+	{0x026ce, 0x026ce},  // Ophiuchus	       ..Ophiuchus
+	{0x026d4, 0x026d4},  // No Entry		..No Entry
+	{0x026ea, 0x026ea},  // Church		  ..Church
+	{0x026f2, 0x026f3},  // Fountain		..Flag In Hole
+	{0x026f5, 0x026f5},  // Sailboat		..Sailboat
+	{0x026fa, 0x026fa},  // Tent		    ..Tent
+	{0x026fd, 0x026fd},  // Fuel Pump	       ..Fuel Pump
+	{0x02705, 0x02705},  // White Heavy Check Mark  ..White Heavy Check Mark
+	{0x0270a, 0x0270b},  // Raised Fist	     ..Raised Hand
+	{0x02728, 0x02728},  // Sparkles		..Sparkles
+	{0x0274c, 0x0274c},  // Cross Mark	      ..Cross Mark
+	{0x0274e, 0x0274e},  // Negative Squared Cross M..Negative Squared Cross M
+	{0x02753, 0x02755},  // Black Question Mark Orna..White Exclamation Mark O
+	{0x02757, 0x02757},  // Heavy Exclamation Mark S..Heavy Exclamation Mark S
+	{0x02795, 0x02797},  // Heavy Plus Sign	 ..Heavy Division Sign
+	{0x027b0, 0x027b0},  // Curly Loop	      ..Curly Loop
+	{0x027bf, 0x027bf},  // Double Curly Loop       ..Double Curly Loop
+	{0x02b1b, 0x02b1c},  // Black Large Square      ..White Large Square
+	{0x02b50, 0x02b50},  // White Medium Star       ..White Medium Star
+	{0x02b55, 0x02b55},  // Heavy Large Circle      ..Heavy Large Circle
+	{0x02e80, 0x02e99},  // Cjk Radical Repeat      ..Cjk Radical Rap
+	{0x02e9b, 0x02ef3},  // Cjk Radical Choke       ..Cjk Radical C-simplified
+	{0x02f00, 0x02fd5},  // Kangxi Radical One      ..Kangxi Radical Flute
+	{0x02ff0, 0x02ffb},  // Ideographic Description ..Ideographic Description
+	{0x03000, 0x0303e},  // Ideographic Space       ..Ideographic Variation In
+	{0x03041, 0x03096},  // Hiragana Letter Small A ..Hiragana Letter Small Ke
+	{0x03099, 0x030ff},  // Combining Katakana-hirag..Katakana Digraph Koto
+	{0x03105, 0x0312f},  // Bopomofo Letter B       ..Bopomofo Letter Nn
+	{0x03131, 0x0318e},  // Hangul Letter Kiyeok    ..Hangul Letter Araeae
+	{0x03190, 0x031e3},  // Ideographic Annotation L..Cjk Stroke Q
+	{0x031f0, 0x0321e},  // Katakana Letter Small Ku..Parenthesized Korean Cha
+	{0x03220, 0x03247},  // Parenthesized Ideograph ..Circled Ideograph Koto
+	{0x03250, 0x04dbf},  // Partnership Sign	..Cjk Unified Ideograph-4d
+	{0x04e00, 0x0a48c},  // Cjk Unified Ideograph-4e..Yi Syllable Yyr
+	{0x0a490, 0x0a4c6},  // Yi Radical Qot	  ..Yi Radical Ke
+	{0x0a960, 0x0a97c},  // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo
+	{0x0ac00, 0x0d7a3},  // Hangul Syllable Ga      ..Hangul Syllable Hih
+	{0x0f900, 0x0faff},  // Cjk Compatibility Ideogr..(nil)
+	{0x0fe10, 0x0fe19},  // Presentation Form For Ve..Presentation Form For Ve
+	{0x0fe30, 0x0fe52},  // Presentation Form For Ve..Small Full Stop
+	{0x0fe54, 0x0fe66},  // Small Semicolon	 ..Small Equals Sign
+	{0x0fe68, 0x0fe6b},  // Small Reverse Solidus   ..Small Commercial At
+	{0x0ff01, 0x0ff60},  // Fullwidth Exclamation Ma..Fullwidth Right White Pa
+	{0x0ffe0, 0x0ffe6},  // Fullwidth Cent Sign     ..Fullwidth Won Sign
+	{0x16fe0, 0x16fe4},  // Tangut Iteration Mark   ..Khitan Small Script Fill
+	{0x16ff0, 0x16ff1},  // Vietnamese Alternate Rea..Vietnamese Alternate Rea
+	{0x17000, 0x187f7},  // (nil)		   ..(nil)
+	{0x18800, 0x18cd5},  // Tangut Component-001    ..Khitan Small Script Char
+	{0x18d00, 0x18d08},  // (nil)		   ..(nil)
+	{0x1aff0, 0x1aff3},  // Katakana Letter Minnan T..Katakana Letter Minnan T
+	{0x1aff5, 0x1affb},  // Katakana Letter Minnan T..Katakana Letter Minnan N
+	{0x1affd, 0x1affe},  // Katakana Letter Minnan N..Katakana Letter Minnan N
+	{0x1b000, 0x1b122},  // Katakana Letter Archaic ..Katakana Letter Archaic
+	{0x1b132, 0x1b132},  // (nil)		   ..(nil)
+	{0x1b150, 0x1b152},  // Hiragana Letter Small Wi..Hiragana Letter Small Wo
+	{0x1b155, 0x1b155},  // (nil)		   ..(nil)
+	{0x1b164, 0x1b167},  // Katakana Letter Small Wi..Katakana Letter Small N
+	{0x1b170, 0x1b2fb},  // Nushu Character-1b170   ..Nushu Character-1b2fb
+	{0x1f004, 0x1f004},  // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon
+	{0x1f0cf, 0x1f0cf},  // Playing Card Black Joker..Playing Card Black Joker
+	{0x1f18e, 0x1f18e},  // Negative Squared Ab     ..Negative Squared Ab
+	{0x1f191, 0x1f19a},  // Squared Cl	      ..Squared Vs
+	{0x1f200, 0x1f202},  // Square Hiragana Hoka    ..Squared Katakana Sa
+	{0x1f210, 0x1f23b},  // Squared Cjk Unified Ideo..Squared Cjk Unified Ideo
+	{0x1f240, 0x1f248},  // Tortoise Shell Bracketed..Tortoise Shell Bracketed
+	{0x1f250, 0x1f251},  // Circled Ideograph Advant..Circled Ideograph Accept
+	{0x1f260, 0x1f265},  // Rounded Symbol For Fu   ..Rounded Symbol For Cai
+	{0x1f300, 0x1f320},  // Cyclone		 ..Shooting Star
+	{0x1f32d, 0x1f335},  // Hot Dog		 ..Cactus
+	{0x1f337, 0x1f37c},  // Tulip		   ..Baby Bottle
+	{0x1f37e, 0x1f393},  // Bottle With Popping Cork..Graduation Cap
+	{0x1f3a0, 0x1f3ca},  // Carousel Horse	  ..Swimmer
+	{0x1f3cf, 0x1f3d3},  // Cricket Bat And Ball    ..Table Tennis Paddle And
+	{0x1f3e0, 0x1f3f0},  // House Building	  ..European Castle
+	{0x1f3f4, 0x1f3f4},  // Waving Black Flag       ..Waving Black Flag
+	{0x1f3f8, 0x1f43e},  // Badminton Racquet And Sh..Paw Prints
+	{0x1f440, 0x1f440},  // Eyes		    ..Eyes
+	{0x1f442, 0x1f4fc},  // Ear		     ..Videocassette
+	{0x1f4ff, 0x1f53d},  // Prayer Beads	    ..Down-pointing Small Red
+	{0x1f54b, 0x1f54e},  // Kaaba		   ..Menorah With Nine Branch
+	{0x1f550, 0x1f567},  // Clock Face One Oclock   ..Clock Face Twelve-thirty
+	{0x1f57a, 0x1f57a},  // Man Dancing	     ..Man Dancing
+	{0x1f595, 0x1f596},  // Reversed Hand With Middl..Raised Hand With Part Be
+	{0x1f5a4, 0x1f5a4},  // Black Heart	     ..Black Heart
+	{0x1f5fb, 0x1f64f},  // Mount Fuji	      ..Person With Folded Hands
+	{0x1f680, 0x1f6c5},  // Rocket		  ..Left Luggage
+	{0x1f6cc, 0x1f6cc},  // Sleeping Accommodation  ..Sleeping Accommodation
+	{0x1f6d0, 0x1f6d2},  // Place Of Worship	..Shopping Trolley
+	{0x1f6d5, 0x1f6d7},  // Hindu Temple	    ..Elevator
+	{0x1f6dc, 0x1f6df},  // (nil)		   ..Ring Buoy
+	{0x1f6eb, 0x1f6ec},  // Airplane Departure      ..Airplane Arriving
+	{0x1f6f4, 0x1f6fc},  // Scooter		 ..Roller Skate
+	{0x1f7e0, 0x1f7eb},  // Large Orange Circle     ..Large Brown Square
+	{0x1f7f0, 0x1f7f0},  // Heavy Equals Sign       ..Heavy Equals Sign
+	{0x1f90c, 0x1f93a},  // Pinched Fingers	 ..Fencer
+	{0x1f93c, 0x1f945},  // Wrestlers	       ..Goal Net
+	{0x1f947, 0x1f9ff},  // First Place Medal       ..Nazar Amulet
+	{0x1fa70, 0x1fa7c},  // Ballet Shoes	    ..Crutch
+	{0x1fa80, 0x1fa88},  // Yo-yo		   ..(nil)
+	{0x1fa90, 0x1fabd},  // Ringed Planet	   ..(nil)
+	{0x1fabf, 0x1fac5},  // (nil)		   ..Person With Crown
+	{0x1face, 0x1fadb},  // (nil)		   ..(nil)
+	{0x1fae0, 0x1fae8},  // Melting Face	    ..(nil)
+	{0x1faf0, 0x1faf8},  // Hand With Index Finger A..(nil)
+	{0x20000, 0x2fffd},  // Cjk Unified Ideograph-20..(nil)
+	{0x30000, 0x3fffd},  // Cjk Unified Ideograph-30..(nil)
 };
 
-static bool intable(struct width_interval* table, int table_length, int c) {
-        // First quick check for Latin1 etc. characters.
-        if (c < table[0].start) return false;
+static bool
+intable(struct width_interval* table, int table_length, int c)
+{
+	// First quick check for Latin1 etc. characters.
+	if(c < table[0].start)
+		return false;
 
-        // Binary search in table.
-        int bot = 0;
-        int top = table_length - 1;
-        while (top >= bot) {
-                int mid = (bot + top) / 2;
-                if (table[mid].end < c) {
-                        bot = mid + 1;
-                } else if (table[mid].start > c) {
-                        top = mid - 1;
-                } else {
-                        return true;
-                }
-        }
-        return false;
-}
+	// Binary search in table.
+	int bot = 0;
+	int top = table_length - 1;
+	while(top >= bot){
+		int mid = (bot + top) / 2;
+		if(table[mid].end < c){
+			bot = mid + 1;
+		}else if (table[mid].start > c){
+			top = mid - 1;
+		}else{
+			return true;
+		}
+	}
+	return false;
+}
 
-int wcwidth(wchar_t ucs) {
+int
+wcwidth(Rune ucs)
+{
 	// NOTE: created by hand, there isn't anything identifiable other than
 	// general Cf category code to identify these, and some characters in Cf
 	// category code are of non-zero width.
-        if (ucs == 0 ||
-                        ucs == 0x034F ||
-                        (0x200B <= ucs && ucs <= 0x200F) ||
-                        ucs == 0x2028 ||
-                        ucs == 0x2029 ||
-                        (0x202A <= ucs && ucs <= 0x202E) ||
-                        (0x2060 <= ucs && ucs <= 0x2063)) {
-                return 0;
-        }
+	if(ucs == 0 ||
+	   ucs == 0x034F ||
+	   (0x200B <= ucs && ucs <= 0x200F) ||
+	   ucs == 0x2028 ||
+	   ucs == 0x2029 ||
+	   (0x202A <= ucs && ucs <= 0x202E) ||
+	   (0x2060 <= ucs && ucs <= 0x2063)){
+		return 0;
+	}
 
-        // C0/C1 control characters.
-        if (ucs < 32 || (0x07F <= ucs && ucs < 0x0A0)) return -1;
+	// C0/C1 control characters.
+	if(ucs < 32 || (0x07F <= ucs && ucs < 0x0A0))
+		return -1;
 
-        // Combining characters with zero width.
-        if (intable(ZERO_WIDTH, sizeof(ZERO_WIDTH)/sizeof(struct width_interval), ucs)) return 0;
+	// Combining characters with zero width.
+	if(intable(ZERO_WIDTH, sizeof(ZERO_WIDTH)/sizeof(struct width_interval), ucs))
+		return 0;
 
-        return intable(WIDE_EASTASIAN, sizeof(WIDE_EASTASIAN)/sizeof(struct width_interval), ucs) ? 2 : 1;
+	return intable(WIDE_EASTASIAN, sizeof(WIDE_EASTASIAN)/sizeof(struct width_interval), ucs) ? 2 : 1;
 }
--- a/aliases.scm
+++ b/aliases.scm
@@ -62,7 +62,7 @@
 (define (infinite? x) (or (equal? x +inf.0) (equal? x -inf.0)))
 
 (define (char->integer c) (fixnum c))
-(define (integer->char i) (wchar i))
+(define (integer->char i) (rune i))
 (define char-upcase char.upcase)
 (define char-downcase char.downcase)
 (define char=? eqv?)
--- a/builtins.c
+++ b/builtins.c
@@ -94,8 +94,8 @@
 		cv = (cvalue_t*)ptr(a);
 		if(cp_class(cv) == bytetype)
 			return fixnum(1);
-		if(cp_class(cv) == wchartype)
-			return fixnum(u8_charlen(*(uint32_t*)cp_data(cv)));
+		if(cp_class(cv) == runetype)
+			return fixnum(runelen(*(Rune*)cp_data(cv)));
 	}
 	if(iscvalue(a) && cv_class(ptr(a))->eltype != nil)
 		return size_wrap(cvalue_arraylen(a));
--- a/cvalues.c
+++ b/cvalues.c
@@ -10,9 +10,9 @@
 
 value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
 value_t int64sym, uint64sym, bignumsym;
-value_t longsym, ulongsym, bytesym, wcharsym;
+value_t longsym, ulongsym, bytesym, runesym;
 value_t floatsym, doublesym;
-value_t gftypesym, stringtypesym, wcstringtypesym;
+value_t gftypesym, stringtypesym, runestringtypesym;
 value_t emptystringsym;
 
 value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
@@ -27,8 +27,8 @@
 static fltype_t *int64type, *uint64type;
 static fltype_t *longtype, *ulongtype;
 static fltype_t *floattype, *doubletype;
-fltype_t *bytetype, *wchartype;
-fltype_t *stringtype, *wcstringtype;
+fltype_t *bytetype, *runetype;
+fltype_t *stringtype, *runestringtype;
 fltype_t *builtintype;
 
 static size_t malloc_pressure = 0;
@@ -307,7 +307,6 @@
 num_ctor(int64, int64_t, T_INT64)
 num_ctor(uint64, uint64_t, T_UINT64)
 num_ctor(byte,  uint8_t, T_UINT8)
-num_ctor(wchar, int32_t, T_INT32)
 #if defined(ULONG64)
 num_ctor(long, int64_t, T_INT64)
 num_ctor(ulong, uint64_t, T_UINT64)
@@ -317,6 +316,7 @@
 #endif
 num_ctor(float, float, T_FLOAT)
 num_ctor(double, double, T_DOUBLE)
+num_ctor(rune, uint32_t, T_UINT32)
 
 static int
 cvalue_mpint_init(fltype_t *type, value_t arg, void *dest)
@@ -1622,7 +1622,6 @@
 	ctor_cv_intern(int64, T_INT64, int64_t);
 	ctor_cv_intern(uint64, T_UINT64, uint64_t);
 	ctor_cv_intern(byte, T_UINT8, uint8_t);
-	ctor_cv_intern(wchar, T_INT32, int32_t);
 #if defined(ULONG64)
 	ctor_cv_intern(long, T_INT64, int64_t);
 	ctor_cv_intern(ulong, T_UINT64, uint64_t);
@@ -1630,6 +1629,7 @@
 	ctor_cv_intern(long, T_INT32, int32_t);
 	ctor_cv_intern(ulong, T_UINT32, uint32_t);
 #endif
+	ctor_cv_intern(rune, T_UINT32, uint32_t);
 	ctor_cv_intern(float, T_FLOAT, float);
 	ctor_cv_intern(double, T_DOUBLE, double);
 
@@ -1644,8 +1644,8 @@
 	stringtypesym = symbol("*string-type*");
 	setc(stringtypesym, fl_list2(arraysym, bytesym));
 
-	wcstringtypesym = symbol("*wcstring-type*");
-	setc(wcstringtypesym, fl_list2(arraysym, wcharsym));
+	runestringtypesym = symbol("*runestring-type*");
+	setc(runestringtypesym, fl_list2(arraysym, runesym));
 
 	mk_primtype(int8, int8_t);
 	mk_primtype(uint8, uint8_t);
@@ -1663,7 +1663,7 @@
 	mk_primtype(ulong, uint32_t);
 #endif
 	mk_primtype(byte, uint8_t);
-	mk_primtype(wchar, int32_t);
+	mk_primtype(rune, uint32_t);
 	mk_primtype(float, float);
 	mk_primtype(double, double);
 
@@ -1673,7 +1673,7 @@
 	mpinttype->vtable = &mpint_vtable;
 
 	stringtype = get_type(symbol_value(stringtypesym));
-	wcstringtype = get_type(symbol_value(wcstringtypesym));
+	runestringtype = get_type(symbol_value(runestringtypesym));
 
 	emptystringsym = symbol("*empty-string*");
 	setc(emptystringsym, cvalue_static_cstring(""));
--- a/cvalues.h
+++ b/cvalues.h
@@ -17,13 +17,13 @@
 
 extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
 extern value_t int64sym, uint64sym, bignumsym;
-extern value_t longsym, ulongsym, bytesym, wcharsym;
+extern value_t longsym, ulongsym, bytesym, runesym;
 extern value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
-extern value_t stringtypesym, wcstringtypesym, emptystringsym;
+extern value_t stringtypesym, runestringtypesym, emptystringsym;
 extern value_t unionsym, floatsym, doublesym;
 
-extern fltype_t *bytetype, *wchartype;
-extern fltype_t *stringtype, *wcstringtype;
+extern fltype_t *bytetype, *runetype;
+extern fltype_t *stringtype, *runestringtype;
 extern fltype_t *builtintype;
 
 extern htable_t TypeTable;
@@ -76,7 +76,7 @@
 value_t mk_uint32(uint32_t n);
 value_t mk_int64(int64_t n);
 value_t mk_uint64(uint64_t n);
-value_t mk_wchar(int32_t n);
+value_t mk_rune(Rune n);
 
 /* builtins.c */
 size_t llength(value_t v);
--- a/equal.c
+++ b/equal.c
@@ -85,7 +85,7 @@
 		if(isfixnum(b))
 			return (numval(a) < numval(b)) ? fixnum(-1) : fixnum(1);
 		if(iscprim(b)){
-			if(cp_class((cprim_t*)ptr(b)) == wchartype)
+			if(cp_class((cprim_t*)ptr(b)) == runetype)
 				return fixnum(1);
 			return fixnum(numeric_compare(a, b, eq, 1, 0));
 		}
@@ -106,10 +106,10 @@
 			return bounded_vector_compare(a, b, bound, eq);
 		break;
 	case TAG_CPRIM:
-		if(cp_class((cprim_t*)ptr(a)) == wchartype){
-			if(!iscprim(b) || cp_class(ptr(b)) != wchartype)
+		if(cp_class((cprim_t*)ptr(a)) == runetype){
+			if(!iscprim(b) || cp_class(ptr(b)) != runetype)
 				return fixnum(-1);
-		}else if(iscprim(b) && cp_class(ptr(b)) == wchartype)
+		}else if(iscprim(b) && cp_class(ptr(b)) == runetype)
 			return fixnum(1);
 		c = numeric_compare(a, b, eq, 1, 0);
 		if(c != 2)
@@ -366,8 +366,8 @@
 	case TAG_CPRIM:
 		cp = ptr(a);
 		data = cp_data(cp);
-		if(cp_class(cp) == wchartype)
-			return inthash(*(int32_t*)data);
+		if(cp_class(cp) == runetype)
+			return inthash(*(Rune*)data);
 		nt = cp_numtype(cp);
 		u.d = conv_to_double(data, nt);
 		return doublehash(u.i64);
--- a/flisp.boot
+++ b/flisp.boot
@@ -56,7 +56,7 @@
   length=) 1arg-lambda?)
 	    <= #fn("7000n210L;IB0470051;380470151S:" #(nan?) <=) >
 	    #fn("7000n210L:" #() >) >= #fn("7000n201L;IB0470051;380470151S:" #(nan?) >=)
-	    Instructions #table(brne 19  vargc 76  load1 27  = 60  setc.l 75  sub2 80  brne.l 85  largc 81  brnn 26  loadc.l 70  loadi8 66  < 28  nop 46  set-cdr! 30  loada 8  neg 37  bound? 42  / 58  brn.l 88  lvargc 82  brt 25  trycatch 77  null? 38  load0 21  jmp.l 48  loadv 2  seta 15  keyargs 91  * 57  function? 44  builtin? 43  aref 23  optargs 89  loadt 20  vector? 45  cdr 13  brf 3  loadc00 17  symbol? 34  cadr 36  pop 4  pair? 18  for 78  closure 14  loadf 31  compare 61  loadv.l 67  setg.l 72  brn 87  eqv? 51  aset! 64  atom? 24  eq? 33  boolean? 39  brt.l 50  tapply 79  dummy_nil 94  loada0 0  brbound 90  dup 11  loadc01 22  list 53  loadc 9  apply 54  dummy_t 93  setg 71  loada1 1  tcall.l 84  jmp 16  fixnum? 41  cons 32  loadg.l 68  tcall 6  dummy_eof 95  call 5  - 56  brf.l 49  + 55  dummy_f 92  add2 29  seta.l 73  loadnil 65  brnn.l 86  setc 74  set-car! 47  loadg 7  vector 63  loada.l 69  argc 62  div0 59  ret 10  car 12  number? 40  equal? 52  call.l 83  not 35)
+	    Instructions #table(not 35  vargc 76  load1 27  = 60  setc.l 75  sub2 80  brne.l 85  largc 81  brnn 26  loadc.l 70  loadi8 66  < 28  nop 46  set-cdr! 30  loada 8  neg 37  bound? 42  / 58  brn.l 88  lvargc 82  brt 25  trycatch 77  null? 38  load0 21  jmp.l 48  loadv 2  seta 15  keyargs 91  * 57  function? 44  builtin? 43  aref 23  optargs 89  loadt 20  vector? 45  cdr 13  brf 3  loadc00 17  symbol? 34  cadr 36  pop 4  pair? 18  for 78  closure 14  loadf 31  compare 61  loadv.l 67  setg.l 72  brn 87  eqv? 51  aset! 64  atom? 24  eq? 33  boolean? 39  brt.l 50  tapply 79  dummy_nil 94  loada0 0  brbound 90  dup 11  loadc01 22  list 53  loadc 9  apply 54  dummy_t 93  setg 71  loada1 1  tcall.l 84  jmp 16  fixnum? 41  cons 32  loadg.l 68  tcall 6  dummy_eof 95  call 5  - 56  brf.l 49  + 55  dummy_f 92  add2 29  seta.l 73  loadnil 65  brnn.l 86  setc 74  set-car! 47  loadg 7  vector 63  loada.l 69  argc 62  div0 59  ret 10  car 12  number? 40  equal? 52  call.l 83  brne 19)
 	    __init_globals #fn("6000n020w1422w3474w5476w7478w9:" #("/"
 								   *directory-separator*
 								   "\n"
@@ -128,7 +128,7 @@
 	    #fn("6000n10<===:" #() cdddar) cddddr #fn("6000n10====:" #() cddddr)
 	    cdddr #fn("6000n10===:" #() cdddr) cddr
 	    #fn("6000n10==:" #() cddr) char? #fn("7000n12005121Q:" #(#fn(typeof)
-  wchar) char?)
+  rune) char?)
 	    closure? #fn("7000n10\\;36040[S:" #() closure?) compile
 	    #fn("8000n170q062:" #(compile-f) compile) compile-and #fn("<000n470018283D2166:" #(compile-short-circuit
   brf) compile-and)
--- a/flisp.c
+++ b/flisp.c
@@ -714,7 +714,7 @@
 		return 1;
 	if(iscprim(v)){
 		cprim_t *c = ptr(v);
-		return c->type != wchartype;
+		return c->type != runetype;
 	}
 	if(iscvalue(v)){
 		cvalue_t *c = ptr(v);
--- a/ios.c
+++ b/ios.c
@@ -844,77 +844,29 @@
 }
 
 int
-ios_getutf8(ios_t *s, uint32_t *pwc)
+ios_getutf8(ios_t *s, Rune *r)
 {
-	int c;
-	size_t sz;
-	char c0;
-	char buf[8];
+	int c, i;
+	char buf[UTFmax];
 
-	c = ios_peekc(s);
-	if(c == IOS_EOF){
-		s->_eof = 1;
-		return IOS_EOF;
+	for(i = 0; i < sizeof(buf); i++){
+		if((c = ios_getc(s)) == IOS_EOF){
+			s->_eof = 1;
+			return IOS_EOF;
+		}
+		buf[i] = c;
+		if(fullrune(buf, i+1))
+			break;
 	}
-	c0 = (char)c;
-	if((uint8_t)c0 < 0x80){
-		ios_getc(s);
-		*pwc = (uint32_t)(uint8_t)c0;
-		return 1;
-	}
-	sz = u8_seqlen(&c0)-1;
-	if(!isutf(c0) || sz > 3)
-		return 0;
-	if(ios_readprep(s, sz) < sz){
-		// NOTE: this returns EOF even though some bytes are available
-		// so we do not set s->_eof on this code path
-		return IOS_EOF;
-	}
-	if(u8_isvalid(&s->buf[s->bpos], sz+1)){
-		size_t i = s->bpos;
-		*pwc = u8_nextchar(s->buf, &i);
-		ios_read(s, buf, sz+1);
-		return 1;
-	}
-	return 0;
+	chartorune(r, buf);
+	return *r == Runeerror ? 0 : 1;
 }
 
 int
-ios_peekutf8(ios_t *s, uint32_t *pwc)
+ios_pututf8(ios_t *s, Rune r)
 {
-	int c;
-	size_t sz;
-	char c0;
-
-	c = ios_peekc(s);
-	if(c == IOS_EOF)
-		return IOS_EOF;
-	c0 = (char)c;
-	if((uint8_t)c0 < 0x80){
-		*pwc = (uint32_t)(uint8_t)c0;
-		return 1;
-	}
-	sz = u8_seqlen(&c0)-1;
-	if(!isutf(c0) || sz > 3)
-		return 0;
-	if(ios_readprep(s, sz) < sz)
-		return IOS_EOF;
-	if(u8_isvalid(&s->buf[s->bpos], sz+1)){
-		size_t i = s->bpos;
-		*pwc = u8_nextchar(s->buf, &i);
-		return 1;
-	}
-	return 0;
-}
-
-int
-ios_pututf8(ios_t *s, uint32_t wc)
-{
-	char buf[8];
-	if(wc < 0x80)
-		return ios_putc((int)wc, s);
-	size_t n = u8_toutf8(buf, 8, &wc, 1);
-	return ios_write(s, buf, n);
+	char buf[UTFmax];
+	return ios_write(s, buf, runetochar(buf, &r));
 }
 
 void
--- a/ios.h
+++ b/ios.h
@@ -95,7 +95,7 @@
 void ios_init_stdstreams(void);
 
 /* high-level functions - output */
-int ios_pututf8(ios_t *s, uint32_t wc);
+int ios_pututf8(ios_t *s, Rune r);
 int ios_printf(ios_t *s, const char *format, ...);
 int ios_vprintf(ios_t *s, const char *format, va_list args);
 
@@ -102,8 +102,7 @@
 void hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs);
 
 /* high-level stream functions - input */
-int ios_getutf8(ios_t *s, uint32_t *pwc);
-int ios_peekutf8(ios_t *s, uint32_t *pwc);
+int ios_getutf8(ios_t *s, Rune *r);
 
 // discard data buffered for reading
 void ios_purge(ios_t *s);
--- a/iostream.c
+++ b/iostream.c
@@ -134,37 +134,24 @@
 {
 	argcount(nargs, 1);
 	ios_t *s = fl_toiostream(args[0]);
-	uint32_t wc;
+	Rune r;
 	int res;
-	if((res = ios_getutf8(s, &wc)) == IOS_EOF)
+	if((res = ios_getutf8(s, &r)) == IOS_EOF)
 		//lerrorf(IOError, "end of file reached");
 		return FL_EOF;
 	if(res == 0)
 		lerrorf(IOError, "invalid UTF-8 sequence");
-	return mk_wchar(wc);
+	return mk_rune(r);
 }
 
-BUILTIN("io.peekc", io_peekc)
-{
-	argcount(nargs, 1);
-	ios_t *s = fl_toiostream(args[0]);
-	uint32_t wc;
-	int res;
-	if((res = ios_peekutf8(s, &wc)) == IOS_EOF)
-		return FL_EOF;
-	if(res == 0)
-		lerrorf(IOError, "invalid UTF-8 sequence");
-	return mk_wchar(wc);
-}
-
 BUILTIN("io.putc", io_putc)
 {
 	argcount(nargs, 2);
 	ios_t *s = fl_toiostream(args[0]);
-	if(!iscprim(args[1]) || ((cprim_t*)ptr(args[1]))->type != wchartype)
-		type_error("wchar", args[1]);
-	uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[1]));
-	return fixnum(ios_pututf8(s, wc));
+	if(!iscprim(args[1]) || ((cprim_t*)ptr(args[1]))->type != runetype)
+		type_error("rune", args[1]);
+	Rune r = *(Rune*)cp_data((cprim_t*)ptr(args[1]));
+	return fixnum(ios_pututf8(s, r));
 }
 
 BUILTIN("io.skip", io_skip)
@@ -281,11 +268,11 @@
 	if(nargs < 2 || nargs > 4)
 		argcount(nargs, 2);
 	ios_t *s = fl_toiostream(args[0]);
-	if(iscprim(args[1]) && ((cprim_t*)ptr(args[1]))->type == wchartype){
+	if(iscprim(args[1]) && ((cprim_t*)ptr(args[1]))->type == runetype){
 		if(nargs > 2)
 			lerrorf(ArgError, "offset argument not supported for characters");
-		uint32_t wc = *(uint32_t*)cp_data(ptr(args[1]));
-		return fixnum(ios_pututf8(s, wc));
+		Rune r = *(Rune*)cp_data(ptr(args[1]));
+		return fixnum(ios_pututf8(s, r));
 	}
 	char *data;
 	size_t sz, offs = 0;
@@ -320,8 +307,8 @@
 {
 	size_t uldelim = toulong(arg);
 	if(uldelim > 0x7f){
-		// wchars > 0x7f, or anything else > 0xff, are out of range
-		if((iscprim(arg) && cp_class(ptr(arg)) == wchartype) || uldelim > 0xff)
+		// runes > 0x7f, or anything else > 0xff, are out of range
+		if((iscprim(arg) && cp_class(ptr(arg)) == runetype) || uldelim > 0xff)
 			lerrorf(ArgError, "delimiter out of range");
 	}
 	return (char)uldelim;
--- a/operators.h
+++ b/operators.h
@@ -14,6 +14,7 @@
 uint64_t conv_to_uint64(void *data, numerictype_t tag);
 int32_t conv_to_int32(void *data, numerictype_t tag);
 uint32_t conv_to_uint32(void *data, numerictype_t tag);
+Rune conv_to_Rune(void *data, numerictype_t tag);
 
 #if defined(ULONG64)
 #define conv_to_long conv_to_int64
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -100,7 +100,6 @@
 typedef uintptr uintptr_t;
 typedef intptr ssize_t;
 typedef uintptr size_t;
-typedef Rune wchar_t;
 typedef enum { false, true } bool;
 
-int wcwidth(wchar_t c);
+int wcwidth(Rune c);
--- a/print.c
+++ b/print.c
@@ -227,7 +227,7 @@
 	// get the width of an expression if we can do so cheaply
 	if(issymbol(v))
 		return u8_strwidth(symbol_name(v));
-	if(iscprim(v) && ptr(v) != nil && cp_class((cprim_t*)ptr(v)) == wchartype)
+	if(iscprim(v) && ptr(v) != nil && cp_class((cprim_t*)ptr(v)) == runetype)
 		return 4;
 	return -1;
 }
@@ -642,37 +642,32 @@
 			HPOS += ios_printf(f, "0x%hhx", ch);
 		else
 			HPOS += ios_printf(f, "#byte(0x%hhx)", ch);
-	}else if(type == wcharsym){
-		uint32_t wc = *(uint32_t*)data;
-		char seq[8];
-		size_t nb = u8_toutf8(seq, sizeof(seq), &wc, 1);
+	}else if(type == runesym){
+		Rune r = *(Rune*)data;
+		char seq[UTFmax+1];
+		int nb = runetochar(seq, &r);
 		seq[nb] = '\0';
 		if(print_princ){
-			// TODO: better multibyte handling
-			if(wc == 0)
-				ios_putc(0, f);
-			else
-				outs(seq, f);
+			outsn(seq, f, nb);
 		}else{
 			outsn("#\\", f, 2);
-			switch(wc){
+			switch(r){
 			case 0x00: outsn("nul", f, 3); break;
 			case 0x07: outsn("alarm", f, 5); break;
 			case 0x08: outsn("backspace", f, 9); break;
 			case 0x09: outsn("tab", f, 3); break;
-			case 'l':  outsn("linefeed", f, 8); break;
 			case 0x0a: outsn("newline", f, 7); break;
-			case 0x0B: outsn("vtab", f, 4); break;
-			case 0x0C: outsn("page", f, 4); break;
-			case 0x0D: outsn("return", f, 6); break;
-			case 0x1B: outsn("esc", f, 3); break;
-			case 's':  outsn("space", f, 5); break;
-			case 0x7F: outsn("delete", f, 6); break;
+			case 0x0b: outsn("vtab", f, 4); break;
+			case 0x0c: outsn("page", f, 4); break;
+			case 0x0d: outsn("return", f, 6); break;
+			case 0x1b: outsn("esc", f, 3); break;
+			case ' ':  outsn("space", f, 5); break;
+			case 0x7f: outsn("delete", f, 6); break;
 			default:
-				if(u8_iswprint(wc))
+				if(u8_iswprint(r))
 					outs(seq, f);
 				else
-					HPOS += ios_printf(f, "x%04x", (int)wc);
+					HPOS += ios_printf(f, "x%04x", r);
 				break;
 			}
 		}
@@ -771,9 +766,12 @@
 					print_string(f, (char*)data, len);
 				}
 				return;
-			}else if(eltype == wcharsym){
-				// TODO wchar
+			}else if(eltype == runesym){
+				char buf[UTFmax];
+				print_string(f, buf, runetochar(buf, (Rune*)data));
 			}else{
+				/* FIXME */
+				assert(0 == 1);
 			}
 			size_t i;
 			if(!weak){
--- a/read.c
+++ b/read.c
@@ -314,7 +314,7 @@
 					lerrorf(ParseError, "unknown character #\\%s", buf);
 			}
 			toktype = TOK_NUM;
-			tokval = mk_wchar(cval);
+			tokval = mk_rune(cval);
 		}else if(c == '('){
 			toktype = TOK_SHARPOPEN;
 		}else if(c == '<'){
@@ -482,7 +482,7 @@
 	size_t i = 0, j, sz = 64, ndig;
 	int c;
 	value_t s;
-	uint32_t wc = 0;
+	Rune r = 0;
 
 	buf = malloc(sz);
 	while(1){
@@ -518,9 +518,9 @@
 					ios_getc(F);
 				}
 				eseq[j] = '\0';
-				wc = strtol(eseq, nil, 8);
+				r = strtol(eseq, nil, 8);
 				// \DDD and \xXX read bytes, not characters
-				buf[i++] = ((char)wc);
+				buf[i++] = (char)r;
 			}else if((c == 'x' && (ndig = 2)) || (c == 'u' && (ndig = 4)) || (c == 'U' && (ndig = 8))){
 				while(1){
 					c = ios_peekc(F);
@@ -531,15 +531,15 @@
 				}
 				eseq[j] = '\0';
 				if(j)
-					wc = strtol(eseq, nil, 16);
-				if(!j || wc > 0x10ffff){
+					r = strtol(eseq, nil, 16);
+				if(!j || r > 0x10ffff){
 					free(buf);
 					lerrorf(ParseError, "invalid escape sequence");
 				}
 				if(ndig == 2)
-					buf[i++] = ((char)wc);
+					buf[i++] = (char)r;
 				else
-					i += u8_wc_toutf8(&buf[i], wc);
+					i += runetochar(&buf[i], &r);
 			}else{
 				char esc = read_escape_control_char((char)c);
 				if(esc == (char)c && !strchr("\\'\"`", esc)){
--- a/string.c
+++ b/string.c
@@ -45,8 +45,8 @@
 	argcount(nargs, 1);
 	if(iscprim(args[0])){
 		cprim_t *cp = ptr(args[0]);
-		if(cp_class(cp) == wchartype){
-			int w = wcwidth(*(wchar_t*)cp_data(cp));
+		if(cp_class(cp) == runetype){
+			int w = wcwidth(*(Rune*)cp_data(cp));
 			return w < 0 ? FL_F : fixnum(w);
 		}
 	}
@@ -70,17 +70,18 @@
 	if(iscvalue(args[0])){
 		cvalue_t *cv = ptr(args[0]);
 		fltype_t *t = cv_class(cv);
-		if(t->eltype == wchartype){
-			size_t nc = cv_len(cv) / sizeof(uint32_t);
-			uint32_t *ptr = (uint32_t*)cv_data(cv);
-			size_t nbytes = u8_codingsize(ptr, nc);
-			value_t str = cvalue_string(nbytes);
-			ptr = cv_data(ptr(args[0]));  // relocatable pointer
-			u8_toutf8(cvalue_data(str), nbytes, ptr, nc);
+		if(t->eltype == runetype){
+			size_t nr = cv_len(cv) / sizeof(Rune);
+			Rune *r = (Rune*)cv_data(cv);
+			size_t nb = runenlen(r, nr);
+			value_t str = cvalue_string(nb);
+			char *s = cvalue_data(str);
+			for(size_t i = 0; i < nr; i++)
+				s += runetochar(s, r+i);
 			return str;
 		}
 	}
-	type_error("wchar array", args[0]);
+	type_error("rune array", args[0]);
 }
 
 BUILTIN("string.decode", string_decode)
@@ -95,17 +96,18 @@
 	cvalue_t *cv = ptr(args[0]);
 	char *ptr = (char*)cv_data(cv);
 	size_t nb = cv_len(cv);
-	size_t nc = u8_charnum(ptr, nb);
-	size_t newsz = nc*sizeof(uint32_t);
+	size_t nc = utfnlen(ptr, nb);
+	size_t newsz = nc*sizeof(Rune);
 	if(term)
-		newsz += sizeof(uint32_t);
-	value_t wcstr = cvalue(wcstringtype, newsz);
+		newsz += sizeof(Rune);
+	value_t runestr = cvalue(runestringtype, newsz);
 	ptr = cv_data(ptr(args[0]));  // relocatable pointer
-	uint32_t *pwc = cvalue_data(wcstr);
-	u8_toucs(pwc, nc, ptr, nb);
+	Rune *r = cvalue_data(runestr);
+	for(size_t i = 0; i < nb; i++)
+		ptr += chartorune(r+i, ptr);
 	if(term)
-		pwc[nc] = 0;
-	return wcstr;
+		r[nb] = 0;
+	return runestr;
 }
 
 extern BUILTIN("buffer", buffer);
@@ -212,7 +214,9 @@
 	size_t sl = u8_seqlen(&s[i]);
 	if(sl > len || i > len-sl)
 		bounds_error(args[0], args[1]);
-	return mk_wchar(u8_nextchar(s, &i));
+	Rune r;
+	chartorune(&r, s+i);
+	return mk_rune(r);
 }
 
 BUILTIN("char.upcase", char_upcase)
@@ -219,9 +223,9 @@
 {
 	argcount(nargs, 1);
 	cprim_t *cp = (cprim_t*)ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != wchartype)
-		type_error("wchar", args[0]);
-	return mk_wchar(towupper(*(int32_t*)cp_data(cp)));
+	if(!iscprim(args[0]) || cp_class(cp) != runetype)
+		type_error("rune", args[0]);
+	return mk_rune(toupperrune(*(Rune*)cp_data(cp)));
 }
 
 BUILTIN("char.downcase", char_downcase)
@@ -228,9 +232,9 @@
 {
 	argcount(nargs, 1);
 	cprim_t *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != wchartype)
-		type_error("wchar", args[0]);
-	return mk_wchar(towlower(*(int32_t*)cp_data(cp)));
+	if(!iscprim(args[0]) || cp_class(cp) != runetype)
+		type_error("rune", args[0]);
+	return mk_rune(tolowerrune(*(Rune*)cp_data(cp)));
 }
 
 BUILTIN("char-alphabetic?", char_alphabeticp)
@@ -237,23 +241,14 @@
 {
 	argcount(nargs, 1);
 	cprim_t *cp = (cprim_t*)ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != wchartype)
-		type_error("wchar", args[0]);
-	return iswalpha(*(int32_t*)cp_data(cp)) ? FL_T : FL_F;
+	if(!iscprim(args[0]) || cp_class(cp) != runetype)
+		type_error("rune", args[0]);
+	return isalpharune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
 }
 
-static value_t
-mem_find_byte(char *s, char c, size_t start, size_t len)
-{
-	char *p = memchr(s+start, c, len-start);
-	if(p == nil)
-		return FL_F;
-	return size_wrap((size_t)(p - s));
-}
-
 BUILTIN("string.find", string_find)
 {
-	char cbuf[8];
+	char cbuf[UTFmax+1];
 	size_t start = 0;
 	if(nargs == 3)
 		start = toulong(args[2]);
@@ -267,14 +262,16 @@
 
 	value_t v = args[1];
 	cprim_t *cp = ptr(v);
-	if(iscprim(v) && cp_class(cp) == wchartype){
-		uint32_t c = *(uint32_t*)cp_data(cp);
-		if(c <= 0x7f)
-			return mem_find_byte(s, (char)c, start, len);
-		needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1);
+	if(iscprim(v) && cp_class(cp) == runetype){
+		Rune r = *(Rune*)cp_data(cp);
+		needlesz = runetochar(cbuf, &r);
 		needle = cbuf;
+		needle[needlesz] = 0;
 	}else if(iscprim(v) && cp_class(cp) == bytetype){
-		return mem_find_byte(s, *(char*)cp_data(cp), start, len);
+		needlesz = 1;
+		needle = cbuf;
+		needle[0] = *(char*)cp_data(cp);
+		needle[needlesz] = 0;
 	}else if(fl_isstring(v)){
 		cvalue_t *cv = (cvalue_t*)ptr(v);
 		needlesz = cv_len(cv);
@@ -284,8 +281,6 @@
 	}
 	if(needlesz > len-start)
 		return FL_F;
-	if(needlesz == 1)
-		return mem_find_byte(s, needle[0], start, len);
 	if(needlesz == 0)
 		return size_wrap(start);
 	size_t i;
--- a/system.lsp
+++ b/system.lsp
@@ -144,7 +144,7 @@
 (define (min x0 . xs)
   (if (null? xs) x0
       (foldl (λ (a b) (if (< a b) a b)) x0 xs)))
-(define (char? x) (eq? (typeof x) 'wchar))
+(define (char? x) (eq? (typeof x) 'rune))
 (define (array? x) (or (vector? x)
 		       (let ((t (typeof x)))
 			 (and (pair? t) (eq? (car t) 'array)))))
@@ -679,7 +679,7 @@
 (define (string.tail s n) (string.sub s (string.inc s 0 n)))
 
 (define *whitespace*
-  (string.encode #array(wchar 9 10 11 12 13 32 133 160 5760 6158 8192
+  (string.encode #array(rune 9 10 11 12 13 32 133 160 5760 6158 8192
 			      8193 8194 8195 8196 8197 8198 8199 8200
 			      8201 8202 8232 8233 8239 8287 12288)))
 
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -77,7 +77,7 @@
 
 (assert (equal? (uint64 (double -123)) #uint64(0xffffffffffffff85)))
 
-(assert (equal? (string 'sym #byte(65) #wchar(945) "blah") "symA\u03B1blah"))
+(assert (equal? (string 'sym #byte(65) #rune(945) "blah") "symA\u03B1blah"))
 (assert (= (length (string #\x0)) 1))
 
 (assert (> 9223372036854775808 9223372036854775807))
--- a/utf8.c
+++ b/utf8.c
@@ -49,148 +49,6 @@
 	return trailingBytesForUTF8[(unsigned int)(uint8_t)s[0]] + 1;
 }
 
-/* returns the # of bytes needed to encode a certain character
-   0 means the character cannot (or should not) be encoded. */
-size_t
-u8_charlen(uint32_t ch)
-{
-	if(ch < 0x80)
-		return 1;
-	if(ch < 0x800)
-		return 2;
-	if(ch < 0x10000)
-		return 3;
-	if(ch < 0x110000)
-		return 4;
-	return 0;
-}
-
-size_t
-u8_codingsize(uint32_t *wcstr, size_t n)
-{
-	size_t i, c = 0;
-
-	for(i = 0; i < n; i++)
-		c += u8_charlen(wcstr[i]);
-	return c;
-}
-
-/* conversions without error checking
-   only works for valid UTF-8, i.e. no 5- or 6-byte sequences
-   srcsz = source size in bytes
-   sz = dest size in # of wide characters
-
-   returns # characters converted
-   if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
-*/
-size_t
-u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz)
-{
-	uint32_t ch;
-	const char *src_end = src + srcsz;
-	size_t nb, i = 0;
-
-	if(sz == 0 || srcsz == 0)
-		return 0;
-
-	while(i < sz){
-		if(!isutf(*src)){ // invalid sequence
-			dest[i++] = 0xFFFD;
-			src++;
-			if(src >= src_end)
-				break;
-			continue;
-		}
-		nb = trailingBytesForUTF8[(uint8_t)*src];
-		if(src + nb >= src_end)
-			break;
-		ch = 0;
-		switch(nb){
-		case 5: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
-		case 4: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
-		case 3: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
-		case 2: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
-		case 1: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
-		case 0: ch += (uint8_t)*src++;
-		}
-		ch -= offsetsFromUTF8[nb];
-		dest[i++] = ch;
-	}
-	return i;
-}
-
-/*
- * srcsz = number of source characters
- * sz = size of dest buffer in bytes
- * returns # bytes stored in dest
- * the destination string will never be bigger than the source string.
-*/
-size_t
-u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz)
-{
-	uint32_t ch;
-	size_t i = 0;
-	char *dest0 = dest;
-	char *dest_end = dest + sz;
-
-	while(i < srcsz){
-		ch = src[i];
-		if(ch < 0x80){
-			if(dest >= dest_end)
-				break;
-			*dest++ = (char)ch;
-		}else if(ch < 0x800){
-			if(dest >= dest_end-1)
-				break;
-			*dest++ = (ch>>6) | 0xC0;
-			*dest++ = (ch & 0x3F) | 0x80;
-		}else if(ch < 0x10000){
-			if(dest >= dest_end-2)
-				break;
-			*dest++ = (ch>>12) | 0xE0;
-			*dest++ = ((ch>>6) & 0x3F) | 0x80;
-			*dest++ = (ch & 0x3F) | 0x80;
-		}else if(ch < 0x110000){
-			if(dest >= dest_end-3)
-				break;
-			*dest++ = (ch>>18) | 0xF0;
-			*dest++ = ((ch>>12) & 0x3F) | 0x80;
-			*dest++ = ((ch>>6) & 0x3F) | 0x80;
-			*dest++ = (ch & 0x3F) | 0x80;
-		}
-		i++;
-	}
-	return dest-dest0;
-}
-
-size_t
-u8_wc_toutf8(char *dest, uint32_t ch)
-{
-	if(ch < 0x80){
-		dest[0] = (char)ch;
-		return 1;
-	}
-	if(ch < 0x800){
-		dest[0] = (ch>>6) | 0xC0;
-		dest[1] = (ch & 0x3F) | 0x80;
-		return 2;
-	}
-	if(ch < 0x10000){
-		dest[0] = (ch>>12) | 0xE0;
-		dest[1] = ((ch>>6) & 0x3F) | 0x80;
-		dest[2] = (ch & 0x3F) | 0x80;
-		return 3;
-	}
-	if(ch < 0x110000){
-		dest[0] = (ch>>18) | 0xF0;
-		dest[1] = ((ch>>12) & 0x3F) | 0x80;
-		dest[2] = ((ch>>6) & 0x3F) | 0x80;
-		dest[3] = (ch & 0x3F) | 0x80;
-		return 4;
-	}
-	return 0;
-}
-
 /* byte offset => charnum */
 size_t
 u8_charnum(const char *s, size_t offset)
@@ -208,56 +66,16 @@
 size_t
 u8_strwidth(const char *s)
 {
-	uint32_t ch;
-	size_t nb, tot = 0;
-	int w;
-	signed char sc;
+	size_t i, w;
+	Rune r;
 
-	while((sc = (signed char)*s) != 0){
-		if(sc >= 0){
-			s++;
-			if(sc)
-				tot++;
-		}else{
-			if(!isutf(sc)){
-				tot++;
-				s++;
-				continue;
-			}
-			nb = trailingBytesForUTF8[(uint8_t)sc];
-			ch = 0;
-			switch(nb){
-			case 5: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
-			case 4: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
-			case 3: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
-			case 2: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
-			case 1: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
-			case 0: ch += (uint8_t)*s++;
-			}
-			ch -= offsetsFromUTF8[nb];
-			w = wcwidth(ch); // might return -1
-			if(w > 0)
-				tot += w;
-		}
+	for(i = w = 0; s[i];){
+		i += chartorune(&r, s+i);
+		w += wcwidth(r);
 	}
-	return tot;
+	return w;
 }
 
-/* reads the next utf-8 sequence out of a string, updating an index */
-uint32_t
-u8_nextchar(const char *s, size_t *i)
-{
-	uint32_t ch = 0;
-	size_t sz = 0;
-
-	do{
-		ch <<= 6;
-		ch += (uint8_t)s[(*i)];
-		sz++;
-	}while(s[*i] && (++(*i)) && !isutf(s[*i]));
-	return ch - offsetsFromUTF8[sz-1];
-}
-
 /* next character without NUL character terminator */
 uint32_t
 u8_nextmemchar(const char *s, size_t *i)
@@ -311,7 +129,7 @@
 }
 
 int
-u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
+u8_escape_rune(char *buf, size_t sz, Rune ch)
 {
 	assert(sz > 2);
 	if(ch >= 0x20 && ch < 0x7f){
@@ -358,7 +176,7 @@
 			i0 = i;
 			ch = u8_nextmemchar(src, &i);
 			if(ascii || !u8_iswprint(ch)){
-				buf += u8_escape_wchar(buf, sz - (buf-start), ch);
+				buf += u8_escape_rune(buf, sz - (buf-start), ch);
 			}else{
 				i = i0;
 				do{
--- a/utf8.h
+++ b/utf8.h
@@ -6,21 +6,9 @@
 
 int u8_iswprint(uint32_t c);
 
-/* convert UTF-8 data to wide character */
-size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
-
-/* the opposite conversion */
-size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
-
-/* single character to UTF-8, returns # bytes written */
-size_t u8_wc_toutf8(char *dest, uint32_t ch);
-
 /* byte offset to character number */
 size_t u8_charnum(const char *s, size_t offset);
 
-/* return next character, updating an index variable */
-uint32_t u8_nextchar(const char *s, size_t *i);
-
 /* next character without NUL character terminator */
 uint32_t u8_nextmemchar(const char *s, size_t *i);
 
@@ -27,18 +15,12 @@
 /* returns length of next utf-8 sequence */
 size_t u8_seqlen(const char *s);
 
-/* returns the # of bytes needed to encode a certain character */
-size_t u8_charlen(uint32_t ch);
-
-/* computes the # of bytes needed to encode a WC string as UTF-8 */
-size_t u8_codingsize(uint32_t *wcstr, size_t n);
-
 char read_escape_control_char(char c);
 
 /* given a wide character, convert it to an ASCII escape sequence stored in
    buf, where buf is "sz" bytes. returns the number of characters output.
    sz must be at least 3. */
-int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
+int u8_escape_rune(char *buf, size_t sz, Rune ch);
 
 /* convert UTF-8 "src" to escape sequences.