shithub: riscv

Download patch

ref: 1af732323852c6e6a06f7c3e84899d9055289c62
parent: 0ca4c2ea45cc83057e969862d32c02a015da1dbc
author: cinap_lenrek <[email protected]>
date: Sun Jul 14 12:31:59 EDT 2013

abaco: use uhtml to handle charset conversions

--- a/sys/src/cmd/abaco/charsets.awk
+++ /dev/null
@@ -1,36 +1,0 @@
-#!/bin/awk -f
-# makes a table of character sets from http://www.iana.org/assignments/character-sets
-# and tcs.txt
-
-BEGIN{
-	if(ARGC != 3){
-		print "Usage:  " ARGV[0] " charsets.txt  tcs.txt"
-		exit 1
-	}
-	while(getline<ARGV[1]){
-		if(/^Name:/){
-			i = 0
-			name=tolower($2)
-			names[name] = name
-			alias[name i] = name
-			nalias[name] = ++i
-			
-		}
-		if(/^Alias:/){
-			a = tolower($2)
-			if(a != "none"){
-				names[a] = name
-				alias[name i ] = a
-				nalias[name] = ++i
-			}
-		}
-	}
-}
-{
-	tcs = $1
-	if(tcs in names){
-		name = names[tcs]
-		for(i=0; i<nalias[name]; i++)
-			print "\"" alias[name i] "\", \"" $2 "\","
-	}
-}
--- a/sys/src/cmd/abaco/charsets.txt
+++ /dev/null
@@ -1,1868 +1,0 @@
-
-===================================================================
-CHARACTER SETS
-
-(last updated 28 January 2005)
-
-These are the official names for character sets that may be used in
-the Internet and may be referred to in Internet documentation.  These
-names are expressed in ANSI_X3.4-1968 which is commonly called
-US-ASCII or simply ASCII.  The character set most commonly use in the
-Internet and used especially in protocol standards is US-ASCII, this
-is strongly encouraged.  The use of the name US-ASCII is also
-encouraged.
-
-The character set names may be up to 40 characters taken from the
-printable characters of US-ASCII.  However, no distinction is made
-between use of upper and lower case letters.
-
-The MIBenum value is a unique value for use in MIBs to identify coded
-character sets.
-
-The value space for MIBenum values has been divided into three
-regions. The first region (3-999) consists of coded character sets
-that have been standardized by some standard setting organization.
-This region is intended for standards that do not have subset
-implementations. The second region (1000-1999) is for the Unicode and
-ISO/IEC 10646 coded character sets together with a specification of a
-(set of) sub-repertoires that may occur.  The third region (>1999) is
-intended for vendor specific coded character sets.
-
-	Assigned MIB enum Numbers
-	-------------------------
-	0-2		Reserved
-	3-999		Set By Standards Organizations
-	1000-1999	Unicode / 10646
-	2000-2999	Vendor
-
-The aliases that start with "cs" have been added for use with the
-IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
-maintained by IANA at http://www.iana.org/assignments/ianacharset-mib.
-Note that the ianacharset-mib needs to be kept in sync with this
-registry.  These aliases that start with "cs" contain the standard 
-numbers along with suggestive names in order to facilitate applications 
-that want to display the names in user interfaces.  The "cs" stands 
-for character set and is provided for applications that need a lower 
-case first letter but want to use mixed case thereafter that cannot 
-contain any special characters, such as underbar ("_") and dash ("-").  
-
-If the character set is from an ISO standard, its cs alias is the ISO
-standard number or name.  If the character set is not from an ISO
-standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
-Registration Authority), the ISO Registry number is specified as
-ISOnnn followed by letters suggestive of the name or standards number
-of the code set.  When a national or international standard is
-revised, the year of revision is added to the cs alias of the new
-character set entry in the IANA Registry in order to distinguish the
-revised character set from the original character set.
-
-
-Character Set                                               Reference
--------------                                               ---------
-
-Name: ANSI_X3.4-1968                                   [RFC1345,KXS2]
-MIBenum: 3
-Source: ECMA registry
-Alias: iso-ir-6
-Alias: ANSI_X3.4-1986
-Alias: ISO_646.irv:1991
-Alias: ASCII
-Alias: ISO646-US
-Alias: US-ASCII (preferred MIME name)
-Alias: us
-Alias: IBM367
-Alias: cp367
-Alias: csASCII
-
-Name: ISO-10646-UTF-1
-MIBenum: 27
-Source: Universal Transfer Format (1), this is the multibyte
-        encoding, that subsets ASCII-7. It does not have byte
-        ordering issues.
-Alias: csISO10646UTF1
-
-Name: ISO_646.basic:1983                                [RFC1345,KXS2]
-MIBenum: 28
-Source: ECMA registry
-Alias: ref
-Alias: csISO646basic1983
-
-Name: INVARIANT                                         [RFC1345,KXS2]
-MIBenum: 29
-Alias: csINVARIANT
-
-Name: ISO_646.irv:1983                                  [RFC1345,KXS2]
-MIBenum: 30
-Source: ECMA registry
-Alias: iso-ir-2
-Alias: irv
-Alias: csISO2IntlRefVersion
-
-Name: BS_4730                                           [RFC1345,KXS2]
-MIBenum: 20
-Source: ECMA registry
-Alias: iso-ir-4
-Alias: ISO646-GB
-Alias: gb
-Alias: uk
-Alias: csISO4UnitedKingdom
-
-Name: NATS-SEFI                                         [RFC1345,KXS2]
-MIBenum: 31
-Source: ECMA registry
-Alias: iso-ir-8-1
-Alias: csNATSSEFI
-
-Name: NATS-SEFI-ADD                                     [RFC1345,KXS2]
-MIBenum: 32
-Source: ECMA registry
-Alias: iso-ir-8-2
-Alias: csNATSSEFIADD
-
-Name: NATS-DANO                                         [RFC1345,KXS2]
-MIBenum: 33
-Source: ECMA registry
-Alias: iso-ir-9-1
-Alias: csNATSDANO
-
-Name: NATS-DANO-ADD                                     [RFC1345,KXS2]
-MIBenum: 34
-Source: ECMA registry
-Alias: iso-ir-9-2
-Alias: csNATSDANOADD
-
-Name: SEN_850200_B                                      [RFC1345,KXS2]
-MIBenum: 35
-Source: ECMA registry
-Alias: iso-ir-10
-Alias: FI
-Alias: ISO646-FI
-Alias: ISO646-SE
-Alias: se
-Alias: csISO10Swedish
-
-Name: SEN_850200_C                                      [RFC1345,KXS2]
-MIBenum: 21
-Source: ECMA registry
-Alias: iso-ir-11
-Alias: ISO646-SE2
-Alias: se2
-Alias: csISO11SwedishForNames
-
-Name: KS_C_5601-1987                                    [RFC1345,KXS2]
-MIBenum: 36
-Source: ECMA registry
-Alias: iso-ir-149
-Alias: KS_C_5601-1989
-Alias: KSC_5601
-Alias: korean
-Alias: csKSC56011987
-
-Name: ISO-2022-KR  (preferred MIME name)                [RFC1557,Choi]
-MIBenum: 37
-Source: RFC-1557 (see also KS_C_5601-1987)
-Alias: csISO2022KR
-
-Name: EUC-KR  (preferred MIME name)                     [RFC1557,Choi]
-MIBenum: 38
-Source: RFC-1557 (see also KS_C_5861-1992)
-Alias: csEUCKR
-
-Name: ISO-2022-JP  (preferred MIME name)               [RFC1468,Murai]
-MIBenum: 39
-Source: RFC-1468 (see also RFC-2237)
-Alias: csISO2022JP
-
-Name: ISO-2022-JP-2  (preferred MIME name)              [RFC1554,Ohta]
-MIBenum: 40
-Source: RFC-1554
-Alias: csISO2022JP2
-
-Name: ISO-2022-CN                                            [RFC1922]
-MIBenum: 104
-Source: RFC-1922
-
-Name: ISO-2022-CN-EXT                                        [RFC1922]
-MIBenum: 105
-Source: RFC-1922
-
-Name: JIS_C6220-1969-jp                                 [RFC1345,KXS2]
-MIBenum: 41
-Source: ECMA registry
-Alias: JIS_C6220-1969
-Alias: iso-ir-13
-Alias: katakana
-Alias: x0201-7
-Alias: csISO13JISC6220jp
-
-Name: JIS_C6220-1969-ro                                 [RFC1345,KXS2]
-MIBenum: 42
-Source: ECMA registry
-Alias: iso-ir-14
-Alias: jp
-Alias: ISO646-JP
-Alias: csISO14JISC6220ro
-
-Name: IT                                                [RFC1345,KXS2]
-MIBenum: 22
-Source: ECMA registry
-Alias: iso-ir-15
-Alias: ISO646-IT
-Alias: csISO15Italian
-
-Name: PT                                                [RFC1345,KXS2]
-MIBenum: 43
-Source: ECMA registry
-Alias: iso-ir-16
-Alias: ISO646-PT
-Alias: csISO16Portuguese
-
-Name: ES                                                [RFC1345,KXS2]
-MIBenum: 23
-Source: ECMA registry
-Alias: iso-ir-17
-Alias: ISO646-ES
-Alias: csISO17Spanish
-
-Name: greek7-old                                        [RFC1345,KXS2]
-MIBenum: 44
-Source: ECMA registry
-Alias: iso-ir-18
-Alias: csISO18Greek7Old
-
-Name: latin-greek                                       [RFC1345,KXS2]
-MIBenum: 45
-Source: ECMA registry
-Alias: iso-ir-19
-Alias: csISO19LatinGreek
-
-Name: DIN_66003                                         [RFC1345,KXS2]
-MIBenum: 24
-Source: ECMA registry
-Alias: iso-ir-21
-Alias: de
-Alias: ISO646-DE
-Alias: csISO21German
-
-Name: NF_Z_62-010_(1973)                                [RFC1345,KXS2]
-MIBenum: 46
-Source: ECMA registry
-Alias: iso-ir-25
-Alias: ISO646-FR1
-Alias: csISO25French
-
-Name: Latin-greek-1                                     [RFC1345,KXS2]
-MIBenum: 47
-Source: ECMA registry
-Alias: iso-ir-27
-Alias: csISO27LatinGreek1
-
-Name: ISO_5427                                          [RFC1345,KXS2]
-MIBenum: 48
-Source: ECMA registry
-Alias: iso-ir-37
-Alias: csISO5427Cyrillic
-
-Name: JIS_C6226-1978                                    [RFC1345,KXS2]
-MIBenum: 49
-Source: ECMA registry
-Alias: iso-ir-42
-Alias: csISO42JISC62261978
-
-Name: BS_viewdata                                       [RFC1345,KXS2]
-MIBenum: 50
-Source: ECMA registry
-Alias: iso-ir-47
-Alias: csISO47BSViewdata
-
-Name: INIS                                              [RFC1345,KXS2]
-MIBenum: 51
-Source: ECMA registry
-Alias: iso-ir-49
-Alias: csISO49INIS
-
-Name: INIS-8                                            [RFC1345,KXS2]
-MIBenum: 52
-Source: ECMA registry
-Alias: iso-ir-50
-Alias: csISO50INIS8
-
-Name: INIS-cyrillic                                     [RFC1345,KXS2]
-MIBenum: 53
-Source: ECMA registry
-Alias: iso-ir-51
-Alias: csISO51INISCyrillic
-
-Name: ISO_5427:1981                                     [RFC1345,KXS2]
-MIBenum: 54
-Source: ECMA registry
-Alias: iso-ir-54
-Alias: ISO5427Cyrillic1981
-
-Name: ISO_5428:1980                                     [RFC1345,KXS2]
-MIBenum: 55
-Source: ECMA registry
-Alias: iso-ir-55
-Alias: csISO5428Greek
-
-Name: GB_1988-80                                        [RFC1345,KXS2]
-MIBenum: 56
-Source: ECMA registry
-Alias: iso-ir-57
-Alias: cn
-Alias: ISO646-CN
-Alias: csISO57GB1988
-
-Name: GB_2312-80                                        [RFC1345,KXS2]
-MIBenum: 57
-Source: ECMA registry
-Alias: iso-ir-58
-Alias: chinese
-Alias: csISO58GB231280
-
-Name: NS_4551-1                                         [RFC1345,KXS2]
-MIBenum: 25
-Source: ECMA registry
-Alias: iso-ir-60
-Alias: ISO646-NO
-Alias: no
-Alias: csISO60DanishNorwegian
-Alias: csISO60Norwegian1
-
-Name: NS_4551-2                                          [RFC1345,KXS2]
-MIBenum: 58
-Source: ECMA registry
-Alias: ISO646-NO2
-Alias: iso-ir-61
-Alias: no2
-Alias: csISO61Norwegian2
-
-Name: NF_Z_62-010                                        [RFC1345,KXS2]
-MIBenum: 26
-Source: ECMA registry
-Alias: iso-ir-69
-Alias: ISO646-FR
-Alias: fr
-Alias: csISO69French
-
-Name: videotex-suppl                                     [RFC1345,KXS2]
-MIBenum: 59
-Source: ECMA registry
-Alias: iso-ir-70
-Alias: csISO70VideotexSupp1
-
-Name: PT2                                                [RFC1345,KXS2]
-MIBenum: 60
-Source: ECMA registry
-Alias: iso-ir-84
-Alias: ISO646-PT2
-Alias: csISO84Portuguese2
-
-Name: ES2                                                [RFC1345,KXS2]
-MIBenum: 61
-Source: ECMA registry
-Alias: iso-ir-85
-Alias: ISO646-ES2
-Alias: csISO85Spanish2
-
-Name: MSZ_7795.3                                         [RFC1345,KXS2]
-MIBenum: 62
-Source: ECMA registry
-Alias: iso-ir-86
-Alias: ISO646-HU
-Alias: hu
-Alias: csISO86Hungarian
-
-Name: JIS_C6226-1983                                     [RFC1345,KXS2]
-MIBenum: 63
-Source: ECMA registry
-Alias: iso-ir-87
-Alias: x0208
-Alias: JIS_X0208-1983
-Alias: csISO87JISX0208
-
-Name: greek7                                             [RFC1345,KXS2]
-MIBenum: 64
-Source: ECMA registry
-Alias: iso-ir-88
-Alias: csISO88Greek7
-
-Name: ASMO_449                                           [RFC1345,KXS2]
-MIBenum: 65
-Source: ECMA registry
-Alias: ISO_9036
-Alias: arabic7
-Alias: iso-ir-89
-Alias: csISO89ASMO449
-
-Name: iso-ir-90                                          [RFC1345,KXS2]
-MIBenum: 66
-Source: ECMA registry
-Alias: csISO90
-
-Name: JIS_C6229-1984-a                                   [RFC1345,KXS2]
-MIBenum: 67
-Source: ECMA registry
-Alias: iso-ir-91
-Alias: jp-ocr-a
-Alias: csISO91JISC62291984a
-
-Name: JIS_C6229-1984-b                                   [RFC1345,KXS2]
-MIBenum: 68
-Source: ECMA registry
-Alias: iso-ir-92
-Alias: ISO646-JP-OCR-B
-Alias: jp-ocr-b
-Alias: csISO92JISC62991984b
-
-Name: JIS_C6229-1984-b-add                               [RFC1345,KXS2]
-MIBenum: 69
-Source: ECMA registry
-Alias: iso-ir-93
-Alias: jp-ocr-b-add
-Alias: csISO93JIS62291984badd
-
-Name: JIS_C6229-1984-hand                                [RFC1345,KXS2]
-MIBenum: 70
-Source: ECMA registry
-Alias: iso-ir-94
-Alias: jp-ocr-hand
-Alias: csISO94JIS62291984hand
-
-Name: JIS_C6229-1984-hand-add                            [RFC1345,KXS2]
-MIBenum: 71
-Source: ECMA registry
-Alias: iso-ir-95
-Alias: jp-ocr-hand-add
-Alias: csISO95JIS62291984handadd
-
-Name: JIS_C6229-1984-kana                                [RFC1345,KXS2]
-MIBenum: 72
-Source: ECMA registry
-Alias: iso-ir-96
-Alias: csISO96JISC62291984kana
-
-Name: ISO_2033-1983                                      [RFC1345,KXS2]
-MIBenum: 73
-Source: ECMA registry
-Alias: iso-ir-98
-Alias: e13b
-Alias: csISO2033
-
-Name: ANSI_X3.110-1983                                   [RFC1345,KXS2]
-MIBenum: 74
-Source: ECMA registry
-Alias: iso-ir-99
-Alias: CSA_T500-1983
-Alias: NAPLPS
-Alias: csISO99NAPLPS
-
-Name: ISO_8859-1:1987                                    [RFC1345,KXS2]
-MIBenum: 4
-Source: ECMA registry
-Alias: iso-ir-100
-Alias: ISO_8859-1
-Alias: ISO-8859-1 (preferred MIME name)
-Alias: latin1
-Alias: l1
-Alias: IBM819
-Alias: CP819
-Alias: csISOLatin1
-
-Name: ISO_8859-2:1987                                    [RFC1345,KXS2]
-MIBenum: 5
-Source: ECMA registry
-Alias: iso-ir-101
-Alias: ISO_8859-2
-Alias: ISO-8859-2 (preferred MIME name)
-Alias: latin2
-Alias: l2
-Alias: csISOLatin2
-
-Name: T.61-7bit                                          [RFC1345,KXS2]
-MIBenum: 75
-Source: ECMA registry
-Alias: iso-ir-102
-Alias: csISO102T617bit
-
-Name: T.61-8bit                                          [RFC1345,KXS2]
-MIBenum: 76
-Alias: T.61
-Source: ECMA registry
-Alias: iso-ir-103
-Alias: csISO103T618bit
-
-Name: ISO_8859-3:1988                                    [RFC1345,KXS2]
-MIBenum: 6
-Source: ECMA registry
-Alias: iso-ir-109
-Alias: ISO_8859-3
-Alias: ISO-8859-3 (preferred MIME name)
-Alias: latin3
-Alias: l3
-Alias: csISOLatin3
-
-Name: ISO_8859-4:1988                                    [RFC1345,KXS2]
-MIBenum: 7
-Source: ECMA registry
-Alias: iso-ir-110
-Alias: ISO_8859-4
-Alias: ISO-8859-4 (preferred MIME name)
-Alias: latin4
-Alias: l4
-Alias: csISOLatin4
-
-Name: ECMA-cyrillic                                     
-MIBenum: 77
-Source: ISO registry (formerly ECMA registry)
-         http://www.itscj.ipsj.jp/ISO-IR/111.pdf
-Alias: iso-ir-111
-Alias: KOI8-E
-Alias: csISO111ECMACyrillic
-
-Name: CSA_Z243.4-1985-1                                  [RFC1345,KXS2]
-MIBenum: 78
-Source: ECMA registry
-Alias: iso-ir-121
-Alias: ISO646-CA
-Alias: csa7-1
-Alias: ca
-Alias: csISO121Canadian1
-
-Name: CSA_Z243.4-1985-2                                  [RFC1345,KXS2]
-MIBenum: 79
-Source: ECMA registry
-Alias: iso-ir-122
-Alias: ISO646-CA2
-Alias: csa7-2
-Alias: csISO122Canadian2
-
-Name: CSA_Z243.4-1985-gr                                 [RFC1345,KXS2]
-MIBenum: 80
-Source: ECMA registry
-Alias: iso-ir-123
-Alias: csISO123CSAZ24341985gr
-
-Name: ISO_8859-6:1987                                    [RFC1345,KXS2]
-MIBenum: 9
-Source: ECMA registry
-Alias: iso-ir-127
-Alias: ISO_8859-6
-Alias: ISO-8859-6 (preferred MIME name)
-Alias: ECMA-114
-Alias: ASMO-708
-Alias: arabic
-Alias: csISOLatinArabic
-
-Name: ISO_8859-6-E                                       [RFC1556,IANA]
-MIBenum: 81
-Source: RFC1556
-Alias: csISO88596E
-Alias: ISO-8859-6-E (preferred MIME name)
-
-Name: ISO_8859-6-I                                       [RFC1556,IANA]
-MIBenum: 82
-Source: RFC1556
-Alias: csISO88596I
-Alias: ISO-8859-6-I (preferred MIME name)
-
-Name: ISO_8859-7:1987                            [RFC1947,RFC1345,KXS2]
-MIBenum: 10
-Source: ECMA registry
-Alias: iso-ir-126
-Alias: ISO_8859-7
-Alias: ISO-8859-7 (preferred MIME name)
-Alias: ELOT_928
-Alias: ECMA-118
-Alias: greek
-Alias: greek8
-Alias: csISOLatinGreek
-
-Name: T.101-G2                                            [RFC1345,KXS2]
-MIBenum: 83
-Source: ECMA registry
-Alias: iso-ir-128
-Alias: csISO128T101G2
-
-Name: ISO_8859-8:1988                                     [RFC1345,KXS2]
-MIBenum: 11
-Source: ECMA registry
-Alias: iso-ir-138
-Alias: ISO_8859-8
-Alias: ISO-8859-8 (preferred MIME name)
-Alias: hebrew
-Alias: csISOLatinHebrew
-
-Name: ISO_8859-8-E                                  [RFC1556,Nussbacher]
-MIBenum: 84
-Source: RFC1556
-Alias: csISO88598E
-Alias: ISO-8859-8-E (preferred MIME name)
-
-Name: ISO_8859-8-I                                  [RFC1556,Nussbacher]
-MIBenum: 85
-Source: RFC1556
-Alias: csISO88598I
-Alias: ISO-8859-8-I (preferred MIME name)
-
-Name: CSN_369103                                          [RFC1345,KXS2]
-MIBenum: 86
-Source: ECMA registry
-Alias: iso-ir-139
-Alias: csISO139CSN369103
-
-Name: JUS_I.B1.002                                        [RFC1345,KXS2]
-MIBenum: 87
-Source: ECMA registry
-Alias: iso-ir-141
-Alias: ISO646-YU
-Alias: js
-Alias: yu
-Alias: csISO141JUSIB1002
-
-Name: ISO_6937-2-add                                      [RFC1345,KXS2]
-MIBenum: 14
-Source: ECMA registry and ISO 6937-2:1983
-Alias: iso-ir-142
-Alias: csISOTextComm
-
-Name: IEC_P27-1                                           [RFC1345,KXS2]
-MIBenum: 88
-Source: ECMA registry
-Alias: iso-ir-143
-Alias: csISO143IECP271
-
-Name: ISO_8859-5:1988                                     [RFC1345,KXS2]
-MIBenum: 8
-Source: ECMA registry
-Alias: iso-ir-144
-Alias: ISO_8859-5
-Alias: ISO-8859-5 (preferred MIME name)
-Alias: cyrillic
-Alias: csISOLatinCyrillic
-
-Name: JUS_I.B1.003-serb                                   [RFC1345,KXS2]
-MIBenum: 89
-Source: ECMA registry
-Alias: iso-ir-146
-Alias: serbian
-Alias: csISO146Serbian
-
-Name: JUS_I.B1.003-mac                                    [RFC1345,KXS2]
-MIBenum: 90
-Source: ECMA registry
-Alias: macedonian
-Alias: iso-ir-147
-Alias: csISO147Macedonian
-
-Name: ISO_8859-9:1989                                     [RFC1345,KXS2]
-MIBenum: 12
-Source: ECMA registry
-Alias: iso-ir-148
-Alias: ISO_8859-9
-Alias: ISO-8859-9 (preferred MIME name)
-Alias: latin5
-Alias: l5
-Alias: csISOLatin5
-
-Name: greek-ccitt                                         [RFC1345,KXS2]
-MIBenum: 91
-Source: ECMA registry
-Alias: iso-ir-150
-Alias: csISO150
-Alias: csISO150GreekCCITT
-
-Name: NC_NC00-10:81                                       [RFC1345,KXS2]
-MIBenum: 92
-Source: ECMA registry
-Alias: cuba
-Alias: iso-ir-151
-Alias: ISO646-CU
-Alias: csISO151Cuba
-
-Name: ISO_6937-2-25                                       [RFC1345,KXS2]
-MIBenum: 93
-Source: ECMA registry
-Alias: iso-ir-152
-Alias: csISO6937Add
-
-Name: GOST_19768-74                                       [RFC1345,KXS2]
-MIBenum: 94
-Source: ECMA registry
-Alias: ST_SEV_358-88
-Alias: iso-ir-153
-Alias: csISO153GOST1976874
-
-Name: ISO_8859-supp                                       [RFC1345,KXS2]
-MIBenum: 95
-Source: ECMA registry
-Alias: iso-ir-154
-Alias: latin1-2-5
-Alias: csISO8859Supp
-
-Name: ISO_10367-box                                       [RFC1345,KXS2]
-MIBenum: 96
-Source: ECMA registry
-Alias: iso-ir-155
-Alias: csISO10367Box
-
-Name: ISO-8859-10 (preferred MIME name)			  [RFC1345,KXS2]
-MIBenum: 13
-Source: ECMA registry
-Alias: iso-ir-157
-Alias: l6
-Alias: ISO_8859-10:1992
-Alias: csISOLatin6
-Alias: latin6
-
-Name: latin-lap                                           [RFC1345,KXS2]
-MIBenum: 97
-Source: ECMA registry
-Alias: lap
-Alias: iso-ir-158
-Alias: csISO158Lap
-
-Name: JIS_X0212-1990                                      [RFC1345,KXS2]
-MIBenum: 98
-Source: ECMA registry
-Alias: x0212
-Alias: iso-ir-159
-Alias: csISO159JISX02121990
-
-Name: DS_2089                                             [RFC1345,KXS2]
-MIBenum: 99
-Source: Danish Standard, DS 2089, February 1974
-Alias: DS2089
-Alias: ISO646-DK
-Alias: dk
-Alias: csISO646Danish
-
-Name: us-dk                                               [RFC1345,KXS2]
-MIBenum: 100
-Alias: csUSDK
-
-Name: dk-us                                               [RFC1345,KXS2]
-MIBenum: 101
-Alias: csDKUS
-
-Name: JIS_X0201                                           [RFC1345,KXS2]
-MIBenum: 15
-Source: JIS X 0201-1976.   One byte only, this is equivalent to 
-        JIS/Roman (similar to ASCII) plus eight-bit half-width 
-        Katakana
-Alias: X0201
-Alias: csHalfWidthKatakana
-
-Name: KSC5636                                             [RFC1345,KXS2]
-MIBenum: 102
-Alias: ISO646-KR
-Alias: csKSC5636
-
-Name: ISO-10646-UCS-2
-MIBenum: 1000
-Source: the 2-octet Basic Multilingual Plane, aka Unicode
-        this needs to specify network byte order: the standard
-        does not specify (it is a 16-bit integer space)
-Alias: csUnicode
-
-Name: ISO-10646-UCS-4
-MIBenum: 1001
-Source: the full code space. (same comment about byte order,
-        these are 31-bit numbers.
-Alias: csUCS4
-
-Name: DEC-MCS                                             [RFC1345,KXS2]
-MIBenum: 2008
-Source: VAX/VMS User's Manual, 
-        Order Number: AI-Y517A-TE, April 1986.
-Alias: dec
-Alias: csDECMCS
-
-Name: hp-roman8                                  [HP-PCL5,RFC1345,KXS2]
-MIBenum: 2004
-Source: LaserJet IIP Printer User's Manual, 
-        HP part no 33471-90901, Hewlet-Packard, June 1989.
-Alias: roman8
-Alias: r8
-Alias: csHPRoman8
-
-Name: macintosh                                           [RFC1345,KXS2]
-MIBenum: 2027
-Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
-Alias: mac
-Alias: csMacintosh
-
-Name: IBM037                                              [RFC1345,KXS2]
-MIBenum: 2028
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp037
-Alias: ebcdic-cp-us
-Alias: ebcdic-cp-ca
-Alias: ebcdic-cp-wt
-Alias: ebcdic-cp-nl
-Alias: csIBM037
-
-Name: IBM038                                              [RFC1345,KXS2]
-MIBenum: 2029
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: EBCDIC-INT
-Alias: cp038
-Alias: csIBM038
-
-Name: IBM273                                              [RFC1345,KXS2]
-MIBenum: 2030
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP273
-Alias: csIBM273
-
-Name: IBM274                                              [RFC1345,KXS2]
-MIBenum: 2031
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: EBCDIC-BE
-Alias: CP274
-Alias: csIBM274
-
-Name: IBM275                                              [RFC1345,KXS2]
-MIBenum: 2032
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: EBCDIC-BR
-Alias: cp275
-Alias: csIBM275
-
-Name: IBM277                                              [RFC1345,KXS2]
-MIBenum: 2033
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: EBCDIC-CP-DK
-Alias: EBCDIC-CP-NO
-Alias: csIBM277
-
-Name: IBM278                                              [RFC1345,KXS2]
-MIBenum: 2034
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP278
-Alias: ebcdic-cp-fi
-Alias: ebcdic-cp-se
-Alias: csIBM278
-
-Name: IBM280                                              [RFC1345,KXS2]
-MIBenum: 2035
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP280
-Alias: ebcdic-cp-it
-Alias: csIBM280
-
-Name: IBM281                                              [RFC1345,KXS2]
-MIBenum: 2036
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: EBCDIC-JP-E
-Alias: cp281
-Alias: csIBM281
-
-Name: IBM284                                              [RFC1345,KXS2]
-MIBenum: 2037
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP284
-Alias: ebcdic-cp-es
-Alias: csIBM284
-
-Name: IBM285                                              [RFC1345,KXS2]
-MIBenum: 2038
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP285
-Alias: ebcdic-cp-gb
-Alias: csIBM285
-
-Name: IBM290                                              [RFC1345,KXS2]
-MIBenum: 2039
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: cp290
-Alias: EBCDIC-JP-kana
-Alias: csIBM290
-
-Name: IBM297                                              [RFC1345,KXS2]
-MIBenum: 2040
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp297
-Alias: ebcdic-cp-fr
-Alias: csIBM297
-
-Name: IBM420                                              [RFC1345,KXS2]
-MIBenum: 2041
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990, 
-        IBM NLS RM p 11-11
-Alias: cp420
-Alias: ebcdic-cp-ar1
-Alias: csIBM420
-
-Name: IBM423                                              [RFC1345,KXS2]
-MIBenum: 2042
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp423
-Alias: ebcdic-cp-gr
-Alias: csIBM423
-
-Name: IBM424                                              [RFC1345,KXS2]
-MIBenum: 2043
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp424
-Alias: ebcdic-cp-he
-Alias: csIBM424
-
-Name: IBM437                                              [RFC1345,KXS2]
-MIBenum: 2011
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp437
-Alias: 437
-Alias: csPC8CodePage437
-
-Name: IBM500                                              [RFC1345,KXS2]
-MIBenum: 2044
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP500
-Alias: ebcdic-cp-be
-Alias: ebcdic-cp-ch
-Alias: csIBM500
-
-Name: IBM775                                                   [HP-PCL5]
-MIBenum: 2087
-Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
-Alias: cp775
-Alias: csPC775Baltic
-
-Name: IBM850                                              [RFC1345,KXS2]
-MIBenum: 2009
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp850
-Alias: 850
-Alias: csPC850Multilingual
-
-Name: IBM851                                              [RFC1345,KXS2]
-MIBenum: 2045
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp851
-Alias: 851
-Alias: csIBM851
-
-Name: IBM852                                              [RFC1345,KXS2]
-MIBenum: 2010
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp852
-Alias: 852
-Alias: csPCp852
-
-Name: IBM855                                              [RFC1345,KXS2]
-MIBenum: 2046
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp855
-Alias: 855
-Alias: csIBM855
-
-Name: IBM857                                              [RFC1345,KXS2]
-MIBenum: 2047
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp857
-Alias: 857
-Alias: csIBM857
-
-Name: IBM860                                              [RFC1345,KXS2]
-MIBenum: 2048
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp860
-Alias: 860
-Alias: csIBM860
-
-Name: IBM861                                              [RFC1345,KXS2]
-MIBenum: 2049
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp861
-Alias: 861
-Alias: cp-is
-Alias: csIBM861
-
-Name: IBM862                                              [RFC1345,KXS2]
-MIBenum: 2013
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp862
-Alias: 862
-Alias: csPC862LatinHebrew
-
-Name: IBM863                                              [RFC1345,KXS2]
-MIBenum: 2050
-Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
-Alias: cp863
-Alias: 863
-Alias: csIBM863
-
-Name: IBM864                                              [RFC1345,KXS2]
-MIBenum: 2051
-Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
-Alias: cp864
-Alias: csIBM864
-
-Name: IBM865                                              [RFC1345,KXS2]
-MIBenum: 2052
-Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
-Alias: cp865
-Alias: 865
-Alias: csIBM865
-
-Name: IBM866                                                     [Pond]
-MIBenum: 2086
-Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
-Alias: cp866
-Alias: 866
-Alias: csIBM866
-
-Name: IBM868                                              [RFC1345,KXS2]
-MIBenum: 2053
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP868
-Alias: cp-ar
-Alias: csIBM868
-
-Name: IBM869                                              [RFC1345,KXS2]
-MIBenum: 2054
-Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
-Alias: cp869
-Alias: 869
-Alias: cp-gr
-Alias: csIBM869
-
-Name: IBM870                                              [RFC1345,KXS2]
-MIBenum: 2055
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP870
-Alias: ebcdic-cp-roece
-Alias: ebcdic-cp-yu
-Alias: csIBM870
-
-Name: IBM871                                              [RFC1345,KXS2]
-MIBenum: 2056
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP871
-Alias: ebcdic-cp-is
-Alias: csIBM871
-
-Name: IBM880                                              [RFC1345,KXS2]
-MIBenum: 2057
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp880
-Alias: EBCDIC-Cyrillic
-Alias: csIBM880
-
-Name: IBM891                                              [RFC1345,KXS2]
-MIBenum: 2058
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp891
-Alias: csIBM891
-
-Name: IBM903                                              [RFC1345,KXS2]
-MIBenum: 2059
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp903
-Alias: csIBM903
-
-Name: IBM904                                              [RFC1345,KXS2]
-MIBenum: 2060
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp904
-Alias: 904
-Alias: csIBBM904
-
-Name: IBM905                                              [RFC1345,KXS2]
-MIBenum: 2061
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: CP905
-Alias: ebcdic-cp-tr
-Alias: csIBM905
-
-Name: IBM918                                              [RFC1345,KXS2]
-MIBenum: 2062
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP918
-Alias: ebcdic-cp-ar2
-Alias: csIBM918
-
-Name: IBM1026                                             [RFC1345,KXS2]
-MIBenum: 2063
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP1026
-Alias: csIBM1026
-
-Name: EBCDIC-AT-DE                                        [RFC1345,KXS2]
-MIBenum: 2064
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csIBMEBCDICATDE
-
-Name: EBCDIC-AT-DE-A                                      [RFC1345,KXS2]
-MIBenum: 2065 
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 
-Alias: csEBCDICATDEA
-
-Name: EBCDIC-CA-FR                                        [RFC1345,KXS2]
-MIBenum: 2066
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICCAFR
-
-Name: EBCDIC-DK-NO                                        [RFC1345,KXS2]
-MIBenum: 2067
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICDKNO
-
-Name: EBCDIC-DK-NO-A                                      [RFC1345,KXS2]
-MIBenum: 2068
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICDKNOA
-
-Name: EBCDIC-FI-SE                                        [RFC1345,KXS2]
-MIBenum: 2069
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICFISE
-
-Name: EBCDIC-FI-SE-A                                      [RFC1345,KXS2]
-MIBenum: 2070
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICFISEA
-
-Name: EBCDIC-FR                                           [RFC1345,KXS2]
-MIBenum: 2071
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICFR
-
-Name: EBCDIC-IT                                           [RFC1345,KXS2]
-MIBenum: 2072
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICIT
-
-Name: EBCDIC-PT                                           [RFC1345,KXS2]
-MIBenum: 2073
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICPT
-
-Name: EBCDIC-ES                                           [RFC1345,KXS2]
-MIBenum: 2074
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICES
-
-Name: EBCDIC-ES-A                                         [RFC1345,KXS2]
-MIBenum: 2075
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICESA
-
-Name: EBCDIC-ES-S                                         [RFC1345,KXS2]
-MIBenum: 2076
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICESS
-
-Name: EBCDIC-UK                                           [RFC1345,KXS2]
-MIBenum: 2077
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICUK
-
-Name: EBCDIC-US                                           [RFC1345,KXS2]
-MIBenum: 2078
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICUS
-
-Name: UNKNOWN-8BIT                                             [RFC1428]
-MIBenum: 2079
-Alias: csUnknown8BiT
-
-Name: MNEMONIC                                            [RFC1345,KXS2]
-MIBenum: 2080
-Source: RFC 1345, also known as "mnemonic+ascii+38"
-Alias: csMnemonic
-
-Name: MNEM                                                [RFC1345,KXS2]
-MIBenum: 2081
-Source: RFC 1345, also known as "mnemonic+ascii+8200"
-Alias: csMnem
-
-Name: VISCII                                                   [RFC1456]
-MIBenum: 2082
-Source: RFC 1456
-Alias: csVISCII
-
-Name: VIQR                                                     [RFC1456]
-MIBenum: 2083
-Source: RFC 1456
-Alias: csVIQR
-
-Name: KOI8-R  (preferred MIME name)                            [RFC1489]
-MIBenum: 2084
-Source: RFC 1489, based on GOST-19768-74, ISO-6937/8, 
-        INIS-Cyrillic, ISO-5427.
-Alias: csKOI8R
-
-Name: KOI8-U                                                   [RFC2319]
-MIBenum: 2088
-Source: RFC 2319
-
-Name: IBM00858
-MIBenum: 2089
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858)    [Mahdi]
-Alias: CCSID00858
-Alias: CP00858
-Alias: PC-Multilingual-850+euro
-
-Name: IBM00924
-MIBenum: 2090
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924)    [Mahdi]
-Alias: CCSID00924
-Alias: CP00924
-Alias: ebcdic-Latin9--euro
-
-Name: IBM01140
-MIBenum: 2091
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140)    [Mahdi]
-Alias: CCSID01140
-Alias: CP01140
-Alias: ebcdic-us-37+euro
-
-Name: IBM01141
-MIBenum: 2092
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141)    [Mahdi]
-Alias: CCSID01141
-Alias: CP01141
-Alias: ebcdic-de-273+euro
-
-Name: IBM01142
-MIBenum: 2093
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142)    [Mahdi]
-Alias: CCSID01142
-Alias: CP01142
-Alias: ebcdic-dk-277+euro
-Alias: ebcdic-no-277+euro
-
-Name: IBM01143
-MIBenum: 2094
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143)    [Mahdi]
-Alias: CCSID01143
-Alias: CP01143
-Alias: ebcdic-fi-278+euro
-Alias: ebcdic-se-278+euro
-
-Name: IBM01144
-MIBenum: 2095
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144)    [Mahdi]
-Alias: CCSID01144
-Alias: CP01144
-Alias: ebcdic-it-280+euro
-
-Name: IBM01145
-MIBenum: 2096
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145)    [Mahdi]
-Alias: CCSID01145
-Alias: CP01145
-Alias: ebcdic-es-284+euro
-
-Name: IBM01146
-MIBenum: 2097
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146)    [Mahdi]
-Alias: CCSID01146
-Alias: CP01146
-Alias: ebcdic-gb-285+euro
-
-Name: IBM01147
-MIBenum: 2098
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147)    [Mahdi]
-Alias: CCSID01147
-Alias: CP01147
-Alias: ebcdic-fr-297+euro
-
-Name: IBM01148
-MIBenum: 2099
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148)    [Mahdi]
-Alias: CCSID01148
-Alias: CP01148
-Alias: ebcdic-international-500+euro
-
-Name: IBM01149
-MIBenum: 2100
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149)    [Mahdi]
-Alias: CCSID01149
-Alias: CP01149
-Alias: ebcdic-is-871+euro
-
-Name: Big5-HKSCS                                                  [Yick]
-MIBenum: 2101
-Source:   See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS) 
-Alias: None
-
-Name: IBM1047                                                [Robrigado]
-MIBenum: 2102
-Source: IBM1047 (EBCDIC Latin 1/Open Systems)
-http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
-Alias: IBM-1047
-
-Name: PTCP154                                                    [Uskov]
-MIBenum: 2103
-Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
-Alias: csPTCP154
-Alias: PT154
-Alias: CP154
-Alias: Cyrillic-Asian
-
-Name:  Amiga-1251
-MIBenum:  2104
-Source:  See (http://www.amiga.ultranet.ru/Amiga-1251.html)
-Alias:  Ami1251
-Alias:  Amiga1251
-Alias:  Ami-1251
-(Aliases are provided for historical reasons and should not be used)
-                                                              [Malyshev]
-															  
-Name:  KOI7-switched
-MIBenum:  2105
-Source:  See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
-Aliases:  None
-
-Name: UNICODE-1-1                                              [RFC1641]
-MIBenum: 1010
-Source: RFC 1641
-Alias: csUnicode11
-
-Name: SCSU
-MIBenum: 1011
-Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU)     [Scherer]
-Alias: None 
-
-Name: UTF-7                                                    [RFC2152]
-MIBenum: 1012
-Source: RFC 2152
-Alias: None
-
-Name: UTF-16BE                                                 [RFC2781]
-MIBenum: 1013
-Source: RFC 2781
-Alias: None
-
-Name: UTF-16LE                                                 [RFC2781]
-MIBenum: 1014
-Source: RFC 2781
-Alias: None
-
-Name: UTF-16                                                   [RFC2781]
-MIBenum: 1015
-Source: RFC 2781
-Alias: None
-
-Name: CESU-8                                                    [Phipps]
-MIBenum: 1016
-Source: <http://www.unicode.org/unicode/reports/tr26>
-Alias: csCESU-8
-
-Name: UTF-32                                                     [Davis] 
-MIBenum: 1017
-Source: <http://www.unicode.org/unicode/reports/tr19/>
-Alias: None
-
-Name: UTF-32BE                                                   [Davis]
-MIBenum: 1018
-Source: <http://www.unicode.org/unicode/reports/tr19/>
-Alias: None
-
-Name: UTF-32LE                                                   [Davis]
-MIBenum: 1019
-Source: <http://www.unicode.org/unicode/reports/tr19/>
-Alias: None
-
-Name: BOCU-1                                                   [Scherer]
-MIBenum: 1020
-Source: http://www.unicode.org/notes/tn6/
-Alias: csBOCU-1
-
-Name: UNICODE-1-1-UTF-7                                        [RFC1642]
-MIBenum: 103
-Source: RFC 1642
-Alias: csUnicode11UTF7
-
-Name: UTF-8                                                    [RFC3629]
-MIBenum: 106
-Source: RFC 3629
-Alias: None 
-
-Name: ISO-8859-13
-MIBenum: 109
-Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis] 
-Alias: None
-
-Name: ISO-8859-14
-MIBenum: 110
-Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
-Alias: iso-ir-199
-Alias: ISO_8859-14:1998
-Alias: ISO_8859-14
-Alias: latin8
-Alias: iso-celtic
-Alias: l8
-
-Name: ISO-8859-15
-MIBenum: 111
-Source: ISO 
-        Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
-Alias: ISO_8859-15
-Alias: Latin-9
-
-Name: ISO-8859-16
-MIBenum: 112
-Source: ISO
-Alias: iso-ir-226
-Alias: ISO_8859-16:2001
-Alias: ISO_8859-16
-Alias: latin10
-Alias: l10 
-
-Name: GBK                                                 
-MIBenum: 113
-Source: Chinese IT Standardization Technical Committee  
-        Please see: <http://www.iana.org/assignments/charset-reg/GBK>
-Alias: CP936
-Alias: MS936
-Alias: windows-936
-
-Name: GB18030
-MIBenum: 114
-Source: Chinese IT Standardization Technical Committee
-        Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
-Alias: None
-
-Name:  OSD_EBCDIC_DF04_15
-MIBenum:  115
-Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
-         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
-Alias:   None
-
-Name:  OSD_EBCDIC_DF03_IRV
-MIBenum:  116
-Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
-         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
-Alias:  None
-
-Name:  OSD_EBCDIC_DF04_1
-MIBenum:  117
-Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
-         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
-Alias:  None   
-
-Name: JIS_Encoding
-MIBenum: 16
-Source: JIS X 0202-1991.  Uses ISO 2022 escape sequences to
-        shift code sets as documented in JIS X 0202-1991.
-Alias: csJISEncoding
-
-Name: Shift_JIS  (preferred MIME name)
-MIBenum: 17
-Source: This charset is an extension of csHalfWidthKatakana by
-        adding graphic characters in JIS X 0208.  The CCS's are
-        JIS X0201:1997 and JIS X0208:1997.  The
-        complete definition is shown in Appendix 1 of JIS
-        X0208:1997.
-        This charset can be used for the top-level media type "text".
-Alias: MS_Kanji 
-Alias: csShiftJIS
-
-Name: Extended_UNIX_Code_Packed_Format_for_Japanese
-MIBenum: 18
-Source: Standardized by OSF, UNIX International, and UNIX Systems
-        Laboratories Pacific.  Uses ISO 2022 rules to select
-               code set 0: US-ASCII (a single 7-bit byte set)
-               code set 1: JIS X0208-1990 (a double 8-bit byte set)
-                           restricted to A0-FF in both bytes
-               code set 2: Half Width Katakana (a single 7-bit byte set)
-                           requiring SS2 as the character prefix
-               code set 3: JIS X0212-1990 (a double 7-bit byte set)
-                           restricted to A0-FF in both bytes
-                           requiring SS3 as the character prefix
-Alias: csEUCPkdFmtJapanese
-Alias: EUC-JP  (preferred MIME name)
-
-Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
-MIBenum: 19
-Source: Used in Japan.  Each character is 2 octets.
-                code set 0: US-ASCII (a single 7-bit byte set)
-                              1st byte = 00
-                              2nd byte = 20-7E
-                code set 1: JIS X0208-1990 (a double 7-bit byte set)
-                            restricted  to A0-FF in both bytes 
-                code set 2: Half Width Katakana (a single 7-bit byte set)
-                              1st byte = 00
-                              2nd byte = A0-FF
-                code set 3: JIS X0212-1990 (a double 7-bit byte set)
-                            restricted to A0-FF in 
-                            the first byte
-                and 21-7E in the second byte
-Alias: csEUCFixWidJapanese
-
-Name: ISO-10646-UCS-Basic
-MIBenum: 1002
-Source: ASCII subset of Unicode.  Basic Latin = collection 1
-        See ISO 10646, Appendix A
-Alias: csUnicodeASCII
-
-Name: ISO-10646-Unicode-Latin1
-MIBenum: 1003
-Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 
-         Supplement  = collections 1 and 2.  See ISO 10646, 
-         Appendix A.  See RFC 1815.
-Alias: csUnicodeLatin1
-Alias: ISO-10646
-
-Name: ISO-10646-J-1
-Source: ISO 10646 Japanese, see RFC 1815.
-
-Name: ISO-Unicode-IBM-1261
-MIBenum: 1005
-Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
-Alias: csUnicodeIBM1261
-
-Name: ISO-Unicode-IBM-1268
-MIBenum: 1006
-Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
-Alias: csUnicodeIBM1268
-
-Name: ISO-Unicode-IBM-1276
-MIBenum: 1007
-Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
-Alias: csUnicodeIBM1276
-
-Name: ISO-Unicode-IBM-1264
-MIBenum: 1008
-Source: IBM Arabic Presentation Set, GCSGID: 1264
-Alias: csUnicodeIBM1264
-
-Name: ISO-Unicode-IBM-1265
-MIBenum: 1009
-Source: IBM Hebrew Presentation Set, GCSGID: 1265
-Alias: csUnicodeIBM1265
-
-Name: ISO-8859-1-Windows-3.0-Latin-1                           [HP-PCL5] 
-MIBenum: 2000
-Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.  
-        PCL Symbol Set id: 9U
-Alias: csWindows30Latin1
-
-Name: ISO-8859-1-Windows-3.1-Latin-1                           [HP-PCL5] 
-MIBenum: 2001
-Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.  
-        PCL Symbol Set id: 19U
-Alias: csWindows31Latin1
-
-Name: ISO-8859-2-Windows-Latin-2                               [HP-PCL5] 
-MIBenum: 2002
-Source: Extended ISO 8859-2.  Latin-2 for Windows 3.1.
-        PCL Symbol Set id: 9E
-Alias: csWindows31Latin2
-
-Name: ISO-8859-9-Windows-Latin-5                               [HP-PCL5] 
-MIBenum: 2003
-Source: Extended ISO 8859-9.  Latin-5 for Windows 3.1
-        PCL Symbol Set id: 5T
-Alias: csWindows31Latin5
-
-Name: Adobe-Standard-Encoding                                    [Adobe]
-MIBenum: 2005
-Source: PostScript Language Reference Manual
-        PCL Symbol Set id: 10J
-Alias: csAdobeStandardEncoding
-
-Name: Ventura-US                                               [HP-PCL5]
-MIBenum: 2006
-Source: Ventura US.  ASCII plus characters typically used in 
-        publishing, like pilcrow, copyright, registered, trade mark, 
-        section, dagger, and double dagger in the range A0 (hex) 
-        to FF (hex).  
-        PCL Symbol Set id: 14J
-Alias: csVenturaUS  
-
-Name: Ventura-International                                    [HP-PCL5]
-MIBenum: 2007
-Source: Ventura International.  ASCII plus coded characters similar 
-        to Roman8.
-        PCL Symbol Set id: 13J
-Alias: csVenturaInternational
-
-Name: PC8-Danish-Norwegian                                     [HP-PCL5]
-MIBenum: 2012
-Source: PC Danish Norwegian
-        8-bit PC set for Danish Norwegian
-        PCL Symbol Set id: 11U
-Alias: csPC8DanishNorwegian
-
-Name: PC8-Turkish                                              [HP-PCL5]
-MIBenum: 2014
-Source: PC Latin Turkish.  PCL Symbol Set id: 9T
-Alias: csPC8Turkish
-
-Name: IBM-Symbols                                             [IBM-CIDT] 
-MIBenum: 2015
-Source: Presentation Set, CPGID: 259
-Alias: csIBMSymbols
-
-Name: IBM-Thai                                                [IBM-CIDT] 
-MIBenum: 2016
-Source: Presentation Set, CPGID: 838
-Alias: csIBMThai
-
-Name: HP-Legal                                                 [HP-PCL5]
-MIBenum: 2017
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
-        HP part number 5961-0510, October 1992
-        PCL Symbol Set id: 1U
-Alias: csHPLegal
-
-Name: HP-Pi-font                                               [HP-PCL5]
-MIBenum: 2018
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
-        HP part number 5961-0510, October 1992
-        PCL Symbol Set id: 15U
-Alias: csHPPiFont
-
-Name: HP-Math8                                                 [HP-PCL5]
-MIBenum: 2019
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
-        HP part number 5961-0510, October 1992
-        PCL Symbol Set id: 8M
-Alias: csHPMath8
-
-Name: Adobe-Symbol-Encoding                                      [Adobe]
-MIBenum: 2020
-Source: PostScript Language Reference Manual
-        PCL Symbol Set id: 5M
-Alias: csHPPSMath
-
-Name: HP-DeskTop                                               [HP-PCL5]
-MIBenum: 2021
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
-        HP part number 5961-0510, October 1992
-        PCL Symbol Set id: 7J
-Alias: csHPDesktop
-
-Name: Ventura-Math                                             [HP-PCL5]
-MIBenum: 2022
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
-        HP part number 5961-0510, October 1992
-        PCL Symbol Set id: 6M
-Alias: csVenturaMath
-
-Name: Microsoft-Publishing                                     [HP-PCL5]
-MIBenum: 2023
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
-        HP part number 5961-0510, October 1992
-        PCL Symbol Set id: 6J
-Alias: csMicrosoftPublishing
-
-Name: Windows-31J
-MIBenum: 2024
-Source: Windows Japanese.  A further extension of Shift_JIS
-        to include NEC special characters (Row 13), NEC
-        selection of IBM extensions (Rows 89 to 92), and IBM
-        extensions (Rows 115 to 119).  The CCS's are
-        JIS X0201:1997, JIS X0208:1997, and these extensions.
-        This charset can be used for the top-level media type "text",
-        but it is of limited or specialized use (see RFC2278).
-        PCL Symbol Set id: 19K
-Alias: csWindows31J
-
-Name: GB2312  (preferred MIME name)
-MIBenum: 2025
-Source: Chinese for People's Republic of China (PRC) mixed one byte, 
-        two byte set: 
-          20-7E = one byte ASCII 
-          A1-FE = two byte PRC Kanji 
-        See GB 2312-80 
-        PCL Symbol Set Id: 18C
-Alias: csGB2312
-
-Name: Big5  (preferred MIME name)
-MIBenum: 2026
-Source: Chinese for Taiwan Multi-byte set.
-        PCL Symbol Set Id: 18T
-Alias: csBig5
-
-Name: windows-1250
-MIBenum: 2250
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
-Alias: None
-
-Name: windows-1251
-MIBenum: 2251
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
-Alias: None
-
-Name: windows-1252
-MIBenum: 2252
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1252)       [Wendt]
-Alias: None
-
-Name: windows-1253
-MIBenum: 2253
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
-Alias: None
-
-Name: windows-1254
-MIBenum: 2254
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
-Alias: None
-
-Name: windows-1255
-MIBenum: 2255
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
-Alias: None
-
-Name: windows-1256
-MIBenum: 2256
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
-Alias: None 
-
-Name: windows-1257
-MIBenum: 2257
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
-Alias: None
-
-Name: windows-1258
-MIBenum: 2258
-Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
-Alias: None
-
-Name: TIS-620
-MIBenum: 2259
-Source: Thai Industrial Standards Institute (TISI)	     [Tantsetthi]
-
-Name: HZ-GB-2312
-MIBenum: 2085
-Source: RFC 1842, RFC 1843                              [RFC1842, RFC1843]
-
-
-REFERENCES
-----------
-
-[RFC1345]  Simonsen, K., "Character Mnemonics & Character Sets",
-           RFC 1345, Rationel Almen Planlaegning, Rationel Almen
-           Planlaegning, June 1992.
-
-[RFC1428]  Vaudreuil, G., "Transition of Internet Mail from
-           Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
-           1993.
-
-[RFC1456]  Vietnamese Standardization Working Group, "Conventions for
-           Encoding the Vietnamese Language VISCII: VIetnamese 
-           Standard Code for Information Interchange VIQR: VIetnamese 
-           Quoted-Readable Specification Revision 1.1", RFC 1456, May
-           1993.
-
-[RFC1468]  Murai, J., Crispin, M., and E. van der Poel, "Japanese
-           Character Encoding for Internet Messages", RFC 1468,
-           Keio University, Panda Programming, June 1993.
-
-[RFC1489]  Chernov, A., "Registration of a Cyrillic Character Set",
-           RFC1489, RELCOM Development Team, July 1993. 
-
-[RFC1554]  Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
-           Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
-           Technology, ETL, December 1993. 
-
-[RFC1556]  Nussbacher, H., "Handling of Bi-directional Texts in MIME",
-           RFC1556, Israeli Inter-University, December 1993. 
-
-[RFC1557]  Choi, U., Chon, K., and H. Park, "Korean Character Encoding
-           for Internet Messages", KAIST, Solvit Chosun Media,
-           December 1993.
-
-[RFC1641]  Goldsmith, D., and M. Davis, "Using Unicode with MIME",
-           RFC1641, Taligent, Inc., July 1994. 
-
-[RFC1642]  Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
-           Inc., July 1994.
-
-[RFC1815]  Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
-           RFC 1815, Tokyo Institute of Technology, July 1995.
-
-
-[Adobe]    Adobe Systems Incorporated, PostScript Language Reference
-           Manual, second edition, Addison-Wesley Publishing Company,
-           Inc., 1990.
-
-[ECMA Registry]  ISO-IR: International Register of Escape Sequences
-           http://www.itscj.ipsj.or.jp/ISO-IE/  Note: The current
-           registration authority is IPSJ/ITSCJ, Japan.
-
-[HP-PCL5]  Hewlett-Packard Company, "HP PCL 5 Comparison Guide", 
-           (P/N 5021-0329) pp B-13, 1996.
-
-[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
-           for Core Interchange Digitized Type", Publication number
-           S544-3708-01
-
-[RFC1842]  Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
-           Characters-Based Chinese Character Encoding for Internet
-           Messages", RFC 1842, Harvard University, Rice University,
-           University of Maryland, August 1995.
-
-[RFC1843]  Lee, F., "HZ - A Data Format for Exchanging Files of
-           Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
-           Stanford University, August 1995.
-
-[RFC2152]  Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
-	   Format of Unicode", RFC 2152, Apple Computer, Inc.,
-	   Taligent Inc., May 1997.
-
-[RFC2279]  Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
-           RFC 2279, Alis Technologies, January, 1998.
-
-[RFC2781]  Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
-           RFC 2781, February 2000.
-
-[RFC3629]  Yergeau, F., "UTF-8, a transformation format of ISO 10646",
-           RFC3629, November 2003.
-
-PEOPLE
-------
-
-[KXS2] Keld Simonsen <[email protected]>
-
-[Choi] Woohyong Choi <[email protected]>
-
-[Davis] Mark Davis, <[email protected]>, April 2002.
-
-[Lazhintseva] Katya Lazhintseva, <[email protected]>, May 1996.
-
-[Mahdi] Tamer Mahdi, <[email protected]>, August 2000.
-
-[Malyshev] Michael Malyshev, <[email protected]>, January 2004
-
-[Murai] Jun Murai <[email protected]>
-
-[Nussbacher] Hank Nussbacher, <[email protected]>
-
-[Ohta] Masataka Ohta, <[email protected]>, July 1995.
-
-[Phipps] Toby Phipps, <[email protected]>, March 2002.
-
-[Pond] Rick Pond, <[email protected]>, March 1997.
-
-[Robrigado] Reuel Robrigado, <[email protected]>, September 2002.
-
-[Scherer] Markus Scherer, <[email protected]>, August 2000, 
-          September 2002.
-
-[Simonsen] Keld Simonsen, <[email protected]>, August 2000.
-
-[Tantsetthi] Trin Tantsetthi, <[email protected]>, September 1998.
-
-[Tumasonis] Vladas Tumasonis, <[email protected]>, August 2000.
-
-[Uskov] Alexander Uskov, <[email protected]>, September 2002.
-
-[Wendt] Chris Wendt, <[email protected]>, December 1999.
-
-[Yick] Nicky Yick, <[email protected]>, October 2000.
-
-[]
-
-
-
-
-
-
-
--- a/sys/src/cmd/abaco/mkfile
+++ b/sys/src/cmd/abaco/mkfile
@@ -22,7 +22,6 @@
 HFILES=\
 	dat.h\
 	fns.h\
-	tcs.h\
 
 UPDATE=\
 	mkfile\
@@ -30,12 +29,6 @@
 	${OFILES:%.$O=%.c}\
 
 </sys/src/cmd/mkone
-
-charsets.txt:
-	hget http://www.iana.org/assignments/character-sets | sed 's/
//' > charsets.txt
-
-tcs.h: charsets.awk charsets.txt tcs.txt
-	charsets.awk charsets.txt tcs.txt > tcs.h
 
 syms:V:
 	8c -a $CFLAGS main.c > syms
--- a/sys/src/cmd/abaco/util.c
+++ b/sys/src/cmd/abaco/util.c
@@ -715,88 +715,22 @@
 	sendul(sync, i);
 }
 
-struct {
-	char *mime;
-	char *tcs;
-}tcstab[] = {
-
-#include "tcs.h"
-
-	/* not generated by the script */
-	"euc_jp", "jis",
-	"euc_kr", "euc-k",
-	"windows-874", "tis",
-	nil,	nil,
-};
-
-enum {
-	Winstart = 127,
-	Winend = 159
-};
-
-static int winchars[] = {
-	 8226,	/* 8226 is a bullet */
-	8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,
-	710, 8240, 352, 8249, 338, 8226, 8226, 8226,
-	8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
-	732, 8482, 353, 8250, 339, 8226, 8226, 376
-};
-
 char *
-tcs(char *cs, char *s, long *np)
+uhtml(char *cs, char *s, long *np)
 {
 	Channel *sync;
 	Exec *e;
-	Rune r;
 	long i, n;
 	void **a;
-	uchar *us;
 	char buf[BUFSIZE], cmd[50];
-	char *t, *u;
+	char *t;
 	int p[2], q[2];
 
-
 	if(s==nil || *s=='\0' || *np==0){
-		werrstr("tcs failed: no data");
+		werrstr("uhtml failed: no data");
 		return s;
 	}
 
-	if(cs == nil){
-		werrstr("tcs failed: no charset");
-		return s;
-	}
-
-	if(cistrncmp(cs, "utf-8", 5)==0 || cistrncmp(cs, "utf8", 4)==0)
-		return s;
-
-	for(i=0; tcstab[i].mime!=nil; i++)
-		if(cistrncmp(cs, tcstab[i].mime, strlen(tcstab[i].mime)) == 0)
-			break;
-
-	if(tcstab[i].mime == nil){
-		fprint(2, "abaco: charset: %s not supported\n", cs);
-		goto latin1;
-	}
-	if(cistrcmp(tcstab[i].tcs, "8859-1")==0 || cistrcmp(tcstab[i].tcs, "ascii")==0){
-latin1:
-		n = 0;
-		for(us=(uchar*)s; *us; us++)
-			n += runelen(*us);
-		n++;
-		t = emalloc(n);
-		for(us=(uchar*)s, u=t; *us; us++){
-			if(*us>=Winstart && *us<=Winend)
-				*u++ = winchars[*us-Winstart];
-			else{
-				r = *us;
-				u += runetochar(u, &r);
-			}
-		}
-		*u = 0;
-		free(s);
-		return t;
-	}
-
 	if(pipe(p)<0 || pipe(q)<0)
 		error("can't create pipe");
 
@@ -804,7 +738,7 @@
 	if(sync == nil)
 		error("can't create channel");
 
-	snprint(cmd, sizeof cmd, "tcs -f %s", tcstab[i].tcs);
+	snprint(cmd, sizeof cmd, (cs != nil && *cs != '\0') ? "uhtml -c %s" : "uthml", cs);
 	e = emalloc(sizeof(Exec));
 	e->p[0] = p[0];
 	e->p[1] = p[1];
@@ -818,7 +752,7 @@
 	close(p[0]);
 	close(q[1]);
 
-	/* in case tcs fails */
+	/* in case uhtml fails */
 	t = s;
 	sync = chancreate(sizeof(ulong), 0);
 	if(sync == nil)
@@ -831,6 +765,7 @@
 	a[3] = (void *)*np;
 	proccreate(writeproc, a, STACK);
 
+	i = 0;
 	s = nil;
 	while((n = read(q[0], buf, sizeof(buf))) > 0){
 		s = erealloc(s, i+n+1);
@@ -840,7 +775,7 @@
 	}
 	n = recvul(sync);
 	if(n != *np)
-		fprint(2, "tcs: did not write %ld; wrote %uld\n", *np, n);
+		fprint(2, "uhtml failed: did not write %ld; wrote %uld\n", *np, n);
 
 	*np = i;
 	chanfree(sync);
@@ -847,7 +782,7 @@
 	close(q[0]);
 
 	if(s == nil){
-		fprint(2, "tcs failed: can't convert charset=%s to %s\n", cs, tcstab[i].tcs);
+		fprint(2, "uhtml failed: can't convert charset=%s\n", cs);
 		return t;
 	}
 	free(t);
@@ -901,46 +836,6 @@
 	return 0;
 }
 
-static
-int
-finddocctype(char *b, int l, char *s)
-{
-	char *p, *e;
-
-	p = cistrstr(s, "<meta");
-	if(!p)
-		return -1;
-	p += 5;
-	e = strchr(s, '>');
-	if(!e)
-		return -1;
-	snprint(b, l, "%.*s", (int)(e-p), p);
-	return 0;
-}
-
-static
-int
-findxmltype(char *b, int l, char *s)
-{
-	char *p, *e;
-
-	p = cistrstr(s, "<?xml ");
-	if(!p)
-		return -1;
-
-	p += 6;
-	e = strstr(p, "?>");
-	if(!e)
-		return -1;
-	snprint(b, l, "%.*s", (int)(e-p), p);
-
-	return 0;
-}
-
-/*
- * servers can lie about lie about the charset,
- * so we use the charset based on the priority.
- */
 char *
 convert(Runestr ctype, char *s, long *np)
 {
@@ -951,14 +846,7 @@
 		snprint(buf, sizeof(buf), "%.*S", ctype.nr, ctype.r);
 		findctype(t, sizeof(t), "charset", buf);
 	}
-	if(findxmltype(buf, sizeof(buf), s)==0)
-		findctype(t, sizeof(t), "encoding", buf);
-	if(finddocctype(buf, sizeof(buf), s) == 0)
-		findctype(t, sizeof(t), "charset", buf);
-
-	if(*t == '\0')
-		strcpy(t, charset);
-	return tcs(t, s, np);
+	return uhtml(t, s, np);
 }
 
 int