~ chicken-core (master) /utf.c


   1/* utf.c - UTF-8 support code
   2;
   3; Copyright (c) 2022-2022, The CHICKEN Team
   4; All rights reserved.
   5;
   6; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
   7; conditions are met:
   8;
   9;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  10;     disclaimer.
  11;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
  12;     disclaimer in the documentation and/or other materials provided with the distribution.
  13;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
  14;     products derived from this software without specific prior written permission.
  15;
  16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  17; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  18; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
  19; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  23; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24; POSSIBILITY OF SUCH DAMAGE.
  25*/
  26
  27
  28#include "chicken.h"
  29
  30#if defined(_WIN32) && !defined(__CYGWIN__)
  31# include <windows.h>
  32#endif
  33
  34/* partially generated by mkrunetype.awk from http://git.suckless.org/ubase/
  35   see LICENSE for more information */
  36
  37#define nelem(x)  (sizeof (x) / sizeof *(x))
  38
  39static int
  40rune1cmp(const void *v1, const void *v2)
  41{
  42	int r1 = *(int *)v1, r2 = *(int *)v2;
  43
  44	return r1 - r2;
  45}
  46
  47static int
  48rune2cmp(const void *v1, const void *v2)
  49{
  50	int r = *(int *)v1, *p = (int *)v2;
  51
  52	if(r >= p[0] && r <= p[1])
  53		return 0;
  54	else
  55		return r - p[0];
  56}
  57
  58static int
  59runemapcmp(const void *v1, const void *v2)
  60{
  61	return *(int *)v1 - *(int *)v2;
  62}
  63
  64/* generated by mkrunetype.awk from  http://git.suckless.org/ubase/
  65   see LICENSE for more information */
  66
  67static int upper3[][2] = {
  68	{ 0x0100, 0x012E },
  69	{ 0x0132, 0x0136 },
  70	{ 0x0139, 0x0147 },
  71	{ 0x014A, 0x0176 },
  72	{ 0x0179, 0x017D },
  73	{ 0x0182, 0x0184 },
  74	{ 0x01A0, 0x01A4 },
  75	{ 0x01B3, 0x01B5 },
  76	{ 0x01CD, 0x01DB },
  77	{ 0x01DE, 0x01EE },
  78	{ 0x01F8, 0x021E },
  79	{ 0x0222, 0x0232 },
  80	{ 0x0246, 0x024E },
  81	{ 0x0370, 0x0372 },
  82	{ 0x03D8, 0x03EE },
  83	{ 0x0460, 0x0480 },
  84	{ 0x048A, 0x04BE },
  85	{ 0x04C1, 0x04CD },
  86	{ 0x04D0, 0x052E },
  87	{ 0x1E00, 0x1E94 },
  88	{ 0x1EA0, 0x1EFE },
  89	{ 0x2C67, 0x2C6B },
  90	{ 0x2C80, 0x2CE2 },
  91	{ 0x2CEB, 0x2CED },
  92	{ 0xA640, 0xA66C },
  93	{ 0xA680, 0xA69A },
  94	{ 0xA722, 0xA72E },
  95	{ 0xA732, 0xA76E },
  96	{ 0xA779, 0xA77B },
  97	{ 0xA77E, 0xA786 },
  98	{ 0xA790, 0xA792 },
  99	{ 0xA796, 0xA7A8 },
 100	{ 0xA7B4, 0xA7C2 },
 101	{ 0xA7C7, 0xA7C9 },
 102	{ 0xA7D6, 0xA7D8 },
 103};
 104
 105static int upper2[][3] = {
 106	{ 0x0041, 0x005A, 0x0061 },
 107	{ 0x00C0, 0x00D6, 0x00E0 },
 108	{ 0x00D8, 0x00DE, 0x00F8 },
 109	{ 0x0189, 0x018A, 0x0256 },
 110	{ 0x01B1, 0x01B2, 0x028A },
 111	{ 0x0388, 0x038A, 0x03AD },
 112	{ 0x038E, 0x038F, 0x03CD },
 113	{ 0x0391, 0x03A1, 0x03B1 },
 114	{ 0x03A3, 0x03AB, 0x03C3 },
 115	{ 0x03D2, 0x03D4, 0x03D2 },
 116	{ 0x03FD, 0x03FF, 0x037B },
 117	{ 0x0400, 0x040F, 0x0450 },
 118	{ 0x0410, 0x042F, 0x0430 },
 119	{ 0x0531, 0x0556, 0x0561 },
 120	{ 0x10A0, 0x10C5, 0x2D00 },
 121	{ 0x13A0, 0x13EF, 0xAB70 },
 122	{ 0x13F0, 0x13F5, 0x13F8 },
 123	{ 0x1C90, 0x1CBA, 0x10D0 },
 124	{ 0x1CBD, 0x1CBF, 0x10FD },
 125	{ 0x1F08, 0x1F0F, 0x1F00 },
 126	{ 0x1F18, 0x1F1D, 0x1F10 },
 127	{ 0x1F28, 0x1F2F, 0x1F20 },
 128	{ 0x1F38, 0x1F3F, 0x1F30 },
 129	{ 0x1F48, 0x1F4D, 0x1F40 },
 130	{ 0x1F68, 0x1F6F, 0x1F60 },
 131	{ 0x1FB8, 0x1FB9, 0x1FB0 },
 132	{ 0x1FBA, 0x1FBB, 0x1F70 },
 133	{ 0x1FC8, 0x1FCB, 0x1F72 },
 134	{ 0x1FD8, 0x1FD9, 0x1FD0 },
 135	{ 0x1FDA, 0x1FDB, 0x1F76 },
 136	{ 0x1FE8, 0x1FE9, 0x1FE0 },
 137	{ 0x1FEA, 0x1FEB, 0x1F7A },
 138	{ 0x1FF8, 0x1FF9, 0x1F78 },
 139	{ 0x1FFA, 0x1FFB, 0x1F7C },
 140	{ 0x210B, 0x210D, 0x210B },
 141	{ 0x2110, 0x2112, 0x2110 },
 142	{ 0x2119, 0x211D, 0x2119 },
 143	{ 0x212C, 0x212D, 0x212C },
 144	{ 0x2130, 0x2131, 0x2130 },
 145	{ 0x213E, 0x213F, 0x213E },
 146	{ 0x2C00, 0x2C2F, 0x2C30 },
 147	{ 0x2C7E, 0x2C7F, 0x023F },
 148	{ 0xFF21, 0xFF3A, 0xFF41 },
 149	{ 0x10400, 0x10427, 0x10428 },
 150	{ 0x104B0, 0x104D3, 0x104D8 },
 151	{ 0x10570, 0x1057A, 0x10597 },
 152	{ 0x1057C, 0x1058A, 0x105A3 },
 153	{ 0x1058C, 0x10592, 0x105B3 },
 154	{ 0x10594, 0x10595, 0x105BB },
 155	{ 0x10C80, 0x10CB2, 0x10CC0 },
 156	{ 0x118A0, 0x118BF, 0x118C0 },
 157	{ 0x16E40, 0x16E5F, 0x16E60 },
 158	{ 0x1D400, 0x1D419, 0x1D400 },
 159	{ 0x1D434, 0x1D44D, 0x1D434 },
 160	{ 0x1D468, 0x1D481, 0x1D468 },
 161	{ 0x1D49E, 0x1D49F, 0x1D49E },
 162	{ 0x1D4A5, 0x1D4A6, 0x1D4A5 },
 163	{ 0x1D4A9, 0x1D4AC, 0x1D4A9 },
 164	{ 0x1D4AE, 0x1D4B5, 0x1D4AE },
 165	{ 0x1D4D0, 0x1D4E9, 0x1D4D0 },
 166	{ 0x1D504, 0x1D505, 0x1D504 },
 167	{ 0x1D507, 0x1D50A, 0x1D507 },
 168	{ 0x1D50D, 0x1D514, 0x1D50D },
 169	{ 0x1D516, 0x1D51C, 0x1D516 },
 170	{ 0x1D538, 0x1D539, 0x1D538 },
 171	{ 0x1D53B, 0x1D53E, 0x1D53B },
 172	{ 0x1D540, 0x1D544, 0x1D540 },
 173	{ 0x1D54A, 0x1D550, 0x1D54A },
 174	{ 0x1D56C, 0x1D585, 0x1D56C },
 175	{ 0x1D5A0, 0x1D5B9, 0x1D5A0 },
 176	{ 0x1D5D4, 0x1D5ED, 0x1D5D4 },
 177	{ 0x1D608, 0x1D621, 0x1D608 },
 178	{ 0x1D63C, 0x1D655, 0x1D63C },
 179	{ 0x1D670, 0x1D689, 0x1D670 },
 180	{ 0x1D6A8, 0x1D6C0, 0x1D6A8 },
 181	{ 0x1D6E2, 0x1D6FA, 0x1D6E2 },
 182	{ 0x1D71C, 0x1D734, 0x1D71C },
 183	{ 0x1D756, 0x1D76E, 0x1D756 },
 184	{ 0x1D790, 0x1D7A8, 0x1D790 },
 185	{ 0x1E900, 0x1E921, 0x1E922 },
 186};
 187
 188static int upper1[][2] = {
 189	{ 0x0130, 0x0069 },
 190	{ 0x0178, 0x00FF },
 191	{ 0x0181, 0x0253 },
 192	{ 0x0186, 0x0254 },
 193	{ 0x0187, 0x0188 },
 194	{ 0x018B, 0x018C },
 195	{ 0x018E, 0x01DD },
 196	{ 0x018F, 0x0259 },
 197	{ 0x0190, 0x025B },
 198	{ 0x0191, 0x0192 },
 199	{ 0x0193, 0x0260 },
 200	{ 0x0194, 0x0263 },
 201	{ 0x0196, 0x0269 },
 202	{ 0x0197, 0x0268 },
 203	{ 0x0198, 0x0199 },
 204	{ 0x019C, 0x026F },
 205	{ 0x019D, 0x0272 },
 206	{ 0x019F, 0x0275 },
 207	{ 0x01A6, 0x0280 },
 208	{ 0x01A7, 0x01A8 },
 209	{ 0x01A9, 0x0283 },
 210	{ 0x01AC, 0x01AD },
 211	{ 0x01AE, 0x0288 },
 212	{ 0x01AF, 0x01B0 },
 213	{ 0x01B7, 0x0292 },
 214	{ 0x01B8, 0x01B9 },
 215	{ 0x01BC, 0x01BD },
 216	{ 0x01C4, 0x01C6 },
 217	{ 0x01C7, 0x01C9 },
 218	{ 0x01CA, 0x01CC },
 219	{ 0x01F1, 0x01F3 },
 220	{ 0x01F4, 0x01F5 },
 221	{ 0x01F6, 0x0195 },
 222	{ 0x01F7, 0x01BF },
 223	{ 0x0220, 0x019E },
 224	{ 0x023A, 0x2C65 },
 225	{ 0x023B, 0x023C },
 226	{ 0x023D, 0x019A },
 227	{ 0x023E, 0x2C66 },
 228	{ 0x0241, 0x0242 },
 229	{ 0x0243, 0x0180 },
 230	{ 0x0244, 0x0289 },
 231	{ 0x0245, 0x028C },
 232	{ 0x0376, 0x0377 },
 233	{ 0x037F, 0x03F3 },
 234	{ 0x0386, 0x03AC },
 235	{ 0x038C, 0x03CC },
 236	{ 0x03CF, 0x03D7 },
 237	{ 0x03F4, 0x03B8 },
 238	{ 0x03F7, 0x03F8 },
 239	{ 0x03F9, 0x03F2 },
 240	{ 0x03FA, 0x03FB },
 241	{ 0x04C0, 0x04CF },
 242	{ 0x10C7, 0x2D27 },
 243	{ 0x10CD, 0x2D2D },
 244	{ 0x1E9E, 0x00DF },
 245	{ 0x1F59, 0x1F51 },
 246	{ 0x1F5B, 0x1F53 },
 247	{ 0x1F5D, 0x1F55 },
 248	{ 0x1F5F, 0x1F57 },
 249	{ 0x1FEC, 0x1FE5 },
 250	{ 0x2102, 0x2102 },
 251	{ 0x2107, 0x2107 },
 252	{ 0x2115, 0x2115 },
 253	{ 0x2124, 0x2124 },
 254	{ 0x2126, 0x03C9 },
 255	{ 0x2128, 0x2128 },
 256	{ 0x212A, 0x006B },
 257	{ 0x212B, 0x00E5 },
 258	{ 0x2132, 0x214E },
 259	{ 0x2133, 0x2133 },
 260	{ 0x2145, 0x2145 },
 261	{ 0x2183, 0x2184 },
 262	{ 0x2C60, 0x2C61 },
 263	{ 0x2C62, 0x026B },
 264	{ 0x2C63, 0x1D7D },
 265	{ 0x2C64, 0x027D },
 266	{ 0x2C6D, 0x0251 },
 267	{ 0x2C6E, 0x0271 },
 268	{ 0x2C6F, 0x0250 },
 269	{ 0x2C70, 0x0252 },
 270	{ 0x2C72, 0x2C73 },
 271	{ 0x2C75, 0x2C76 },
 272	{ 0x2CF2, 0x2CF3 },
 273	{ 0xA77D, 0x1D79 },
 274	{ 0xA78B, 0xA78C },
 275	{ 0xA78D, 0x0265 },
 276	{ 0xA7AA, 0x0266 },
 277	{ 0xA7AB, 0x025C },
 278	{ 0xA7AC, 0x0261 },
 279	{ 0xA7AD, 0x026C },
 280	{ 0xA7AE, 0x026A },
 281	{ 0xA7B0, 0x029E },
 282	{ 0xA7B1, 0x0287 },
 283	{ 0xA7B2, 0x029D },
 284	{ 0xA7B3, 0xAB53 },
 285	{ 0xA7C4, 0xA794 },
 286	{ 0xA7C5, 0x0282 },
 287	{ 0xA7C6, 0x1D8E },
 288	{ 0xA7D0, 0xA7D1 },
 289	{ 0xA7F5, 0xA7F6 },
 290	{ 0x1D49C, 0x1D49C },
 291	{ 0x1D4A2, 0x1D4A2 },
 292	{ 0x1D546, 0x1D546 },
 293	{ 0x1D7CA, 0x1D7CA },
 294};
 295
 296C_regparm int C_utf_isupper(int r)
 297{
 298	int *match;
 299
 300	if((match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp)))
 301		return !((r - match[0]) % 2);
 302	if(bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp))
 303		return 1;
 304	if(bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp))
 305		return 1;
 306	return 0;
 307}
 308
 309C_regparm int C_utf_char_downcase(int r)
 310{
 311	int *match;
 312
 313	match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp);
 314	if (match)
 315		return ((r - match[0]) % 2) ? r : r + 1;
 316	match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp);
 317	if (match)
 318		return match[2] + (r - match[0]);
 319	match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp);
 320	if (match)
 321		return match[1];
 322	return r;
 323}
 324
 325static int lower4[][2] = {
 326	{ 0x0101, 0x012F },
 327	{ 0x0133, 0x0137 },
 328	{ 0x013A, 0x0148 },
 329	{ 0x014B, 0x0177 },
 330	{ 0x017A, 0x017E },
 331	{ 0x0183, 0x0185 },
 332	{ 0x01A1, 0x01A5 },
 333	{ 0x01B4, 0x01B6 },
 334	{ 0x01CE, 0x01DC },
 335	{ 0x01DF, 0x01EF },
 336	{ 0x01F9, 0x021F },
 337	{ 0x0223, 0x0233 },
 338	{ 0x0247, 0x024F },
 339	{ 0x0371, 0x0373 },
 340	{ 0x03D9, 0x03EF },
 341	{ 0x0461, 0x0481 },
 342	{ 0x048B, 0x04BF },
 343	{ 0x04C2, 0x04CE },
 344	{ 0x04D1, 0x052F },
 345	{ 0x1E01, 0x1E95 },
 346	{ 0x1EA1, 0x1EFF },
 347	{ 0x2C68, 0x2C6C },
 348	{ 0x2C81, 0x2CE3 },
 349	{ 0x2CEC, 0x2CEE },
 350	{ 0xA641, 0xA66D },
 351	{ 0xA681, 0xA69B },
 352	{ 0xA723, 0xA72F },
 353	{ 0xA733, 0xA76F },
 354	{ 0xA77A, 0xA77C },
 355	{ 0xA77F, 0xA787 },
 356	{ 0xA791, 0xA793 },
 357	{ 0xA797, 0xA7A9 },
 358	{ 0xA7B5, 0xA7C3 },
 359	{ 0xA7C8, 0xA7CA },
 360	{ 0xA7D7, 0xA7D9 },
 361};
 362
 363static int lower2[][3] = {
 364	{ 0x0061, 0x007A, 0x0041 },
 365	{ 0x00E0, 0x00F6, 0x00C0 },
 366	{ 0x00F8, 0x00FE, 0x00D8 },
 367	{ 0x01AA, 0x01AB, 0x01AA },
 368	{ 0x0234, 0x0239, 0x0234 },
 369	{ 0x023F, 0x0240, 0x2C7E },
 370	{ 0x0256, 0x0257, 0x0189 },
 371	{ 0x025D, 0x025F, 0x025D },
 372	{ 0x026D, 0x026E, 0x026D },
 373	{ 0x0273, 0x0274, 0x0273 },
 374	{ 0x0276, 0x027C, 0x0276 },
 375	{ 0x027E, 0x027F, 0x027E },
 376	{ 0x0284, 0x0286, 0x0284 },
 377	{ 0x028A, 0x028B, 0x01B1 },
 378	{ 0x028D, 0x0291, 0x028D },
 379	{ 0x0295, 0x029C, 0x0295 },
 380	{ 0x029F, 0x02AF, 0x029F },
 381	{ 0x037B, 0x037D, 0x03FD },
 382	{ 0x03AD, 0x03AF, 0x0388 },
 383	{ 0x03B1, 0x03C1, 0x0391 },
 384	{ 0x03C3, 0x03CB, 0x03A3 },
 385	{ 0x03CD, 0x03CE, 0x038E },
 386	{ 0x0430, 0x044F, 0x0410 },
 387	{ 0x0450, 0x045F, 0x0400 },
 388	{ 0x0561, 0x0586, 0x0531 },
 389	{ 0x0587, 0x0588, 0x0587 },
 390	{ 0x10D0, 0x10FA, 0x1C90 },
 391	{ 0x10FD, 0x10FF, 0x1CBD },
 392	{ 0x13F8, 0x13FD, 0x13F0 },
 393	{ 0x1C83, 0x1C84, 0x0421 },
 394	{ 0x1D00, 0x1D2B, 0x1D00 },
 395	{ 0x1D6B, 0x1D77, 0x1D6B },
 396	{ 0x1D7A, 0x1D7C, 0x1D7A },
 397	{ 0x1D7E, 0x1D8D, 0x1D7E },
 398	{ 0x1D8F, 0x1D9A, 0x1D8F },
 399	{ 0x1E96, 0x1E9A, 0x1E96 },
 400	{ 0x1E9C, 0x1E9D, 0x1E9C },
 401	{ 0x1F00, 0x1F07, 0x1F08 },
 402	{ 0x1F10, 0x1F15, 0x1F18 },
 403	{ 0x1F20, 0x1F27, 0x1F28 },
 404	{ 0x1F30, 0x1F37, 0x1F38 },
 405	{ 0x1F40, 0x1F45, 0x1F48 },
 406	{ 0x1F60, 0x1F67, 0x1F68 },
 407	{ 0x1F70, 0x1F71, 0x1FBA },
 408	{ 0x1F72, 0x1F75, 0x1FC8 },
 409	{ 0x1F76, 0x1F77, 0x1FDA },
 410	{ 0x1F78, 0x1F79, 0x1FF8 },
 411	{ 0x1F7A, 0x1F7B, 0x1FEA },
 412	{ 0x1F7C, 0x1F7D, 0x1FFA },
 413	{ 0x1F80, 0x1F87, 0x1F88 },
 414	{ 0x1F90, 0x1F97, 0x1F98 },
 415	{ 0x1FA0, 0x1FA7, 0x1FA8 },
 416	{ 0x1FB0, 0x1FB1, 0x1FB8 },
 417	{ 0x1FB6, 0x1FB7, 0x1FB6 },
 418	{ 0x1FC6, 0x1FC7, 0x1FC6 },
 419	{ 0x1FD0, 0x1FD1, 0x1FD8 },
 420	{ 0x1FD2, 0x1FD3, 0x1FD2 },
 421	{ 0x1FD6, 0x1FD7, 0x1FD6 },
 422	{ 0x1FE0, 0x1FE1, 0x1FE8 },
 423	{ 0x1FE2, 0x1FE4, 0x1FE2 },
 424	{ 0x1FE6, 0x1FE7, 0x1FE6 },
 425	{ 0x1FF6, 0x1FF7, 0x1FF6 },
 426	{ 0x210E, 0x210F, 0x210E },
 427	{ 0x213C, 0x213D, 0x213C },
 428	{ 0x2146, 0x2149, 0x2146 },
 429	{ 0x2C30, 0x2C5F, 0x2C00 },
 430	{ 0x2C77, 0x2C7B, 0x2C77 },
 431	{ 0x2D00, 0x2D25, 0x10A0 },
 432	{ 0xA730, 0xA731, 0xA730 },
 433	{ 0xA771, 0xA778, 0xA771 },
 434	{ 0xAB30, 0xAB52, 0xAB30 },
 435	{ 0xAB54, 0xAB5A, 0xAB54 },
 436	{ 0xAB60, 0xAB68, 0xAB60 },
 437	{ 0xAB70, 0xABBF, 0x13A0 },
 438	{ 0xFB00, 0xFB06, 0xFB00 },
 439	{ 0xFB13, 0xFB17, 0xFB13 },
 440	{ 0xFF41, 0xFF5A, 0xFF21 },
 441	{ 0x10428, 0x1044F, 0x10400 },
 442	{ 0x104D8, 0x104FB, 0x104B0 },
 443	{ 0x10597, 0x105A1, 0x10570 },
 444	{ 0x105A3, 0x105B1, 0x1057C },
 445	{ 0x105B3, 0x105B9, 0x1058C },
 446	{ 0x105BB, 0x105BC, 0x10594 },
 447	{ 0x10CC0, 0x10CF2, 0x10C80 },
 448	{ 0x118C0, 0x118DF, 0x118A0 },
 449	{ 0x16E60, 0x16E7F, 0x16E40 },
 450	{ 0x1D41A, 0x1D433, 0x1D41A },
 451	{ 0x1D44E, 0x1D454, 0x1D44E },
 452	{ 0x1D456, 0x1D467, 0x1D456 },
 453	{ 0x1D482, 0x1D49B, 0x1D482 },
 454	{ 0x1D4B6, 0x1D4B9, 0x1D4B6 },
 455	{ 0x1D4BD, 0x1D4C3, 0x1D4BD },
 456	{ 0x1D4C5, 0x1D4CF, 0x1D4C5 },
 457	{ 0x1D4EA, 0x1D503, 0x1D4EA },
 458	{ 0x1D51E, 0x1D537, 0x1D51E },
 459	{ 0x1D552, 0x1D56B, 0x1D552 },
 460	{ 0x1D586, 0x1D59F, 0x1D586 },
 461	{ 0x1D5BA, 0x1D5D3, 0x1D5BA },
 462	{ 0x1D5EE, 0x1D607, 0x1D5EE },
 463	{ 0x1D622, 0x1D63B, 0x1D622 },
 464	{ 0x1D656, 0x1D66F, 0x1D656 },
 465	{ 0x1D68A, 0x1D6A5, 0x1D68A },
 466	{ 0x1D6C2, 0x1D6DA, 0x1D6C2 },
 467	{ 0x1D6DC, 0x1D6E1, 0x1D6DC },
 468	{ 0x1D6FC, 0x1D714, 0x1D6FC },
 469	{ 0x1D716, 0x1D71B, 0x1D716 },
 470	{ 0x1D736, 0x1D74E, 0x1D736 },
 471	{ 0x1D750, 0x1D755, 0x1D750 },
 472	{ 0x1D770, 0x1D788, 0x1D770 },
 473	{ 0x1D78A, 0x1D78F, 0x1D78A },
 474	{ 0x1D7AA, 0x1D7C2, 0x1D7AA },
 475	{ 0x1D7C4, 0x1D7C9, 0x1D7C4 },
 476	{ 0x1DF00, 0x1DF09, 0x1DF00 },
 477	{ 0x1DF0B, 0x1DF1E, 0x1DF0B },
 478	{ 0x1E922, 0x1E943, 0x1E900 },
 479};
 480
 481static int lower1[][2] = {
 482	{ 0x00B5, 0x039C },
 483	{ 0x00DF, 0x00DF },
 484	{ 0x00FF, 0x0178 },
 485	{ 0x0131, 0x0049 },
 486	{ 0x0138, 0x0138 },
 487	{ 0x0149, 0x0149 },
 488	{ 0x017F, 0x0053 },
 489	{ 0x0180, 0x0243 },
 490	{ 0x0188, 0x0187 },
 491	{ 0x018C, 0x018B },
 492	{ 0x018D, 0x018D },
 493	{ 0x0192, 0x0191 },
 494	{ 0x0195, 0x01F6 },
 495	{ 0x0199, 0x0198 },
 496	{ 0x019A, 0x023D },
 497	{ 0x019B, 0x019B },
 498	{ 0x019E, 0x0220 },
 499	{ 0x01A8, 0x01A7 },
 500	{ 0x01AD, 0x01AC },
 501	{ 0x01B0, 0x01AF },
 502	{ 0x01B9, 0x01B8 },
 503	{ 0x01BA, 0x01BA },
 504	{ 0x01BD, 0x01BC },
 505	{ 0x01BE, 0x01BE },
 506	{ 0x01BF, 0x01F7 },
 507	{ 0x01C6, 0x01C4 },
 508	{ 0x01C9, 0x01C7 },
 509	{ 0x01CC, 0x01CA },
 510	{ 0x01DD, 0x018E },
 511	{ 0x01F0, 0x01F0 },
 512	{ 0x01F3, 0x01F1 },
 513	{ 0x01F5, 0x01F4 },
 514	{ 0x0221, 0x0221 },
 515	{ 0x023C, 0x023B },
 516	{ 0x0242, 0x0241 },
 517	{ 0x0250, 0x2C6F },
 518	{ 0x0251, 0x2C6D },
 519	{ 0x0252, 0x2C70 },
 520	{ 0x0253, 0x0181 },
 521	{ 0x0254, 0x0186 },
 522	{ 0x0255, 0x0255 },
 523	{ 0x0258, 0x0258 },
 524	{ 0x0259, 0x018F },
 525	{ 0x025A, 0x025A },
 526	{ 0x025B, 0x0190 },
 527	{ 0x025C, 0xA7AB },
 528	{ 0x0260, 0x0193 },
 529	{ 0x0261, 0xA7AC },
 530	{ 0x0262, 0x0262 },
 531	{ 0x0263, 0x0194 },
 532	{ 0x0264, 0x0264 },
 533	{ 0x0265, 0xA78D },
 534	{ 0x0266, 0xA7AA },
 535	{ 0x0267, 0x0267 },
 536	{ 0x0268, 0x0197 },
 537	{ 0x0269, 0x0196 },
 538	{ 0x026A, 0xA7AE },
 539	{ 0x026B, 0x2C62 },
 540	{ 0x026C, 0xA7AD },
 541	{ 0x026F, 0x019C },
 542	{ 0x0270, 0x0270 },
 543	{ 0x0271, 0x2C6E },
 544	{ 0x0272, 0x019D },
 545	{ 0x0275, 0x019F },
 546	{ 0x027D, 0x2C64 },
 547	{ 0x0280, 0x01A6 },
 548	{ 0x0281, 0x0281 },
 549	{ 0x0282, 0xA7C5 },
 550	{ 0x0283, 0x01A9 },
 551	{ 0x0287, 0xA7B1 },
 552	{ 0x0288, 0x01AE },
 553	{ 0x0289, 0x0244 },
 554	{ 0x028C, 0x0245 },
 555	{ 0x0292, 0x01B7 },
 556	{ 0x0293, 0x0293 },
 557	{ 0x029D, 0xA7B2 },
 558	{ 0x029E, 0xA7B0 },
 559	{ 0x0377, 0x0376 },
 560	{ 0x0390, 0x0390 },
 561	{ 0x03AC, 0x0386 },
 562	{ 0x03B0, 0x03B0 },
 563	{ 0x03C2, 0x03A3 },
 564	{ 0x03CC, 0x038C },
 565	{ 0x03D0, 0x0392 },
 566	{ 0x03D1, 0x0398 },
 567	{ 0x03D5, 0x03A6 },
 568	{ 0x03D6, 0x03A0 },
 569	{ 0x03D7, 0x03CF },
 570	{ 0x03F0, 0x039A },
 571	{ 0x03F1, 0x03A1 },
 572	{ 0x03F2, 0x03F9 },
 573	{ 0x03F3, 0x037F },
 574	{ 0x03F5, 0x0395 },
 575	{ 0x03F8, 0x03F7 },
 576	{ 0x03FB, 0x03FA },
 577	{ 0x03FC, 0x03FC },
 578	{ 0x04CF, 0x04C0 },
 579	{ 0x0560, 0x0560 },
 580	{ 0x1C80, 0x0412 },
 581	{ 0x1C81, 0x0414 },
 582	{ 0x1C82, 0x041E },
 583	{ 0x1C85, 0x0422 },
 584	{ 0x1C86, 0x042A },
 585	{ 0x1C87, 0x0462 },
 586	{ 0x1C88, 0xA64A },
 587	{ 0x1D79, 0xA77D },
 588	{ 0x1D7D, 0x2C63 },
 589	{ 0x1D8E, 0xA7C6 },
 590	{ 0x1E9B, 0x1E60 },
 591	{ 0x1E9F, 0x1E9F },
 592	{ 0x1F50, 0x1F50 },
 593	{ 0x1F51, 0x1F59 },
 594	{ 0x1F52, 0x1F52 },
 595	{ 0x1F53, 0x1F5B },
 596	{ 0x1F54, 0x1F54 },
 597	{ 0x1F55, 0x1F5D },
 598	{ 0x1F56, 0x1F56 },
 599	{ 0x1F57, 0x1F5F },
 600	{ 0x1FB2, 0x1FB2 },
 601	{ 0x1FB3, 0x1FBC },
 602	{ 0x1FB4, 0x1FB4 },
 603	{ 0x1FBE, 0x0399 },
 604	{ 0x1FC2, 0x1FC2 },
 605	{ 0x1FC3, 0x1FCC },
 606	{ 0x1FC4, 0x1FC4 },
 607	{ 0x1FE5, 0x1FEC },
 608	{ 0x1FF2, 0x1FF2 },
 609	{ 0x1FF3, 0x1FFC },
 610	{ 0x1FF4, 0x1FF4 },
 611	{ 0x210A, 0x210A },
 612	{ 0x2113, 0x2113 },
 613	{ 0x212F, 0x212F },
 614	{ 0x2134, 0x2134 },
 615	{ 0x2139, 0x2139 },
 616	{ 0x214E, 0x2132 },
 617	{ 0x2184, 0x2183 },
 618	{ 0x2C61, 0x2C60 },
 619	{ 0x2C65, 0x023A },
 620	{ 0x2C66, 0x023E },
 621	{ 0x2C71, 0x2C71 },
 622	{ 0x2C73, 0x2C72 },
 623	{ 0x2C74, 0x2C74 },
 624	{ 0x2C76, 0x2C75 },
 625	{ 0x2CE4, 0x2CE4 },
 626	{ 0x2CF3, 0x2CF2 },
 627	{ 0x2D27, 0x10C7 },
 628	{ 0x2D2D, 0x10CD },
 629	{ 0xA78C, 0xA78B },
 630	{ 0xA78E, 0xA78E },
 631	{ 0xA794, 0xA7C4 },
 632	{ 0xA795, 0xA795 },
 633	{ 0xA7AF, 0xA7AF },
 634	{ 0xA7D1, 0xA7D0 },
 635	{ 0xA7D3, 0xA7D3 },
 636	{ 0xA7D5, 0xA7D5 },
 637	{ 0xA7F6, 0xA7F5 },
 638	{ 0xA7FA, 0xA7FA },
 639	{ 0xAB53, 0xA7B3 },
 640	{ 0x1D4BB, 0x1D4BB },
 641	{ 0x1D7CB, 0x1D7CB },
 642};
 643
 644C_regparm int C_utf_islower(int r)
 645{
 646	int *match;
 647
 648	if((match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp)))
 649		return !((r - match[0]) % 2);
 650	if(bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp))
 651		return 1;
 652	if(bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp))
 653		return 1;
 654	return 0;
 655}
 656
 657C_regparm int C_utf_char_upcase(int r)
 658{
 659	int *match;
 660
 661	match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp);
 662	if (match)
 663		return ((r - match[0]) % 2) ? r : r - 1;
 664	match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp);
 665	if (match)
 666		return match[2] + (r - match[0]);
 667	match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp);
 668	if (match)
 669		return match[1];
 670	return r;
 671}
 672
 673static int digit2[][2] = {
 674	{ 0x0030, 0x0039 },
 675	{ 0x0660, 0x0669 },
 676	{ 0x06F0, 0x06F9 },
 677	{ 0x07C0, 0x07C9 },
 678	{ 0x0966, 0x096F },
 679	{ 0x09E6, 0x09EF },
 680	{ 0x0A66, 0x0A6F },
 681	{ 0x0AE6, 0x0AEF },
 682	{ 0x0B66, 0x0B6F },
 683	{ 0x0BE6, 0x0BEF },
 684	{ 0x0C66, 0x0C6F },
 685	{ 0x0CE6, 0x0CEF },
 686	{ 0x0D66, 0x0D6F },
 687	{ 0x0DE6, 0x0DEF },
 688	{ 0x0E50, 0x0E59 },
 689	{ 0x0ED0, 0x0ED9 },
 690	{ 0x0F20, 0x0F29 },
 691	{ 0x1040, 0x1049 },
 692	{ 0x1090, 0x1099 },
 693	{ 0x17E0, 0x17E9 },
 694	{ 0x1810, 0x1819 },
 695	{ 0x1946, 0x194F },
 696	{ 0x19D0, 0x19D9 },
 697	{ 0x1A80, 0x1A89 },
 698	{ 0x1A90, 0x1A99 },
 699	{ 0x1B50, 0x1B59 },
 700	{ 0x1BB0, 0x1BB9 },
 701	{ 0x1C40, 0x1C49 },
 702	{ 0x1C50, 0x1C59 },
 703	{ 0xA620, 0xA629 },
 704	{ 0xA8D0, 0xA8D9 },
 705	{ 0xA900, 0xA909 },
 706	{ 0xA9D0, 0xA9D9 },
 707	{ 0xA9F0, 0xA9F9 },
 708	{ 0xAA50, 0xAA59 },
 709	{ 0xABF0, 0xABF9 },
 710	{ 0xFF10, 0xFF19 },
 711	{ 0x104A0, 0x104A9 },
 712	{ 0x10D30, 0x10D39 },
 713	{ 0x11066, 0x1106F },
 714	{ 0x110F0, 0x110F9 },
 715	{ 0x11136, 0x1113F },
 716	{ 0x111D0, 0x111D9 },
 717	{ 0x112F0, 0x112F9 },
 718	{ 0x11450, 0x11459 },
 719	{ 0x114D0, 0x114D9 },
 720	{ 0x11650, 0x11659 },
 721	{ 0x116C0, 0x116C9 },
 722	{ 0x11730, 0x11739 },
 723	{ 0x118E0, 0x118E9 },
 724	{ 0x11950, 0x11959 },
 725	{ 0x11C50, 0x11C59 },
 726	{ 0x11D50, 0x11D59 },
 727	{ 0x11DA0, 0x11DA9 },
 728	{ 0x16A60, 0x16A69 },
 729	{ 0x16AC0, 0x16AC9 },
 730	{ 0x16B50, 0x16B59 },
 731	{ 0x1D7CE, 0x1D7FF },
 732	{ 0x1E140, 0x1E149 },
 733	{ 0x1E2F0, 0x1E2F9 },
 734	{ 0x1E950, 0x1E959 },
 735	{ 0x1FBF0, 0x1FBF9 },
 736};
 737
 738C_regparm int C_utf_isdigit(int r)
 739{
 740	int *dp = bsearch(&r, digit2, nelem(digit2), sizeof *digit2, &rune2cmp);
 741	if(dp != NULL) return 1 + r - dp[ 0 ];
 742	return 0;
 743}
 744
 745static int alpha3[][2] = {
 746	{ 0x00D6, 0x00D8 },
 747	{ 0x00F6, 0x00F8 },
 748	{ 0x02EC, 0x02EE },
 749	{ 0x0374, 0x0376 },
 750	{ 0x037D, 0x037F },
 751	{ 0x0386, 0x0388 },
 752	{ 0x038A, 0x038E },
 753	{ 0x03A1, 0x03A3 },
 754	{ 0x03F5, 0x03F7 },
 755	{ 0x052F, 0x0531 },
 756	{ 0x066F, 0x0671 },
 757	{ 0x06D3, 0x06D5 },
 758	{ 0x0710, 0x0712 },
 759	{ 0x0887, 0x0889 },
 760	{ 0x09A8, 0x09AA },
 761	{ 0x09B0, 0x09B2 },
 762	{ 0x09DD, 0x09DF },
 763	{ 0x0A28, 0x0A2A },
 764	{ 0x0A30, 0x0A32 },
 765	{ 0x0A33, 0x0A35 },
 766	{ 0x0A36, 0x0A38 },
 767	{ 0x0A5C, 0x0A5E },
 768	{ 0x0A8D, 0x0A8F },
 769	{ 0x0A91, 0x0A93 },
 770	{ 0x0AA8, 0x0AAA },
 771	{ 0x0AB0, 0x0AB2 },
 772	{ 0x0AB3, 0x0AB5 },
 773	{ 0x0B28, 0x0B2A },
 774	{ 0x0B30, 0x0B32 },
 775	{ 0x0B33, 0x0B35 },
 776	{ 0x0B5D, 0x0B5F },
 777	{ 0x0B83, 0x0B85 },
 778	{ 0x0B90, 0x0B92 },
 779	{ 0x0B9A, 0x0B9E },
 780	{ 0x0C0C, 0x0C0E },
 781	{ 0x0C10, 0x0C12 },
 782	{ 0x0C28, 0x0C2A },
 783	{ 0x0C8C, 0x0C8E },
 784	{ 0x0C90, 0x0C92 },
 785	{ 0x0CA8, 0x0CAA },
 786	{ 0x0CB3, 0x0CB5 },
 787	{ 0x0CDE, 0x0CE0 },
 788	{ 0x0D0C, 0x0D0E },
 789	{ 0x0D10, 0x0D12 },
 790	{ 0x0DB1, 0x0DB3 },
 791	{ 0x0DBB, 0x0DBD },
 792	{ 0x0E30, 0x0E32 },
 793	{ 0x0E82, 0x0E86 },
 794	{ 0x0E8A, 0x0E8C },
 795	{ 0x0EA3, 0x0EA7 },
 796	{ 0x0EB0, 0x0EB2 },
 797	{ 0x0EC4, 0x0EC6 },
 798	{ 0x0F47, 0x0F49 },
 799	{ 0x10C5, 0x10C7 },
 800	{ 0x10FA, 0x10FC },
 801	{ 0x1248, 0x124A },
 802	{ 0x1256, 0x125A },
 803	{ 0x1288, 0x128A },
 804	{ 0x12B0, 0x12B2 },
 805	{ 0x12BE, 0x12C2 },
 806	{ 0x12D6, 0x12D8 },
 807	{ 0x1310, 0x1312 },
 808	{ 0x167F, 0x1681 },
 809	{ 0x176C, 0x176E },
 810	{ 0x18A8, 0x18AA },
 811	{ 0x1CEC, 0x1CEE },
 812	{ 0x1CF3, 0x1CF5 },
 813	{ 0x1F57, 0x1F5F },
 814	{ 0x1FB4, 0x1FB6 },
 815	{ 0x1FBC, 0x1FBE },
 816	{ 0x1FC4, 0x1FC6 },
 817	{ 0x1FF4, 0x1FF6 },
 818	{ 0x2113, 0x2115 },
 819	{ 0x2124, 0x212A },
 820	{ 0x212D, 0x212F },
 821	{ 0x2D25, 0x2D27 },
 822	{ 0x2DA6, 0x2DA8 },
 823	{ 0x2DAE, 0x2DB0 },
 824	{ 0x2DB6, 0x2DB8 },
 825	{ 0x2DBE, 0x2DC0 },
 826	{ 0x2DC6, 0x2DC8 },
 827	{ 0x2DCE, 0x2DD0 },
 828	{ 0x2DD6, 0x2DD8 },
 829	{ 0x309F, 0x30A1 },
 830	{ 0x30FA, 0x30FC },
 831	{ 0x312F, 0x3131 },
 832	{ 0xA7D1, 0xA7D5 },
 833	{ 0xA801, 0xA803 },
 834	{ 0xA805, 0xA807 },
 835	{ 0xA80A, 0xA80C },
 836	{ 0xA8FB, 0xA8FD },
 837	{ 0xA9E4, 0xA9E6 },
 838	{ 0xA9FE, 0xAA00 },
 839	{ 0xAA42, 0xAA44 },
 840	{ 0xAAAF, 0xAAB1 },
 841	{ 0xAAC0, 0xAAC2 },
 842	{ 0xAB26, 0xAB28 },
 843	{ 0xAB2E, 0xAB30 },
 844	{ 0xAB5A, 0xAB5C },
 845	{ 0xFB1D, 0xFB1F },
 846	{ 0xFB28, 0xFB2A },
 847	{ 0xFB36, 0xFB38 },
 848	{ 0xFB3C, 0xFB40 },
 849	{ 0xFB41, 0xFB43 },
 850	{ 0xFB44, 0xFB46 },
 851	{ 0xFE74, 0xFE76 },
 852	{ 0x1000B, 0x1000D },
 853	{ 0x10026, 0x10028 },
 854	{ 0x1003A, 0x1003C },
 855	{ 0x1003D, 0x1003F },
 856	{ 0x10340, 0x10342 },
 857	{ 0x1057A, 0x1057C },
 858	{ 0x1058A, 0x1058C },
 859	{ 0x10592, 0x10594 },
 860	{ 0x10595, 0x10597 },
 861	{ 0x105A1, 0x105A3 },
 862	{ 0x105B1, 0x105B3 },
 863	{ 0x105B9, 0x105BB },
 864	{ 0x10785, 0x10787 },
 865	{ 0x107B0, 0x107B2 },
 866	{ 0x10808, 0x1080A },
 867	{ 0x10835, 0x10837 },
 868	{ 0x108F2, 0x108F4 },
 869	{ 0x10A13, 0x10A15 },
 870	{ 0x10A17, 0x10A19 },
 871	{ 0x10AC7, 0x10AC9 },
 872	{ 0x111DA, 0x111DC },
 873	{ 0x11211, 0x11213 },
 874	{ 0x11286, 0x1128A },
 875	{ 0x1128D, 0x1128F },
 876	{ 0x1129D, 0x1129F },
 877	{ 0x11328, 0x1132A },
 878	{ 0x11330, 0x11332 },
 879	{ 0x11333, 0x11335 },
 880	{ 0x114C5, 0x114C7 },
 881	{ 0x11913, 0x11915 },
 882	{ 0x11916, 0x11918 },
 883	{ 0x1193F, 0x11941 },
 884	{ 0x119E1, 0x119E3 },
 885	{ 0x11C08, 0x11C0A },
 886	{ 0x11D06, 0x11D08 },
 887	{ 0x11D09, 0x11D0B },
 888	{ 0x11D65, 0x11D67 },
 889	{ 0x11D68, 0x11D6A },
 890	{ 0x16FE1, 0x16FE3 },
 891	{ 0x1AFF3, 0x1AFF5 },
 892	{ 0x1AFFB, 0x1AFFD },
 893	{ 0x1AFFE, 0x1B000 },
 894	{ 0x1D454, 0x1D456 },
 895	{ 0x1D49C, 0x1D49E },
 896	{ 0x1D4AC, 0x1D4AE },
 897	{ 0x1D4B9, 0x1D4BD },
 898	{ 0x1D4C3, 0x1D4C5 },
 899	{ 0x1D505, 0x1D507 },
 900	{ 0x1D514, 0x1D516 },
 901	{ 0x1D51C, 0x1D51E },
 902	{ 0x1D539, 0x1D53B },
 903	{ 0x1D53E, 0x1D540 },
 904	{ 0x1D544, 0x1D546 },
 905	{ 0x1D550, 0x1D552 },
 906	{ 0x1D6C0, 0x1D6C2 },
 907	{ 0x1D6DA, 0x1D6DC },
 908	{ 0x1D6FA, 0x1D6FC },
 909	{ 0x1D714, 0x1D716 },
 910	{ 0x1D734, 0x1D736 },
 911	{ 0x1D74E, 0x1D750 },
 912	{ 0x1D76E, 0x1D770 },
 913	{ 0x1D788, 0x1D78A },
 914	{ 0x1D7A8, 0x1D7AA },
 915	{ 0x1D7C2, 0x1D7C4 },
 916	{ 0x1E7E6, 0x1E7E8 },
 917	{ 0x1E7EB, 0x1E7ED },
 918	{ 0x1E7EE, 0x1E7F0 },
 919	{ 0x1E7FE, 0x1E800 },
 920	{ 0x1EE03, 0x1EE05 },
 921	{ 0x1EE1F, 0x1EE21 },
 922	{ 0x1EE22, 0x1EE24 },
 923	{ 0x1EE27, 0x1EE29 },
 924	{ 0x1EE32, 0x1EE34 },
 925	{ 0x1EE37, 0x1EE3B },
 926	{ 0x1EE47, 0x1EE4D },
 927	{ 0x1EE4F, 0x1EE51 },
 928	{ 0x1EE52, 0x1EE54 },
 929	{ 0x1EE57, 0x1EE61 },
 930	{ 0x1EE62, 0x1EE64 },
 931	{ 0x1EE6A, 0x1EE6C },
 932	{ 0x1EE72, 0x1EE74 },
 933	{ 0x1EE77, 0x1EE79 },
 934	{ 0x1EE7C, 0x1EE80 },
 935	{ 0x1EE89, 0x1EE8B },
 936	{ 0x1EEA3, 0x1EEA5 },
 937	{ 0x1EEA9, 0x1EEAB },
 938};
 939
 940static int alpha2[][2] = {
 941	{ 0x0041, 0x005A },
 942	{ 0x0061, 0x007A },
 943	{ 0x00C0, 0x00D6 },
 944	{ 0x00D8, 0x00F6 },
 945	{ 0x00F8, 0x02C1 },
 946	{ 0x02C6, 0x02D1 },
 947	{ 0x02E0, 0x02E4 },
 948	{ 0x0370, 0x0374 },
 949	{ 0x0376, 0x0377 },
 950	{ 0x037A, 0x037D },
 951	{ 0x0388, 0x038A },
 952	{ 0x038E, 0x03A1 },
 953	{ 0x03A3, 0x03F5 },
 954	{ 0x03F7, 0x0481 },
 955	{ 0x048A, 0x052F },
 956	{ 0x0531, 0x0556 },
 957	{ 0x0560, 0x0588 },
 958	{ 0x05D0, 0x05EA },
 959	{ 0x05EF, 0x05F2 },
 960	{ 0x0620, 0x064A },
 961	{ 0x066E, 0x066F },
 962	{ 0x0671, 0x06D3 },
 963	{ 0x06E5, 0x06E6 },
 964	{ 0x06EE, 0x06EF },
 965	{ 0x06FA, 0x06FC },
 966	{ 0x0712, 0x072F },
 967	{ 0x074D, 0x07A5 },
 968	{ 0x07CA, 0x07EA },
 969	{ 0x07F4, 0x07F5 },
 970	{ 0x0800, 0x0815 },
 971	{ 0x0840, 0x0858 },
 972	{ 0x0860, 0x086A },
 973	{ 0x0870, 0x0887 },
 974	{ 0x0889, 0x088E },
 975	{ 0x08A0, 0x08C9 },
 976	{ 0x0904, 0x0939 },
 977	{ 0x0958, 0x0961 },
 978	{ 0x0971, 0x0980 },
 979	{ 0x0985, 0x098C },
 980	{ 0x098F, 0x0990 },
 981	{ 0x0993, 0x09A8 },
 982	{ 0x09AA, 0x09B0 },
 983	{ 0x09B6, 0x09B9 },
 984	{ 0x09DC, 0x09DD },
 985	{ 0x09DF, 0x09E1 },
 986	{ 0x09F0, 0x09F1 },
 987	{ 0x0A05, 0x0A0A },
 988	{ 0x0A0F, 0x0A10 },
 989	{ 0x0A13, 0x0A28 },
 990	{ 0x0A2A, 0x0A30 },
 991	{ 0x0A32, 0x0A33 },
 992	{ 0x0A35, 0x0A36 },
 993	{ 0x0A38, 0x0A39 },
 994	{ 0x0A59, 0x0A5C },
 995	{ 0x0A72, 0x0A74 },
 996	{ 0x0A85, 0x0A8D },
 997	{ 0x0A8F, 0x0A91 },
 998	{ 0x0A93, 0x0AA8 },
 999	{ 0x0AAA, 0x0AB0 },
 1000	{ 0x0AB2, 0x0AB3 },
1001	{ 0x0AB5, 0x0AB9 },
1002	{ 0x0AE0, 0x0AE1 },
1003	{ 0x0B05, 0x0B0C },
1004	{ 0x0B0F, 0x0B10 },
1005	{ 0x0B13, 0x0B28 },
1006	{ 0x0B2A, 0x0B30 },
1007	{ 0x0B32, 0x0B33 },
1008	{ 0x0B35, 0x0B39 },
1009	{ 0x0B5C, 0x0B5D },
1010	{ 0x0B5F, 0x0B61 },
1011	{ 0x0B85, 0x0B8A },
1012	{ 0x0B8E, 0x0B90 },
1013	{ 0x0B92, 0x0B95 },
1014	{ 0x0B99, 0x0B9A },
1015	{ 0x0B9E, 0x0B9F },
1016	{ 0x0BA3, 0x0BA4 },
1017	{ 0x0BA8, 0x0BAA },
1018	{ 0x0BAE, 0x0BB9 },
1019	{ 0x0C05, 0x0C0C },
1020	{ 0x0C0E, 0x0C10 },
1021	{ 0x0C12, 0x0C28 },
1022	{ 0x0C2A, 0x0C39 },
1023	{ 0x0C58, 0x0C5A },
1024	{ 0x0C60, 0x0C61 },
1025	{ 0x0C85, 0x0C8C },
1026	{ 0x0C8E, 0x0C90 },
1027	{ 0x0C92, 0x0CA8 },
1028	{ 0x0CAA, 0x0CB3 },
1029	{ 0x0CB5, 0x0CB9 },
1030	{ 0x0CDD, 0x0CDE },
1031	{ 0x0CE0, 0x0CE1 },
1032	{ 0x0CF1, 0x0CF2 },
1033	{ 0x0D04, 0x0D0C },
1034	{ 0x0D0E, 0x0D10 },
1035	{ 0x0D12, 0x0D3A },
1036	{ 0x0D54, 0x0D56 },
1037	{ 0x0D5F, 0x0D61 },
1038	{ 0x0D7A, 0x0D7F },
1039	{ 0x0D85, 0x0D96 },
1040	{ 0x0D9A, 0x0DB1 },
1041	{ 0x0DB3, 0x0DBB },
1042	{ 0x0DC0, 0x0DC6 },
1043	{ 0x0E01, 0x0E30 },
1044	{ 0x0E32, 0x0E33 },
1045	{ 0x0E40, 0x0E46 },
1046	{ 0x0E81, 0x0E82 },
1047	{ 0x0E86, 0x0E8A },
1048	{ 0x0E8C, 0x0EA3 },
1049	{ 0x0EA7, 0x0EB0 },
1050	{ 0x0EB2, 0x0EB3 },
1051	{ 0x0EC0, 0x0EC4 },
1052	{ 0x0EDC, 0x0EDF },
1053	{ 0x0F40, 0x0F47 },
1054	{ 0x0F49, 0x0F6C },
1055	{ 0x0F88, 0x0F8C },
1056	{ 0x1000, 0x102A },
1057	{ 0x1050, 0x1055 },
1058	{ 0x105A, 0x105D },
1059	{ 0x1065, 0x1066 },
1060	{ 0x106E, 0x1070 },
1061	{ 0x1075, 0x1081 },
1062	{ 0x10A0, 0x10C5 },
1063	{ 0x10D0, 0x10FA },
1064	{ 0x10FC, 0x1248 },
1065	{ 0x124A, 0x124D },
1066	{ 0x1250, 0x1256 },
1067	{ 0x125A, 0x125D },
1068	{ 0x1260, 0x1288 },
1069	{ 0x128A, 0x128D },
1070	{ 0x1290, 0x12B0 },
1071	{ 0x12B2, 0x12B5 },
1072	{ 0x12B8, 0x12BE },
1073	{ 0x12C2, 0x12C5 },
1074	{ 0x12C8, 0x12D6 },
1075	{ 0x12D8, 0x1310 },
1076	{ 0x1312, 0x1315 },
1077	{ 0x1318, 0x135A },
1078	{ 0x1380, 0x138F },
1079	{ 0x13A0, 0x13F5 },
1080	{ 0x13F8, 0x13FD },
1081	{ 0x1401, 0x166C },
1082	{ 0x166F, 0x167F },
1083	{ 0x1681, 0x169A },
1084	{ 0x16A0, 0x16EA },
1085	{ 0x16F1, 0x16F8 },
1086	{ 0x1700, 0x1711 },
1087	{ 0x171F, 0x1731 },
1088	{ 0x1740, 0x1751 },
1089	{ 0x1760, 0x176C },
1090	{ 0x176E, 0x1770 },
1091	{ 0x1780, 0x17B3 },
1092	{ 0x1820, 0x1878 },
1093	{ 0x1880, 0x1884 },
1094	{ 0x1887, 0x18A8 },
1095	{ 0x18B0, 0x18F5 },
1096	{ 0x1900, 0x191E },
1097	{ 0x1950, 0x196D },
1098	{ 0x1970, 0x1974 },
1099	{ 0x1980, 0x19AB },
1100	{ 0x19B0, 0x19C9 },
1101	{ 0x1A00, 0x1A16 },
1102	{ 0x1A20, 0x1A54 },
1103	{ 0x1B05, 0x1B33 },
1104	{ 0x1B45, 0x1B4C },
1105	{ 0x1B83, 0x1BA0 },
1106	{ 0x1BAE, 0x1BAF },
1107	{ 0x1BBA, 0x1BE5 },
1108	{ 0x1C00, 0x1C23 },
1109	{ 0x1C4D, 0x1C4F },
1110	{ 0x1C5A, 0x1C7D },
1111	{ 0x1C80, 0x1C88 },
1112	{ 0x1C90, 0x1CBA },
1113	{ 0x1CBD, 0x1CBF },
1114	{ 0x1CE9, 0x1CEC },
1115	{ 0x1CEE, 0x1CF3 },
1116	{ 0x1CF5, 0x1CF6 },
1117	{ 0x1D00, 0x1DBF },
1118	{ 0x1E00, 0x1F15 },
1119	{ 0x1F18, 0x1F1D },
1120	{ 0x1F20, 0x1F45 },
1121	{ 0x1F48, 0x1F4D },
1122	{ 0x1F50, 0x1F57 },
1123	{ 0x1F5F, 0x1F7D },
1124	{ 0x1F80, 0x1FB4 },
1125	{ 0x1FB6, 0x1FBC },
1126	{ 0x1FC2, 0x1FC4 },
1127	{ 0x1FC6, 0x1FCC },
1128	{ 0x1FD0, 0x1FD3 },
1129	{ 0x1FD6, 0x1FDB },
1130	{ 0x1FE0, 0x1FEC },
1131	{ 0x1FF2, 0x1FF4 },
1132	{ 0x1FF6, 0x1FFC },
1133	{ 0x2090, 0x209C },
1134	{ 0x210A, 0x2113 },
1135	{ 0x2119, 0x211D },
1136	{ 0x212A, 0x212D },
1137	{ 0x212F, 0x2139 },
1138	{ 0x213C, 0x213F },
1139	{ 0x2145, 0x2149 },
1140	{ 0x2183, 0x2184 },
1141	{ 0x2C00, 0x2CE4 },
1142	{ 0x2CEB, 0x2CEE },
1143	{ 0x2CF2, 0x2CF3 },
1144	{ 0x2D00, 0x2D25 },
1145	{ 0x2D30, 0x2D67 },
1146	{ 0x2D80, 0x2D96 },
1147	{ 0x2DA0, 0x2DA6 },
1148	{ 0x2DA8, 0x2DAE },
1149	{ 0x2DB0, 0x2DB6 },
1150	{ 0x2DB8, 0x2DBE },
1151	{ 0x2DC0, 0x2DC6 },
1152	{ 0x2DC8, 0x2DCE },
1153	{ 0x2DD0, 0x2DD6 },
1154	{ 0x2DD8, 0x2DDE },
1155	{ 0x3005, 0x3006 },
1156	{ 0x3031, 0x3035 },
1157	{ 0x303B, 0x303C },
1158	{ 0x3041, 0x3096 },
1159	{ 0x309D, 0x309F },
1160	{ 0x30A1, 0x30FA },
1161	{ 0x30FC, 0x30FF },
1162	{ 0x3105, 0x312F },
1163	{ 0x3131, 0x318E },
1164	{ 0x31A0, 0x31BF },
1165	{ 0x31F0, 0x31FF },
1166	{ 0x9FFF, 0xA48C },
1167	{ 0xA4D0, 0xA4FD },
1168	{ 0xA500, 0xA60C },
1169	{ 0xA610, 0xA61F },
1170	{ 0xA62A, 0xA62B },
1171	{ 0xA640, 0xA66E },
1172	{ 0xA67F, 0xA69D },
1173	{ 0xA6A0, 0xA6E5 },
1174	{ 0xA717, 0xA71F },
1175	{ 0xA722, 0xA788 },
1176	{ 0xA78B, 0xA7CA },
1177	{ 0xA7D0, 0xA7D1 },
1178	{ 0xA7D5, 0xA7D9 },
1179	{ 0xA7F2, 0xA801 },
1180	{ 0xA803, 0xA805 },
1181	{ 0xA807, 0xA80A },
1182	{ 0xA80C, 0xA822 },
1183	{ 0xA840, 0xA873 },
1184	{ 0xA882, 0xA8B3 },
1185	{ 0xA8F2, 0xA8F7 },
1186	{ 0xA8FD, 0xA8FE },
1187	{ 0xA90A, 0xA925 },
1188	{ 0xA930, 0xA946 },
1189	{ 0xA960, 0xA97C },
1190	{ 0xA984, 0xA9B2 },
1191	{ 0xA9E0, 0xA9E4 },
1192	{ 0xA9E6, 0xA9EF },
1193	{ 0xA9FA, 0xA9FE },
1194	{ 0xAA00, 0xAA28 },
1195	{ 0xAA40, 0xAA42 },
1196	{ 0xAA44, 0xAA4B },
1197	{ 0xAA60, 0xAA76 },
1198	{ 0xAA7E, 0xAAAF },
1199	{ 0xAAB5, 0xAAB6 },
1200	{ 0xAAB9, 0xAABD },
1201	{ 0xAADB, 0xAADD },
1202	{ 0xAAE0, 0xAAEA },
1203	{ 0xAAF2, 0xAAF4 },
1204	{ 0xAB01, 0xAB06 },
1205	{ 0xAB09, 0xAB0E },
1206	{ 0xAB11, 0xAB16 },
1207	{ 0xAB20, 0xAB26 },
1208	{ 0xAB28, 0xAB2E },
1209	{ 0xAB30, 0xAB5A },
1210	{ 0xAB5C, 0xAB69 },
1211	{ 0xAB70, 0xABE2 },
1212	{ 0xD7B0, 0xD7C6 },
1213	{ 0xD7CB, 0xD7FB },
1214	{ 0xF900, 0xFA6D },
1215	{ 0xFA70, 0xFAD9 },
1216	{ 0xFB00, 0xFB06 },
1217	{ 0xFB13, 0xFB17 },
1218	{ 0xFB1F, 0xFB28 },
1219	{ 0xFB2A, 0xFB36 },
1220	{ 0xFB38, 0xFB3C },
1221	{ 0xFB40, 0xFB41 },
1222	{ 0xFB43, 0xFB44 },
1223	{ 0xFB46, 0xFBB1 },
1224	{ 0xFBD3, 0xFD3D },
1225	{ 0xFD50, 0xFD8F },
1226	{ 0xFD92, 0xFDC7 },
1227	{ 0xFDF0, 0xFDFB },
1228	{ 0xFE70, 0xFE74 },
1229	{ 0xFE76, 0xFEFC },
1230	{ 0xFF21, 0xFF3A },
1231	{ 0xFF41, 0xFF5A },
1232	{ 0xFF66, 0xFFBE },
1233	{ 0xFFC2, 0xFFC7 },
1234	{ 0xFFCA, 0xFFCF },
1235	{ 0xFFD2, 0xFFD7 },
1236	{ 0xFFDA, 0xFFDC },
1237	{ 0x10000, 0x1000B },
1238	{ 0x1000D, 0x10026 },
1239	{ 0x10028, 0x1003A },
1240	{ 0x1003C, 0x1003D },
1241	{ 0x1003F, 0x1004D },
1242	{ 0x10050, 0x1005D },
1243	{ 0x10080, 0x100FA },
1244	{ 0x10280, 0x1029C },
1245	{ 0x102A0, 0x102D0 },
1246	{ 0x10300, 0x1031F },
1247	{ 0x1032D, 0x10340 },
1248	{ 0x10342, 0x10349 },
1249	{ 0x10350, 0x10375 },
1250	{ 0x10380, 0x1039D },
1251	{ 0x103A0, 0x103C3 },
1252	{ 0x103C8, 0x103CF },
1253	{ 0x10400, 0x1049D },
1254	{ 0x104B0, 0x104D3 },
1255	{ 0x104D8, 0x104FB },
1256	{ 0x10500, 0x10527 },
1257	{ 0x10530, 0x10563 },
1258	{ 0x10570, 0x1057A },
1259	{ 0x1057C, 0x1058A },
1260	{ 0x1058C, 0x10592 },
1261	{ 0x10594, 0x10595 },
1262	{ 0x10597, 0x105A1 },
1263	{ 0x105A3, 0x105B1 },
1264	{ 0x105B3, 0x105B9 },
1265	{ 0x105BB, 0x105BC },
1266	{ 0x10600, 0x10736 },
1267	{ 0x10740, 0x10755 },
1268	{ 0x10760, 0x10767 },
1269	{ 0x10780, 0x10785 },
1270	{ 0x10787, 0x107B0 },
1271	{ 0x107B2, 0x107BA },
1272	{ 0x10800, 0x10805 },
1273	{ 0x1080A, 0x10835 },
1274	{ 0x10837, 0x10838 },
1275	{ 0x1083F, 0x10855 },
1276	{ 0x10860, 0x10876 },
1277	{ 0x10880, 0x1089E },
1278	{ 0x108E0, 0x108F2 },
1279	{ 0x108F4, 0x108F5 },
1280	{ 0x10900, 0x10915 },
1281	{ 0x10920, 0x10939 },
1282	{ 0x10980, 0x109B7 },
1283	{ 0x109BE, 0x109BF },
1284	{ 0x10A10, 0x10A13 },
1285	{ 0x10A15, 0x10A17 },
1286	{ 0x10A19, 0x10A35 },
1287	{ 0x10A60, 0x10A7C },
1288	{ 0x10A80, 0x10A9C },
1289	{ 0x10AC0, 0x10AC7 },
1290	{ 0x10AC9, 0x10AE4 },
1291	{ 0x10B00, 0x10B35 },
1292	{ 0x10B40, 0x10B55 },
1293	{ 0x10B60, 0x10B72 },
1294	{ 0x10B80, 0x10B91 },
1295	{ 0x10C00, 0x10C48 },
1296	{ 0x10C80, 0x10CB2 },
1297	{ 0x10CC0, 0x10CF2 },
1298	{ 0x10D00, 0x10D23 },
1299	{ 0x10E80, 0x10EA9 },
1300	{ 0x10EB0, 0x10EB1 },
1301	{ 0x10F00, 0x10F1C },
1302	{ 0x10F30, 0x10F45 },
1303	{ 0x10F70, 0x10F81 },
1304	{ 0x10FB0, 0x10FC4 },
1305	{ 0x10FE0, 0x10FF6 },
1306	{ 0x11003, 0x11037 },
1307	{ 0x11071, 0x11072 },
1308	{ 0x11083, 0x110AF },
1309	{ 0x110D0, 0x110E8 },
1310	{ 0x11103, 0x11126 },
1311	{ 0x11150, 0x11172 },
1312	{ 0x11183, 0x111B2 },
1313	{ 0x111C1, 0x111C4 },
1314	{ 0x11200, 0x11211 },
1315	{ 0x11213, 0x1122B },
1316	{ 0x11280, 0x11286 },
1317	{ 0x1128A, 0x1128D },
1318	{ 0x1128F, 0x1129D },
1319	{ 0x1129F, 0x112A8 },
1320	{ 0x112B0, 0x112DE },
1321	{ 0x11305, 0x1130C },
1322	{ 0x1130F, 0x11310 },
1323	{ 0x11313, 0x11328 },
1324	{ 0x1132A, 0x11330 },
1325	{ 0x11332, 0x11333 },
1326	{ 0x11335, 0x11339 },
1327	{ 0x1135D, 0x11361 },
1328	{ 0x11400, 0x11434 },
1329	{ 0x11447, 0x1144A },
1330	{ 0x1145F, 0x11461 },
1331	{ 0x11480, 0x114AF },
1332	{ 0x114C4, 0x114C5 },
1333	{ 0x11580, 0x115AE },
1334	{ 0x115D8, 0x115DB },
1335	{ 0x11600, 0x1162F },
1336	{ 0x11680, 0x116AA },
1337	{ 0x11700, 0x1171A },
1338	{ 0x11740, 0x11746 },
1339	{ 0x11800, 0x1182B },
1340	{ 0x118A0, 0x118DF },
1341	{ 0x118FF, 0x11906 },
1342	{ 0x1190C, 0x11913 },
1343	{ 0x11915, 0x11916 },
1344	{ 0x11918, 0x1192F },
1345	{ 0x119A0, 0x119A7 },
1346	{ 0x119AA, 0x119D0 },
1347	{ 0x11A0B, 0x11A32 },
1348	{ 0x11A5C, 0x11A89 },
1349	{ 0x11AB0, 0x11AF8 },
1350	{ 0x11C00, 0x11C08 },
1351	{ 0x11C0A, 0x11C2E },
1352	{ 0x11C72, 0x11C8F },
1353	{ 0x11D00, 0x11D06 },
1354	{ 0x11D08, 0x11D09 },
1355	{ 0x11D0B, 0x11D30 },
1356	{ 0x11D60, 0x11D65 },
1357	{ 0x11D67, 0x11D68 },
1358	{ 0x11D6A, 0x11D89 },
1359	{ 0x11EE0, 0x11EF2 },
1360	{ 0x12000, 0x12399 },
1361	{ 0x12480, 0x12543 },
1362	{ 0x12F90, 0x12FF0 },
1363	{ 0x13000, 0x1342E },
1364	{ 0x14400, 0x14646 },
1365	{ 0x16800, 0x16A38 },
1366	{ 0x16A40, 0x16A5E },
1367	{ 0x16A70, 0x16ABE },
1368	{ 0x16AD0, 0x16AED },
1369	{ 0x16B00, 0x16B2F },
1370	{ 0x16B40, 0x16B43 },
1371	{ 0x16B63, 0x16B77 },
1372	{ 0x16B7D, 0x16B8F },
1373	{ 0x16E40, 0x16E7F },
1374	{ 0x16F00, 0x16F4A },
1375	{ 0x16F93, 0x16F9F },
1376	{ 0x16FE0, 0x16FE1 },
1377	{ 0x18800, 0x18CD5 },
1378	{ 0x1AFF0, 0x1AFF3 },
1379	{ 0x1AFF5, 0x1AFFB },
1380	{ 0x1AFFD, 0x1AFFE },
1381	{ 0x1B000, 0x1B122 },
1382	{ 0x1B150, 0x1B152 },
1383	{ 0x1B164, 0x1B167 },
1384	{ 0x1B170, 0x1B2FB },
1385	{ 0x1BC00, 0x1BC6A },
1386	{ 0x1BC70, 0x1BC7C },
1387	{ 0x1BC80, 0x1BC88 },
1388	{ 0x1BC90, 0x1BC99 },
1389	{ 0x1D400, 0x1D454 },
1390	{ 0x1D456, 0x1D49C },
1391	{ 0x1D49E, 0x1D49F },
1392	{ 0x1D4A5, 0x1D4A6 },
1393	{ 0x1D4A9, 0x1D4AC },
1394	{ 0x1D4AE, 0x1D4B9 },
1395	{ 0x1D4BD, 0x1D4C3 },
1396	{ 0x1D4C5, 0x1D505 },
1397	{ 0x1D507, 0x1D50A },
1398	{ 0x1D50D, 0x1D514 },
1399	{ 0x1D516, 0x1D51C },
1400	{ 0x1D51E, 0x1D539 },
1401	{ 0x1D53B, 0x1D53E },
1402	{ 0x1D540, 0x1D544 },
1403	{ 0x1D54A, 0x1D550 },
1404	{ 0x1D552, 0x1D6A5 },
1405	{ 0x1D6A8, 0x1D6C0 },
1406	{ 0x1D6C2, 0x1D6DA },
1407	{ 0x1D6DC, 0x1D6FA },
1408	{ 0x1D6FC, 0x1D714 },
1409	{ 0x1D716, 0x1D734 },
1410	{ 0x1D736, 0x1D74E },
1411	{ 0x1D750, 0x1D76E },
1412	{ 0x1D770, 0x1D788 },
1413	{ 0x1D78A, 0x1D7A8 },
1414	{ 0x1D7AA, 0x1D7C2 },
1415	{ 0x1D7C4, 0x1D7CB },
1416	{ 0x1DF00, 0x1DF1E },
1417	{ 0x1E100, 0x1E12C },
1418	{ 0x1E137, 0x1E13D },
1419	{ 0x1E290, 0x1E2AD },
1420	{ 0x1E2C0, 0x1E2EB },
1421	{ 0x1E7E0, 0x1E7E6 },
1422	{ 0x1E7E8, 0x1E7EB },
1423	{ 0x1E7ED, 0x1E7EE },
1424	{ 0x1E7F0, 0x1E7FE },
1425	{ 0x1E800, 0x1E8C4 },
1426	{ 0x1E900, 0x1E943 },
1427	{ 0x1EE00, 0x1EE03 },
1428	{ 0x1EE05, 0x1EE1F },
1429	{ 0x1EE21, 0x1EE22 },
1430	{ 0x1EE29, 0x1EE32 },
1431	{ 0x1EE34, 0x1EE37 },
1432	{ 0x1EE4D, 0x1EE4F },
1433	{ 0x1EE51, 0x1EE52 },
1434	{ 0x1EE61, 0x1EE62 },
1435	{ 0x1EE67, 0x1EE6A },
1436	{ 0x1EE6C, 0x1EE72 },
1437	{ 0x1EE74, 0x1EE77 },
1438	{ 0x1EE79, 0x1EE7C },
1439	{ 0x1EE80, 0x1EE89 },
1440	{ 0x1EE8B, 0x1EE9B },
1441	{ 0x1EEA1, 0x1EEA3 },
1442	{ 0x1EEA5, 0x1EEA9 },
1443	{ 0x1EEAB, 0x1EEBB },
1444	{ 0x2F800, 0x2FA1D },
1445};
1446
1447static int alpha1[] = {
1448	0x00AA,
1449	0x00B5,
1450	0x00BA,
1451	0x0559,
1452	0x06FF,
1453	0x07B1,
1454	0x07FA,
1455	0x081A,
1456	0x0824,
1457	0x0828,
1458	0x093D,
1459	0x0950,
1460	0x09BD,
1461	0x09CE,
1462	0x09FC,
1463	0x0ABD,
1464	0x0AD0,
1465	0x0AF9,
1466	0x0B3D,
1467	0x0B71,
1468	0x0BD0,
1469	0x0C3D,
1470	0x0C5D,
1471	0x0C80,
1472	0x0CBD,
1473	0x0D3D,
1474	0x0D4E,
1475	0x0EBD,
1476	0x0F00,
1477	0x103F,
1478	0x1061,
1479	0x108E,
1480	0x10CD,
1481	0x17D7,
1482	0x17DC,
1483	0x1AA7,
1484	0x1CFA,
1485	0x2071,
1486	0x207F,
1487	0x2102,
1488	0x2107,
1489	0x214E,
1490	0x2D2D,
1491	0x2D6F,
1492	0x2E2F,
1493	0x3400,
1494	0x4DBF,
1495	0x4E00,
1496	0xA9CF,
1497	0xAA7A,
1498	0xAC00,
1499	0xD7A3,
1500	0x1083C,
1501	0x10A00,
1502	0x10F27,
1503	0x11075,
1504	0x11144,
1505	0x11147,
1506	0x11176,
1507	0x1133D,
1508	0x11350,
1509	0x11644,
1510	0x116B8,
1511	0x11909,
1512	0x11A00,
1513	0x11A3A,
1514	0x11A50,
1515	0x11A9D,
1516	0x11C40,
1517	0x11D46,
1518	0x11D98,
1519	0x11FB0,
1520	0x16F50,
1521	0x17000,
1522	0x187F7,
1523	0x18D00,
1524	0x18D08,
1525	0x1D4A2,
1526	0x1E14E,
1527	0x1E94B,
1528	0x1EE42,
1529	0x20000,
1530	0x2A6DF,
1531	0x2A700,
1532	0x2B738,
1533	0x2B740,
1534	0x2B81D,
1535	0x2B820,
1536	0x2CEA1,
1537	0x2CEB0,
1538	0x2EBE0,
1539	0x30000,
1540	0x3134A,
1541};
1542
1543C_regparm int C_utf_isalpha(int r)
1544{
1545	int *match;
1546
1547	if((match = bsearch(&r, alpha3, nelem(alpha3), sizeof *alpha3, &rune2cmp)))
1548		return !((r - match[0]) % 2);
1549	if(bsearch(&r, alpha2, nelem(alpha2), sizeof *alpha2, &rune2cmp))
1550		return 1;
1551	if(bsearch(&r, alpha1, nelem(alpha1), sizeof *alpha1, &rune1cmp))
1552		return 1;
1553	return 0;
1554}
1555
1556static int space2[][2] = {
1557	{ 0x0009, 0x000D },
1558	{ 0x001C, 0x0020 },
1559	{ 0x2000, 0x200A },
1560	{ 0x2028, 0x2029 },
1561};
1562
1563static int space1[] = {
1564	0x0085,
1565	0x00A0,
1566	0x1680,
1567	0x202F,
1568	0x205F,
1569	0x3000,
1570};
1571
1572C_regparm int C_utf_isspace(int r)
1573{
1574	if(bsearch(&r, space2, nelem(space2), sizeof *space2, &rune2cmp))
1575		return 1;
1576	if(bsearch(&r, space1, nelem(space1), sizeof *space1, &rune1cmp))
1577		return 1;
1578	return 0;
1579}
1580
1581static int fold1[][ 2 ] = {
1582  {0x0041, 0x0061},
1583  {0x0042, 0x0062},
1584  {0x0043, 0x0063},
1585  {0x0044, 0x0064},
1586  {0x0045, 0x0065},
1587  {0x0046, 0x0066},
1588  {0x0047, 0x0067},
1589  {0x0048, 0x0068},
1590  {0x0049, 0x0069},
1591  {0x004A, 0x006A},
1592  {0x004B, 0x006B},
1593  {0x004C, 0x006C},
1594  {0x004D, 0x006D},
1595  {0x004E, 0x006E},
1596  {0x004F, 0x006F},
1597  {0x0050, 0x0070},
1598  {0x0051, 0x0071},
1599  {0x0052, 0x0072},
1600  {0x0053, 0x0073},
1601  {0x0054, 0x0074},
1602  {0x0055, 0x0075},
1603  {0x0056, 0x0076},
1604  {0x0057, 0x0077},
1605  {0x0058, 0x0078},
1606  {0x0059, 0x0079},
1607  {0x005A, 0x007A},
1608  {0x00B5, 0x03BC},
1609  {0x00C0, 0x00E0},
1610  {0x00C1, 0x00E1},
1611  {0x00C2, 0x00E2},
1612  {0x00C3, 0x00E3},
1613  {0x00C4, 0x00E4},
1614  {0x00C5, 0x00E5},
1615  {0x00C6, 0x00E6},
1616  {0x00C7, 0x00E7},
1617  {0x00C8, 0x00E8},
1618  {0x00C9, 0x00E9},
1619  {0x00CA, 0x00EA},
1620  {0x00CB, 0x00EB},
1621  {0x00CC, 0x00EC},
1622  {0x00CD, 0x00ED},
1623  {0x00CE, 0x00EE},
1624  {0x00CF, 0x00EF},
1625  {0x00D0, 0x00F0},
1626  {0x00D1, 0x00F1},
1627  {0x00D2, 0x00F2},
1628  {0x00D3, 0x00F3},
1629  {0x00D4, 0x00F4},
1630  {0x00D5, 0x00F5},
1631  {0x00D6, 0x00F6},
1632  {0x00D8, 0x00F8},
1633  {0x00D9, 0x00F9},
1634  {0x00DA, 0x00FA},
1635  {0x00DB, 0x00FB},
1636  {0x00DC, 0x00FC},
1637  {0x00DD, 0x00FD},
1638  {0x00DE, 0x00FE},
1639  {0x0100, 0x0101},
1640  {0x0102, 0x0103},
1641  {0x0104, 0x0105},
1642  {0x0106, 0x0107},
1643  {0x0108, 0x0109},
1644  {0x010A, 0x010B},
1645  {0x010C, 0x010D},
1646  {0x010E, 0x010F},
1647  {0x0110, 0x0111},
1648  {0x0112, 0x0113},
1649  {0x0114, 0x0115},
1650  {0x0116, 0x0117},
1651  {0x0118, 0x0119},
1652  {0x011A, 0x011B},
1653  {0x011C, 0x011D},
1654  {0x011E, 0x011F},
1655  {0x0120, 0x0121},
1656  {0x0122, 0x0123},
1657  {0x0124, 0x0125},
1658  {0x0126, 0x0127},
1659  {0x0128, 0x0129},
1660  {0x012A, 0x012B},
1661  {0x012C, 0x012D},
1662  {0x012E, 0x012F},
1663  {0x0132, 0x0133},
1664  {0x0134, 0x0135},
1665  {0x0136, 0x0137},
1666  {0x0139, 0x013A},
1667  {0x013B, 0x013C},
1668  {0x013D, 0x013E},
1669  {0x013F, 0x0140},
1670  {0x0141, 0x0142},
1671  {0x0143, 0x0144},
1672  {0x0145, 0x0146},
1673  {0x0147, 0x0148},
1674  {0x014A, 0x014B},
1675  {0x014C, 0x014D},
1676  {0x014E, 0x014F},
1677  {0x0150, 0x0151},
1678  {0x0152, 0x0153},
1679  {0x0154, 0x0155},
1680  {0x0156, 0x0157},
1681  {0x0158, 0x0159},
1682  {0x015A, 0x015B},
1683  {0x015C, 0x015D},
1684  {0x015E, 0x015F},
1685  {0x0160, 0x0161},
1686  {0x0162, 0x0163},
1687  {0x0164, 0x0165},
1688  {0x0166, 0x0167},
1689  {0x0168, 0x0169},
1690  {0x016A, 0x016B},
1691  {0x016C, 0x016D},
1692  {0x016E, 0x016F},
1693  {0x0170, 0x0171},
1694  {0x0172, 0x0173},
1695  {0x0174, 0x0175},
1696  {0x0176, 0x0177},
1697  {0x0178, 0x00FF},
1698  {0x0179, 0x017A},
1699  {0x017B, 0x017C},
1700  {0x017D, 0x017E},
1701  {0x017F, 0x0073},
1702  {0x0181, 0x0253},
1703  {0x0182, 0x0183},
1704  {0x0184, 0x0185},
1705  {0x0186, 0x0254},
1706  {0x0187, 0x0188},
1707  {0x0189, 0x0256},
1708  {0x018A, 0x0257},
1709  {0x018B, 0x018C},
1710  {0x018E, 0x01DD},
1711  {0x018F, 0x0259},
1712  {0x0190, 0x025B},
1713  {0x0191, 0x0192},
1714  {0x0193, 0x0260},
1715  {0x0194, 0x0263},
1716  {0x0196, 0x0269},
1717  {0x0197, 0x0268},
1718  {0x0198, 0x0199},
1719  {0x019C, 0x026F},
1720  {0x019D, 0x0272},
1721  {0x019F, 0x0275},
1722  {0x01A0, 0x01A1},
1723  {0x01A2, 0x01A3},
1724  {0x01A4, 0x01A5},
1725  {0x01A6, 0x0280},
1726  {0x01A7, 0x01A8},
1727  {0x01A9, 0x0283},
1728  {0x01AC, 0x01AD},
1729  {0x01AE, 0x0288},
1730  {0x01AF, 0x01B0},
1731  {0x01B1, 0x028A},
1732  {0x01B2, 0x028B},
1733  {0x01B3, 0x01B4},
1734  {0x01B5, 0x01B6},
1735  {0x01B7, 0x0292},
1736  {0x01B8, 0x01B9},
1737  {0x01BC, 0x01BD},
1738  {0x01C4, 0x01C6},
1739  {0x01C5, 0x01C6},
1740  {0x01C7, 0x01C9},
1741  {0x01C8, 0x01C9},
1742  {0x01CA, 0x01CC},
1743  {0x01CB, 0x01CC},
1744  {0x01CD, 0x01CE},
1745  {0x01CF, 0x01D0},
1746  {0x01D1, 0x01D2},
1747  {0x01D3, 0x01D4},
1748  {0x01D5, 0x01D6},
1749  {0x01D7, 0x01D8},
1750  {0x01D9, 0x01DA},
1751  {0x01DB, 0x01DC},
1752  {0x01DE, 0x01DF},
1753  {0x01E0, 0x01E1},
1754  {0x01E2, 0x01E3},
1755  {0x01E4, 0x01E5},
1756  {0x01E6, 0x01E7},
1757  {0x01E8, 0x01E9},
1758  {0x01EA, 0x01EB},
1759  {0x01EC, 0x01ED},
1760  {0x01EE, 0x01EF},
1761  {0x01F1, 0x01F3},
1762  {0x01F2, 0x01F3},
1763  {0x01F4, 0x01F5},
1764  {0x01F6, 0x0195},
1765  {0x01F7, 0x01BF},
1766  {0x01F8, 0x01F9},
1767  {0x01FA, 0x01FB},
1768  {0x01FC, 0x01FD},
1769  {0x01FE, 0x01FF},
1770  {0x0200, 0x0201},
1771  {0x0202, 0x0203},
1772  {0x0204, 0x0205},
1773  {0x0206, 0x0207},
1774  {0x0208, 0x0209},
1775  {0x020A, 0x020B},
1776  {0x020C, 0x020D},
1777  {0x020E, 0x020F},
1778  {0x0210, 0x0211},
1779  {0x0212, 0x0213},
1780  {0x0214, 0x0215},
1781  {0x0216, 0x0217},
1782  {0x0218, 0x0219},
1783  {0x021A, 0x021B},
1784  {0x021C, 0x021D},
1785  {0x021E, 0x021F},
1786  {0x0220, 0x019E},
1787  {0x0222, 0x0223},
1788  {0x0224, 0x0225},
1789  {0x0226, 0x0227},
1790  {0x0228, 0x0229},
1791  {0x022A, 0x022B},
1792  {0x022C, 0x022D},
1793  {0x022E, 0x022F},
1794  {0x0230, 0x0231},
1795  {0x0232, 0x0233},
1796  {0x023A, 0x2C65},
1797  {0x023B, 0x023C},
1798  {0x023D, 0x019A},
1799  {0x023E, 0x2C66},
1800  {0x0241, 0x0242},
1801  {0x0243, 0x0180},
1802  {0x0244, 0x0289},
1803  {0x0245, 0x028C},
1804  {0x0246, 0x0247},
1805  {0x0248, 0x0249},
1806  {0x024A, 0x024B},
1807  {0x024C, 0x024D},
1808  {0x024E, 0x024F},
1809  {0x0345, 0x03B9},
1810  {0x0370, 0x0371},
1811  {0x0372, 0x0373},
1812  {0x0376, 0x0377},
1813  {0x037F, 0x03F3},
1814  {0x0386, 0x03AC},
1815  {0x0388, 0x03AD},
1816  {0x0389, 0x03AE},
1817  {0x038A, 0x03AF},
1818  {0x038C, 0x03CC},
1819  {0x038E, 0x03CD},
1820  {0x038F, 0x03CE},
1821  {0x0391, 0x03B1},
1822  {0x0392, 0x03B2},
1823  {0x0393, 0x03B3},
1824  {0x0394, 0x03B4},
1825  {0x0395, 0x03B5},
1826  {0x0396, 0x03B6},
1827  {0x0397, 0x03B7},
1828  {0x0398, 0x03B8},
1829  {0x0399, 0x03B9},
1830  {0x039A, 0x03BA},
1831  {0x039B, 0x03BB},
1832  {0x039C, 0x03BC},
1833  {0x039D, 0x03BD},
1834  {0x039E, 0x03BE},
1835  {0x039F, 0x03BF},
1836  {0x03A0, 0x03C0},
1837  {0x03A1, 0x03C1},
1838  {0x03A3, 0x03C3},
1839  {0x03A4, 0x03C4},
1840  {0x03A5, 0x03C5},
1841  {0x03A6, 0x03C6},
1842  {0x03A7, 0x03C7},
1843  {0x03A8, 0x03C8},
1844  {0x03A9, 0x03C9},
1845  {0x03AA, 0x03CA},
1846  {0x03AB, 0x03CB},
1847  {0x03C2, 0x03C3},
1848  {0x03CF, 0x03D7},
1849  {0x03D0, 0x03B2},
1850  {0x03D1, 0x03B8},
1851  {0x03D5, 0x03C6},
1852  {0x03D6, 0x03C0},
1853  {0x03D8, 0x03D9},
1854  {0x03DA, 0x03DB},
1855  {0x03DC, 0x03DD},
1856  {0x03DE, 0x03DF},
1857  {0x03E0, 0x03E1},
1858  {0x03E2, 0x03E3},
1859  {0x03E4, 0x03E5},
1860  {0x03E6, 0x03E7},
1861  {0x03E8, 0x03E9},
1862  {0x03EA, 0x03EB},
1863  {0x03EC, 0x03ED},
1864  {0x03EE, 0x03EF},
1865  {0x03F0, 0x03BA},
1866  {0x03F1, 0x03C1},
1867  {0x03F4, 0x03B8},
1868  {0x03F5, 0x03B5},
1869  {0x03F7, 0x03F8},
1870  {0x03F9, 0x03F2},
1871  {0x03FA, 0x03FB},
1872  {0x03FD, 0x037B},
1873  {0x03FE, 0x037C},
1874  {0x03FF, 0x037D},
1875  {0x0400, 0x0450},
1876  {0x0401, 0x0451},
1877  {0x0402, 0x0452},
1878  {0x0403, 0x0453},
1879  {0x0404, 0x0454},
1880  {0x0405, 0x0455},
1881  {0x0406, 0x0456},
1882  {0x0407, 0x0457},
1883  {0x0408, 0x0458},
1884  {0x0409, 0x0459},
1885  {0x040A, 0x045A},
1886  {0x040B, 0x045B},
1887  {0x040C, 0x045C},
1888  {0x040D, 0x045D},
1889  {0x040E, 0x045E},
1890  {0x040F, 0x045F},
1891  {0x0410, 0x0430},
1892  {0x0411, 0x0431},
1893  {0x0412, 0x0432},
1894  {0x0413, 0x0433},
1895  {0x0414, 0x0434},
1896  {0x0415, 0x0435},
1897  {0x0416, 0x0436},
1898  {0x0417, 0x0437},
1899  {0x0418, 0x0438},
1900  {0x0419, 0x0439},
1901  {0x041A, 0x043A},
1902  {0x041B, 0x043B},
1903  {0x041C, 0x043C},
1904  {0x041D, 0x043D},
1905  {0x041E, 0x043E},
1906  {0x041F, 0x043F},
1907  {0x0420, 0x0440},
1908  {0x0421, 0x0441},
1909  {0x0422, 0x0442},
1910  {0x0423, 0x0443},
1911  {0x0424, 0x0444},
1912  {0x0425, 0x0445},
1913  {0x0426, 0x0446},
1914  {0x0427, 0x0447},
1915  {0x0428, 0x0448},
1916  {0x0429, 0x0449},
1917  {0x042A, 0x044A},
1918  {0x042B, 0x044B},
1919  {0x042C, 0x044C},
1920  {0x042D, 0x044D},
1921  {0x042E, 0x044E},
1922  {0x042F, 0x044F},
1923  {0x0460, 0x0461},
1924  {0x0462, 0x0463},
1925  {0x0464, 0x0465},
1926  {0x0466, 0x0467},
1927  {0x0468, 0x0469},
1928  {0x046A, 0x046B},
1929  {0x046C, 0x046D},
1930  {0x046E, 0x046F},
1931  {0x0470, 0x0471},
1932  {0x0472, 0x0473},
1933  {0x0474, 0x0475},
1934  {0x0476, 0x0477},
1935  {0x0478, 0x0479},
1936  {0x047A, 0x047B},
1937  {0x047C, 0x047D},
1938  {0x047E, 0x047F},
1939  {0x0480, 0x0481},
1940  {0x048A, 0x048B},
1941  {0x048C, 0x048D},
1942  {0x048E, 0x048F},
1943  {0x0490, 0x0491},
1944  {0x0492, 0x0493},
1945  {0x0494, 0x0495},
1946  {0x0496, 0x0497},
1947  {0x0498, 0x0499},
1948  {0x049A, 0x049B},
1949  {0x049C, 0x049D},
1950  {0x049E, 0x049F},
1951  {0x04A0, 0x04A1},
1952  {0x04A2, 0x04A3},
1953  {0x04A4, 0x04A5},
1954  {0x04A6, 0x04A7},
1955  {0x04A8, 0x04A9},
1956  {0x04AA, 0x04AB},
1957  {0x04AC, 0x04AD},
1958  {0x04AE, 0x04AF},
1959  {0x04B0, 0x04B1},
1960  {0x04B2, 0x04B3},
1961  {0x04B4, 0x04B5},
1962  {0x04B6, 0x04B7},
1963  {0x04B8, 0x04B9},
1964  {0x04BA, 0x04BB},
1965  {0x04BC, 0x04BD},
1966  {0x04BE, 0x04BF},
1967  {0x04C0, 0x04CF},
1968  {0x04C1, 0x04C2},
1969  {0x04C3, 0x04C4},
1970  {0x04C5, 0x04C6},
1971  {0x04C7, 0x04C8},
1972  {0x04C9, 0x04CA},
1973  {0x04CB, 0x04CC},
1974  {0x04CD, 0x04CE},
1975  {0x04D0, 0x04D1},
1976  {0x04D2, 0x04D3},
1977  {0x04D4, 0x04D5},
1978  {0x04D6, 0x04D7},
1979  {0x04D8, 0x04D9},
1980  {0x04DA, 0x04DB},
1981  {0x04DC, 0x04DD},
1982  {0x04DE, 0x04DF},
1983  {0x04E0, 0x04E1},
1984  {0x04E2, 0x04E3},
1985  {0x04E4, 0x04E5},
1986  {0x04E6, 0x04E7},
1987  {0x04E8, 0x04E9},
1988  {0x04EA, 0x04EB},
1989  {0x04EC, 0x04ED},
1990  {0x04EE, 0x04EF},
1991  {0x04F0, 0x04F1},
1992  {0x04F2, 0x04F3},
1993  {0x04F4, 0x04F5},
1994  {0x04F6, 0x04F7},
1995  {0x04F8, 0x04F9},
1996  {0x04FA, 0x04FB},
1997  {0x04FC, 0x04FD},
1998  {0x04FE, 0x04FF},
1999  {0x0500, 0x0501},
2000  {0x0502, 0x0503},
2001  {0x0504, 0x0505},
2002  {0x0506, 0x0507},
2003  {0x0508, 0x0509},
2004  {0x050A, 0x050B},
2005  {0x050C, 0x050D},
2006  {0x050E, 0x050F},
2007  {0x0510, 0x0511},
2008  {0x0512, 0x0513},
2009  {0x0514, 0x0515},
2010  {0x0516, 0x0517},
2011  {0x0518, 0x0519},
2012  {0x051A, 0x051B},
2013  {0x051C, 0x051D},
2014  {0x051E, 0x051F},
2015  {0x0520, 0x0521},
2016  {0x0522, 0x0523},
2017  {0x0524, 0x0525},
2018  {0x0526, 0x0527},
2019  {0x0528, 0x0529},
2020  {0x052A, 0x052B},
2021  {0x052C, 0x052D},
2022  {0x052E, 0x052F},
2023  {0x0531, 0x0561},
2024  {0x0532, 0x0562},
2025  {0x0533, 0x0563},
2026  {0x0534, 0x0564},
2027  {0x0535, 0x0565},
2028  {0x0536, 0x0566},
2029  {0x0537, 0x0567},
2030  {0x0538, 0x0568},
2031  {0x0539, 0x0569},
2032  {0x053A, 0x056A},
2033  {0x053B, 0x056B},
2034  {0x053C, 0x056C},
2035  {0x053D, 0x056D},
2036  {0x053E, 0x056E},
2037  {0x053F, 0x056F},
2038  {0x0540, 0x0570},
2039  {0x0541, 0x0571},
2040  {0x0542, 0x0572},
2041  {0x0543, 0x0573},
2042  {0x0544, 0x0574},
2043  {0x0545, 0x0575},
2044  {0x0546, 0x0576},
2045  {0x0547, 0x0577},
2046  {0x0548, 0x0578},
2047  {0x0549, 0x0579},
2048  {0x054A, 0x057A},
2049  {0x054B, 0x057B},
2050  {0x054C, 0x057C},
2051  {0x054D, 0x057D},
2052  {0x054E, 0x057E},
2053  {0x054F, 0x057F},
2054  {0x0550, 0x0580},
2055  {0x0551, 0x0581},
2056  {0x0552, 0x0582},
2057  {0x0553, 0x0583},
2058  {0x0554, 0x0584},
2059  {0x0555, 0x0585},
2060  {0x0556, 0x0586},
2061  {0x10A0, 0x2D00},
2062  {0x10A1, 0x2D01},
2063  {0x10A2, 0x2D02},
2064  {0x10A3, 0x2D03},
2065  {0x10A4, 0x2D04},
2066  {0x10A5, 0x2D05},
2067  {0x10A6, 0x2D06},
2068  {0x10A7, 0x2D07},
2069  {0x10A8, 0x2D08},
2070  {0x10A9, 0x2D09},
2071  {0x10AA, 0x2D0A},
2072  {0x10AB, 0x2D0B},
2073  {0x10AC, 0x2D0C},
2074  {0x10AD, 0x2D0D},
2075  {0x10AE, 0x2D0E},
2076  {0x10AF, 0x2D0F},
2077  {0x10B0, 0x2D10},
2078  {0x10B1, 0x2D11},
2079  {0x10B2, 0x2D12},
2080  {0x10B3, 0x2D13},
2081  {0x10B4, 0x2D14},
2082  {0x10B5, 0x2D15},
2083  {0x10B6, 0x2D16},
2084  {0x10B7, 0x2D17},
2085  {0x10B8, 0x2D18},
2086  {0x10B9, 0x2D19},
2087  {0x10BA, 0x2D1A},
2088  {0x10BB, 0x2D1B},
2089  {0x10BC, 0x2D1C},
2090  {0x10BD, 0x2D1D},
2091  {0x10BE, 0x2D1E},
2092  {0x10BF, 0x2D1F},
2093  {0x10C0, 0x2D20},
2094  {0x10C1, 0x2D21},
2095  {0x10C2, 0x2D22},
2096  {0x10C3, 0x2D23},
2097  {0x10C4, 0x2D24},
2098  {0x10C5, 0x2D25},
2099  {0x10C7, 0x2D27},
2100  {0x10CD, 0x2D2D},
2101  {0x13F8, 0x13F0},
2102  {0x13F9, 0x13F1},
2103  {0x13FA, 0x13F2},
2104  {0x13FB, 0x13F3},
2105  {0x13FC, 0x13F4},
2106  {0x13FD, 0x13F5},
2107  {0x1C80, 0x0432},
2108  {0x1C81, 0x0434},
2109  {0x1C82, 0x043E},
2110  {0x1C83, 0x0441},
2111  {0x1C84, 0x0442},
2112  {0x1C85, 0x0442},
2113  {0x1C86, 0x044A},
2114  {0x1C87, 0x0463},
2115  {0x1C88, 0xA64B},
2116  {0x1C90, 0x10D0},
2117  {0x1C91, 0x10D1},
2118  {0x1C92, 0x10D2},
2119  {0x1C93, 0x10D3},
2120  {0x1C94, 0x10D4},
2121  {0x1C95, 0x10D5},
2122  {0x1C96, 0x10D6},
2123  {0x1C97, 0x10D7},
2124  {0x1C98, 0x10D8},
2125  {0x1C99, 0x10D9},
2126  {0x1C9A, 0x10DA},
2127  {0x1C9B, 0x10DB},
2128  {0x1C9C, 0x10DC},
2129  {0x1C9D, 0x10DD},
2130  {0x1C9E, 0x10DE},
2131  {0x1C9F, 0x10DF},
2132  {0x1CA0, 0x10E0},
2133  {0x1CA1, 0x10E1},
2134  {0x1CA2, 0x10E2},
2135  {0x1CA3, 0x10E3},
2136  {0x1CA4, 0x10E4},
2137  {0x1CA5, 0x10E5},
2138  {0x1CA6, 0x10E6},
2139  {0x1CA7, 0x10E7},
2140  {0x1CA8, 0x10E8},
2141  {0x1CA9, 0x10E9},
2142  {0x1CAA, 0x10EA},
2143  {0x1CAB, 0x10EB},
2144  {0x1CAC, 0x10EC},
2145  {0x1CAD, 0x10ED},
2146  {0x1CAE, 0x10EE},
2147  {0x1CAF, 0x10EF},
2148  {0x1CB0, 0x10F0},
2149  {0x1CB1, 0x10F1},
2150  {0x1CB2, 0x10F2},
2151  {0x1CB3, 0x10F3},
2152  {0x1CB4, 0x10F4},
2153  {0x1CB5, 0x10F5},
2154  {0x1CB6, 0x10F6},
2155  {0x1CB7, 0x10F7},
2156  {0x1CB8, 0x10F8},
2157  {0x1CB9, 0x10F9},
2158  {0x1CBA, 0x10FA},
2159  {0x1CBD, 0x10FD},
2160  {0x1CBE, 0x10FE},
2161  {0x1CBF, 0x10FF},
2162  {0x1E00, 0x1E01},
2163  {0x1E02, 0x1E03},
2164  {0x1E04, 0x1E05},
2165  {0x1E06, 0x1E07},
2166  {0x1E08, 0x1E09},
2167  {0x1E0A, 0x1E0B},
2168  {0x1E0C, 0x1E0D},
2169  {0x1E0E, 0x1E0F},
2170  {0x1E10, 0x1E11},
2171  {0x1E12, 0x1E13},
2172  {0x1E14, 0x1E15},
2173  {0x1E16, 0x1E17},
2174  {0x1E18, 0x1E19},
2175  {0x1E1A, 0x1E1B},
2176  {0x1E1C, 0x1E1D},
2177  {0x1E1E, 0x1E1F},
2178  {0x1E20, 0x1E21},
2179  {0x1E22, 0x1E23},
2180  {0x1E24, 0x1E25},
2181  {0x1E26, 0x1E27},
2182  {0x1E28, 0x1E29},
2183  {0x1E2A, 0x1E2B},
2184  {0x1E2C, 0x1E2D},
2185  {0x1E2E, 0x1E2F},
2186  {0x1E30, 0x1E31},
2187  {0x1E32, 0x1E33},
2188  {0x1E34, 0x1E35},
2189  {0x1E36, 0x1E37},
2190  {0x1E38, 0x1E39},
2191  {0x1E3A, 0x1E3B},
2192  {0x1E3C, 0x1E3D},
2193  {0x1E3E, 0x1E3F},
2194  {0x1E40, 0x1E41},
2195  {0x1E42, 0x1E43},
2196  {0x1E44, 0x1E45},
2197  {0x1E46, 0x1E47},
2198  {0x1E48, 0x1E49},
2199  {0x1E4A, 0x1E4B},
2200  {0x1E4C, 0x1E4D},
2201  {0x1E4E, 0x1E4F},
2202  {0x1E50, 0x1E51},
2203  {0x1E52, 0x1E53},
2204  {0x1E54, 0x1E55},
2205  {0x1E56, 0x1E57},
2206  {0x1E58, 0x1E59},
2207  {0x1E5A, 0x1E5B},
2208  {0x1E5C, 0x1E5D},
2209  {0x1E5E, 0x1E5F},
2210  {0x1E60, 0x1E61},
2211  {0x1E62, 0x1E63},
2212  {0x1E64, 0x1E65},
2213  {0x1E66, 0x1E67},
2214  {0x1E68, 0x1E69},
2215  {0x1E6A, 0x1E6B},
2216  {0x1E6C, 0x1E6D},
2217  {0x1E6E, 0x1E6F},
2218  {0x1E70, 0x1E71},
2219  {0x1E72, 0x1E73},
2220  {0x1E74, 0x1E75},
2221  {0x1E76, 0x1E77},
2222  {0x1E78, 0x1E79},
2223  {0x1E7A, 0x1E7B},
2224  {0x1E7C, 0x1E7D},
2225  {0x1E7E, 0x1E7F},
2226  {0x1E80, 0x1E81},
2227  {0x1E82, 0x1E83},
2228  {0x1E84, 0x1E85},
2229  {0x1E86, 0x1E87},
2230  {0x1E88, 0x1E89},
2231  {0x1E8A, 0x1E8B},
2232  {0x1E8C, 0x1E8D},
2233  {0x1E8E, 0x1E8F},
2234  {0x1E90, 0x1E91},
2235  {0x1E92, 0x1E93},
2236  {0x1E94, 0x1E95},
2237  {0x1E9B, 0x1E61},
2238  {0x1E9E, 0x00DF},
2239  {0x1EA0, 0x1EA1},
2240  {0x1EA2, 0x1EA3},
2241  {0x1EA4, 0x1EA5},
2242  {0x1EA6, 0x1EA7},
2243  {0x1EA8, 0x1EA9},
2244  {0x1EAA, 0x1EAB},
2245  {0x1EAC, 0x1EAD},
2246  {0x1EAE, 0x1EAF},
2247  {0x1EB0, 0x1EB1},
2248  {0x1EB2, 0x1EB3},
2249  {0x1EB4, 0x1EB5},
2250  {0x1EB6, 0x1EB7},
2251  {0x1EB8, 0x1EB9},
2252  {0x1EBA, 0x1EBB},
2253  {0x1EBC, 0x1EBD},
2254  {0x1EBE, 0x1EBF},
2255  {0x1EC0, 0x1EC1},
2256  {0x1EC2, 0x1EC3},
2257  {0x1EC4, 0x1EC5},
2258  {0x1EC6, 0x1EC7},
2259  {0x1EC8, 0x1EC9},
2260  {0x1ECA, 0x1ECB},
2261  {0x1ECC, 0x1ECD},
2262  {0x1ECE, 0x1ECF},
2263  {0x1ED0, 0x1ED1},
2264  {0x1ED2, 0x1ED3},
2265  {0x1ED4, 0x1ED5},
2266  {0x1ED6, 0x1ED7},
2267  {0x1ED8, 0x1ED9},
2268  {0x1EDA, 0x1EDB},
2269  {0x1EDC, 0x1EDD},
2270  {0x1EDE, 0x1EDF},
2271  {0x1EE0, 0x1EE1},
2272  {0x1EE2, 0x1EE3},
2273  {0x1EE4, 0x1EE5},
2274  {0x1EE6, 0x1EE7},
2275  {0x1EE8, 0x1EE9},
2276  {0x1EEA, 0x1EEB},
2277  {0x1EEC, 0x1EED},
2278  {0x1EEE, 0x1EEF},
2279  {0x1EF0, 0x1EF1},
2280  {0x1EF2, 0x1EF3},
2281  {0x1EF4, 0x1EF5},
2282  {0x1EF6, 0x1EF7},
2283  {0x1EF8, 0x1EF9},
2284  {0x1EFA, 0x1EFB},
2285  {0x1EFC, 0x1EFD},
2286  {0x1EFE, 0x1EFF},
2287  {0x1F08, 0x1F00},
2288  {0x1F09, 0x1F01},
2289  {0x1F0A, 0x1F02},
2290  {0x1F0B, 0x1F03},
2291  {0x1F0C, 0x1F04},
2292  {0x1F0D, 0x1F05},
2293  {0x1F0E, 0x1F06},
2294  {0x1F0F, 0x1F07},
2295  {0x1F18, 0x1F10},
2296  {0x1F19, 0x1F11},
2297  {0x1F1A, 0x1F12},
2298  {0x1F1B, 0x1F13},
2299  {0x1F1C, 0x1F14},
2300  {0x1F1D, 0x1F15},
2301  {0x1F28, 0x1F20},
2302  {0x1F29, 0x1F21},
2303  {0x1F2A, 0x1F22},
2304  {0x1F2B, 0x1F23},
2305  {0x1F2C, 0x1F24},
2306  {0x1F2D, 0x1F25},
2307  {0x1F2E, 0x1F26},
2308  {0x1F2F, 0x1F27},
2309  {0x1F38, 0x1F30},
2310  {0x1F39, 0x1F31},
2311  {0x1F3A, 0x1F32},
2312  {0x1F3B, 0x1F33},
2313  {0x1F3C, 0x1F34},
2314  {0x1F3D, 0x1F35},
2315  {0x1F3E, 0x1F36},
2316  {0x1F3F, 0x1F37},
2317  {0x1F48, 0x1F40},
2318  {0x1F49, 0x1F41},
2319  {0x1F4A, 0x1F42},
2320  {0x1F4B, 0x1F43},
2321  {0x1F4C, 0x1F44},
2322  {0x1F4D, 0x1F45},
2323  {0x1F59, 0x1F51},
2324  {0x1F5B, 0x1F53},
2325  {0x1F5D, 0x1F55},
2326  {0x1F5F, 0x1F57},
2327  {0x1F68, 0x1F60},
2328  {0x1F69, 0x1F61},
2329  {0x1F6A, 0x1F62},
2330  {0x1F6B, 0x1F63},
2331  {0x1F6C, 0x1F64},
2332  {0x1F6D, 0x1F65},
2333  {0x1F6E, 0x1F66},
2334  {0x1F6F, 0x1F67},
2335  {0x1F88, 0x1F80},
2336  {0x1F89, 0x1F81},
2337  {0x1F8A, 0x1F82},
2338  {0x1F8B, 0x1F83},
2339  {0x1F8C, 0x1F84},
2340  {0x1F8D, 0x1F85},
2341  {0x1F8E, 0x1F86},
2342  {0x1F8F, 0x1F87},
2343  {0x1F98, 0x1F90},
2344  {0x1F99, 0x1F91},
2345  {0x1F9A, 0x1F92},
2346  {0x1F9B, 0x1F93},
2347  {0x1F9C, 0x1F94},
2348  {0x1F9D, 0x1F95},
2349  {0x1F9E, 0x1F96},
2350  {0x1F9F, 0x1F97},
2351  {0x1FA8, 0x1FA0},
2352  {0x1FA9, 0x1FA1},
2353  {0x1FAA, 0x1FA2},
2354  {0x1FAB, 0x1FA3},
2355  {0x1FAC, 0x1FA4},
2356  {0x1FAD, 0x1FA5},
2357  {0x1FAE, 0x1FA6},
2358  {0x1FAF, 0x1FA7},
2359  {0x1FB8, 0x1FB0},
2360  {0x1FB9, 0x1FB1},
2361  {0x1FBA, 0x1F70},
2362  {0x1FBB, 0x1F71},
2363  {0x1FBC, 0x1FB3},
2364  {0x1FBE, 0x03B9},
2365  {0x1FC8, 0x1F72},
2366  {0x1FC9, 0x1F73},
2367  {0x1FCA, 0x1F74},
2368  {0x1FCB, 0x1F75},
2369  {0x1FCC, 0x1FC3},
2370  {0x1FD8, 0x1FD0},
2371  {0x1FD9, 0x1FD1},
2372  {0x1FDA, 0x1F76},
2373  {0x1FDB, 0x1F77},
2374  {0x1FE8, 0x1FE0},
2375  {0x1FE9, 0x1FE1},
2376  {0x1FEA, 0x1F7A},
2377  {0x1FEB, 0x1F7B},
2378  {0x1FEC, 0x1FE5},
2379  {0x1FF8, 0x1F78},
2380  {0x1FF9, 0x1F79},
2381  {0x1FFA, 0x1F7C},
2382  {0x1FFB, 0x1F7D},
2383  {0x1FFC, 0x1FF3},
2384  {0x2126, 0x03C9},
2385  {0x212A, 0x006B},
2386  {0x212B, 0x00E5},
2387  {0x2132, 0x214E},
2388  {0x2160, 0x2170},
2389  {0x2161, 0x2171},
2390  {0x2162, 0x2172},
2391  {0x2163, 0x2173},
2392  {0x2164, 0x2174},
2393  {0x2165, 0x2175},
2394  {0x2166, 0x2176},
2395  {0x2167, 0x2177},
2396  {0x2168, 0x2178},
2397  {0x2169, 0x2179},
2398  {0x216A, 0x217A},
2399  {0x216B, 0x217B},
2400  {0x216C, 0x217C},
2401  {0x216D, 0x217D},
2402  {0x216E, 0x217E},
2403  {0x216F, 0x217F},
2404  {0x2183, 0x2184},
2405  {0x24B6, 0x24D0},
2406  {0x24B7, 0x24D1},
2407  {0x24B8, 0x24D2},
2408  {0x24B9, 0x24D3},
2409  {0x24BA, 0x24D4},
2410  {0x24BB, 0x24D5},
2411  {0x24BC, 0x24D6},
2412  {0x24BD, 0x24D7},
2413  {0x24BE, 0x24D8},
2414  {0x24BF, 0x24D9},
2415  {0x24C0, 0x24DA},
2416  {0x24C1, 0x24DB},
2417  {0x24C2, 0x24DC},
2418  {0x24C3, 0x24DD},
2419  {0x24C4, 0x24DE},
2420  {0x24C5, 0x24DF},
2421  {0x24C6, 0x24E0},
2422  {0x24C7, 0x24E1},
2423  {0x24C8, 0x24E2},
2424  {0x24C9, 0x24E3},
2425  {0x24CA, 0x24E4},
2426  {0x24CB, 0x24E5},
2427  {0x24CC, 0x24E6},
2428  {0x24CD, 0x24E7},
2429  {0x24CE, 0x24E8},
2430  {0x24CF, 0x24E9},
2431  {0x2C00, 0x2C30},
2432  {0x2C01, 0x2C31},
2433  {0x2C02, 0x2C32},
2434  {0x2C03, 0x2C33},
2435  {0x2C04, 0x2C34},
2436  {0x2C05, 0x2C35},
2437  {0x2C06, 0x2C36},
2438  {0x2C07, 0x2C37},
2439  {0x2C08, 0x2C38},
2440  {0x2C09, 0x2C39},
2441  {0x2C0A, 0x2C3A},
2442  {0x2C0B, 0x2C3B},
2443  {0x2C0C, 0x2C3C},
2444  {0x2C0D, 0x2C3D},
2445  {0x2C0E, 0x2C3E},
2446  {0x2C0F, 0x2C3F},
2447  {0x2C10, 0x2C40},
2448  {0x2C11, 0x2C41},
2449  {0x2C12, 0x2C42},
2450  {0x2C13, 0x2C43},
2451  {0x2C14, 0x2C44},
2452  {0x2C15, 0x2C45},
2453  {0x2C16, 0x2C46},
2454  {0x2C17, 0x2C47},
2455  {0x2C18, 0x2C48},
2456  {0x2C19, 0x2C49},
2457  {0x2C1A, 0x2C4A},
2458  {0x2C1B, 0x2C4B},
2459  {0x2C1C, 0x2C4C},
2460  {0x2C1D, 0x2C4D},
2461  {0x2C1E, 0x2C4E},
2462  {0x2C1F, 0x2C4F},
2463  {0x2C20, 0x2C50},
2464  {0x2C21, 0x2C51},
2465  {0x2C22, 0x2C52},
2466  {0x2C23, 0x2C53},
2467  {0x2C24, 0x2C54},
2468  {0x2C25, 0x2C55},
2469  {0x2C26, 0x2C56},
2470  {0x2C27, 0x2C57},
2471  {0x2C28, 0x2C58},
2472  {0x2C29, 0x2C59},
2473  {0x2C2A, 0x2C5A},
2474  {0x2C2B, 0x2C5B},
2475  {0x2C2C, 0x2C5C},
2476  {0x2C2D, 0x2C5D},
2477  {0x2C2E, 0x2C5E},
2478  {0x2C2F, 0x2C5F},
2479  {0x2C60, 0x2C61},
2480  {0x2C62, 0x026B},
2481  {0x2C63, 0x1D7D},
2482  {0x2C64, 0x027D},
2483  {0x2C67, 0x2C68},
2484  {0x2C69, 0x2C6A},
2485  {0x2C6B, 0x2C6C},
2486  {0x2C6D, 0x0251},
2487  {0x2C6E, 0x0271},
2488  {0x2C6F, 0x0250},
2489  {0x2C70, 0x0252},
2490  {0x2C72, 0x2C73},
2491  {0x2C75, 0x2C76},
2492  {0x2C7E, 0x023F},
2493  {0x2C7F, 0x0240},
2494  {0x2C80, 0x2C81},
2495  {0x2C82, 0x2C83},
2496  {0x2C84, 0x2C85},
2497  {0x2C86, 0x2C87},
2498  {0x2C88, 0x2C89},
2499  {0x2C8A, 0x2C8B},
2500  {0x2C8C, 0x2C8D},
2501  {0x2C8E, 0x2C8F},
2502  {0x2C90, 0x2C91},
2503  {0x2C92, 0x2C93},
2504  {0x2C94, 0x2C95},
2505  {0x2C96, 0x2C97},
2506  {0x2C98, 0x2C99},
2507  {0x2C9A, 0x2C9B},
2508  {0x2C9C, 0x2C9D},
2509  {0x2C9E, 0x2C9F},
2510  {0x2CA0, 0x2CA1},
2511  {0x2CA2, 0x2CA3},
2512  {0x2CA4, 0x2CA5},
2513  {0x2CA6, 0x2CA7},
2514  {0x2CA8, 0x2CA9},
2515  {0x2CAA, 0x2CAB},
2516  {0x2CAC, 0x2CAD},
2517  {0x2CAE, 0x2CAF},
2518  {0x2CB0, 0x2CB1},
2519  {0x2CB2, 0x2CB3},
2520  {0x2CB4, 0x2CB5},
2521  {0x2CB6, 0x2CB7},
2522  {0x2CB8, 0x2CB9},
2523  {0x2CBA, 0x2CBB},
2524  {0x2CBC, 0x2CBD},
2525  {0x2CBE, 0x2CBF},
2526  {0x2CC0, 0x2CC1},
2527  {0x2CC2, 0x2CC3},
2528  {0x2CC4, 0x2CC5},
2529  {0x2CC6, 0x2CC7},
2530  {0x2CC8, 0x2CC9},
2531  {0x2CCA, 0x2CCB},
2532  {0x2CCC, 0x2CCD},
2533  {0x2CCE, 0x2CCF},
2534  {0x2CD0, 0x2CD1},
2535  {0x2CD2, 0x2CD3},
2536  {0x2CD4, 0x2CD5},
2537  {0x2CD6, 0x2CD7},
2538  {0x2CD8, 0x2CD9},
2539  {0x2CDA, 0x2CDB},
2540  {0x2CDC, 0x2CDD},
2541  {0x2CDE, 0x2CDF},
2542  {0x2CE0, 0x2CE1},
2543  {0x2CE2, 0x2CE3},
2544  {0x2CEB, 0x2CEC},
2545  {0x2CED, 0x2CEE},
2546  {0x2CF2, 0x2CF3},
2547  {0xA640, 0xA641},
2548  {0xA642, 0xA643},
2549  {0xA644, 0xA645},
2550  {0xA646, 0xA647},
2551  {0xA648, 0xA649},
2552  {0xA64A, 0xA64B},
2553  {0xA64C, 0xA64D},
2554  {0xA64E, 0xA64F},
2555  {0xA650, 0xA651},
2556  {0xA652, 0xA653},
2557  {0xA654, 0xA655},
2558  {0xA656, 0xA657},
2559  {0xA658, 0xA659},
2560  {0xA65A, 0xA65B},
2561  {0xA65C, 0xA65D},
2562  {0xA65E, 0xA65F},
2563  {0xA660, 0xA661},
2564  {0xA662, 0xA663},
2565  {0xA664, 0xA665},
2566  {0xA666, 0xA667},
2567  {0xA668, 0xA669},
2568  {0xA66A, 0xA66B},
2569  {0xA66C, 0xA66D},
2570  {0xA680, 0xA681},
2571  {0xA682, 0xA683},
2572  {0xA684, 0xA685},
2573  {0xA686, 0xA687},
2574  {0xA688, 0xA689},
2575  {0xA68A, 0xA68B},
2576  {0xA68C, 0xA68D},
2577  {0xA68E, 0xA68F},
2578  {0xA690, 0xA691},
2579  {0xA692, 0xA693},
2580  {0xA694, 0xA695},
2581  {0xA696, 0xA697},
2582  {0xA698, 0xA699},
2583  {0xA69A, 0xA69B},
2584  {0xA722, 0xA723},
2585  {0xA724, 0xA725},
2586  {0xA726, 0xA727},
2587  {0xA728, 0xA729},
2588  {0xA72A, 0xA72B},
2589  {0xA72C, 0xA72D},
2590  {0xA72E, 0xA72F},
2591  {0xA732, 0xA733},
2592  {0xA734, 0xA735},
2593  {0xA736, 0xA737},
2594  {0xA738, 0xA739},
2595  {0xA73A, 0xA73B},
2596  {0xA73C, 0xA73D},
2597  {0xA73E, 0xA73F},
2598  {0xA740, 0xA741},
2599  {0xA742, 0xA743},
2600  {0xA744, 0xA745},
2601  {0xA746, 0xA747},
2602  {0xA748, 0xA749},
2603  {0xA74A, 0xA74B},
2604  {0xA74C, 0xA74D},
2605  {0xA74E, 0xA74F},
2606  {0xA750, 0xA751},
2607  {0xA752, 0xA753},
2608  {0xA754, 0xA755},
2609  {0xA756, 0xA757},
2610  {0xA758, 0xA759},
2611  {0xA75A, 0xA75B},
2612  {0xA75C, 0xA75D},
2613  {0xA75E, 0xA75F},
2614  {0xA760, 0xA761},
2615  {0xA762, 0xA763},
2616  {0xA764, 0xA765},
2617  {0xA766, 0xA767},
2618  {0xA768, 0xA769},
2619  {0xA76A, 0xA76B},
2620  {0xA76C, 0xA76D},
2621  {0xA76E, 0xA76F},
2622  {0xA779, 0xA77A},
2623  {0xA77B, 0xA77C},
2624  {0xA77D, 0x1D79},
2625  {0xA77E, 0xA77F},
2626  {0xA780, 0xA781},
2627  {0xA782, 0xA783},
2628  {0xA784, 0xA785},
2629  {0xA786, 0xA787},
2630  {0xA78B, 0xA78C},
2631  {0xA78D, 0x0265},
2632  {0xA790, 0xA791},
2633  {0xA792, 0xA793},
2634  {0xA796, 0xA797},
2635  {0xA798, 0xA799},
2636  {0xA79A, 0xA79B},
2637  {0xA79C, 0xA79D},
2638  {0xA79E, 0xA79F},
2639  {0xA7A0, 0xA7A1},
2640  {0xA7A2, 0xA7A3},
2641  {0xA7A4, 0xA7A5},
2642  {0xA7A6, 0xA7A7},
2643  {0xA7A8, 0xA7A9},
2644  {0xA7AA, 0x0266},
2645  {0xA7AB, 0x025C},
2646  {0xA7AC, 0x0261},
2647  {0xA7AD, 0x026C},
2648  {0xA7AE, 0x026A},
2649  {0xA7B0, 0x029E},
2650  {0xA7B1, 0x0287},
2651  {0xA7B2, 0x029D},
2652  {0xA7B3, 0xAB53},
2653  {0xA7B4, 0xA7B5},
2654  {0xA7B6, 0xA7B7},
2655  {0xA7B8, 0xA7B9},
2656  {0xA7BA, 0xA7BB},
2657  {0xA7BC, 0xA7BD},
2658  {0xA7BE, 0xA7BF},
2659  {0xA7C0, 0xA7C1},
2660  {0xA7C2, 0xA7C3},
2661  {0xA7C4, 0xA794},
2662  {0xA7C5, 0x0282},
2663  {0xA7C6, 0x1D8E},
2664  {0xA7C7, 0xA7C8},
2665  {0xA7C9, 0xA7CA},
2666  {0xA7D0, 0xA7D1},
2667  {0xA7D6, 0xA7D7},
2668  {0xA7D8, 0xA7D9},
2669  {0xA7F5, 0xA7F6},
2670  {0xAB70, 0x13A0},
2671  {0xAB71, 0x13A1},
2672  {0xAB72, 0x13A2},
2673  {0xAB73, 0x13A3},
2674  {0xAB74, 0x13A4},
2675  {0xAB75, 0x13A5},
2676  {0xAB76, 0x13A6},
2677  {0xAB77, 0x13A7},
2678  {0xAB78, 0x13A8},
2679  {0xAB79, 0x13A9},
2680  {0xAB7A, 0x13AA},
2681  {0xAB7B, 0x13AB},
2682  {0xAB7C, 0x13AC},
2683  {0xAB7D, 0x13AD},
2684  {0xAB7E, 0x13AE},
2685  {0xAB7F, 0x13AF},
2686  {0xAB80, 0x13B0},
2687  {0xAB81, 0x13B1},
2688  {0xAB82, 0x13B2},
2689  {0xAB83, 0x13B3},
2690  {0xAB84, 0x13B4},
2691  {0xAB85, 0x13B5},
2692  {0xAB86, 0x13B6},
2693  {0xAB87, 0x13B7},
2694  {0xAB88, 0x13B8},
2695  {0xAB89, 0x13B9},
2696  {0xAB8A, 0x13BA},
2697  {0xAB8B, 0x13BB},
2698  {0xAB8C, 0x13BC},
2699  {0xAB8D, 0x13BD},
2700  {0xAB8E, 0x13BE},
2701  {0xAB8F, 0x13BF},
2702  {0xAB90, 0x13C0},
2703  {0xAB91, 0x13C1},
2704  {0xAB92, 0x13C2},
2705  {0xAB93, 0x13C3},
2706  {0xAB94, 0x13C4},
2707  {0xAB95, 0x13C5},
2708  {0xAB96, 0x13C6},
2709  {0xAB97, 0x13C7},
2710  {0xAB98, 0x13C8},
2711  {0xAB99, 0x13C9},
2712  {0xAB9A, 0x13CA},
2713  {0xAB9B, 0x13CB},
2714  {0xAB9C, 0x13CC},
2715  {0xAB9D, 0x13CD},
2716  {0xAB9E, 0x13CE},
2717  {0xAB9F, 0x13CF},
2718  {0xABA0, 0x13D0},
2719  {0xABA1, 0x13D1},
2720  {0xABA2, 0x13D2},
2721  {0xABA3, 0x13D3},
2722  {0xABA4, 0x13D4},
2723  {0xABA5, 0x13D5},
2724  {0xABA6, 0x13D6},
2725  {0xABA7, 0x13D7},
2726  {0xABA8, 0x13D8},
2727  {0xABA9, 0x13D9},
2728  {0xABAA, 0x13DA},
2729  {0xABAB, 0x13DB},
2730  {0xABAC, 0x13DC},
2731  {0xABAD, 0x13DD},
2732  {0xABAE, 0x13DE},
2733  {0xABAF, 0x13DF},
2734  {0xABB0, 0x13E0},
2735  {0xABB1, 0x13E1},
2736  {0xABB2, 0x13E2},
2737  {0xABB3, 0x13E3},
2738  {0xABB4, 0x13E4},
2739  {0xABB5, 0x13E5},
2740  {0xABB6, 0x13E6},
2741  {0xABB7, 0x13E7},
2742  {0xABB8, 0x13E8},
2743  {0xABB9, 0x13E9},
2744  {0xABBA, 0x13EA},
2745  {0xABBB, 0x13EB},
2746  {0xABBC, 0x13EC},
2747  {0xABBD, 0x13ED},
2748  {0xABBE, 0x13EE},
2749  {0xABBF, 0x13EF},
2750  {0xFF21, 0xFF41},
2751  {0xFF22, 0xFF42},
2752  {0xFF23, 0xFF43},
2753  {0xFF24, 0xFF44},
2754  {0xFF25, 0xFF45},
2755  {0xFF26, 0xFF46},
2756  {0xFF27, 0xFF47},
2757  {0xFF28, 0xFF48},
2758  {0xFF29, 0xFF49},
2759  {0xFF2A, 0xFF4A},
2760  {0xFF2B, 0xFF4B},
2761  {0xFF2C, 0xFF4C},
2762  {0xFF2D, 0xFF4D},
2763  {0xFF2E, 0xFF4E},
2764  {0xFF2F, 0xFF4F},
2765  {0xFF30, 0xFF50},
2766  {0xFF31, 0xFF51},
2767  {0xFF32, 0xFF52},
2768  {0xFF33, 0xFF53},
2769  {0xFF34, 0xFF54},
2770  {0xFF35, 0xFF55},
2771  {0xFF36, 0xFF56},
2772  {0xFF37, 0xFF57},
2773  {0xFF38, 0xFF58},
2774  {0xFF39, 0xFF59},
2775  {0xFF3A, 0xFF5A},
2776  {0x10400, 0x10428},
2777  {0x10401, 0x10429},
2778  {0x10402, 0x1042A},
2779  {0x10403, 0x1042B},
2780  {0x10404, 0x1042C},
2781  {0x10405, 0x1042D},
2782  {0x10406, 0x1042E},
2783  {0x10407, 0x1042F},
2784  {0x10408, 0x10430},
2785  {0x10409, 0x10431},
2786  {0x1040A, 0x10432},
2787  {0x1040B, 0x10433},
2788  {0x1040C, 0x10434},
2789  {0x1040D, 0x10435},
2790  {0x1040E, 0x10436},
2791  {0x1040F, 0x10437},
2792  {0x10410, 0x10438},
2793  {0x10411, 0x10439},
2794  {0x10412, 0x1043A},
2795  {0x10413, 0x1043B},
2796  {0x10414, 0x1043C},
2797  {0x10415, 0x1043D},
2798  {0x10416, 0x1043E},
2799  {0x10417, 0x1043F},
2800  {0x10418, 0x10440},
2801  {0x10419, 0x10441},
2802  {0x1041A, 0x10442},
2803  {0x1041B, 0x10443},
2804  {0x1041C, 0x10444},
2805  {0x1041D, 0x10445},
2806  {0x1041E, 0x10446},
2807  {0x1041F, 0x10447},
2808  {0x10420, 0x10448},
2809  {0x10421, 0x10449},
2810  {0x10422, 0x1044A},
2811  {0x10423, 0x1044B},
2812  {0x10424, 0x1044C},
2813  {0x10425, 0x1044D},
2814  {0x10426, 0x1044E},
2815  {0x10427, 0x1044F},
2816  {0x104B0, 0x104D8},
2817  {0x104B1, 0x104D9},
2818  {0x104B2, 0x104DA},
2819  {0x104B3, 0x104DB},
2820  {0x104B4, 0x104DC},
2821  {0x104B5, 0x104DD},
2822  {0x104B6, 0x104DE},
2823  {0x104B7, 0x104DF},
2824  {0x104B8, 0x104E0},
2825  {0x104B9, 0x104E1},
2826  {0x104BA, 0x104E2},
2827  {0x104BB, 0x104E3},
2828  {0x104BC, 0x104E4},
2829  {0x104BD, 0x104E5},
2830  {0x104BE, 0x104E6},
2831  {0x104BF, 0x104E7},
2832  {0x104C0, 0x104E8},
2833  {0x104C1, 0x104E9},
2834  {0x104C2, 0x104EA},
2835  {0x104C3, 0x104EB},
2836  {0x104C4, 0x104EC},
2837  {0x104C5, 0x104ED},
2838  {0x104C6, 0x104EE},
2839  {0x104C7, 0x104EF},
2840  {0x104C8, 0x104F0},
2841  {0x104C9, 0x104F1},
2842  {0x104CA, 0x104F2},
2843  {0x104CB, 0x104F3},
2844  {0x104CC, 0x104F4},
2845  {0x104CD, 0x104F5},
2846  {0x104CE, 0x104F6},
2847  {0x104CF, 0x104F7},
2848  {0x104D0, 0x104F8},
2849  {0x104D1, 0x104F9},
2850  {0x104D2, 0x104FA},
2851  {0x104D3, 0x104FB},
2852  {0x10570, 0x10597},
2853  {0x10571, 0x10598},
2854  {0x10572, 0x10599},
2855  {0x10573, 0x1059A},
2856  {0x10574, 0x1059B},
2857  {0x10575, 0x1059C},
2858  {0x10576, 0x1059D},
2859  {0x10577, 0x1059E},
2860  {0x10578, 0x1059F},
2861  {0x10579, 0x105A0},
2862  {0x1057A, 0x105A1},
2863  {0x1057C, 0x105A3},
2864  {0x1057D, 0x105A4},
2865  {0x1057E, 0x105A5},
2866  {0x1057F, 0x105A6},
2867  {0x10580, 0x105A7},
2868  {0x10581, 0x105A8},
2869  {0x10582, 0x105A9},
2870  {0x10583, 0x105AA},
2871  {0x10584, 0x105AB},
2872  {0x10585, 0x105AC},
2873  {0x10586, 0x105AD},
2874  {0x10587, 0x105AE},
2875  {0x10588, 0x105AF},
2876  {0x10589, 0x105B0},
2877  {0x1058A, 0x105B1},
2878  {0x1058C, 0x105B3},
2879  {0x1058D, 0x105B4},
2880  {0x1058E, 0x105B5},
2881  {0x1058F, 0x105B6},
2882  {0x10590, 0x105B7},
2883  {0x10591, 0x105B8},
2884  {0x10592, 0x105B9},
2885  {0x10594, 0x105BB},
2886  {0x10595, 0x105BC},
2887  {0x10C80, 0x10CC0},
2888  {0x10C81, 0x10CC1},
2889  {0x10C82, 0x10CC2},
2890  {0x10C83, 0x10CC3},
2891  {0x10C84, 0x10CC4},
2892  {0x10C85, 0x10CC5},
2893  {0x10C86, 0x10CC6},
2894  {0x10C87, 0x10CC7},
2895  {0x10C88, 0x10CC8},
2896  {0x10C89, 0x10CC9},
2897  {0x10C8A, 0x10CCA},
2898  {0x10C8B, 0x10CCB},
2899  {0x10C8C, 0x10CCC},
2900  {0x10C8D, 0x10CCD},
2901  {0x10C8E, 0x10CCE},
2902  {0x10C8F, 0x10CCF},
2903  {0x10C90, 0x10CD0},
2904  {0x10C91, 0x10CD1},
2905  {0x10C92, 0x10CD2},
2906  {0x10C93, 0x10CD3},
2907  {0x10C94, 0x10CD4},
2908  {0x10C95, 0x10CD5},
2909  {0x10C96, 0x10CD6},
2910  {0x10C97, 0x10CD7},
2911  {0x10C98, 0x10CD8},
2912  {0x10C99, 0x10CD9},
2913  {0x10C9A, 0x10CDA},
2914  {0x10C9B, 0x10CDB},
2915  {0x10C9C, 0x10CDC},
2916  {0x10C9D, 0x10CDD},
2917  {0x10C9E, 0x10CDE},
2918  {0x10C9F, 0x10CDF},
2919  {0x10CA0, 0x10CE0},
2920  {0x10CA1, 0x10CE1},
2921  {0x10CA2, 0x10CE2},
2922  {0x10CA3, 0x10CE3},
2923  {0x10CA4, 0x10CE4},
2924  {0x10CA5, 0x10CE5},
2925  {0x10CA6, 0x10CE6},
2926  {0x10CA7, 0x10CE7},
2927  {0x10CA8, 0x10CE8},
2928  {0x10CA9, 0x10CE9},
2929  {0x10CAA, 0x10CEA},
2930  {0x10CAB, 0x10CEB},
2931  {0x10CAC, 0x10CEC},
2932  {0x10CAD, 0x10CED},
2933  {0x10CAE, 0x10CEE},
2934  {0x10CAF, 0x10CEF},
2935  {0x10CB0, 0x10CF0},
2936  {0x10CB1, 0x10CF1},
2937  {0x10CB2, 0x10CF2},
2938  {0x118A0, 0x118C0},
2939  {0x118A1, 0x118C1},
2940  {0x118A2, 0x118C2},
2941  {0x118A3, 0x118C3},
2942  {0x118A4, 0x118C4},
2943  {0x118A5, 0x118C5},
2944  {0x118A6, 0x118C6},
2945  {0x118A7, 0x118C7},
2946  {0x118A8, 0x118C8},
2947  {0x118A9, 0x118C9},
2948  {0x118AA, 0x118CA},
2949  {0x118AB, 0x118CB},
2950  {0x118AC, 0x118CC},
2951  {0x118AD, 0x118CD},
2952  {0x118AE, 0x118CE},
2953  {0x118AF, 0x118CF},
2954  {0x118B0, 0x118D0},
2955  {0x118B1, 0x118D1},
2956  {0x118B2, 0x118D2},
2957  {0x118B3, 0x118D3},
2958  {0x118B4, 0x118D4},
2959  {0x118B5, 0x118D5},
2960  {0x118B6, 0x118D6},
2961  {0x118B7, 0x118D7},
2962  {0x118B8, 0x118D8},
2963  {0x118B9, 0x118D9},
2964  {0x118BA, 0x118DA},
2965  {0x118BB, 0x118DB},
2966  {0x118BC, 0x118DC},
2967  {0x118BD, 0x118DD},
2968  {0x118BE, 0x118DE},
2969  {0x118BF, 0x118DF},
2970  {0x16E40, 0x16E60},
2971  {0x16E41, 0x16E61},
2972  {0x16E42, 0x16E62},
2973  {0x16E43, 0x16E63},
2974  {0x16E44, 0x16E64},
2975  {0x16E45, 0x16E65},
2976  {0x16E46, 0x16E66},
2977  {0x16E47, 0x16E67},
2978  {0x16E48, 0x16E68},
2979  {0x16E49, 0x16E69},
2980  {0x16E4A, 0x16E6A},
2981  {0x16E4B, 0x16E6B},
2982  {0x16E4C, 0x16E6C},
2983  {0x16E4D, 0x16E6D},
2984  {0x16E4E, 0x16E6E},
2985  {0x16E4F, 0x16E6F},
2986  {0x16E50, 0x16E70},
2987  {0x16E51, 0x16E71},
2988  {0x16E52, 0x16E72},
2989  {0x16E53, 0x16E73},
2990  {0x16E54, 0x16E74},
2991  {0x16E55, 0x16E75},
2992  {0x16E56, 0x16E76},
2993  {0x16E57, 0x16E77},
2994  {0x16E58, 0x16E78},
2995  {0x16E59, 0x16E79},
2996  {0x16E5A, 0x16E7A},
2997  {0x16E5B, 0x16E7B},
2998  {0x16E5C, 0x16E7C},
2999  {0x16E5D, 0x16E7D},
3000  {0x16E5E, 0x16E7E},
3001  {0x16E5F, 0x16E7F},
3002  {0x1E900, 0x1E922},
3003  {0x1E901, 0x1E923},
3004  {0x1E902, 0x1E924},
3005  {0x1E903, 0x1E925},
3006  {0x1E904, 0x1E926},
3007  {0x1E905, 0x1E927},
3008  {0x1E906, 0x1E928},
3009  {0x1E907, 0x1E929},
3010  {0x1E908, 0x1E92A},
3011  {0x1E909, 0x1E92B},
3012  {0x1E90A, 0x1E92C},
3013  {0x1E90B, 0x1E92D},
3014  {0x1E90C, 0x1E92E},
3015  {0x1E90D, 0x1E92F},
3016  {0x1E90E, 0x1E930},
3017  {0x1E90F, 0x1E931},
3018  {0x1E910, 0x1E932},
3019  {0x1E911, 0x1E933},
3020  {0x1E912, 0x1E934},
3021  {0x1E913, 0x1E935},
3022  {0x1E914, 0x1E936},
3023  {0x1E915, 0x1E937},
3024  {0x1E916, 0x1E938},
3025  {0x1E917, 0x1E939},
3026  {0x1E918, 0x1E93A},
3027  {0x1E919, 0x1E93B},
3028  {0x1E91A, 0x1E93C},
3029  {0x1E91B, 0x1E93D},
3030  {0x1E91C, 0x1E93E},
3031  {0x1E91D, 0x1E93F},
3032  {0x1E91E, 0x1E940},
3033  {0x1E91F, 0x1E941},
3034  {0x1E920, 0x1E942},
3035  {0x1E921, 0x1E943},
3036};
3037
3038static int fold2[][ 4 ] = {
3039  {0x00DF, 0x0073, 0x0073, 0x0},
3040  {0x0130, 0x0069, 0x0307, 0x0},
3041  {0x0149, 0x02BC, 0x006E, 0x0},
3042  {0x01F0, 0x006A, 0x030C, 0x0},
3043  {0x0390, 0x03B9, 0x0308, 0x0301},
3044  {0x03B0, 0x03C5, 0x0308, 0x0301},
3045  {0x0587, 0x0565, 0x0582, 0x0},
3046  {0x1E96, 0x0068, 0x0331, 0x0},
3047  {0x1E97, 0x0074, 0x0308, 0x0},
3048  {0x1E98, 0x0077, 0x030A, 0x0},
3049  {0x1E99, 0x0079, 0x030A, 0x0},
3050  {0x1E9A, 0x0061, 0x02BE, 0x0},
3051  {0x1E9E, 0x0073, 0x0073, 0x0},
3052  {0x1F50, 0x03C5, 0x0313, 0x0},
3053  {0x1F52, 0x03C5, 0x0313, 0x0300},
3054  {0x1F54, 0x03C5, 0x0313, 0x0301},
3055  {0x1F56, 0x03C5, 0x0313, 0x0342},
3056  {0x1F80, 0x1F00, 0x03B9, 0x0},
3057  {0x1F81, 0x1F01, 0x03B9, 0x0},
3058  {0x1F82, 0x1F02, 0x03B9, 0x0},
3059  {0x1F83, 0x1F03, 0x03B9, 0x0},
3060  {0x1F84, 0x1F04, 0x03B9, 0x0},
3061  {0x1F85, 0x1F05, 0x03B9, 0x0},
3062  {0x1F86, 0x1F06, 0x03B9, 0x0},
3063  {0x1F87, 0x1F07, 0x03B9, 0x0},
3064  {0x1F88, 0x1F00, 0x03B9, 0x0},
3065  {0x1F89, 0x1F01, 0x03B9, 0x0},
3066  {0x1F8A, 0x1F02, 0x03B9, 0x0},
3067  {0x1F8B, 0x1F03, 0x03B9, 0x0},
3068  {0x1F8C, 0x1F04, 0x03B9, 0x0},
3069  {0x1F8D, 0x1F05, 0x03B9, 0x0},
3070  {0x1F8E, 0x1F06, 0x03B9, 0x0},
3071  {0x1F8F, 0x1F07, 0x03B9, 0x0},
3072  {0x1F90, 0x1F20, 0x03B9, 0x0},
3073  {0x1F91, 0x1F21, 0x03B9, 0x0},
3074  {0x1F92, 0x1F22, 0x03B9, 0x0},
3075  {0x1F93, 0x1F23, 0x03B9, 0x0},
3076  {0x1F94, 0x1F24, 0x03B9, 0x0},
3077  {0x1F95, 0x1F25, 0x03B9, 0x0},
3078  {0x1F96, 0x1F26, 0x03B9, 0x0},
3079  {0x1F97, 0x1F27, 0x03B9, 0x0},
3080  {0x1F98, 0x1F20, 0x03B9, 0x0},
3081  {0x1F99, 0x1F21, 0x03B9, 0x0},
3082  {0x1F9A, 0x1F22, 0x03B9, 0x0},
3083  {0x1F9B, 0x1F23, 0x03B9, 0x0},
3084  {0x1F9C, 0x1F24, 0x03B9, 0x0},
3085  {0x1F9D, 0x1F25, 0x03B9, 0x0},
3086  {0x1F9E, 0x1F26, 0x03B9, 0x0},
3087  {0x1F9F, 0x1F27, 0x03B9, 0x0},
3088  {0x1FA0, 0x1F60, 0x03B9, 0x0},
3089  {0x1FA1, 0x1F61, 0x03B9, 0x0},
3090  {0x1FA2, 0x1F62, 0x03B9, 0x0},
3091  {0x1FA3, 0x1F63, 0x03B9, 0x0},
3092  {0x1FA4, 0x1F64, 0x03B9, 0x0},
3093  {0x1FA5, 0x1F65, 0x03B9, 0x0},
3094  {0x1FA6, 0x1F66, 0x03B9, 0x0},
3095  {0x1FA7, 0x1F67, 0x03B9, 0x0},
3096  {0x1FA8, 0x1F60, 0x03B9, 0x0},
3097  {0x1FA9, 0x1F61, 0x03B9, 0x0},
3098  {0x1FAA, 0x1F62, 0x03B9, 0x0},
3099  {0x1FAB, 0x1F63, 0x03B9, 0x0},
3100  {0x1FAC, 0x1F64, 0x03B9, 0x0},
3101  {0x1FAD, 0x1F65, 0x03B9, 0x0},
3102  {0x1FAE, 0x1F66, 0x03B9, 0x0},
3103  {0x1FAF, 0x1F67, 0x03B9, 0x0},
3104  {0x1FB2, 0x1F70, 0x03B9, 0x0},
3105  {0x1FB3, 0x03B1, 0x03B9, 0x0},
3106  {0x1FB4, 0x03AC, 0x03B9, 0x0},
3107  {0x1FB6, 0x03B1, 0x0342, 0x0},
3108  {0x1FB7, 0x03B1, 0x0342, 0x03B9},
3109  {0x1FBC, 0x03B1, 0x03B9, 0x0},
3110  {0x1FC2, 0x1F74, 0x03B9, 0x0},
3111  {0x1FC3, 0x03B7, 0x03B9, 0x0},
3112  {0x1FC4, 0x03AE, 0x03B9, 0x0},
3113  {0x1FC6, 0x03B7, 0x0342, 0x0},
3114  {0x1FC7, 0x03B7, 0x0342, 0x03B9},
3115  {0x1FCC, 0x03B7, 0x03B9, 0x0},
3116  {0x1FD2, 0x03B9, 0x0308, 0x0300},
3117  {0x1FD3, 0x03B9, 0x0308, 0x0301},
3118  {0x1FD6, 0x03B9, 0x0342, 0x0},
3119  {0x1FD7, 0x03B9, 0x0308, 0x0342},
3120  {0x1FE2, 0x03C5, 0x0308, 0x0300},
3121  {0x1FE3, 0x03C5, 0x0308, 0x0301},
3122  {0x1FE4, 0x03C1, 0x0313, 0x0},
3123  {0x1FE6, 0x03C5, 0x0342, 0x0},
3124  {0x1FE7, 0x03C5, 0x0308, 0x0342},
3125  {0x1FF2, 0x1F7C, 0x03B9, 0x0},
3126  {0x1FF3, 0x03C9, 0x03B9, 0x0},
3127  {0x1FF4, 0x03CE, 0x03B9, 0x0},
3128  {0x1FF6, 0x03C9, 0x0342, 0x0},
3129  {0x1FF7, 0x03C9, 0x0342, 0x03B9},
3130  {0x1FFC, 0x03C9, 0x03B9, 0x0},
3131  {0xFB00, 0x0066, 0x0066, 0x0},
3132  {0xFB01, 0x0066, 0x0069, 0x0},
3133  {0xFB02, 0x0066, 0x006C, 0x0},
3134  {0xFB03, 0x0066, 0x0066, 0x0069},
3135  {0xFB04, 0x0066, 0x0066, 0x006C},
3136  {0xFB05, 0x0073, 0x0074, 0x0},
3137  {0xFB06, 0x0073, 0x0074, 0x0},
3138  {0xFB13, 0x0574, 0x0576, 0x0},
3139  {0xFB14, 0x0574, 0x0565, 0x0},
3140  {0xFB15, 0x0574, 0x056B, 0x0},
3141  {0xFB16, 0x057E, 0x0576, 0x0},
3142  {0xFB17, 0x0574, 0x056D, 0x0},
3143};
3144
3145
3146/* Branchless UTF-8 decoder
3147 * https://raw.githubusercontent.com/skeeto/branchless-utf8/
3148 * This is free and unencumbered software released into the public domain.
3149 */
3150
3151/* Decode the next character, C, from BUF, reporting errors in E.
3152 *
3153 * Since this is a branchless decoder, four bytes will be read from the
3154 * buffer regardless of the actual length of the next character. This
3155 * means the buffer _must_ have at least three bytes of zero padding
3156 * following the end of the data stream.
3157 *
3158 * Errors are reported in E, which will be non-zero if the parsed
3159 * character was somehow invalid: invalid byte sequence, non-canonical
3160 * encoding, or a surrogate half.
3161 *
3162 * The function returns a pointer to the next character. When an error
3163 * occurs, this pointer will be a guess that depends on the particular
3164 * error, but it will always advance at least one byte.
3165 */
3166static const char lengths[] = {
3167    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3168    0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
3169};
3170
3171static C_char *utf8_decode(C_char *buf, C_u32 *c, int *e)
3172{
3173    static const int masks[]  = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
3174    static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
3175    static const int shiftc[] = {0, 18, 12, 6, 0};
3176    static const int shifte[] = {0, 6, 4, 2, 0};
3177
3178#ifdef DEBUGBUILD
3179    if(buf == NULL)
3180        C_panic_hook(C_text("possibly invalid string index"));
3181#endif
3182
3183    unsigned char *s = (unsigned char *)buf;
3184    int len = lengths[s[0] >> 3];
3185
3186    /* Compute the pointer to the next character early so that the next
3187     * iteration can start working on the next character. Neither Clang
3188     * nor GCC figure out this reordering on their own.
3189     */
3190    unsigned char *next = s + len + !len;
3191
3192    /* Assume a four-byte character and load four bytes. Unused bits are
3193     * shifted out.
3194     */
3195    *c  = (C_u32)(s[0] & masks[len]) << 18;
3196    *c |= (C_u32)(s[1] & 0x3f) << 12;
3197    *c |= (C_u32)(s[2] & 0x3f) <<  6;
3198    *c |= (C_u32)(s[3] & 0x3f) <<  0;
3199    *c >>= shiftc[len];
3200
3201    /* Accumulate the various error conditions. */
3202    *e  = (*c < mins[len]) << 6; // non-canonical encoding
3203    *e |= ((*c >> 11) == 0x1b) << 7;  // surrogate half?
3204    *e |= (*c > 0x10FFFF) << 8;  // out of range?
3205    *e |= (s[1] & 0xc0) >> 2;
3206    *e |= (s[2] & 0xc0) >> 4;
3207    *e |= (s[3]       ) >> 6;
3208    *e ^= 0x2a; // top two bits of each tail byte correct?
3209    *e >>= shifte[len];
3210
3211    /* now make all that optimization pointless... */
3212    if(*e) {
3213        *c = 0xdc00 | *s;
3214        return (C_char *)s + 1;
3215    }
3216
3217    return (C_char *)next;
3218}
3219/* */
3220
3221static C_char *utf8_encode(C_u32 u, C_char *p1)
3222{
3223    unsigned char *p = (unsigned char *)p1;
3224    if(u < 0x80) *(p++) = u;
3225    else if((u & 0xff00) == 0xdc00) {
3226        *(p++) = u & 0xff;
3227    } else if(u < 0x800) {
3228        *(p++) = (u >> 6) | 0xC0;
3229        *(p++) = (u & 0x3F) | 0x80;
3230    } else if(u < 0x10000) {
3231        *(p++) = (u >> 12) | 0xE0;
3232        *(p++) = ((u >> 6) & 0x3F) | 0x80;
3233        *(p++) = (u & 0x3F) | 0x80;
3234    } else if(u < 0x110000) {
3235        *(p++) = (u >> 18) | 0xF0;
3236        *(p++) = ((u >> 12) & 0x3F) | 0x80;
3237        *(p++) = ((u >> 6) & 0x3F) | 0x80;
3238        *(p++) = (u & 0x3F) | 0x80;
3239    }
3240    return (C_char *)p;
3241}
3242
3243static C_char *utf_index1(C_word s, C_word i)
3244{
3245    C_word i0 = C_unfix(C_block_item(s, 2));
3246    C_word count = C_unfix(C_block_item(s, 1));
3247    C_word off = 0, index = 0;
3248    C_char *p1, *p = C_c_string(C_block_item(s, 0));
3249    int e;
3250    C_u32 c;
3251    if(i >= i0) {
3252        p += off = C_unfix(C_block_item(s, 3));
3253        index = i0;
3254    }
3255    while(index <= count) {
3256        if(index == i) {
3257            C_set_block_item(s, 2, C_fix(index));
3258            C_set_block_item(s, 3, C_fix(off));
3259            return p;
3260        }
3261        p1 = p;
3262        p = utf8_decode(p, &c, &e);
3263        ++index;
3264        off += p - p1;
3265    }
3266    return NULL;
3267}
3268
3269static C_char *utf_index(C_word s, C_word i)
3270{
3271    C_word bv = C_block_item(s, 0);
3272    if(i == 0) {
3273        C_set_block_item(s, 2, C_fix(0));
3274        C_set_block_item(s, 3, C_fix(0));
3275        return C_c_string(bv);
3276    } else if(C_header_size(bv) - 1 == C_unfix(C_block_item(s, 1))) {
3277        /* len == codepoints */
3278        C_set_block_item(s, 2, C_fix(i));
3279        C_set_block_item(s, 3, C_fix(i));
3280        return C_c_string(bv) + i;
3281    }
3282    return utf_index1(s, i);
3283}
3284
3285C_regparm C_word C_utf_subchar(C_word s, C_word i)
3286{
3287    C_char *p = utf_index(s, C_unfix(i));
3288    int e;
3289    C_u32 c;
3290    utf8_decode(p, &c, &e);
3291    return C_make_character(c);
3292}
3293
3294C_regparm C_word C_utf_setsubchar(C_word s, C_word i, C_word c)
3295{
3296    C_char buf[ 4 ];
3297    C_char *p = utf8_encode(C_character_code(c), buf);
3298    int e;
3299    C_u32 old;
3300    C_char *p1 = utf_index(s, C_unfix(i));
3301    C_char *p2 = utf8_decode(p1, &old, &e);
3302    int nl = p - buf, ol = p2 - p1;
3303    C_word bv = C_block_item(s, 0);
3304    C_word bvlen = C_header_size(bv) - 1;
3305    int prefix = C_unfix(C_block_item(s, 3));    /* offset */
3306    int suffix = bvlen - prefix - ol;
3307
3308    if(nl > ol) {
3309        int tl = bvlen + nl - ol;
3310        if(C_in_scratchspacep(bv)) C_mutate_scratch_slot(NULL, bv);
3311        C_word bvn = C_scratch_alloc(C_SIZEOF_BYTEVECTOR(tl + 1));
3312        C_block_header_init(bvn, C_make_header(C_BYTEVECTOR_TYPE, tl + 1));
3313        if(prefix) C_memcpy(C_c_string(bvn), C_c_string(bv), prefix);
3314        C_memcpy((C_char *)C_data_pointer(bvn) + prefix, buf, nl);
3315        C_memcpy((C_char *)C_data_pointer(bvn) + prefix + nl,
3316            (C_char *)C_data_pointer(bv) + prefix + ol, suffix + 1); /* include 0 byte */
3317        C_mutate_slot(&C_block_item(s, 0), bvn);
3318        C_mutate_scratch_slot(&C_block_item(s, 0), bvn);
3319    } else if(nl < ol) {
3320        C_memcpy(p1, buf, nl);
3321        C_memmove(p1 + nl, p1 + ol, suffix + 1); /* include 0 byte */
3322        C_block_header_init(bv, C_make_header(C_BYTEVECTOR_TYPE, bvlen - (ol - nl) + 1));
3323    } else C_memcpy(p1, buf, nl);
3324
3325    return C_SCHEME_UNDEFINED;
3326}
3327
3328/* copy c bytes of bv into s at position i, occupying len characters */
3329C_regparm C_word C_utf_overwrite(C_word s, C_word i, C_word len, C_word bv,
3330    C_word c)
3331{
3332    C_word bvs = C_block_item(s, 0);
3333    C_word bvlen = C_header_size(bvs) - 1;
3334    C_char *p1 = utf_index(s, C_unfix(i));
3335    C_char *p2 = utf_index(s, C_unfix(i) + C_unfix(len));
3336    int count = C_unfix(c);
3337    int d = p2 - p1;
3338    int prefix = p1 - (C_char *)C_data_pointer(bvs);
3339    int suffix = bvlen - prefix - d;
3340
3341    if(count > d) {
3342        int tl = bvlen + count - d;
3343        C_word bvn = C_scratch_alloc(C_SIZEOF_BYTEVECTOR(tl + 1));
3344        if(C_in_scratchspacep(bvs)) C_mutate_scratch_slot(NULL, bvs);
3345        C_block_header_init(bvn, C_make_header(C_BYTEVECTOR_TYPE, tl + 1));
3346        if(prefix) C_memcpy(C_c_string(bvn), C_data_pointer(bvs), prefix);
3347        C_memcpy((C_char *)C_data_pointer(bvn) + prefix, (C_char *)C_data_pointer(bv),
3348            count);
3349        C_memcpy((C_char *)C_data_pointer(bvn) + prefix + count,
3350            p2, suffix + 1); /* include 0 byte */
3351        C_mutate_slot(&C_block_item(s, 0), bvn);
3352        C_mutate_scratch_slot(&C_block_item(s, 0), bvn);
3353    } else if(count < d && count) {
3354        C_memcpy(p1, C_data_pointer(bv), count);
3355        C_memmove(p1 + count, p2, suffix + 1); /* include 0 byte */
3356        C_block_header_init(bvs, C_make_header(C_BYTEVECTOR_TYPE,
3357            bvlen - (d - count) + 1));
3358    } else if(count) C_memcpy(p1, C_data_pointer(bv), count);
3359
3360    return C_SCHEME_UNDEFINED;
3361}
3362
3363C_regparm C_word C_utf_set_bv_size(C_word bv, C_word sz)
3364{
3365    int i = C_unfix(sz);
3366    C_block_header_init(bv, C_make_header(C_BYTEVECTOR_TYPE, i + 1));
3367    C_char *p = (C_char *)C_data_pointer(bv);
3368    p[ i ] = 0;
3369    return bv;
3370}
3371
3372C_regparm C_word C_utf_compare(C_word s1, C_word s2, C_word start1, C_word start2,
3373    C_word len)
3374{
3375    C_char *p1 = utf_index(s1, C_unfix(start1));
3376    C_char *p2 = utf_index(s2, C_unfix(start2));
3377    int e, n = C_unfix(len);
3378    while(n--) {
3379        C_u32 c1, c2;
3380        p1 = utf8_decode(p1, &c1, &e);
3381        p2 = utf8_decode(p2, &c2, &e);
3382        if(c1 != c2) return C_fix((C_word)c1 - (C_word)c2);
3383    }
3384    return C_fix(0);
3385}
3386
3387C_regparm C_word C_utf_compare_ci(C_word s1, C_word s2, C_word start1, C_word start2, C_word len)
3388{
3389    C_char *p1 = utf_index(s1, C_unfix(start1));
3390    C_char *p2 = utf_index(s2, C_unfix(start2));
3391    int e, n = C_unfix(len);
3392    while(n--) {
3393        C_u32 c1, c2;
3394        int *m, r1, r2, i;
3395        p1 = utf8_decode(p1, &c1, &e);
3396        p2 = utf8_decode(p2, &c2, &e);
3397        if(c1 >= 'A' && c1 <= 'Z') r1 = c1 + 32;
3398        else r1 = c1;
3399        if(c2 >= 'A' && c2 <= 'Z') r2 = c2 + 32;
3400        else r2 = c2;
3401        if(r1 == r2) continue;
3402        if(r1 < 128 || r2 < 128) goto fail;
3403        m = bsearch(&r1, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp);
3404        if(m) {
3405            for(i = 1; i < 3; ++i) {
3406                if(m[ i ] == 0) break;
3407                if(m[ i ] != c2) return C_fix(m[ i ] - c2);
3408                if(i != 2 && m[ i + 1 ] != 0) p2 = utf8_decode(p2, &c2, &e);
3409            }
3410        } else {
3411            m = bsearch(&r1, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3412            if(m) {
3413                if(m[ 1 ] != c2) return C_fix(m[ 1 ] - c2);
3414            }
3415        }
3416        m = bsearch(&r2, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp);
3417        if(m) {
3418            for(i = 1; i < 3; ++i) {
3419                if(m[ i ] == 0) break;
3420                if(c1 != m[ i ]) return C_fix(c1 - m[ i ]);
3421                if(i != 2 && m[ i + 1 ]) p1 = utf8_decode(p1, &c1, &e);
3422            }
3423        } else {
3424            m = bsearch(&r2, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3425            if(m) {
3426                if(c1 != m[ 1 ]) return C_fix(c1 - m[ 1 ]);
3427            }
3428        }
3429        continue;
3430fail:
3431        return C_fix(r1 - r2);
3432    }
3433    return C_fix(0);
3434}
3435
3436/* XXX inline this? */
3437C_regparm C_word C_utf_equal(C_word s1, C_word s2)
3438{
3439    C_word b1 = C_block_item(s1, 0);
3440    C_word b2 = C_block_item(s2, 0);
3441    int n1 = C_header_size(b1);
3442    int n2 = C_header_size(b2);
3443    if(n1 != n2) return C_SCHEME_FALSE;
3444    return C_mk_bool(C_memcmp(C_c_string(b1), C_c_string(b2), n1) == 0);
3445}
3446
3447/* XXX inline this? */
3448C_regparm C_word C_utf_equal_ci(C_word s1, C_word s2)
3449{
3450    C_word n1 = C_block_item(s1, 1);
3451    if(n1 != C_block_item(s2, 1)) return C_SCHEME_FALSE;
3452    return C_mk_bool(C_utf_compare_ci(s1, s2, C_fix(0), C_fix(0), n1) == C_fix(0));
3453}
3454
3455C_regparm C_word C_utf_copy(C_word from, C_word to, C_word start1, C_word end1, C_word start2)
3456{
3457    C_char *p1 = utf_index(from, C_unfix(start1));
3458    C_char *p2 = utf_index(to, C_unfix(start2));
3459    C_char *p3 = utf_index(from, C_unfix(end1));
3460    C_memcpy(p2, p1, p3 - p1);
3461    return C_SCHEME_UNDEFINED;
3462}
3463
3464/* compute byte-index from char-index */
3465C_regparm C_word C_utf_position(C_word str, C_word index)
3466{
3467    C_char *p1 = utf_index(str, C_unfix(index));
3468    return C_fix(p1 - C_c_string(C_block_item(str, 0)));
3469}
3470
3471/* compute char-index from byte-index (slow, uncached) */
3472C_regparm int C_utf_char_position(C_word bv, int pos)
3473{
3474    int p = 0;
3475    C_u32 c;
3476    int e;
3477    C_char *ptr = C_c_string(bv), *ptr2;
3478    while(pos > 0) {
3479        ptr2 = utf8_decode(ptr, &c, &e);
3480        pos -= ptr2 - ptr;
3481        ptr = ptr2;
3482        ++p;
3483    }
3484    return p;
3485}
3486
3487/* compute byte-offset between two char-indices */
3488C_regparm C_word C_utf_range(C_word str, C_word start, C_word end)
3489{
3490    C_char *p1 = utf_index(str, C_unfix(start));
3491    C_char *p2 = utf_index(str, C_unfix(end));
3492    return C_fix(p2 - p1);
3493}
3494
3495/* Count characters - slow variant, handles invalid sequences */
3496C_regparm int C_utf_count(C_char *s, int len)
3497{
3498    int i = 0;
3499    C_u32 c;
3500    int e;
3501    C_char *s2;
3502    while (len > 0) {
3503        s2 = utf8_decode(s, &c, &e);
3504        len -= (s2 - s);
3505        s = s2;
3506        i++;
3507    }
3508    return i;
3509}
3510
3511/* Count characters - slow variant, detects invalid sequences */
3512C_regparm C_word C_utf_validate(C_word bv, C_word blen)
3513{
3514    int i = 0;
3515    C_u32 c;
3516    int e;
3517    C_char *s = C_c_string(bv), *s2;
3518    int len = C_unfix(blen);
3519    while (len > 0) {
3520        s2 = utf8_decode(s, &c, &e);
3521        if(e) return C_SCHEME_FALSE;
3522        len -= (s2 - s);
3523        s = s2;
3524        i++;
3525    }
3526    return C_fix(i);
3527}
3528
3529/* count characters, fast, unsafe variant
3530   http://canonical.org/~kragen/strlen-utf8.html */
3531C_regparm int C_utf_fast_count(C_char *s, int len)
3532{
3533    int i = 0, j = 0;
3534    while (len--) {
3535        if ((s[i] & 0xc0) != 0x80) j++;
3536        i++;
3537    }
3538    return j;
3539}
3540
3541C_regparm C_word C_utf_bytes(C_word chr)
3542{
3543    int e;
3544    char buf[ 5 ];
3545    C_char *p1 = utf8_encode(C_character_code(chr), buf);
3546    return C_fix(p1 - buf);
3547}
3548
3549C_regparm C_char * C_utf_encode(C_char *str, int chr)
3550{
3551    return utf8_encode(chr, str);
3552}
3553
3554C_regparm C_word C_utf_decode(C_word bv, C_word pos)
3555{
3556    C_u32 c;
3557    int e;
3558    utf8_decode(C_c_string(bv) + C_unfix(pos), &c, &e);
3559    return C_make_character(c);
3560}
3561
3562C_regparm C_word C_utf_decode_ptr(C_char *bv)
3563{
3564    C_u32 c;
3565    int e;
3566    utf8_decode(bv, &c, &e);
3567    return C_make_character(c);
3568}
3569
3570C_regparm C_word C_utf_advance(C_word bv, C_word pos)
3571{
3572    C_char *p1 = (C_char *)C_data_pointer(bv) + C_unfix(pos);
3573    C_u32 c;
3574    int e;
3575    C_char *p2 = utf8_decode(p1, &c, &e);
3576    return C_fix(C_unfix(pos) + p2 - p1);
3577}
3578
3579C_regparm C_word C_utf_insert(C_word bv, C_word pos, C_word c)
3580{
3581    C_char *p1 = C_c_string(bv) + C_unfix(pos);
3582    C_char *p2 = utf8_encode(C_character_code(c), p1);
3583    return C_fix(C_unfix(pos) + p2 - p1);
3584}
3585
3586C_regparm C_word C_utf_fill(C_word bv, C_word chr)
3587{
3588    char buf[ 5 ];
3589    int size = C_header_size(bv) - 1;
3590    int len = C_utf_encode(buf, C_character_code(chr)) - buf;
3591    C_char *p;
3592    int n;
3593
3594    if(len == 1) {
3595        C_memset(C_data_pointer(bv), *buf, size);
3596        return bv;
3597    }
3598
3599    p = C_data_pointer(bv);
3600    n = size / len;
3601
3602    while(n--) {
3603        C_memcpy(p, buf, len);
3604        p += len;
3605    }
3606    ((C_char *)C_data_pointer(bv))[ size ] = 0; /* terminating zero */
3607    return bv;
3608}
3609
3610C_regparm int C_utf_expect(int byte)
3611{
3612    int len = lengths[ byte >> 3 ];
3613    return len + !len;
3614}
3615
3616/* take bytevector section and compute full + incomplete codepoints */
3617C_regparm C_word C_utf_fragment_counts(C_word bv, C_word pos, C_word len)
3618{
3619    int full = 0;
3620    C_uchar *ptr = C_data_pointer(bv) + C_unfix(pos);
3621    int count = C_unfix(len);
3622
3623    while(count) {
3624        unsigned int byte = *(ptr++);
3625        int n = lengths[ byte >> 3 ];
3626        int bn = n + !n;
3627        if(count >= bn) {
3628            ++full;
3629            count -= bn;
3630            ptr += bn - 1;
3631        } else return C_fix((full << 4) | (bn - count));
3632    }
3633
3634    return C_fix(full << 4);
3635}
3636
3637C_regparm void C_utf_putc(int chr, C_FILEPTR fp)
3638{
3639    C_char buf[ 5 ];
3640    C_char *p = utf8_encode(chr, buf);
3641    *p = '\0';
3642    C_fputs(buf, fp);
3643}
3644
3645C_regparm C_word C_utf_list_size(C_word lst)
3646{
3647    int n = 0;
3648    while(!C_immediatep(lst) && C_header_bits(lst) == C_PAIR_TYPE) {
3649        C_word x = C_block_item(lst, 0);
3650        if(((x) & C_IMMEDIATE_TYPE_BITS) == C_CHARACTER_BITS)
3651            n += C_unfix(C_utf_bytes(x));
3652        lst = C_block_item(lst, 1);
3653    }
3654    return C_fix(n);
3655}
3656
3657C_regparm C_word C_latin_to_utf(C_word from, C_word to, C_word start, C_word len)
3658{
3659    int n = C_unfix(len);
3660    C_uchar *pf = (C_uchar *)C_c_string(from) + C_unfix(start);
3661    C_char *pt = C_c_string(to), *pt0 = pt;
3662    while(n-- > 0) {
3663        C_u32 c = *(pf++);
3664        pt = utf8_encode(c, pt);
3665    }
3666    return C_fix(pt - pt0);
3667}
3668
3669C_regparm C_word C_utf_to_latin(C_word from, C_word to, C_word start, C_word blen)
3670{
3671    int n = C_unfix(blen);
3672    C_char *pf = C_c_string(from) + C_unfix(start), *pf2;
3673    C_char *pt = C_c_string(to), *pt0 = pt;
3674    C_u32 c;
3675    int e;
3676    while(n > 0) {
3677        pf2 = utf8_decode(pf, &c, &e);
3678        n -= pf2 - pf;
3679        pf = pf2;
3680        *(pt++) = c & 0xff;
3681    }
3682    *pt = '\0';
3683    return C_fix(pt - pt0);
3684}
3685
3686C_regparm C_word C_utf_char_foldcase(C_word c)
3687{
3688    int r = C_character_code(c);
3689    int *m = bsearch(&r, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3690    if(m) return C_make_character(m[ 1 ]);
3691    return c;
3692}
3693
3694C_regparm C_word C_utf_string_foldcase(C_word from, C_word to, C_word len)
3695{
3696    C_u32 c;
3697    int e;
3698    C_char *pf = C_c_string(from), *pf2;
3699    C_char *pt = C_c_string(to), *pt0 = pt;
3700    int count = C_unfix(len);
3701    while(count > 0) {
3702        pf2 = utf8_decode(pf, &c, &e);
3703        if(!e) {
3704            int r = c;
3705            int *m = bsearch(&r, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp);
3706            if(m) {
3707                pt = utf8_encode(m[ 1 ], pt);
3708                if(m[ 3 ] != 0) {
3709                    pt = utf8_encode(m[ 2 ], pt);
3710                    c = m[ 3 ];
3711                } else c = m[ 2 ];
3712            } else {
3713                m = bsearch(&r, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3714                if(m) c = m[ 1 ];
3715            }
3716        }
3717        pt = utf8_encode(c, pt);
3718        count -= pf2 - pf;
3719        pf = pf2;
3720    }
3721    return C_fix(pt - pt0);
3722}
3723
3724C_regparm C_word C_utf_string_downcase(C_word from, C_word to, C_word len)
3725{
3726    C_u32 c;
3727    int e;
3728    C_char *pf = C_c_string(from), *pf2;
3729    C_char *pt = C_c_string(to), *pt0 = pt;
3730    int count = C_unfix(len);
3731    while(count > 0) {
3732        pf2 = utf8_decode(pf, &c, &e);
3733        if(!e) c = C_utf_char_downcase(c);
3734        pt = utf8_encode(c, pt);
3735        count -= pf2 - pf;
3736        pf = pf2;
3737    }
3738    return C_fix(pt - pt0);
3739}
3740
3741C_regparm C_word C_utf_string_upcase(C_word from, C_word to, C_word len)
3742{
3743    C_u32 c;
3744    int e;
3745    C_char *pf = C_c_string(from), *pf2;
3746    C_char *pt = C_c_string(to), *pt0 = pt;
3747    int count = C_unfix(len);
3748    while(count > 0) {
3749        pf2 = utf8_decode(pf, &c, &e);
3750        if(!e) c = C_utf_char_upcase(c);
3751        pt = utf8_encode(c, pt);
3752        count -= pf2 - pf;
3753        pf = pf2;
3754    }
3755    return C_fix(pt - pt0);
3756}
3757
3758#if defined(_WIN32) && !defined(__CYGWIN__)
3759#define C_WCHAR_FNBUF_SIZE	2048
3760static C_WCHAR fnbuf[ C_WCHAR_FNBUF_SIZE ], *pfnbuf;
3761C_regparm C_WCHAR *C_utf16(C_word bv, int cont)
3762{
3763	int len = C_header_size(bv) - 1;
3764	C_WCHAR *p;
3765	if(!cont) pfnbuf = fnbuf;
3766	p = pfnbuf;
3767	int n = MultiByteToWideChar(CP_UTF8,
3768		0,
3769		C_c_string(bv),
3770		-1,
3771		pfnbuf,
3772		C_WCHAR_FNBUF_SIZE - (pfnbuf - fnbuf));
3773	if(n == 0) C_decoding_error(bv, C_fix(0));
3774	pfnbuf += n;
3775	return p;
3776}
3777
3778C_regparm C_char *C_utf8(C_WCHAR *str)
3779{
3780	int n = WideCharToMultiByte(CP_UTF8,
3781		0,
3782		str,
3783		-1,
3784		(C_char *)fnbuf,
3785		C_WCHAR_FNBUF_SIZE,
3786		NULL, NULL);
3787	if(n == 0) C_decoding_error(C_SCHEME_UNDEFINED, C_fix(0));
3788	((C_char *)fnbuf)[ n ] = '\0';
3789	return (C_char *)fnbuf;
3790}
3791#endif
Trap