~ chicken-core (master) /utf.c
Trap1/* utf.c - UTF-8 support code
2;
3; Copyright (c) 2022-2022, The CHICKEN Team
4; All rights reserved.
5;
6; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
7; conditions are met:
8;
9; Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10; disclaimer.
11; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
12; disclaimer in the documentation and/or other materials provided with the distribution.
13; Neither the name of the author nor the names of its contributors may be used to endorse or promote
14; products derived from this software without specific prior written permission.
15;
16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
17; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
19; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24; POSSIBILITY OF SUCH DAMAGE.
25*/
26
27
28#include "chicken.h"
29
30#if defined(_WIN32) && !defined(__CYGWIN__)
31# include <windows.h>
32#endif
33
34/* partially generated by mkrunetype.awk from http://git.suckless.org/ubase/
35 see LICENSE for more information */
36
37#define nelem(x) (sizeof (x) / sizeof *(x))
38
39static int
40rune1cmp(const void *v1, const void *v2)
41{
42 int r1 = *(int *)v1, r2 = *(int *)v2;
43
44 return r1 - r2;
45}
46
47static int
48rune2cmp(const void *v1, const void *v2)
49{
50 int r = *(int *)v1, *p = (int *)v2;
51
52 if(r >= p[0] && r <= p[1])
53 return 0;
54 else
55 return r - p[0];
56}
57
58static int
59runemapcmp(const void *v1, const void *v2)
60{
61 return *(int *)v1 - *(int *)v2;
62}
63
64/* generated by mkrunetype.awk from http://git.suckless.org/ubase/
65 see LICENSE for more information */
66
67static int upper3[][2] = {
68 { 0x0100, 0x012E },
69 { 0x0132, 0x0136 },
70 { 0x0139, 0x0147 },
71 { 0x014A, 0x0176 },
72 { 0x0179, 0x017D },
73 { 0x0182, 0x0184 },
74 { 0x01A0, 0x01A4 },
75 { 0x01B3, 0x01B5 },
76 { 0x01CD, 0x01DB },
77 { 0x01DE, 0x01EE },
78 { 0x01F8, 0x021E },
79 { 0x0222, 0x0232 },
80 { 0x0246, 0x024E },
81 { 0x0370, 0x0372 },
82 { 0x03D8, 0x03EE },
83 { 0x0460, 0x0480 },
84 { 0x048A, 0x04BE },
85 { 0x04C1, 0x04CD },
86 { 0x04D0, 0x052E },
87 { 0x1E00, 0x1E94 },
88 { 0x1EA0, 0x1EFE },
89 { 0x2C67, 0x2C6B },
90 { 0x2C80, 0x2CE2 },
91 { 0x2CEB, 0x2CED },
92 { 0xA640, 0xA66C },
93 { 0xA680, 0xA69A },
94 { 0xA722, 0xA72E },
95 { 0xA732, 0xA76E },
96 { 0xA779, 0xA77B },
97 { 0xA77E, 0xA786 },
98 { 0xA790, 0xA792 },
99 { 0xA796, 0xA7A8 },
100 { 0xA7B4, 0xA7C2 },
101 { 0xA7C7, 0xA7C9 },
102 { 0xA7D6, 0xA7D8 },
103};
104
105static int upper2[][3] = {
106 { 0x0041, 0x005A, 0x0061 },
107 { 0x00C0, 0x00D6, 0x00E0 },
108 { 0x00D8, 0x00DE, 0x00F8 },
109 { 0x0189, 0x018A, 0x0256 },
110 { 0x01B1, 0x01B2, 0x028A },
111 { 0x0388, 0x038A, 0x03AD },
112 { 0x038E, 0x038F, 0x03CD },
113 { 0x0391, 0x03A1, 0x03B1 },
114 { 0x03A3, 0x03AB, 0x03C3 },
115 { 0x03D2, 0x03D4, 0x03D2 },
116 { 0x03FD, 0x03FF, 0x037B },
117 { 0x0400, 0x040F, 0x0450 },
118 { 0x0410, 0x042F, 0x0430 },
119 { 0x0531, 0x0556, 0x0561 },
120 { 0x10A0, 0x10C5, 0x2D00 },
121 { 0x13A0, 0x13EF, 0xAB70 },
122 { 0x13F0, 0x13F5, 0x13F8 },
123 { 0x1C90, 0x1CBA, 0x10D0 },
124 { 0x1CBD, 0x1CBF, 0x10FD },
125 { 0x1F08, 0x1F0F, 0x1F00 },
126 { 0x1F18, 0x1F1D, 0x1F10 },
127 { 0x1F28, 0x1F2F, 0x1F20 },
128 { 0x1F38, 0x1F3F, 0x1F30 },
129 { 0x1F48, 0x1F4D, 0x1F40 },
130 { 0x1F68, 0x1F6F, 0x1F60 },
131 { 0x1FB8, 0x1FB9, 0x1FB0 },
132 { 0x1FBA, 0x1FBB, 0x1F70 },
133 { 0x1FC8, 0x1FCB, 0x1F72 },
134 { 0x1FD8, 0x1FD9, 0x1FD0 },
135 { 0x1FDA, 0x1FDB, 0x1F76 },
136 { 0x1FE8, 0x1FE9, 0x1FE0 },
137 { 0x1FEA, 0x1FEB, 0x1F7A },
138 { 0x1FF8, 0x1FF9, 0x1F78 },
139 { 0x1FFA, 0x1FFB, 0x1F7C },
140 { 0x210B, 0x210D, 0x210B },
141 { 0x2110, 0x2112, 0x2110 },
142 { 0x2119, 0x211D, 0x2119 },
143 { 0x212C, 0x212D, 0x212C },
144 { 0x2130, 0x2131, 0x2130 },
145 { 0x213E, 0x213F, 0x213E },
146 { 0x2C00, 0x2C2F, 0x2C30 },
147 { 0x2C7E, 0x2C7F, 0x023F },
148 { 0xFF21, 0xFF3A, 0xFF41 },
149 { 0x10400, 0x10427, 0x10428 },
150 { 0x104B0, 0x104D3, 0x104D8 },
151 { 0x10570, 0x1057A, 0x10597 },
152 { 0x1057C, 0x1058A, 0x105A3 },
153 { 0x1058C, 0x10592, 0x105B3 },
154 { 0x10594, 0x10595, 0x105BB },
155 { 0x10C80, 0x10CB2, 0x10CC0 },
156 { 0x118A0, 0x118BF, 0x118C0 },
157 { 0x16E40, 0x16E5F, 0x16E60 },
158 { 0x1D400, 0x1D419, 0x1D400 },
159 { 0x1D434, 0x1D44D, 0x1D434 },
160 { 0x1D468, 0x1D481, 0x1D468 },
161 { 0x1D49E, 0x1D49F, 0x1D49E },
162 { 0x1D4A5, 0x1D4A6, 0x1D4A5 },
163 { 0x1D4A9, 0x1D4AC, 0x1D4A9 },
164 { 0x1D4AE, 0x1D4B5, 0x1D4AE },
165 { 0x1D4D0, 0x1D4E9, 0x1D4D0 },
166 { 0x1D504, 0x1D505, 0x1D504 },
167 { 0x1D507, 0x1D50A, 0x1D507 },
168 { 0x1D50D, 0x1D514, 0x1D50D },
169 { 0x1D516, 0x1D51C, 0x1D516 },
170 { 0x1D538, 0x1D539, 0x1D538 },
171 { 0x1D53B, 0x1D53E, 0x1D53B },
172 { 0x1D540, 0x1D544, 0x1D540 },
173 { 0x1D54A, 0x1D550, 0x1D54A },
174 { 0x1D56C, 0x1D585, 0x1D56C },
175 { 0x1D5A0, 0x1D5B9, 0x1D5A0 },
176 { 0x1D5D4, 0x1D5ED, 0x1D5D4 },
177 { 0x1D608, 0x1D621, 0x1D608 },
178 { 0x1D63C, 0x1D655, 0x1D63C },
179 { 0x1D670, 0x1D689, 0x1D670 },
180 { 0x1D6A8, 0x1D6C0, 0x1D6A8 },
181 { 0x1D6E2, 0x1D6FA, 0x1D6E2 },
182 { 0x1D71C, 0x1D734, 0x1D71C },
183 { 0x1D756, 0x1D76E, 0x1D756 },
184 { 0x1D790, 0x1D7A8, 0x1D790 },
185 { 0x1E900, 0x1E921, 0x1E922 },
186};
187
188static int upper1[][2] = {
189 { 0x0130, 0x0069 },
190 { 0x0178, 0x00FF },
191 { 0x0181, 0x0253 },
192 { 0x0186, 0x0254 },
193 { 0x0187, 0x0188 },
194 { 0x018B, 0x018C },
195 { 0x018E, 0x01DD },
196 { 0x018F, 0x0259 },
197 { 0x0190, 0x025B },
198 { 0x0191, 0x0192 },
199 { 0x0193, 0x0260 },
200 { 0x0194, 0x0263 },
201 { 0x0196, 0x0269 },
202 { 0x0197, 0x0268 },
203 { 0x0198, 0x0199 },
204 { 0x019C, 0x026F },
205 { 0x019D, 0x0272 },
206 { 0x019F, 0x0275 },
207 { 0x01A6, 0x0280 },
208 { 0x01A7, 0x01A8 },
209 { 0x01A9, 0x0283 },
210 { 0x01AC, 0x01AD },
211 { 0x01AE, 0x0288 },
212 { 0x01AF, 0x01B0 },
213 { 0x01B7, 0x0292 },
214 { 0x01B8, 0x01B9 },
215 { 0x01BC, 0x01BD },
216 { 0x01C4, 0x01C6 },
217 { 0x01C7, 0x01C9 },
218 { 0x01CA, 0x01CC },
219 { 0x01F1, 0x01F3 },
220 { 0x01F4, 0x01F5 },
221 { 0x01F6, 0x0195 },
222 { 0x01F7, 0x01BF },
223 { 0x0220, 0x019E },
224 { 0x023A, 0x2C65 },
225 { 0x023B, 0x023C },
226 { 0x023D, 0x019A },
227 { 0x023E, 0x2C66 },
228 { 0x0241, 0x0242 },
229 { 0x0243, 0x0180 },
230 { 0x0244, 0x0289 },
231 { 0x0245, 0x028C },
232 { 0x0376, 0x0377 },
233 { 0x037F, 0x03F3 },
234 { 0x0386, 0x03AC },
235 { 0x038C, 0x03CC },
236 { 0x03CF, 0x03D7 },
237 { 0x03F4, 0x03B8 },
238 { 0x03F7, 0x03F8 },
239 { 0x03F9, 0x03F2 },
240 { 0x03FA, 0x03FB },
241 { 0x04C0, 0x04CF },
242 { 0x10C7, 0x2D27 },
243 { 0x10CD, 0x2D2D },
244 { 0x1E9E, 0x00DF },
245 { 0x1F59, 0x1F51 },
246 { 0x1F5B, 0x1F53 },
247 { 0x1F5D, 0x1F55 },
248 { 0x1F5F, 0x1F57 },
249 { 0x1FEC, 0x1FE5 },
250 { 0x2102, 0x2102 },
251 { 0x2107, 0x2107 },
252 { 0x2115, 0x2115 },
253 { 0x2124, 0x2124 },
254 { 0x2126, 0x03C9 },
255 { 0x2128, 0x2128 },
256 { 0x212A, 0x006B },
257 { 0x212B, 0x00E5 },
258 { 0x2132, 0x214E },
259 { 0x2133, 0x2133 },
260 { 0x2145, 0x2145 },
261 { 0x2183, 0x2184 },
262 { 0x2C60, 0x2C61 },
263 { 0x2C62, 0x026B },
264 { 0x2C63, 0x1D7D },
265 { 0x2C64, 0x027D },
266 { 0x2C6D, 0x0251 },
267 { 0x2C6E, 0x0271 },
268 { 0x2C6F, 0x0250 },
269 { 0x2C70, 0x0252 },
270 { 0x2C72, 0x2C73 },
271 { 0x2C75, 0x2C76 },
272 { 0x2CF2, 0x2CF3 },
273 { 0xA77D, 0x1D79 },
274 { 0xA78B, 0xA78C },
275 { 0xA78D, 0x0265 },
276 { 0xA7AA, 0x0266 },
277 { 0xA7AB, 0x025C },
278 { 0xA7AC, 0x0261 },
279 { 0xA7AD, 0x026C },
280 { 0xA7AE, 0x026A },
281 { 0xA7B0, 0x029E },
282 { 0xA7B1, 0x0287 },
283 { 0xA7B2, 0x029D },
284 { 0xA7B3, 0xAB53 },
285 { 0xA7C4, 0xA794 },
286 { 0xA7C5, 0x0282 },
287 { 0xA7C6, 0x1D8E },
288 { 0xA7D0, 0xA7D1 },
289 { 0xA7F5, 0xA7F6 },
290 { 0x1D49C, 0x1D49C },
291 { 0x1D4A2, 0x1D4A2 },
292 { 0x1D546, 0x1D546 },
293 { 0x1D7CA, 0x1D7CA },
294};
295
296C_regparm int C_utf_isupper(int r)
297{
298 int *match;
299
300 if((match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp)))
301 return !((r - match[0]) % 2);
302 if(bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp))
303 return 1;
304 if(bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp))
305 return 1;
306 return 0;
307}
308
309C_regparm int C_utf_char_downcase(int r)
310{
311 int *match;
312
313 match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp);
314 if (match)
315 return ((r - match[0]) % 2) ? r : r + 1;
316 match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp);
317 if (match)
318 return match[2] + (r - match[0]);
319 match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp);
320 if (match)
321 return match[1];
322 return r;
323}
324
325static int lower4[][2] = {
326 { 0x0101, 0x012F },
327 { 0x0133, 0x0137 },
328 { 0x013A, 0x0148 },
329 { 0x014B, 0x0177 },
330 { 0x017A, 0x017E },
331 { 0x0183, 0x0185 },
332 { 0x01A1, 0x01A5 },
333 { 0x01B4, 0x01B6 },
334 { 0x01CE, 0x01DC },
335 { 0x01DF, 0x01EF },
336 { 0x01F9, 0x021F },
337 { 0x0223, 0x0233 },
338 { 0x0247, 0x024F },
339 { 0x0371, 0x0373 },
340 { 0x03D9, 0x03EF },
341 { 0x0461, 0x0481 },
342 { 0x048B, 0x04BF },
343 { 0x04C2, 0x04CE },
344 { 0x04D1, 0x052F },
345 { 0x1E01, 0x1E95 },
346 { 0x1EA1, 0x1EFF },
347 { 0x2C68, 0x2C6C },
348 { 0x2C81, 0x2CE3 },
349 { 0x2CEC, 0x2CEE },
350 { 0xA641, 0xA66D },
351 { 0xA681, 0xA69B },
352 { 0xA723, 0xA72F },
353 { 0xA733, 0xA76F },
354 { 0xA77A, 0xA77C },
355 { 0xA77F, 0xA787 },
356 { 0xA791, 0xA793 },
357 { 0xA797, 0xA7A9 },
358 { 0xA7B5, 0xA7C3 },
359 { 0xA7C8, 0xA7CA },
360 { 0xA7D7, 0xA7D9 },
361};
362
363static int lower2[][3] = {
364 { 0x0061, 0x007A, 0x0041 },
365 { 0x00E0, 0x00F6, 0x00C0 },
366 { 0x00F8, 0x00FE, 0x00D8 },
367 { 0x01AA, 0x01AB, 0x01AA },
368 { 0x0234, 0x0239, 0x0234 },
369 { 0x023F, 0x0240, 0x2C7E },
370 { 0x0256, 0x0257, 0x0189 },
371 { 0x025D, 0x025F, 0x025D },
372 { 0x026D, 0x026E, 0x026D },
373 { 0x0273, 0x0274, 0x0273 },
374 { 0x0276, 0x027C, 0x0276 },
375 { 0x027E, 0x027F, 0x027E },
376 { 0x0284, 0x0286, 0x0284 },
377 { 0x028A, 0x028B, 0x01B1 },
378 { 0x028D, 0x0291, 0x028D },
379 { 0x0295, 0x029C, 0x0295 },
380 { 0x029F, 0x02AF, 0x029F },
381 { 0x037B, 0x037D, 0x03FD },
382 { 0x03AD, 0x03AF, 0x0388 },
383 { 0x03B1, 0x03C1, 0x0391 },
384 { 0x03C3, 0x03CB, 0x03A3 },
385 { 0x03CD, 0x03CE, 0x038E },
386 { 0x0430, 0x044F, 0x0410 },
387 { 0x0450, 0x045F, 0x0400 },
388 { 0x0561, 0x0586, 0x0531 },
389 { 0x0587, 0x0588, 0x0587 },
390 { 0x10D0, 0x10FA, 0x1C90 },
391 { 0x10FD, 0x10FF, 0x1CBD },
392 { 0x13F8, 0x13FD, 0x13F0 },
393 { 0x1C83, 0x1C84, 0x0421 },
394 { 0x1D00, 0x1D2B, 0x1D00 },
395 { 0x1D6B, 0x1D77, 0x1D6B },
396 { 0x1D7A, 0x1D7C, 0x1D7A },
397 { 0x1D7E, 0x1D8D, 0x1D7E },
398 { 0x1D8F, 0x1D9A, 0x1D8F },
399 { 0x1E96, 0x1E9A, 0x1E96 },
400 { 0x1E9C, 0x1E9D, 0x1E9C },
401 { 0x1F00, 0x1F07, 0x1F08 },
402 { 0x1F10, 0x1F15, 0x1F18 },
403 { 0x1F20, 0x1F27, 0x1F28 },
404 { 0x1F30, 0x1F37, 0x1F38 },
405 { 0x1F40, 0x1F45, 0x1F48 },
406 { 0x1F60, 0x1F67, 0x1F68 },
407 { 0x1F70, 0x1F71, 0x1FBA },
408 { 0x1F72, 0x1F75, 0x1FC8 },
409 { 0x1F76, 0x1F77, 0x1FDA },
410 { 0x1F78, 0x1F79, 0x1FF8 },
411 { 0x1F7A, 0x1F7B, 0x1FEA },
412 { 0x1F7C, 0x1F7D, 0x1FFA },
413 { 0x1F80, 0x1F87, 0x1F88 },
414 { 0x1F90, 0x1F97, 0x1F98 },
415 { 0x1FA0, 0x1FA7, 0x1FA8 },
416 { 0x1FB0, 0x1FB1, 0x1FB8 },
417 { 0x1FB6, 0x1FB7, 0x1FB6 },
418 { 0x1FC6, 0x1FC7, 0x1FC6 },
419 { 0x1FD0, 0x1FD1, 0x1FD8 },
420 { 0x1FD2, 0x1FD3, 0x1FD2 },
421 { 0x1FD6, 0x1FD7, 0x1FD6 },
422 { 0x1FE0, 0x1FE1, 0x1FE8 },
423 { 0x1FE2, 0x1FE4, 0x1FE2 },
424 { 0x1FE6, 0x1FE7, 0x1FE6 },
425 { 0x1FF6, 0x1FF7, 0x1FF6 },
426 { 0x210E, 0x210F, 0x210E },
427 { 0x213C, 0x213D, 0x213C },
428 { 0x2146, 0x2149, 0x2146 },
429 { 0x2C30, 0x2C5F, 0x2C00 },
430 { 0x2C77, 0x2C7B, 0x2C77 },
431 { 0x2D00, 0x2D25, 0x10A0 },
432 { 0xA730, 0xA731, 0xA730 },
433 { 0xA771, 0xA778, 0xA771 },
434 { 0xAB30, 0xAB52, 0xAB30 },
435 { 0xAB54, 0xAB5A, 0xAB54 },
436 { 0xAB60, 0xAB68, 0xAB60 },
437 { 0xAB70, 0xABBF, 0x13A0 },
438 { 0xFB00, 0xFB06, 0xFB00 },
439 { 0xFB13, 0xFB17, 0xFB13 },
440 { 0xFF41, 0xFF5A, 0xFF21 },
441 { 0x10428, 0x1044F, 0x10400 },
442 { 0x104D8, 0x104FB, 0x104B0 },
443 { 0x10597, 0x105A1, 0x10570 },
444 { 0x105A3, 0x105B1, 0x1057C },
445 { 0x105B3, 0x105B9, 0x1058C },
446 { 0x105BB, 0x105BC, 0x10594 },
447 { 0x10CC0, 0x10CF2, 0x10C80 },
448 { 0x118C0, 0x118DF, 0x118A0 },
449 { 0x16E60, 0x16E7F, 0x16E40 },
450 { 0x1D41A, 0x1D433, 0x1D41A },
451 { 0x1D44E, 0x1D454, 0x1D44E },
452 { 0x1D456, 0x1D467, 0x1D456 },
453 { 0x1D482, 0x1D49B, 0x1D482 },
454 { 0x1D4B6, 0x1D4B9, 0x1D4B6 },
455 { 0x1D4BD, 0x1D4C3, 0x1D4BD },
456 { 0x1D4C5, 0x1D4CF, 0x1D4C5 },
457 { 0x1D4EA, 0x1D503, 0x1D4EA },
458 { 0x1D51E, 0x1D537, 0x1D51E },
459 { 0x1D552, 0x1D56B, 0x1D552 },
460 { 0x1D586, 0x1D59F, 0x1D586 },
461 { 0x1D5BA, 0x1D5D3, 0x1D5BA },
462 { 0x1D5EE, 0x1D607, 0x1D5EE },
463 { 0x1D622, 0x1D63B, 0x1D622 },
464 { 0x1D656, 0x1D66F, 0x1D656 },
465 { 0x1D68A, 0x1D6A5, 0x1D68A },
466 { 0x1D6C2, 0x1D6DA, 0x1D6C2 },
467 { 0x1D6DC, 0x1D6E1, 0x1D6DC },
468 { 0x1D6FC, 0x1D714, 0x1D6FC },
469 { 0x1D716, 0x1D71B, 0x1D716 },
470 { 0x1D736, 0x1D74E, 0x1D736 },
471 { 0x1D750, 0x1D755, 0x1D750 },
472 { 0x1D770, 0x1D788, 0x1D770 },
473 { 0x1D78A, 0x1D78F, 0x1D78A },
474 { 0x1D7AA, 0x1D7C2, 0x1D7AA },
475 { 0x1D7C4, 0x1D7C9, 0x1D7C4 },
476 { 0x1DF00, 0x1DF09, 0x1DF00 },
477 { 0x1DF0B, 0x1DF1E, 0x1DF0B },
478 { 0x1E922, 0x1E943, 0x1E900 },
479};
480
481static int lower1[][2] = {
482 { 0x00B5, 0x039C },
483 { 0x00DF, 0x00DF },
484 { 0x00FF, 0x0178 },
485 { 0x0131, 0x0049 },
486 { 0x0138, 0x0138 },
487 { 0x0149, 0x0149 },
488 { 0x017F, 0x0053 },
489 { 0x0180, 0x0243 },
490 { 0x0188, 0x0187 },
491 { 0x018C, 0x018B },
492 { 0x018D, 0x018D },
493 { 0x0192, 0x0191 },
494 { 0x0195, 0x01F6 },
495 { 0x0199, 0x0198 },
496 { 0x019A, 0x023D },
497 { 0x019B, 0x019B },
498 { 0x019E, 0x0220 },
499 { 0x01A8, 0x01A7 },
500 { 0x01AD, 0x01AC },
501 { 0x01B0, 0x01AF },
502 { 0x01B9, 0x01B8 },
503 { 0x01BA, 0x01BA },
504 { 0x01BD, 0x01BC },
505 { 0x01BE, 0x01BE },
506 { 0x01BF, 0x01F7 },
507 { 0x01C6, 0x01C4 },
508 { 0x01C9, 0x01C7 },
509 { 0x01CC, 0x01CA },
510 { 0x01DD, 0x018E },
511 { 0x01F0, 0x01F0 },
512 { 0x01F3, 0x01F1 },
513 { 0x01F5, 0x01F4 },
514 { 0x0221, 0x0221 },
515 { 0x023C, 0x023B },
516 { 0x0242, 0x0241 },
517 { 0x0250, 0x2C6F },
518 { 0x0251, 0x2C6D },
519 { 0x0252, 0x2C70 },
520 { 0x0253, 0x0181 },
521 { 0x0254, 0x0186 },
522 { 0x0255, 0x0255 },
523 { 0x0258, 0x0258 },
524 { 0x0259, 0x018F },
525 { 0x025A, 0x025A },
526 { 0x025B, 0x0190 },
527 { 0x025C, 0xA7AB },
528 { 0x0260, 0x0193 },
529 { 0x0261, 0xA7AC },
530 { 0x0262, 0x0262 },
531 { 0x0263, 0x0194 },
532 { 0x0264, 0x0264 },
533 { 0x0265, 0xA78D },
534 { 0x0266, 0xA7AA },
535 { 0x0267, 0x0267 },
536 { 0x0268, 0x0197 },
537 { 0x0269, 0x0196 },
538 { 0x026A, 0xA7AE },
539 { 0x026B, 0x2C62 },
540 { 0x026C, 0xA7AD },
541 { 0x026F, 0x019C },
542 { 0x0270, 0x0270 },
543 { 0x0271, 0x2C6E },
544 { 0x0272, 0x019D },
545 { 0x0275, 0x019F },
546 { 0x027D, 0x2C64 },
547 { 0x0280, 0x01A6 },
548 { 0x0281, 0x0281 },
549 { 0x0282, 0xA7C5 },
550 { 0x0283, 0x01A9 },
551 { 0x0287, 0xA7B1 },
552 { 0x0288, 0x01AE },
553 { 0x0289, 0x0244 },
554 { 0x028C, 0x0245 },
555 { 0x0292, 0x01B7 },
556 { 0x0293, 0x0293 },
557 { 0x029D, 0xA7B2 },
558 { 0x029E, 0xA7B0 },
559 { 0x0377, 0x0376 },
560 { 0x0390, 0x0390 },
561 { 0x03AC, 0x0386 },
562 { 0x03B0, 0x03B0 },
563 { 0x03C2, 0x03A3 },
564 { 0x03CC, 0x038C },
565 { 0x03D0, 0x0392 },
566 { 0x03D1, 0x0398 },
567 { 0x03D5, 0x03A6 },
568 { 0x03D6, 0x03A0 },
569 { 0x03D7, 0x03CF },
570 { 0x03F0, 0x039A },
571 { 0x03F1, 0x03A1 },
572 { 0x03F2, 0x03F9 },
573 { 0x03F3, 0x037F },
574 { 0x03F5, 0x0395 },
575 { 0x03F8, 0x03F7 },
576 { 0x03FB, 0x03FA },
577 { 0x03FC, 0x03FC },
578 { 0x04CF, 0x04C0 },
579 { 0x0560, 0x0560 },
580 { 0x1C80, 0x0412 },
581 { 0x1C81, 0x0414 },
582 { 0x1C82, 0x041E },
583 { 0x1C85, 0x0422 },
584 { 0x1C86, 0x042A },
585 { 0x1C87, 0x0462 },
586 { 0x1C88, 0xA64A },
587 { 0x1D79, 0xA77D },
588 { 0x1D7D, 0x2C63 },
589 { 0x1D8E, 0xA7C6 },
590 { 0x1E9B, 0x1E60 },
591 { 0x1E9F, 0x1E9F },
592 { 0x1F50, 0x1F50 },
593 { 0x1F51, 0x1F59 },
594 { 0x1F52, 0x1F52 },
595 { 0x1F53, 0x1F5B },
596 { 0x1F54, 0x1F54 },
597 { 0x1F55, 0x1F5D },
598 { 0x1F56, 0x1F56 },
599 { 0x1F57, 0x1F5F },
600 { 0x1FB2, 0x1FB2 },
601 { 0x1FB3, 0x1FBC },
602 { 0x1FB4, 0x1FB4 },
603 { 0x1FBE, 0x0399 },
604 { 0x1FC2, 0x1FC2 },
605 { 0x1FC3, 0x1FCC },
606 { 0x1FC4, 0x1FC4 },
607 { 0x1FE5, 0x1FEC },
608 { 0x1FF2, 0x1FF2 },
609 { 0x1FF3, 0x1FFC },
610 { 0x1FF4, 0x1FF4 },
611 { 0x210A, 0x210A },
612 { 0x2113, 0x2113 },
613 { 0x212F, 0x212F },
614 { 0x2134, 0x2134 },
615 { 0x2139, 0x2139 },
616 { 0x214E, 0x2132 },
617 { 0x2184, 0x2183 },
618 { 0x2C61, 0x2C60 },
619 { 0x2C65, 0x023A },
620 { 0x2C66, 0x023E },
621 { 0x2C71, 0x2C71 },
622 { 0x2C73, 0x2C72 },
623 { 0x2C74, 0x2C74 },
624 { 0x2C76, 0x2C75 },
625 { 0x2CE4, 0x2CE4 },
626 { 0x2CF3, 0x2CF2 },
627 { 0x2D27, 0x10C7 },
628 { 0x2D2D, 0x10CD },
629 { 0xA78C, 0xA78B },
630 { 0xA78E, 0xA78E },
631 { 0xA794, 0xA7C4 },
632 { 0xA795, 0xA795 },
633 { 0xA7AF, 0xA7AF },
634 { 0xA7D1, 0xA7D0 },
635 { 0xA7D3, 0xA7D3 },
636 { 0xA7D5, 0xA7D5 },
637 { 0xA7F6, 0xA7F5 },
638 { 0xA7FA, 0xA7FA },
639 { 0xAB53, 0xA7B3 },
640 { 0x1D4BB, 0x1D4BB },
641 { 0x1D7CB, 0x1D7CB },
642};
643
644C_regparm int C_utf_islower(int r)
645{
646 int *match;
647
648 if((match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp)))
649 return !((r - match[0]) % 2);
650 if(bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp))
651 return 1;
652 if(bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp))
653 return 1;
654 return 0;
655}
656
657C_regparm int C_utf_char_upcase(int r)
658{
659 int *match;
660
661 match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp);
662 if (match)
663 return ((r - match[0]) % 2) ? r : r - 1;
664 match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp);
665 if (match)
666 return match[2] + (r - match[0]);
667 match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp);
668 if (match)
669 return match[1];
670 return r;
671}
672
673static int digit2[][2] = {
674 { 0x0030, 0x0039 },
675 { 0x0660, 0x0669 },
676 { 0x06F0, 0x06F9 },
677 { 0x07C0, 0x07C9 },
678 { 0x0966, 0x096F },
679 { 0x09E6, 0x09EF },
680 { 0x0A66, 0x0A6F },
681 { 0x0AE6, 0x0AEF },
682 { 0x0B66, 0x0B6F },
683 { 0x0BE6, 0x0BEF },
684 { 0x0C66, 0x0C6F },
685 { 0x0CE6, 0x0CEF },
686 { 0x0D66, 0x0D6F },
687 { 0x0DE6, 0x0DEF },
688 { 0x0E50, 0x0E59 },
689 { 0x0ED0, 0x0ED9 },
690 { 0x0F20, 0x0F29 },
691 { 0x1040, 0x1049 },
692 { 0x1090, 0x1099 },
693 { 0x17E0, 0x17E9 },
694 { 0x1810, 0x1819 },
695 { 0x1946, 0x194F },
696 { 0x19D0, 0x19D9 },
697 { 0x1A80, 0x1A89 },
698 { 0x1A90, 0x1A99 },
699 { 0x1B50, 0x1B59 },
700 { 0x1BB0, 0x1BB9 },
701 { 0x1C40, 0x1C49 },
702 { 0x1C50, 0x1C59 },
703 { 0xA620, 0xA629 },
704 { 0xA8D0, 0xA8D9 },
705 { 0xA900, 0xA909 },
706 { 0xA9D0, 0xA9D9 },
707 { 0xA9F0, 0xA9F9 },
708 { 0xAA50, 0xAA59 },
709 { 0xABF0, 0xABF9 },
710 { 0xFF10, 0xFF19 },
711 { 0x104A0, 0x104A9 },
712 { 0x10D30, 0x10D39 },
713 { 0x11066, 0x1106F },
714 { 0x110F0, 0x110F9 },
715 { 0x11136, 0x1113F },
716 { 0x111D0, 0x111D9 },
717 { 0x112F0, 0x112F9 },
718 { 0x11450, 0x11459 },
719 { 0x114D0, 0x114D9 },
720 { 0x11650, 0x11659 },
721 { 0x116C0, 0x116C9 },
722 { 0x11730, 0x11739 },
723 { 0x118E0, 0x118E9 },
724 { 0x11950, 0x11959 },
725 { 0x11C50, 0x11C59 },
726 { 0x11D50, 0x11D59 },
727 { 0x11DA0, 0x11DA9 },
728 { 0x16A60, 0x16A69 },
729 { 0x16AC0, 0x16AC9 },
730 { 0x16B50, 0x16B59 },
731 { 0x1D7CE, 0x1D7FF },
732 { 0x1E140, 0x1E149 },
733 { 0x1E2F0, 0x1E2F9 },
734 { 0x1E950, 0x1E959 },
735 { 0x1FBF0, 0x1FBF9 },
736};
737
738C_regparm int C_utf_isdigit(int r)
739{
740 int *dp = bsearch(&r, digit2, nelem(digit2), sizeof *digit2, &rune2cmp);
741 if(dp != NULL) return 1 + r - dp[ 0 ];
742 return 0;
743}
744
745static int alpha3[][2] = {
746 { 0x00D6, 0x00D8 },
747 { 0x00F6, 0x00F8 },
748 { 0x02EC, 0x02EE },
749 { 0x0374, 0x0376 },
750 { 0x037D, 0x037F },
751 { 0x0386, 0x0388 },
752 { 0x038A, 0x038E },
753 { 0x03A1, 0x03A3 },
754 { 0x03F5, 0x03F7 },
755 { 0x052F, 0x0531 },
756 { 0x066F, 0x0671 },
757 { 0x06D3, 0x06D5 },
758 { 0x0710, 0x0712 },
759 { 0x0887, 0x0889 },
760 { 0x09A8, 0x09AA },
761 { 0x09B0, 0x09B2 },
762 { 0x09DD, 0x09DF },
763 { 0x0A28, 0x0A2A },
764 { 0x0A30, 0x0A32 },
765 { 0x0A33, 0x0A35 },
766 { 0x0A36, 0x0A38 },
767 { 0x0A5C, 0x0A5E },
768 { 0x0A8D, 0x0A8F },
769 { 0x0A91, 0x0A93 },
770 { 0x0AA8, 0x0AAA },
771 { 0x0AB0, 0x0AB2 },
772 { 0x0AB3, 0x0AB5 },
773 { 0x0B28, 0x0B2A },
774 { 0x0B30, 0x0B32 },
775 { 0x0B33, 0x0B35 },
776 { 0x0B5D, 0x0B5F },
777 { 0x0B83, 0x0B85 },
778 { 0x0B90, 0x0B92 },
779 { 0x0B9A, 0x0B9E },
780 { 0x0C0C, 0x0C0E },
781 { 0x0C10, 0x0C12 },
782 { 0x0C28, 0x0C2A },
783 { 0x0C8C, 0x0C8E },
784 { 0x0C90, 0x0C92 },
785 { 0x0CA8, 0x0CAA },
786 { 0x0CB3, 0x0CB5 },
787 { 0x0CDE, 0x0CE0 },
788 { 0x0D0C, 0x0D0E },
789 { 0x0D10, 0x0D12 },
790 { 0x0DB1, 0x0DB3 },
791 { 0x0DBB, 0x0DBD },
792 { 0x0E30, 0x0E32 },
793 { 0x0E82, 0x0E86 },
794 { 0x0E8A, 0x0E8C },
795 { 0x0EA3, 0x0EA7 },
796 { 0x0EB0, 0x0EB2 },
797 { 0x0EC4, 0x0EC6 },
798 { 0x0F47, 0x0F49 },
799 { 0x10C5, 0x10C7 },
800 { 0x10FA, 0x10FC },
801 { 0x1248, 0x124A },
802 { 0x1256, 0x125A },
803 { 0x1288, 0x128A },
804 { 0x12B0, 0x12B2 },
805 { 0x12BE, 0x12C2 },
806 { 0x12D6, 0x12D8 },
807 { 0x1310, 0x1312 },
808 { 0x167F, 0x1681 },
809 { 0x176C, 0x176E },
810 { 0x18A8, 0x18AA },
811 { 0x1CEC, 0x1CEE },
812 { 0x1CF3, 0x1CF5 },
813 { 0x1F57, 0x1F5F },
814 { 0x1FB4, 0x1FB6 },
815 { 0x1FBC, 0x1FBE },
816 { 0x1FC4, 0x1FC6 },
817 { 0x1FF4, 0x1FF6 },
818 { 0x2113, 0x2115 },
819 { 0x2124, 0x212A },
820 { 0x212D, 0x212F },
821 { 0x2D25, 0x2D27 },
822 { 0x2DA6, 0x2DA8 },
823 { 0x2DAE, 0x2DB0 },
824 { 0x2DB6, 0x2DB8 },
825 { 0x2DBE, 0x2DC0 },
826 { 0x2DC6, 0x2DC8 },
827 { 0x2DCE, 0x2DD0 },
828 { 0x2DD6, 0x2DD8 },
829 { 0x309F, 0x30A1 },
830 { 0x30FA, 0x30FC },
831 { 0x312F, 0x3131 },
832 { 0xA7D1, 0xA7D5 },
833 { 0xA801, 0xA803 },
834 { 0xA805, 0xA807 },
835 { 0xA80A, 0xA80C },
836 { 0xA8FB, 0xA8FD },
837 { 0xA9E4, 0xA9E6 },
838 { 0xA9FE, 0xAA00 },
839 { 0xAA42, 0xAA44 },
840 { 0xAAAF, 0xAAB1 },
841 { 0xAAC0, 0xAAC2 },
842 { 0xAB26, 0xAB28 },
843 { 0xAB2E, 0xAB30 },
844 { 0xAB5A, 0xAB5C },
845 { 0xFB1D, 0xFB1F },
846 { 0xFB28, 0xFB2A },
847 { 0xFB36, 0xFB38 },
848 { 0xFB3C, 0xFB40 },
849 { 0xFB41, 0xFB43 },
850 { 0xFB44, 0xFB46 },
851 { 0xFE74, 0xFE76 },
852 { 0x1000B, 0x1000D },
853 { 0x10026, 0x10028 },
854 { 0x1003A, 0x1003C },
855 { 0x1003D, 0x1003F },
856 { 0x10340, 0x10342 },
857 { 0x1057A, 0x1057C },
858 { 0x1058A, 0x1058C },
859 { 0x10592, 0x10594 },
860 { 0x10595, 0x10597 },
861 { 0x105A1, 0x105A3 },
862 { 0x105B1, 0x105B3 },
863 { 0x105B9, 0x105BB },
864 { 0x10785, 0x10787 },
865 { 0x107B0, 0x107B2 },
866 { 0x10808, 0x1080A },
867 { 0x10835, 0x10837 },
868 { 0x108F2, 0x108F4 },
869 { 0x10A13, 0x10A15 },
870 { 0x10A17, 0x10A19 },
871 { 0x10AC7, 0x10AC9 },
872 { 0x111DA, 0x111DC },
873 { 0x11211, 0x11213 },
874 { 0x11286, 0x1128A },
875 { 0x1128D, 0x1128F },
876 { 0x1129D, 0x1129F },
877 { 0x11328, 0x1132A },
878 { 0x11330, 0x11332 },
879 { 0x11333, 0x11335 },
880 { 0x114C5, 0x114C7 },
881 { 0x11913, 0x11915 },
882 { 0x11916, 0x11918 },
883 { 0x1193F, 0x11941 },
884 { 0x119E1, 0x119E3 },
885 { 0x11C08, 0x11C0A },
886 { 0x11D06, 0x11D08 },
887 { 0x11D09, 0x11D0B },
888 { 0x11D65, 0x11D67 },
889 { 0x11D68, 0x11D6A },
890 { 0x16FE1, 0x16FE3 },
891 { 0x1AFF3, 0x1AFF5 },
892 { 0x1AFFB, 0x1AFFD },
893 { 0x1AFFE, 0x1B000 },
894 { 0x1D454, 0x1D456 },
895 { 0x1D49C, 0x1D49E },
896 { 0x1D4AC, 0x1D4AE },
897 { 0x1D4B9, 0x1D4BD },
898 { 0x1D4C3, 0x1D4C5 },
899 { 0x1D505, 0x1D507 },
900 { 0x1D514, 0x1D516 },
901 { 0x1D51C, 0x1D51E },
902 { 0x1D539, 0x1D53B },
903 { 0x1D53E, 0x1D540 },
904 { 0x1D544, 0x1D546 },
905 { 0x1D550, 0x1D552 },
906 { 0x1D6C0, 0x1D6C2 },
907 { 0x1D6DA, 0x1D6DC },
908 { 0x1D6FA, 0x1D6FC },
909 { 0x1D714, 0x1D716 },
910 { 0x1D734, 0x1D736 },
911 { 0x1D74E, 0x1D750 },
912 { 0x1D76E, 0x1D770 },
913 { 0x1D788, 0x1D78A },
914 { 0x1D7A8, 0x1D7AA },
915 { 0x1D7C2, 0x1D7C4 },
916 { 0x1E7E6, 0x1E7E8 },
917 { 0x1E7EB, 0x1E7ED },
918 { 0x1E7EE, 0x1E7F0 },
919 { 0x1E7FE, 0x1E800 },
920 { 0x1EE03, 0x1EE05 },
921 { 0x1EE1F, 0x1EE21 },
922 { 0x1EE22, 0x1EE24 },
923 { 0x1EE27, 0x1EE29 },
924 { 0x1EE32, 0x1EE34 },
925 { 0x1EE37, 0x1EE3B },
926 { 0x1EE47, 0x1EE4D },
927 { 0x1EE4F, 0x1EE51 },
928 { 0x1EE52, 0x1EE54 },
929 { 0x1EE57, 0x1EE61 },
930 { 0x1EE62, 0x1EE64 },
931 { 0x1EE6A, 0x1EE6C },
932 { 0x1EE72, 0x1EE74 },
933 { 0x1EE77, 0x1EE79 },
934 { 0x1EE7C, 0x1EE80 },
935 { 0x1EE89, 0x1EE8B },
936 { 0x1EEA3, 0x1EEA5 },
937 { 0x1EEA9, 0x1EEAB },
938};
939
940static int alpha2[][2] = {
941 { 0x0041, 0x005A },
942 { 0x0061, 0x007A },
943 { 0x00C0, 0x00D6 },
944 { 0x00D8, 0x00F6 },
945 { 0x00F8, 0x02C1 },
946 { 0x02C6, 0x02D1 },
947 { 0x02E0, 0x02E4 },
948 { 0x0370, 0x0374 },
949 { 0x0376, 0x0377 },
950 { 0x037A, 0x037D },
951 { 0x0388, 0x038A },
952 { 0x038E, 0x03A1 },
953 { 0x03A3, 0x03F5 },
954 { 0x03F7, 0x0481 },
955 { 0x048A, 0x052F },
956 { 0x0531, 0x0556 },
957 { 0x0560, 0x0588 },
958 { 0x05D0, 0x05EA },
959 { 0x05EF, 0x05F2 },
960 { 0x0620, 0x064A },
961 { 0x066E, 0x066F },
962 { 0x0671, 0x06D3 },
963 { 0x06E5, 0x06E6 },
964 { 0x06EE, 0x06EF },
965 { 0x06FA, 0x06FC },
966 { 0x0712, 0x072F },
967 { 0x074D, 0x07A5 },
968 { 0x07CA, 0x07EA },
969 { 0x07F4, 0x07F5 },
970 { 0x0800, 0x0815 },
971 { 0x0840, 0x0858 },
972 { 0x0860, 0x086A },
973 { 0x0870, 0x0887 },
974 { 0x0889, 0x088E },
975 { 0x08A0, 0x08C9 },
976 { 0x0904, 0x0939 },
977 { 0x0958, 0x0961 },
978 { 0x0971, 0x0980 },
979 { 0x0985, 0x098C },
980 { 0x098F, 0x0990 },
981 { 0x0993, 0x09A8 },
982 { 0x09AA, 0x09B0 },
983 { 0x09B6, 0x09B9 },
984 { 0x09DC, 0x09DD },
985 { 0x09DF, 0x09E1 },
986 { 0x09F0, 0x09F1 },
987 { 0x0A05, 0x0A0A },
988 { 0x0A0F, 0x0A10 },
989 { 0x0A13, 0x0A28 },
990 { 0x0A2A, 0x0A30 },
991 { 0x0A32, 0x0A33 },
992 { 0x0A35, 0x0A36 },
993 { 0x0A38, 0x0A39 },
994 { 0x0A59, 0x0A5C },
995 { 0x0A72, 0x0A74 },
996 { 0x0A85, 0x0A8D },
997 { 0x0A8F, 0x0A91 },
998 { 0x0A93, 0x0AA8 },
999 { 0x0AAA, 0x0AB0 },
1000 { 0x0AB2, 0x0AB3 },
1001 { 0x0AB5, 0x0AB9 },
1002 { 0x0AE0, 0x0AE1 },
1003 { 0x0B05, 0x0B0C },
1004 { 0x0B0F, 0x0B10 },
1005 { 0x0B13, 0x0B28 },
1006 { 0x0B2A, 0x0B30 },
1007 { 0x0B32, 0x0B33 },
1008 { 0x0B35, 0x0B39 },
1009 { 0x0B5C, 0x0B5D },
1010 { 0x0B5F, 0x0B61 },
1011 { 0x0B85, 0x0B8A },
1012 { 0x0B8E, 0x0B90 },
1013 { 0x0B92, 0x0B95 },
1014 { 0x0B99, 0x0B9A },
1015 { 0x0B9E, 0x0B9F },
1016 { 0x0BA3, 0x0BA4 },
1017 { 0x0BA8, 0x0BAA },
1018 { 0x0BAE, 0x0BB9 },
1019 { 0x0C05, 0x0C0C },
1020 { 0x0C0E, 0x0C10 },
1021 { 0x0C12, 0x0C28 },
1022 { 0x0C2A, 0x0C39 },
1023 { 0x0C58, 0x0C5A },
1024 { 0x0C60, 0x0C61 },
1025 { 0x0C85, 0x0C8C },
1026 { 0x0C8E, 0x0C90 },
1027 { 0x0C92, 0x0CA8 },
1028 { 0x0CAA, 0x0CB3 },
1029 { 0x0CB5, 0x0CB9 },
1030 { 0x0CDD, 0x0CDE },
1031 { 0x0CE0, 0x0CE1 },
1032 { 0x0CF1, 0x0CF2 },
1033 { 0x0D04, 0x0D0C },
1034 { 0x0D0E, 0x0D10 },
1035 { 0x0D12, 0x0D3A },
1036 { 0x0D54, 0x0D56 },
1037 { 0x0D5F, 0x0D61 },
1038 { 0x0D7A, 0x0D7F },
1039 { 0x0D85, 0x0D96 },
1040 { 0x0D9A, 0x0DB1 },
1041 { 0x0DB3, 0x0DBB },
1042 { 0x0DC0, 0x0DC6 },
1043 { 0x0E01, 0x0E30 },
1044 { 0x0E32, 0x0E33 },
1045 { 0x0E40, 0x0E46 },
1046 { 0x0E81, 0x0E82 },
1047 { 0x0E86, 0x0E8A },
1048 { 0x0E8C, 0x0EA3 },
1049 { 0x0EA7, 0x0EB0 },
1050 { 0x0EB2, 0x0EB3 },
1051 { 0x0EC0, 0x0EC4 },
1052 { 0x0EDC, 0x0EDF },
1053 { 0x0F40, 0x0F47 },
1054 { 0x0F49, 0x0F6C },
1055 { 0x0F88, 0x0F8C },
1056 { 0x1000, 0x102A },
1057 { 0x1050, 0x1055 },
1058 { 0x105A, 0x105D },
1059 { 0x1065, 0x1066 },
1060 { 0x106E, 0x1070 },
1061 { 0x1075, 0x1081 },
1062 { 0x10A0, 0x10C5 },
1063 { 0x10D0, 0x10FA },
1064 { 0x10FC, 0x1248 },
1065 { 0x124A, 0x124D },
1066 { 0x1250, 0x1256 },
1067 { 0x125A, 0x125D },
1068 { 0x1260, 0x1288 },
1069 { 0x128A, 0x128D },
1070 { 0x1290, 0x12B0 },
1071 { 0x12B2, 0x12B5 },
1072 { 0x12B8, 0x12BE },
1073 { 0x12C2, 0x12C5 },
1074 { 0x12C8, 0x12D6 },
1075 { 0x12D8, 0x1310 },
1076 { 0x1312, 0x1315 },
1077 { 0x1318, 0x135A },
1078 { 0x1380, 0x138F },
1079 { 0x13A0, 0x13F5 },
1080 { 0x13F8, 0x13FD },
1081 { 0x1401, 0x166C },
1082 { 0x166F, 0x167F },
1083 { 0x1681, 0x169A },
1084 { 0x16A0, 0x16EA },
1085 { 0x16F1, 0x16F8 },
1086 { 0x1700, 0x1711 },
1087 { 0x171F, 0x1731 },
1088 { 0x1740, 0x1751 },
1089 { 0x1760, 0x176C },
1090 { 0x176E, 0x1770 },
1091 { 0x1780, 0x17B3 },
1092 { 0x1820, 0x1878 },
1093 { 0x1880, 0x1884 },
1094 { 0x1887, 0x18A8 },
1095 { 0x18B0, 0x18F5 },
1096 { 0x1900, 0x191E },
1097 { 0x1950, 0x196D },
1098 { 0x1970, 0x1974 },
1099 { 0x1980, 0x19AB },
1100 { 0x19B0, 0x19C9 },
1101 { 0x1A00, 0x1A16 },
1102 { 0x1A20, 0x1A54 },
1103 { 0x1B05, 0x1B33 },
1104 { 0x1B45, 0x1B4C },
1105 { 0x1B83, 0x1BA0 },
1106 { 0x1BAE, 0x1BAF },
1107 { 0x1BBA, 0x1BE5 },
1108 { 0x1C00, 0x1C23 },
1109 { 0x1C4D, 0x1C4F },
1110 { 0x1C5A, 0x1C7D },
1111 { 0x1C80, 0x1C88 },
1112 { 0x1C90, 0x1CBA },
1113 { 0x1CBD, 0x1CBF },
1114 { 0x1CE9, 0x1CEC },
1115 { 0x1CEE, 0x1CF3 },
1116 { 0x1CF5, 0x1CF6 },
1117 { 0x1D00, 0x1DBF },
1118 { 0x1E00, 0x1F15 },
1119 { 0x1F18, 0x1F1D },
1120 { 0x1F20, 0x1F45 },
1121 { 0x1F48, 0x1F4D },
1122 { 0x1F50, 0x1F57 },
1123 { 0x1F5F, 0x1F7D },
1124 { 0x1F80, 0x1FB4 },
1125 { 0x1FB6, 0x1FBC },
1126 { 0x1FC2, 0x1FC4 },
1127 { 0x1FC6, 0x1FCC },
1128 { 0x1FD0, 0x1FD3 },
1129 { 0x1FD6, 0x1FDB },
1130 { 0x1FE0, 0x1FEC },
1131 { 0x1FF2, 0x1FF4 },
1132 { 0x1FF6, 0x1FFC },
1133 { 0x2090, 0x209C },
1134 { 0x210A, 0x2113 },
1135 { 0x2119, 0x211D },
1136 { 0x212A, 0x212D },
1137 { 0x212F, 0x2139 },
1138 { 0x213C, 0x213F },
1139 { 0x2145, 0x2149 },
1140 { 0x2183, 0x2184 },
1141 { 0x2C00, 0x2CE4 },
1142 { 0x2CEB, 0x2CEE },
1143 { 0x2CF2, 0x2CF3 },
1144 { 0x2D00, 0x2D25 },
1145 { 0x2D30, 0x2D67 },
1146 { 0x2D80, 0x2D96 },
1147 { 0x2DA0, 0x2DA6 },
1148 { 0x2DA8, 0x2DAE },
1149 { 0x2DB0, 0x2DB6 },
1150 { 0x2DB8, 0x2DBE },
1151 { 0x2DC0, 0x2DC6 },
1152 { 0x2DC8, 0x2DCE },
1153 { 0x2DD0, 0x2DD6 },
1154 { 0x2DD8, 0x2DDE },
1155 { 0x3005, 0x3006 },
1156 { 0x3031, 0x3035 },
1157 { 0x303B, 0x303C },
1158 { 0x3041, 0x3096 },
1159 { 0x309D, 0x309F },
1160 { 0x30A1, 0x30FA },
1161 { 0x30FC, 0x30FF },
1162 { 0x3105, 0x312F },
1163 { 0x3131, 0x318E },
1164 { 0x31A0, 0x31BF },
1165 { 0x31F0, 0x31FF },
1166 { 0x9FFF, 0xA48C },
1167 { 0xA4D0, 0xA4FD },
1168 { 0xA500, 0xA60C },
1169 { 0xA610, 0xA61F },
1170 { 0xA62A, 0xA62B },
1171 { 0xA640, 0xA66E },
1172 { 0xA67F, 0xA69D },
1173 { 0xA6A0, 0xA6E5 },
1174 { 0xA717, 0xA71F },
1175 { 0xA722, 0xA788 },
1176 { 0xA78B, 0xA7CA },
1177 { 0xA7D0, 0xA7D1 },
1178 { 0xA7D5, 0xA7D9 },
1179 { 0xA7F2, 0xA801 },
1180 { 0xA803, 0xA805 },
1181 { 0xA807, 0xA80A },
1182 { 0xA80C, 0xA822 },
1183 { 0xA840, 0xA873 },
1184 { 0xA882, 0xA8B3 },
1185 { 0xA8F2, 0xA8F7 },
1186 { 0xA8FD, 0xA8FE },
1187 { 0xA90A, 0xA925 },
1188 { 0xA930, 0xA946 },
1189 { 0xA960, 0xA97C },
1190 { 0xA984, 0xA9B2 },
1191 { 0xA9E0, 0xA9E4 },
1192 { 0xA9E6, 0xA9EF },
1193 { 0xA9FA, 0xA9FE },
1194 { 0xAA00, 0xAA28 },
1195 { 0xAA40, 0xAA42 },
1196 { 0xAA44, 0xAA4B },
1197 { 0xAA60, 0xAA76 },
1198 { 0xAA7E, 0xAAAF },
1199 { 0xAAB5, 0xAAB6 },
1200 { 0xAAB9, 0xAABD },
1201 { 0xAADB, 0xAADD },
1202 { 0xAAE0, 0xAAEA },
1203 { 0xAAF2, 0xAAF4 },
1204 { 0xAB01, 0xAB06 },
1205 { 0xAB09, 0xAB0E },
1206 { 0xAB11, 0xAB16 },
1207 { 0xAB20, 0xAB26 },
1208 { 0xAB28, 0xAB2E },
1209 { 0xAB30, 0xAB5A },
1210 { 0xAB5C, 0xAB69 },
1211 { 0xAB70, 0xABE2 },
1212 { 0xD7B0, 0xD7C6 },
1213 { 0xD7CB, 0xD7FB },
1214 { 0xF900, 0xFA6D },
1215 { 0xFA70, 0xFAD9 },
1216 { 0xFB00, 0xFB06 },
1217 { 0xFB13, 0xFB17 },
1218 { 0xFB1F, 0xFB28 },
1219 { 0xFB2A, 0xFB36 },
1220 { 0xFB38, 0xFB3C },
1221 { 0xFB40, 0xFB41 },
1222 { 0xFB43, 0xFB44 },
1223 { 0xFB46, 0xFBB1 },
1224 { 0xFBD3, 0xFD3D },
1225 { 0xFD50, 0xFD8F },
1226 { 0xFD92, 0xFDC7 },
1227 { 0xFDF0, 0xFDFB },
1228 { 0xFE70, 0xFE74 },
1229 { 0xFE76, 0xFEFC },
1230 { 0xFF21, 0xFF3A },
1231 { 0xFF41, 0xFF5A },
1232 { 0xFF66, 0xFFBE },
1233 { 0xFFC2, 0xFFC7 },
1234 { 0xFFCA, 0xFFCF },
1235 { 0xFFD2, 0xFFD7 },
1236 { 0xFFDA, 0xFFDC },
1237 { 0x10000, 0x1000B },
1238 { 0x1000D, 0x10026 },
1239 { 0x10028, 0x1003A },
1240 { 0x1003C, 0x1003D },
1241 { 0x1003F, 0x1004D },
1242 { 0x10050, 0x1005D },
1243 { 0x10080, 0x100FA },
1244 { 0x10280, 0x1029C },
1245 { 0x102A0, 0x102D0 },
1246 { 0x10300, 0x1031F },
1247 { 0x1032D, 0x10340 },
1248 { 0x10342, 0x10349 },
1249 { 0x10350, 0x10375 },
1250 { 0x10380, 0x1039D },
1251 { 0x103A0, 0x103C3 },
1252 { 0x103C8, 0x103CF },
1253 { 0x10400, 0x1049D },
1254 { 0x104B0, 0x104D3 },
1255 { 0x104D8, 0x104FB },
1256 { 0x10500, 0x10527 },
1257 { 0x10530, 0x10563 },
1258 { 0x10570, 0x1057A },
1259 { 0x1057C, 0x1058A },
1260 { 0x1058C, 0x10592 },
1261 { 0x10594, 0x10595 },
1262 { 0x10597, 0x105A1 },
1263 { 0x105A3, 0x105B1 },
1264 { 0x105B3, 0x105B9 },
1265 { 0x105BB, 0x105BC },
1266 { 0x10600, 0x10736 },
1267 { 0x10740, 0x10755 },
1268 { 0x10760, 0x10767 },
1269 { 0x10780, 0x10785 },
1270 { 0x10787, 0x107B0 },
1271 { 0x107B2, 0x107BA },
1272 { 0x10800, 0x10805 },
1273 { 0x1080A, 0x10835 },
1274 { 0x10837, 0x10838 },
1275 { 0x1083F, 0x10855 },
1276 { 0x10860, 0x10876 },
1277 { 0x10880, 0x1089E },
1278 { 0x108E0, 0x108F2 },
1279 { 0x108F4, 0x108F5 },
1280 { 0x10900, 0x10915 },
1281 { 0x10920, 0x10939 },
1282 { 0x10980, 0x109B7 },
1283 { 0x109BE, 0x109BF },
1284 { 0x10A10, 0x10A13 },
1285 { 0x10A15, 0x10A17 },
1286 { 0x10A19, 0x10A35 },
1287 { 0x10A60, 0x10A7C },
1288 { 0x10A80, 0x10A9C },
1289 { 0x10AC0, 0x10AC7 },
1290 { 0x10AC9, 0x10AE4 },
1291 { 0x10B00, 0x10B35 },
1292 { 0x10B40, 0x10B55 },
1293 { 0x10B60, 0x10B72 },
1294 { 0x10B80, 0x10B91 },
1295 { 0x10C00, 0x10C48 },
1296 { 0x10C80, 0x10CB2 },
1297 { 0x10CC0, 0x10CF2 },
1298 { 0x10D00, 0x10D23 },
1299 { 0x10E80, 0x10EA9 },
1300 { 0x10EB0, 0x10EB1 },
1301 { 0x10F00, 0x10F1C },
1302 { 0x10F30, 0x10F45 },
1303 { 0x10F70, 0x10F81 },
1304 { 0x10FB0, 0x10FC4 },
1305 { 0x10FE0, 0x10FF6 },
1306 { 0x11003, 0x11037 },
1307 { 0x11071, 0x11072 },
1308 { 0x11083, 0x110AF },
1309 { 0x110D0, 0x110E8 },
1310 { 0x11103, 0x11126 },
1311 { 0x11150, 0x11172 },
1312 { 0x11183, 0x111B2 },
1313 { 0x111C1, 0x111C4 },
1314 { 0x11200, 0x11211 },
1315 { 0x11213, 0x1122B },
1316 { 0x11280, 0x11286 },
1317 { 0x1128A, 0x1128D },
1318 { 0x1128F, 0x1129D },
1319 { 0x1129F, 0x112A8 },
1320 { 0x112B0, 0x112DE },
1321 { 0x11305, 0x1130C },
1322 { 0x1130F, 0x11310 },
1323 { 0x11313, 0x11328 },
1324 { 0x1132A, 0x11330 },
1325 { 0x11332, 0x11333 },
1326 { 0x11335, 0x11339 },
1327 { 0x1135D, 0x11361 },
1328 { 0x11400, 0x11434 },
1329 { 0x11447, 0x1144A },
1330 { 0x1145F, 0x11461 },
1331 { 0x11480, 0x114AF },
1332 { 0x114C4, 0x114C5 },
1333 { 0x11580, 0x115AE },
1334 { 0x115D8, 0x115DB },
1335 { 0x11600, 0x1162F },
1336 { 0x11680, 0x116AA },
1337 { 0x11700, 0x1171A },
1338 { 0x11740, 0x11746 },
1339 { 0x11800, 0x1182B },
1340 { 0x118A0, 0x118DF },
1341 { 0x118FF, 0x11906 },
1342 { 0x1190C, 0x11913 },
1343 { 0x11915, 0x11916 },
1344 { 0x11918, 0x1192F },
1345 { 0x119A0, 0x119A7 },
1346 { 0x119AA, 0x119D0 },
1347 { 0x11A0B, 0x11A32 },
1348 { 0x11A5C, 0x11A89 },
1349 { 0x11AB0, 0x11AF8 },
1350 { 0x11C00, 0x11C08 },
1351 { 0x11C0A, 0x11C2E },
1352 { 0x11C72, 0x11C8F },
1353 { 0x11D00, 0x11D06 },
1354 { 0x11D08, 0x11D09 },
1355 { 0x11D0B, 0x11D30 },
1356 { 0x11D60, 0x11D65 },
1357 { 0x11D67, 0x11D68 },
1358 { 0x11D6A, 0x11D89 },
1359 { 0x11EE0, 0x11EF2 },
1360 { 0x12000, 0x12399 },
1361 { 0x12480, 0x12543 },
1362 { 0x12F90, 0x12FF0 },
1363 { 0x13000, 0x1342E },
1364 { 0x14400, 0x14646 },
1365 { 0x16800, 0x16A38 },
1366 { 0x16A40, 0x16A5E },
1367 { 0x16A70, 0x16ABE },
1368 { 0x16AD0, 0x16AED },
1369 { 0x16B00, 0x16B2F },
1370 { 0x16B40, 0x16B43 },
1371 { 0x16B63, 0x16B77 },
1372 { 0x16B7D, 0x16B8F },
1373 { 0x16E40, 0x16E7F },
1374 { 0x16F00, 0x16F4A },
1375 { 0x16F93, 0x16F9F },
1376 { 0x16FE0, 0x16FE1 },
1377 { 0x18800, 0x18CD5 },
1378 { 0x1AFF0, 0x1AFF3 },
1379 { 0x1AFF5, 0x1AFFB },
1380 { 0x1AFFD, 0x1AFFE },
1381 { 0x1B000, 0x1B122 },
1382 { 0x1B150, 0x1B152 },
1383 { 0x1B164, 0x1B167 },
1384 { 0x1B170, 0x1B2FB },
1385 { 0x1BC00, 0x1BC6A },
1386 { 0x1BC70, 0x1BC7C },
1387 { 0x1BC80, 0x1BC88 },
1388 { 0x1BC90, 0x1BC99 },
1389 { 0x1D400, 0x1D454 },
1390 { 0x1D456, 0x1D49C },
1391 { 0x1D49E, 0x1D49F },
1392 { 0x1D4A5, 0x1D4A6 },
1393 { 0x1D4A9, 0x1D4AC },
1394 { 0x1D4AE, 0x1D4B9 },
1395 { 0x1D4BD, 0x1D4C3 },
1396 { 0x1D4C5, 0x1D505 },
1397 { 0x1D507, 0x1D50A },
1398 { 0x1D50D, 0x1D514 },
1399 { 0x1D516, 0x1D51C },
1400 { 0x1D51E, 0x1D539 },
1401 { 0x1D53B, 0x1D53E },
1402 { 0x1D540, 0x1D544 },
1403 { 0x1D54A, 0x1D550 },
1404 { 0x1D552, 0x1D6A5 },
1405 { 0x1D6A8, 0x1D6C0 },
1406 { 0x1D6C2, 0x1D6DA },
1407 { 0x1D6DC, 0x1D6FA },
1408 { 0x1D6FC, 0x1D714 },
1409 { 0x1D716, 0x1D734 },
1410 { 0x1D736, 0x1D74E },
1411 { 0x1D750, 0x1D76E },
1412 { 0x1D770, 0x1D788 },
1413 { 0x1D78A, 0x1D7A8 },
1414 { 0x1D7AA, 0x1D7C2 },
1415 { 0x1D7C4, 0x1D7CB },
1416 { 0x1DF00, 0x1DF1E },
1417 { 0x1E100, 0x1E12C },
1418 { 0x1E137, 0x1E13D },
1419 { 0x1E290, 0x1E2AD },
1420 { 0x1E2C0, 0x1E2EB },
1421 { 0x1E7E0, 0x1E7E6 },
1422 { 0x1E7E8, 0x1E7EB },
1423 { 0x1E7ED, 0x1E7EE },
1424 { 0x1E7F0, 0x1E7FE },
1425 { 0x1E800, 0x1E8C4 },
1426 { 0x1E900, 0x1E943 },
1427 { 0x1EE00, 0x1EE03 },
1428 { 0x1EE05, 0x1EE1F },
1429 { 0x1EE21, 0x1EE22 },
1430 { 0x1EE29, 0x1EE32 },
1431 { 0x1EE34, 0x1EE37 },
1432 { 0x1EE4D, 0x1EE4F },
1433 { 0x1EE51, 0x1EE52 },
1434 { 0x1EE61, 0x1EE62 },
1435 { 0x1EE67, 0x1EE6A },
1436 { 0x1EE6C, 0x1EE72 },
1437 { 0x1EE74, 0x1EE77 },
1438 { 0x1EE79, 0x1EE7C },
1439 { 0x1EE80, 0x1EE89 },
1440 { 0x1EE8B, 0x1EE9B },
1441 { 0x1EEA1, 0x1EEA3 },
1442 { 0x1EEA5, 0x1EEA9 },
1443 { 0x1EEAB, 0x1EEBB },
1444 { 0x2F800, 0x2FA1D },
1445};
1446
1447static int alpha1[] = {
1448 0x00AA,
1449 0x00B5,
1450 0x00BA,
1451 0x0559,
1452 0x06FF,
1453 0x07B1,
1454 0x07FA,
1455 0x081A,
1456 0x0824,
1457 0x0828,
1458 0x093D,
1459 0x0950,
1460 0x09BD,
1461 0x09CE,
1462 0x09FC,
1463 0x0ABD,
1464 0x0AD0,
1465 0x0AF9,
1466 0x0B3D,
1467 0x0B71,
1468 0x0BD0,
1469 0x0C3D,
1470 0x0C5D,
1471 0x0C80,
1472 0x0CBD,
1473 0x0D3D,
1474 0x0D4E,
1475 0x0EBD,
1476 0x0F00,
1477 0x103F,
1478 0x1061,
1479 0x108E,
1480 0x10CD,
1481 0x17D7,
1482 0x17DC,
1483 0x1AA7,
1484 0x1CFA,
1485 0x2071,
1486 0x207F,
1487 0x2102,
1488 0x2107,
1489 0x214E,
1490 0x2D2D,
1491 0x2D6F,
1492 0x2E2F,
1493 0x3400,
1494 0x4DBF,
1495 0x4E00,
1496 0xA9CF,
1497 0xAA7A,
1498 0xAC00,
1499 0xD7A3,
1500 0x1083C,
1501 0x10A00,
1502 0x10F27,
1503 0x11075,
1504 0x11144,
1505 0x11147,
1506 0x11176,
1507 0x1133D,
1508 0x11350,
1509 0x11644,
1510 0x116B8,
1511 0x11909,
1512 0x11A00,
1513 0x11A3A,
1514 0x11A50,
1515 0x11A9D,
1516 0x11C40,
1517 0x11D46,
1518 0x11D98,
1519 0x11FB0,
1520 0x16F50,
1521 0x17000,
1522 0x187F7,
1523 0x18D00,
1524 0x18D08,
1525 0x1D4A2,
1526 0x1E14E,
1527 0x1E94B,
1528 0x1EE42,
1529 0x20000,
1530 0x2A6DF,
1531 0x2A700,
1532 0x2B738,
1533 0x2B740,
1534 0x2B81D,
1535 0x2B820,
1536 0x2CEA1,
1537 0x2CEB0,
1538 0x2EBE0,
1539 0x30000,
1540 0x3134A,
1541};
1542
1543C_regparm int C_utf_isalpha(int r)
1544{
1545 int *match;
1546
1547 if((match = bsearch(&r, alpha3, nelem(alpha3), sizeof *alpha3, &rune2cmp)))
1548 return !((r - match[0]) % 2);
1549 if(bsearch(&r, alpha2, nelem(alpha2), sizeof *alpha2, &rune2cmp))
1550 return 1;
1551 if(bsearch(&r, alpha1, nelem(alpha1), sizeof *alpha1, &rune1cmp))
1552 return 1;
1553 return 0;
1554}
1555
1556static int space2[][2] = {
1557 { 0x0009, 0x000D },
1558 { 0x001C, 0x0020 },
1559 { 0x2000, 0x200A },
1560 { 0x2028, 0x2029 },
1561};
1562
1563static int space1[] = {
1564 0x0085,
1565 0x00A0,
1566 0x1680,
1567 0x202F,
1568 0x205F,
1569 0x3000,
1570};
1571
1572C_regparm int C_utf_isspace(int r)
1573{
1574 if(bsearch(&r, space2, nelem(space2), sizeof *space2, &rune2cmp))
1575 return 1;
1576 if(bsearch(&r, space1, nelem(space1), sizeof *space1, &rune1cmp))
1577 return 1;
1578 return 0;
1579}
1580
1581static int fold1[][ 2 ] = {
1582 {0x0041, 0x0061},
1583 {0x0042, 0x0062},
1584 {0x0043, 0x0063},
1585 {0x0044, 0x0064},
1586 {0x0045, 0x0065},
1587 {0x0046, 0x0066},
1588 {0x0047, 0x0067},
1589 {0x0048, 0x0068},
1590 {0x0049, 0x0069},
1591 {0x004A, 0x006A},
1592 {0x004B, 0x006B},
1593 {0x004C, 0x006C},
1594 {0x004D, 0x006D},
1595 {0x004E, 0x006E},
1596 {0x004F, 0x006F},
1597 {0x0050, 0x0070},
1598 {0x0051, 0x0071},
1599 {0x0052, 0x0072},
1600 {0x0053, 0x0073},
1601 {0x0054, 0x0074},
1602 {0x0055, 0x0075},
1603 {0x0056, 0x0076},
1604 {0x0057, 0x0077},
1605 {0x0058, 0x0078},
1606 {0x0059, 0x0079},
1607 {0x005A, 0x007A},
1608 {0x00B5, 0x03BC},
1609 {0x00C0, 0x00E0},
1610 {0x00C1, 0x00E1},
1611 {0x00C2, 0x00E2},
1612 {0x00C3, 0x00E3},
1613 {0x00C4, 0x00E4},
1614 {0x00C5, 0x00E5},
1615 {0x00C6, 0x00E6},
1616 {0x00C7, 0x00E7},
1617 {0x00C8, 0x00E8},
1618 {0x00C9, 0x00E9},
1619 {0x00CA, 0x00EA},
1620 {0x00CB, 0x00EB},
1621 {0x00CC, 0x00EC},
1622 {0x00CD, 0x00ED},
1623 {0x00CE, 0x00EE},
1624 {0x00CF, 0x00EF},
1625 {0x00D0, 0x00F0},
1626 {0x00D1, 0x00F1},
1627 {0x00D2, 0x00F2},
1628 {0x00D3, 0x00F3},
1629 {0x00D4, 0x00F4},
1630 {0x00D5, 0x00F5},
1631 {0x00D6, 0x00F6},
1632 {0x00D8, 0x00F8},
1633 {0x00D9, 0x00F9},
1634 {0x00DA, 0x00FA},
1635 {0x00DB, 0x00FB},
1636 {0x00DC, 0x00FC},
1637 {0x00DD, 0x00FD},
1638 {0x00DE, 0x00FE},
1639 {0x0100, 0x0101},
1640 {0x0102, 0x0103},
1641 {0x0104, 0x0105},
1642 {0x0106, 0x0107},
1643 {0x0108, 0x0109},
1644 {0x010A, 0x010B},
1645 {0x010C, 0x010D},
1646 {0x010E, 0x010F},
1647 {0x0110, 0x0111},
1648 {0x0112, 0x0113},
1649 {0x0114, 0x0115},
1650 {0x0116, 0x0117},
1651 {0x0118, 0x0119},
1652 {0x011A, 0x011B},
1653 {0x011C, 0x011D},
1654 {0x011E, 0x011F},
1655 {0x0120, 0x0121},
1656 {0x0122, 0x0123},
1657 {0x0124, 0x0125},
1658 {0x0126, 0x0127},
1659 {0x0128, 0x0129},
1660 {0x012A, 0x012B},
1661 {0x012C, 0x012D},
1662 {0x012E, 0x012F},
1663 {0x0132, 0x0133},
1664 {0x0134, 0x0135},
1665 {0x0136, 0x0137},
1666 {0x0139, 0x013A},
1667 {0x013B, 0x013C},
1668 {0x013D, 0x013E},
1669 {0x013F, 0x0140},
1670 {0x0141, 0x0142},
1671 {0x0143, 0x0144},
1672 {0x0145, 0x0146},
1673 {0x0147, 0x0148},
1674 {0x014A, 0x014B},
1675 {0x014C, 0x014D},
1676 {0x014E, 0x014F},
1677 {0x0150, 0x0151},
1678 {0x0152, 0x0153},
1679 {0x0154, 0x0155},
1680 {0x0156, 0x0157},
1681 {0x0158, 0x0159},
1682 {0x015A, 0x015B},
1683 {0x015C, 0x015D},
1684 {0x015E, 0x015F},
1685 {0x0160, 0x0161},
1686 {0x0162, 0x0163},
1687 {0x0164, 0x0165},
1688 {0x0166, 0x0167},
1689 {0x0168, 0x0169},
1690 {0x016A, 0x016B},
1691 {0x016C, 0x016D},
1692 {0x016E, 0x016F},
1693 {0x0170, 0x0171},
1694 {0x0172, 0x0173},
1695 {0x0174, 0x0175},
1696 {0x0176, 0x0177},
1697 {0x0178, 0x00FF},
1698 {0x0179, 0x017A},
1699 {0x017B, 0x017C},
1700 {0x017D, 0x017E},
1701 {0x017F, 0x0073},
1702 {0x0181, 0x0253},
1703 {0x0182, 0x0183},
1704 {0x0184, 0x0185},
1705 {0x0186, 0x0254},
1706 {0x0187, 0x0188},
1707 {0x0189, 0x0256},
1708 {0x018A, 0x0257},
1709 {0x018B, 0x018C},
1710 {0x018E, 0x01DD},
1711 {0x018F, 0x0259},
1712 {0x0190, 0x025B},
1713 {0x0191, 0x0192},
1714 {0x0193, 0x0260},
1715 {0x0194, 0x0263},
1716 {0x0196, 0x0269},
1717 {0x0197, 0x0268},
1718 {0x0198, 0x0199},
1719 {0x019C, 0x026F},
1720 {0x019D, 0x0272},
1721 {0x019F, 0x0275},
1722 {0x01A0, 0x01A1},
1723 {0x01A2, 0x01A3},
1724 {0x01A4, 0x01A5},
1725 {0x01A6, 0x0280},
1726 {0x01A7, 0x01A8},
1727 {0x01A9, 0x0283},
1728 {0x01AC, 0x01AD},
1729 {0x01AE, 0x0288},
1730 {0x01AF, 0x01B0},
1731 {0x01B1, 0x028A},
1732 {0x01B2, 0x028B},
1733 {0x01B3, 0x01B4},
1734 {0x01B5, 0x01B6},
1735 {0x01B7, 0x0292},
1736 {0x01B8, 0x01B9},
1737 {0x01BC, 0x01BD},
1738 {0x01C4, 0x01C6},
1739 {0x01C5, 0x01C6},
1740 {0x01C7, 0x01C9},
1741 {0x01C8, 0x01C9},
1742 {0x01CA, 0x01CC},
1743 {0x01CB, 0x01CC},
1744 {0x01CD, 0x01CE},
1745 {0x01CF, 0x01D0},
1746 {0x01D1, 0x01D2},
1747 {0x01D3, 0x01D4},
1748 {0x01D5, 0x01D6},
1749 {0x01D7, 0x01D8},
1750 {0x01D9, 0x01DA},
1751 {0x01DB, 0x01DC},
1752 {0x01DE, 0x01DF},
1753 {0x01E0, 0x01E1},
1754 {0x01E2, 0x01E3},
1755 {0x01E4, 0x01E5},
1756 {0x01E6, 0x01E7},
1757 {0x01E8, 0x01E9},
1758 {0x01EA, 0x01EB},
1759 {0x01EC, 0x01ED},
1760 {0x01EE, 0x01EF},
1761 {0x01F1, 0x01F3},
1762 {0x01F2, 0x01F3},
1763 {0x01F4, 0x01F5},
1764 {0x01F6, 0x0195},
1765 {0x01F7, 0x01BF},
1766 {0x01F8, 0x01F9},
1767 {0x01FA, 0x01FB},
1768 {0x01FC, 0x01FD},
1769 {0x01FE, 0x01FF},
1770 {0x0200, 0x0201},
1771 {0x0202, 0x0203},
1772 {0x0204, 0x0205},
1773 {0x0206, 0x0207},
1774 {0x0208, 0x0209},
1775 {0x020A, 0x020B},
1776 {0x020C, 0x020D},
1777 {0x020E, 0x020F},
1778 {0x0210, 0x0211},
1779 {0x0212, 0x0213},
1780 {0x0214, 0x0215},
1781 {0x0216, 0x0217},
1782 {0x0218, 0x0219},
1783 {0x021A, 0x021B},
1784 {0x021C, 0x021D},
1785 {0x021E, 0x021F},
1786 {0x0220, 0x019E},
1787 {0x0222, 0x0223},
1788 {0x0224, 0x0225},
1789 {0x0226, 0x0227},
1790 {0x0228, 0x0229},
1791 {0x022A, 0x022B},
1792 {0x022C, 0x022D},
1793 {0x022E, 0x022F},
1794 {0x0230, 0x0231},
1795 {0x0232, 0x0233},
1796 {0x023A, 0x2C65},
1797 {0x023B, 0x023C},
1798 {0x023D, 0x019A},
1799 {0x023E, 0x2C66},
1800 {0x0241, 0x0242},
1801 {0x0243, 0x0180},
1802 {0x0244, 0x0289},
1803 {0x0245, 0x028C},
1804 {0x0246, 0x0247},
1805 {0x0248, 0x0249},
1806 {0x024A, 0x024B},
1807 {0x024C, 0x024D},
1808 {0x024E, 0x024F},
1809 {0x0345, 0x03B9},
1810 {0x0370, 0x0371},
1811 {0x0372, 0x0373},
1812 {0x0376, 0x0377},
1813 {0x037F, 0x03F3},
1814 {0x0386, 0x03AC},
1815 {0x0388, 0x03AD},
1816 {0x0389, 0x03AE},
1817 {0x038A, 0x03AF},
1818 {0x038C, 0x03CC},
1819 {0x038E, 0x03CD},
1820 {0x038F, 0x03CE},
1821 {0x0391, 0x03B1},
1822 {0x0392, 0x03B2},
1823 {0x0393, 0x03B3},
1824 {0x0394, 0x03B4},
1825 {0x0395, 0x03B5},
1826 {0x0396, 0x03B6},
1827 {0x0397, 0x03B7},
1828 {0x0398, 0x03B8},
1829 {0x0399, 0x03B9},
1830 {0x039A, 0x03BA},
1831 {0x039B, 0x03BB},
1832 {0x039C, 0x03BC},
1833 {0x039D, 0x03BD},
1834 {0x039E, 0x03BE},
1835 {0x039F, 0x03BF},
1836 {0x03A0, 0x03C0},
1837 {0x03A1, 0x03C1},
1838 {0x03A3, 0x03C3},
1839 {0x03A4, 0x03C4},
1840 {0x03A5, 0x03C5},
1841 {0x03A6, 0x03C6},
1842 {0x03A7, 0x03C7},
1843 {0x03A8, 0x03C8},
1844 {0x03A9, 0x03C9},
1845 {0x03AA, 0x03CA},
1846 {0x03AB, 0x03CB},
1847 {0x03C2, 0x03C3},
1848 {0x03CF, 0x03D7},
1849 {0x03D0, 0x03B2},
1850 {0x03D1, 0x03B8},
1851 {0x03D5, 0x03C6},
1852 {0x03D6, 0x03C0},
1853 {0x03D8, 0x03D9},
1854 {0x03DA, 0x03DB},
1855 {0x03DC, 0x03DD},
1856 {0x03DE, 0x03DF},
1857 {0x03E0, 0x03E1},
1858 {0x03E2, 0x03E3},
1859 {0x03E4, 0x03E5},
1860 {0x03E6, 0x03E7},
1861 {0x03E8, 0x03E9},
1862 {0x03EA, 0x03EB},
1863 {0x03EC, 0x03ED},
1864 {0x03EE, 0x03EF},
1865 {0x03F0, 0x03BA},
1866 {0x03F1, 0x03C1},
1867 {0x03F4, 0x03B8},
1868 {0x03F5, 0x03B5},
1869 {0x03F7, 0x03F8},
1870 {0x03F9, 0x03F2},
1871 {0x03FA, 0x03FB},
1872 {0x03FD, 0x037B},
1873 {0x03FE, 0x037C},
1874 {0x03FF, 0x037D},
1875 {0x0400, 0x0450},
1876 {0x0401, 0x0451},
1877 {0x0402, 0x0452},
1878 {0x0403, 0x0453},
1879 {0x0404, 0x0454},
1880 {0x0405, 0x0455},
1881 {0x0406, 0x0456},
1882 {0x0407, 0x0457},
1883 {0x0408, 0x0458},
1884 {0x0409, 0x0459},
1885 {0x040A, 0x045A},
1886 {0x040B, 0x045B},
1887 {0x040C, 0x045C},
1888 {0x040D, 0x045D},
1889 {0x040E, 0x045E},
1890 {0x040F, 0x045F},
1891 {0x0410, 0x0430},
1892 {0x0411, 0x0431},
1893 {0x0412, 0x0432},
1894 {0x0413, 0x0433},
1895 {0x0414, 0x0434},
1896 {0x0415, 0x0435},
1897 {0x0416, 0x0436},
1898 {0x0417, 0x0437},
1899 {0x0418, 0x0438},
1900 {0x0419, 0x0439},
1901 {0x041A, 0x043A},
1902 {0x041B, 0x043B},
1903 {0x041C, 0x043C},
1904 {0x041D, 0x043D},
1905 {0x041E, 0x043E},
1906 {0x041F, 0x043F},
1907 {0x0420, 0x0440},
1908 {0x0421, 0x0441},
1909 {0x0422, 0x0442},
1910 {0x0423, 0x0443},
1911 {0x0424, 0x0444},
1912 {0x0425, 0x0445},
1913 {0x0426, 0x0446},
1914 {0x0427, 0x0447},
1915 {0x0428, 0x0448},
1916 {0x0429, 0x0449},
1917 {0x042A, 0x044A},
1918 {0x042B, 0x044B},
1919 {0x042C, 0x044C},
1920 {0x042D, 0x044D},
1921 {0x042E, 0x044E},
1922 {0x042F, 0x044F},
1923 {0x0460, 0x0461},
1924 {0x0462, 0x0463},
1925 {0x0464, 0x0465},
1926 {0x0466, 0x0467},
1927 {0x0468, 0x0469},
1928 {0x046A, 0x046B},
1929 {0x046C, 0x046D},
1930 {0x046E, 0x046F},
1931 {0x0470, 0x0471},
1932 {0x0472, 0x0473},
1933 {0x0474, 0x0475},
1934 {0x0476, 0x0477},
1935 {0x0478, 0x0479},
1936 {0x047A, 0x047B},
1937 {0x047C, 0x047D},
1938 {0x047E, 0x047F},
1939 {0x0480, 0x0481},
1940 {0x048A, 0x048B},
1941 {0x048C, 0x048D},
1942 {0x048E, 0x048F},
1943 {0x0490, 0x0491},
1944 {0x0492, 0x0493},
1945 {0x0494, 0x0495},
1946 {0x0496, 0x0497},
1947 {0x0498, 0x0499},
1948 {0x049A, 0x049B},
1949 {0x049C, 0x049D},
1950 {0x049E, 0x049F},
1951 {0x04A0, 0x04A1},
1952 {0x04A2, 0x04A3},
1953 {0x04A4, 0x04A5},
1954 {0x04A6, 0x04A7},
1955 {0x04A8, 0x04A9},
1956 {0x04AA, 0x04AB},
1957 {0x04AC, 0x04AD},
1958 {0x04AE, 0x04AF},
1959 {0x04B0, 0x04B1},
1960 {0x04B2, 0x04B3},
1961 {0x04B4, 0x04B5},
1962 {0x04B6, 0x04B7},
1963 {0x04B8, 0x04B9},
1964 {0x04BA, 0x04BB},
1965 {0x04BC, 0x04BD},
1966 {0x04BE, 0x04BF},
1967 {0x04C0, 0x04CF},
1968 {0x04C1, 0x04C2},
1969 {0x04C3, 0x04C4},
1970 {0x04C5, 0x04C6},
1971 {0x04C7, 0x04C8},
1972 {0x04C9, 0x04CA},
1973 {0x04CB, 0x04CC},
1974 {0x04CD, 0x04CE},
1975 {0x04D0, 0x04D1},
1976 {0x04D2, 0x04D3},
1977 {0x04D4, 0x04D5},
1978 {0x04D6, 0x04D7},
1979 {0x04D8, 0x04D9},
1980 {0x04DA, 0x04DB},
1981 {0x04DC, 0x04DD},
1982 {0x04DE, 0x04DF},
1983 {0x04E0, 0x04E1},
1984 {0x04E2, 0x04E3},
1985 {0x04E4, 0x04E5},
1986 {0x04E6, 0x04E7},
1987 {0x04E8, 0x04E9},
1988 {0x04EA, 0x04EB},
1989 {0x04EC, 0x04ED},
1990 {0x04EE, 0x04EF},
1991 {0x04F0, 0x04F1},
1992 {0x04F2, 0x04F3},
1993 {0x04F4, 0x04F5},
1994 {0x04F6, 0x04F7},
1995 {0x04F8, 0x04F9},
1996 {0x04FA, 0x04FB},
1997 {0x04FC, 0x04FD},
1998 {0x04FE, 0x04FF},
1999 {0x0500, 0x0501},
2000 {0x0502, 0x0503},
2001 {0x0504, 0x0505},
2002 {0x0506, 0x0507},
2003 {0x0508, 0x0509},
2004 {0x050A, 0x050B},
2005 {0x050C, 0x050D},
2006 {0x050E, 0x050F},
2007 {0x0510, 0x0511},
2008 {0x0512, 0x0513},
2009 {0x0514, 0x0515},
2010 {0x0516, 0x0517},
2011 {0x0518, 0x0519},
2012 {0x051A, 0x051B},
2013 {0x051C, 0x051D},
2014 {0x051E, 0x051F},
2015 {0x0520, 0x0521},
2016 {0x0522, 0x0523},
2017 {0x0524, 0x0525},
2018 {0x0526, 0x0527},
2019 {0x0528, 0x0529},
2020 {0x052A, 0x052B},
2021 {0x052C, 0x052D},
2022 {0x052E, 0x052F},
2023 {0x0531, 0x0561},
2024 {0x0532, 0x0562},
2025 {0x0533, 0x0563},
2026 {0x0534, 0x0564},
2027 {0x0535, 0x0565},
2028 {0x0536, 0x0566},
2029 {0x0537, 0x0567},
2030 {0x0538, 0x0568},
2031 {0x0539, 0x0569},
2032 {0x053A, 0x056A},
2033 {0x053B, 0x056B},
2034 {0x053C, 0x056C},
2035 {0x053D, 0x056D},
2036 {0x053E, 0x056E},
2037 {0x053F, 0x056F},
2038 {0x0540, 0x0570},
2039 {0x0541, 0x0571},
2040 {0x0542, 0x0572},
2041 {0x0543, 0x0573},
2042 {0x0544, 0x0574},
2043 {0x0545, 0x0575},
2044 {0x0546, 0x0576},
2045 {0x0547, 0x0577},
2046 {0x0548, 0x0578},
2047 {0x0549, 0x0579},
2048 {0x054A, 0x057A},
2049 {0x054B, 0x057B},
2050 {0x054C, 0x057C},
2051 {0x054D, 0x057D},
2052 {0x054E, 0x057E},
2053 {0x054F, 0x057F},
2054 {0x0550, 0x0580},
2055 {0x0551, 0x0581},
2056 {0x0552, 0x0582},
2057 {0x0553, 0x0583},
2058 {0x0554, 0x0584},
2059 {0x0555, 0x0585},
2060 {0x0556, 0x0586},
2061 {0x10A0, 0x2D00},
2062 {0x10A1, 0x2D01},
2063 {0x10A2, 0x2D02},
2064 {0x10A3, 0x2D03},
2065 {0x10A4, 0x2D04},
2066 {0x10A5, 0x2D05},
2067 {0x10A6, 0x2D06},
2068 {0x10A7, 0x2D07},
2069 {0x10A8, 0x2D08},
2070 {0x10A9, 0x2D09},
2071 {0x10AA, 0x2D0A},
2072 {0x10AB, 0x2D0B},
2073 {0x10AC, 0x2D0C},
2074 {0x10AD, 0x2D0D},
2075 {0x10AE, 0x2D0E},
2076 {0x10AF, 0x2D0F},
2077 {0x10B0, 0x2D10},
2078 {0x10B1, 0x2D11},
2079 {0x10B2, 0x2D12},
2080 {0x10B3, 0x2D13},
2081 {0x10B4, 0x2D14},
2082 {0x10B5, 0x2D15},
2083 {0x10B6, 0x2D16},
2084 {0x10B7, 0x2D17},
2085 {0x10B8, 0x2D18},
2086 {0x10B9, 0x2D19},
2087 {0x10BA, 0x2D1A},
2088 {0x10BB, 0x2D1B},
2089 {0x10BC, 0x2D1C},
2090 {0x10BD, 0x2D1D},
2091 {0x10BE, 0x2D1E},
2092 {0x10BF, 0x2D1F},
2093 {0x10C0, 0x2D20},
2094 {0x10C1, 0x2D21},
2095 {0x10C2, 0x2D22},
2096 {0x10C3, 0x2D23},
2097 {0x10C4, 0x2D24},
2098 {0x10C5, 0x2D25},
2099 {0x10C7, 0x2D27},
2100 {0x10CD, 0x2D2D},
2101 {0x13F8, 0x13F0},
2102 {0x13F9, 0x13F1},
2103 {0x13FA, 0x13F2},
2104 {0x13FB, 0x13F3},
2105 {0x13FC, 0x13F4},
2106 {0x13FD, 0x13F5},
2107 {0x1C80, 0x0432},
2108 {0x1C81, 0x0434},
2109 {0x1C82, 0x043E},
2110 {0x1C83, 0x0441},
2111 {0x1C84, 0x0442},
2112 {0x1C85, 0x0442},
2113 {0x1C86, 0x044A},
2114 {0x1C87, 0x0463},
2115 {0x1C88, 0xA64B},
2116 {0x1C90, 0x10D0},
2117 {0x1C91, 0x10D1},
2118 {0x1C92, 0x10D2},
2119 {0x1C93, 0x10D3},
2120 {0x1C94, 0x10D4},
2121 {0x1C95, 0x10D5},
2122 {0x1C96, 0x10D6},
2123 {0x1C97, 0x10D7},
2124 {0x1C98, 0x10D8},
2125 {0x1C99, 0x10D9},
2126 {0x1C9A, 0x10DA},
2127 {0x1C9B, 0x10DB},
2128 {0x1C9C, 0x10DC},
2129 {0x1C9D, 0x10DD},
2130 {0x1C9E, 0x10DE},
2131 {0x1C9F, 0x10DF},
2132 {0x1CA0, 0x10E0},
2133 {0x1CA1, 0x10E1},
2134 {0x1CA2, 0x10E2},
2135 {0x1CA3, 0x10E3},
2136 {0x1CA4, 0x10E4},
2137 {0x1CA5, 0x10E5},
2138 {0x1CA6, 0x10E6},
2139 {0x1CA7, 0x10E7},
2140 {0x1CA8, 0x10E8},
2141 {0x1CA9, 0x10E9},
2142 {0x1CAA, 0x10EA},
2143 {0x1CAB, 0x10EB},
2144 {0x1CAC, 0x10EC},
2145 {0x1CAD, 0x10ED},
2146 {0x1CAE, 0x10EE},
2147 {0x1CAF, 0x10EF},
2148 {0x1CB0, 0x10F0},
2149 {0x1CB1, 0x10F1},
2150 {0x1CB2, 0x10F2},
2151 {0x1CB3, 0x10F3},
2152 {0x1CB4, 0x10F4},
2153 {0x1CB5, 0x10F5},
2154 {0x1CB6, 0x10F6},
2155 {0x1CB7, 0x10F7},
2156 {0x1CB8, 0x10F8},
2157 {0x1CB9, 0x10F9},
2158 {0x1CBA, 0x10FA},
2159 {0x1CBD, 0x10FD},
2160 {0x1CBE, 0x10FE},
2161 {0x1CBF, 0x10FF},
2162 {0x1E00, 0x1E01},
2163 {0x1E02, 0x1E03},
2164 {0x1E04, 0x1E05},
2165 {0x1E06, 0x1E07},
2166 {0x1E08, 0x1E09},
2167 {0x1E0A, 0x1E0B},
2168 {0x1E0C, 0x1E0D},
2169 {0x1E0E, 0x1E0F},
2170 {0x1E10, 0x1E11},
2171 {0x1E12, 0x1E13},
2172 {0x1E14, 0x1E15},
2173 {0x1E16, 0x1E17},
2174 {0x1E18, 0x1E19},
2175 {0x1E1A, 0x1E1B},
2176 {0x1E1C, 0x1E1D},
2177 {0x1E1E, 0x1E1F},
2178 {0x1E20, 0x1E21},
2179 {0x1E22, 0x1E23},
2180 {0x1E24, 0x1E25},
2181 {0x1E26, 0x1E27},
2182 {0x1E28, 0x1E29},
2183 {0x1E2A, 0x1E2B},
2184 {0x1E2C, 0x1E2D},
2185 {0x1E2E, 0x1E2F},
2186 {0x1E30, 0x1E31},
2187 {0x1E32, 0x1E33},
2188 {0x1E34, 0x1E35},
2189 {0x1E36, 0x1E37},
2190 {0x1E38, 0x1E39},
2191 {0x1E3A, 0x1E3B},
2192 {0x1E3C, 0x1E3D},
2193 {0x1E3E, 0x1E3F},
2194 {0x1E40, 0x1E41},
2195 {0x1E42, 0x1E43},
2196 {0x1E44, 0x1E45},
2197 {0x1E46, 0x1E47},
2198 {0x1E48, 0x1E49},
2199 {0x1E4A, 0x1E4B},
2200 {0x1E4C, 0x1E4D},
2201 {0x1E4E, 0x1E4F},
2202 {0x1E50, 0x1E51},
2203 {0x1E52, 0x1E53},
2204 {0x1E54, 0x1E55},
2205 {0x1E56, 0x1E57},
2206 {0x1E58, 0x1E59},
2207 {0x1E5A, 0x1E5B},
2208 {0x1E5C, 0x1E5D},
2209 {0x1E5E, 0x1E5F},
2210 {0x1E60, 0x1E61},
2211 {0x1E62, 0x1E63},
2212 {0x1E64, 0x1E65},
2213 {0x1E66, 0x1E67},
2214 {0x1E68, 0x1E69},
2215 {0x1E6A, 0x1E6B},
2216 {0x1E6C, 0x1E6D},
2217 {0x1E6E, 0x1E6F},
2218 {0x1E70, 0x1E71},
2219 {0x1E72, 0x1E73},
2220 {0x1E74, 0x1E75},
2221 {0x1E76, 0x1E77},
2222 {0x1E78, 0x1E79},
2223 {0x1E7A, 0x1E7B},
2224 {0x1E7C, 0x1E7D},
2225 {0x1E7E, 0x1E7F},
2226 {0x1E80, 0x1E81},
2227 {0x1E82, 0x1E83},
2228 {0x1E84, 0x1E85},
2229 {0x1E86, 0x1E87},
2230 {0x1E88, 0x1E89},
2231 {0x1E8A, 0x1E8B},
2232 {0x1E8C, 0x1E8D},
2233 {0x1E8E, 0x1E8F},
2234 {0x1E90, 0x1E91},
2235 {0x1E92, 0x1E93},
2236 {0x1E94, 0x1E95},
2237 {0x1E9B, 0x1E61},
2238 {0x1E9E, 0x00DF},
2239 {0x1EA0, 0x1EA1},
2240 {0x1EA2, 0x1EA3},
2241 {0x1EA4, 0x1EA5},
2242 {0x1EA6, 0x1EA7},
2243 {0x1EA8, 0x1EA9},
2244 {0x1EAA, 0x1EAB},
2245 {0x1EAC, 0x1EAD},
2246 {0x1EAE, 0x1EAF},
2247 {0x1EB0, 0x1EB1},
2248 {0x1EB2, 0x1EB3},
2249 {0x1EB4, 0x1EB5},
2250 {0x1EB6, 0x1EB7},
2251 {0x1EB8, 0x1EB9},
2252 {0x1EBA, 0x1EBB},
2253 {0x1EBC, 0x1EBD},
2254 {0x1EBE, 0x1EBF},
2255 {0x1EC0, 0x1EC1},
2256 {0x1EC2, 0x1EC3},
2257 {0x1EC4, 0x1EC5},
2258 {0x1EC6, 0x1EC7},
2259 {0x1EC8, 0x1EC9},
2260 {0x1ECA, 0x1ECB},
2261 {0x1ECC, 0x1ECD},
2262 {0x1ECE, 0x1ECF},
2263 {0x1ED0, 0x1ED1},
2264 {0x1ED2, 0x1ED3},
2265 {0x1ED4, 0x1ED5},
2266 {0x1ED6, 0x1ED7},
2267 {0x1ED8, 0x1ED9},
2268 {0x1EDA, 0x1EDB},
2269 {0x1EDC, 0x1EDD},
2270 {0x1EDE, 0x1EDF},
2271 {0x1EE0, 0x1EE1},
2272 {0x1EE2, 0x1EE3},
2273 {0x1EE4, 0x1EE5},
2274 {0x1EE6, 0x1EE7},
2275 {0x1EE8, 0x1EE9},
2276 {0x1EEA, 0x1EEB},
2277 {0x1EEC, 0x1EED},
2278 {0x1EEE, 0x1EEF},
2279 {0x1EF0, 0x1EF1},
2280 {0x1EF2, 0x1EF3},
2281 {0x1EF4, 0x1EF5},
2282 {0x1EF6, 0x1EF7},
2283 {0x1EF8, 0x1EF9},
2284 {0x1EFA, 0x1EFB},
2285 {0x1EFC, 0x1EFD},
2286 {0x1EFE, 0x1EFF},
2287 {0x1F08, 0x1F00},
2288 {0x1F09, 0x1F01},
2289 {0x1F0A, 0x1F02},
2290 {0x1F0B, 0x1F03},
2291 {0x1F0C, 0x1F04},
2292 {0x1F0D, 0x1F05},
2293 {0x1F0E, 0x1F06},
2294 {0x1F0F, 0x1F07},
2295 {0x1F18, 0x1F10},
2296 {0x1F19, 0x1F11},
2297 {0x1F1A, 0x1F12},
2298 {0x1F1B, 0x1F13},
2299 {0x1F1C, 0x1F14},
2300 {0x1F1D, 0x1F15},
2301 {0x1F28, 0x1F20},
2302 {0x1F29, 0x1F21},
2303 {0x1F2A, 0x1F22},
2304 {0x1F2B, 0x1F23},
2305 {0x1F2C, 0x1F24},
2306 {0x1F2D, 0x1F25},
2307 {0x1F2E, 0x1F26},
2308 {0x1F2F, 0x1F27},
2309 {0x1F38, 0x1F30},
2310 {0x1F39, 0x1F31},
2311 {0x1F3A, 0x1F32},
2312 {0x1F3B, 0x1F33},
2313 {0x1F3C, 0x1F34},
2314 {0x1F3D, 0x1F35},
2315 {0x1F3E, 0x1F36},
2316 {0x1F3F, 0x1F37},
2317 {0x1F48, 0x1F40},
2318 {0x1F49, 0x1F41},
2319 {0x1F4A, 0x1F42},
2320 {0x1F4B, 0x1F43},
2321 {0x1F4C, 0x1F44},
2322 {0x1F4D, 0x1F45},
2323 {0x1F59, 0x1F51},
2324 {0x1F5B, 0x1F53},
2325 {0x1F5D, 0x1F55},
2326 {0x1F5F, 0x1F57},
2327 {0x1F68, 0x1F60},
2328 {0x1F69, 0x1F61},
2329 {0x1F6A, 0x1F62},
2330 {0x1F6B, 0x1F63},
2331 {0x1F6C, 0x1F64},
2332 {0x1F6D, 0x1F65},
2333 {0x1F6E, 0x1F66},
2334 {0x1F6F, 0x1F67},
2335 {0x1F88, 0x1F80},
2336 {0x1F89, 0x1F81},
2337 {0x1F8A, 0x1F82},
2338 {0x1F8B, 0x1F83},
2339 {0x1F8C, 0x1F84},
2340 {0x1F8D, 0x1F85},
2341 {0x1F8E, 0x1F86},
2342 {0x1F8F, 0x1F87},
2343 {0x1F98, 0x1F90},
2344 {0x1F99, 0x1F91},
2345 {0x1F9A, 0x1F92},
2346 {0x1F9B, 0x1F93},
2347 {0x1F9C, 0x1F94},
2348 {0x1F9D, 0x1F95},
2349 {0x1F9E, 0x1F96},
2350 {0x1F9F, 0x1F97},
2351 {0x1FA8, 0x1FA0},
2352 {0x1FA9, 0x1FA1},
2353 {0x1FAA, 0x1FA2},
2354 {0x1FAB, 0x1FA3},
2355 {0x1FAC, 0x1FA4},
2356 {0x1FAD, 0x1FA5},
2357 {0x1FAE, 0x1FA6},
2358 {0x1FAF, 0x1FA7},
2359 {0x1FB8, 0x1FB0},
2360 {0x1FB9, 0x1FB1},
2361 {0x1FBA, 0x1F70},
2362 {0x1FBB, 0x1F71},
2363 {0x1FBC, 0x1FB3},
2364 {0x1FBE, 0x03B9},
2365 {0x1FC8, 0x1F72},
2366 {0x1FC9, 0x1F73},
2367 {0x1FCA, 0x1F74},
2368 {0x1FCB, 0x1F75},
2369 {0x1FCC, 0x1FC3},
2370 {0x1FD8, 0x1FD0},
2371 {0x1FD9, 0x1FD1},
2372 {0x1FDA, 0x1F76},
2373 {0x1FDB, 0x1F77},
2374 {0x1FE8, 0x1FE0},
2375 {0x1FE9, 0x1FE1},
2376 {0x1FEA, 0x1F7A},
2377 {0x1FEB, 0x1F7B},
2378 {0x1FEC, 0x1FE5},
2379 {0x1FF8, 0x1F78},
2380 {0x1FF9, 0x1F79},
2381 {0x1FFA, 0x1F7C},
2382 {0x1FFB, 0x1F7D},
2383 {0x1FFC, 0x1FF3},
2384 {0x2126, 0x03C9},
2385 {0x212A, 0x006B},
2386 {0x212B, 0x00E5},
2387 {0x2132, 0x214E},
2388 {0x2160, 0x2170},
2389 {0x2161, 0x2171},
2390 {0x2162, 0x2172},
2391 {0x2163, 0x2173},
2392 {0x2164, 0x2174},
2393 {0x2165, 0x2175},
2394 {0x2166, 0x2176},
2395 {0x2167, 0x2177},
2396 {0x2168, 0x2178},
2397 {0x2169, 0x2179},
2398 {0x216A, 0x217A},
2399 {0x216B, 0x217B},
2400 {0x216C, 0x217C},
2401 {0x216D, 0x217D},
2402 {0x216E, 0x217E},
2403 {0x216F, 0x217F},
2404 {0x2183, 0x2184},
2405 {0x24B6, 0x24D0},
2406 {0x24B7, 0x24D1},
2407 {0x24B8, 0x24D2},
2408 {0x24B9, 0x24D3},
2409 {0x24BA, 0x24D4},
2410 {0x24BB, 0x24D5},
2411 {0x24BC, 0x24D6},
2412 {0x24BD, 0x24D7},
2413 {0x24BE, 0x24D8},
2414 {0x24BF, 0x24D9},
2415 {0x24C0, 0x24DA},
2416 {0x24C1, 0x24DB},
2417 {0x24C2, 0x24DC},
2418 {0x24C3, 0x24DD},
2419 {0x24C4, 0x24DE},
2420 {0x24C5, 0x24DF},
2421 {0x24C6, 0x24E0},
2422 {0x24C7, 0x24E1},
2423 {0x24C8, 0x24E2},
2424 {0x24C9, 0x24E3},
2425 {0x24CA, 0x24E4},
2426 {0x24CB, 0x24E5},
2427 {0x24CC, 0x24E6},
2428 {0x24CD, 0x24E7},
2429 {0x24CE, 0x24E8},
2430 {0x24CF, 0x24E9},
2431 {0x2C00, 0x2C30},
2432 {0x2C01, 0x2C31},
2433 {0x2C02, 0x2C32},
2434 {0x2C03, 0x2C33},
2435 {0x2C04, 0x2C34},
2436 {0x2C05, 0x2C35},
2437 {0x2C06, 0x2C36},
2438 {0x2C07, 0x2C37},
2439 {0x2C08, 0x2C38},
2440 {0x2C09, 0x2C39},
2441 {0x2C0A, 0x2C3A},
2442 {0x2C0B, 0x2C3B},
2443 {0x2C0C, 0x2C3C},
2444 {0x2C0D, 0x2C3D},
2445 {0x2C0E, 0x2C3E},
2446 {0x2C0F, 0x2C3F},
2447 {0x2C10, 0x2C40},
2448 {0x2C11, 0x2C41},
2449 {0x2C12, 0x2C42},
2450 {0x2C13, 0x2C43},
2451 {0x2C14, 0x2C44},
2452 {0x2C15, 0x2C45},
2453 {0x2C16, 0x2C46},
2454 {0x2C17, 0x2C47},
2455 {0x2C18, 0x2C48},
2456 {0x2C19, 0x2C49},
2457 {0x2C1A, 0x2C4A},
2458 {0x2C1B, 0x2C4B},
2459 {0x2C1C, 0x2C4C},
2460 {0x2C1D, 0x2C4D},
2461 {0x2C1E, 0x2C4E},
2462 {0x2C1F, 0x2C4F},
2463 {0x2C20, 0x2C50},
2464 {0x2C21, 0x2C51},
2465 {0x2C22, 0x2C52},
2466 {0x2C23, 0x2C53},
2467 {0x2C24, 0x2C54},
2468 {0x2C25, 0x2C55},
2469 {0x2C26, 0x2C56},
2470 {0x2C27, 0x2C57},
2471 {0x2C28, 0x2C58},
2472 {0x2C29, 0x2C59},
2473 {0x2C2A, 0x2C5A},
2474 {0x2C2B, 0x2C5B},
2475 {0x2C2C, 0x2C5C},
2476 {0x2C2D, 0x2C5D},
2477 {0x2C2E, 0x2C5E},
2478 {0x2C2F, 0x2C5F},
2479 {0x2C60, 0x2C61},
2480 {0x2C62, 0x026B},
2481 {0x2C63, 0x1D7D},
2482 {0x2C64, 0x027D},
2483 {0x2C67, 0x2C68},
2484 {0x2C69, 0x2C6A},
2485 {0x2C6B, 0x2C6C},
2486 {0x2C6D, 0x0251},
2487 {0x2C6E, 0x0271},
2488 {0x2C6F, 0x0250},
2489 {0x2C70, 0x0252},
2490 {0x2C72, 0x2C73},
2491 {0x2C75, 0x2C76},
2492 {0x2C7E, 0x023F},
2493 {0x2C7F, 0x0240},
2494 {0x2C80, 0x2C81},
2495 {0x2C82, 0x2C83},
2496 {0x2C84, 0x2C85},
2497 {0x2C86, 0x2C87},
2498 {0x2C88, 0x2C89},
2499 {0x2C8A, 0x2C8B},
2500 {0x2C8C, 0x2C8D},
2501 {0x2C8E, 0x2C8F},
2502 {0x2C90, 0x2C91},
2503 {0x2C92, 0x2C93},
2504 {0x2C94, 0x2C95},
2505 {0x2C96, 0x2C97},
2506 {0x2C98, 0x2C99},
2507 {0x2C9A, 0x2C9B},
2508 {0x2C9C, 0x2C9D},
2509 {0x2C9E, 0x2C9F},
2510 {0x2CA0, 0x2CA1},
2511 {0x2CA2, 0x2CA3},
2512 {0x2CA4, 0x2CA5},
2513 {0x2CA6, 0x2CA7},
2514 {0x2CA8, 0x2CA9},
2515 {0x2CAA, 0x2CAB},
2516 {0x2CAC, 0x2CAD},
2517 {0x2CAE, 0x2CAF},
2518 {0x2CB0, 0x2CB1},
2519 {0x2CB2, 0x2CB3},
2520 {0x2CB4, 0x2CB5},
2521 {0x2CB6, 0x2CB7},
2522 {0x2CB8, 0x2CB9},
2523 {0x2CBA, 0x2CBB},
2524 {0x2CBC, 0x2CBD},
2525 {0x2CBE, 0x2CBF},
2526 {0x2CC0, 0x2CC1},
2527 {0x2CC2, 0x2CC3},
2528 {0x2CC4, 0x2CC5},
2529 {0x2CC6, 0x2CC7},
2530 {0x2CC8, 0x2CC9},
2531 {0x2CCA, 0x2CCB},
2532 {0x2CCC, 0x2CCD},
2533 {0x2CCE, 0x2CCF},
2534 {0x2CD0, 0x2CD1},
2535 {0x2CD2, 0x2CD3},
2536 {0x2CD4, 0x2CD5},
2537 {0x2CD6, 0x2CD7},
2538 {0x2CD8, 0x2CD9},
2539 {0x2CDA, 0x2CDB},
2540 {0x2CDC, 0x2CDD},
2541 {0x2CDE, 0x2CDF},
2542 {0x2CE0, 0x2CE1},
2543 {0x2CE2, 0x2CE3},
2544 {0x2CEB, 0x2CEC},
2545 {0x2CED, 0x2CEE},
2546 {0x2CF2, 0x2CF3},
2547 {0xA640, 0xA641},
2548 {0xA642, 0xA643},
2549 {0xA644, 0xA645},
2550 {0xA646, 0xA647},
2551 {0xA648, 0xA649},
2552 {0xA64A, 0xA64B},
2553 {0xA64C, 0xA64D},
2554 {0xA64E, 0xA64F},
2555 {0xA650, 0xA651},
2556 {0xA652, 0xA653},
2557 {0xA654, 0xA655},
2558 {0xA656, 0xA657},
2559 {0xA658, 0xA659},
2560 {0xA65A, 0xA65B},
2561 {0xA65C, 0xA65D},
2562 {0xA65E, 0xA65F},
2563 {0xA660, 0xA661},
2564 {0xA662, 0xA663},
2565 {0xA664, 0xA665},
2566 {0xA666, 0xA667},
2567 {0xA668, 0xA669},
2568 {0xA66A, 0xA66B},
2569 {0xA66C, 0xA66D},
2570 {0xA680, 0xA681},
2571 {0xA682, 0xA683},
2572 {0xA684, 0xA685},
2573 {0xA686, 0xA687},
2574 {0xA688, 0xA689},
2575 {0xA68A, 0xA68B},
2576 {0xA68C, 0xA68D},
2577 {0xA68E, 0xA68F},
2578 {0xA690, 0xA691},
2579 {0xA692, 0xA693},
2580 {0xA694, 0xA695},
2581 {0xA696, 0xA697},
2582 {0xA698, 0xA699},
2583 {0xA69A, 0xA69B},
2584 {0xA722, 0xA723},
2585 {0xA724, 0xA725},
2586 {0xA726, 0xA727},
2587 {0xA728, 0xA729},
2588 {0xA72A, 0xA72B},
2589 {0xA72C, 0xA72D},
2590 {0xA72E, 0xA72F},
2591 {0xA732, 0xA733},
2592 {0xA734, 0xA735},
2593 {0xA736, 0xA737},
2594 {0xA738, 0xA739},
2595 {0xA73A, 0xA73B},
2596 {0xA73C, 0xA73D},
2597 {0xA73E, 0xA73F},
2598 {0xA740, 0xA741},
2599 {0xA742, 0xA743},
2600 {0xA744, 0xA745},
2601 {0xA746, 0xA747},
2602 {0xA748, 0xA749},
2603 {0xA74A, 0xA74B},
2604 {0xA74C, 0xA74D},
2605 {0xA74E, 0xA74F},
2606 {0xA750, 0xA751},
2607 {0xA752, 0xA753},
2608 {0xA754, 0xA755},
2609 {0xA756, 0xA757},
2610 {0xA758, 0xA759},
2611 {0xA75A, 0xA75B},
2612 {0xA75C, 0xA75D},
2613 {0xA75E, 0xA75F},
2614 {0xA760, 0xA761},
2615 {0xA762, 0xA763},
2616 {0xA764, 0xA765},
2617 {0xA766, 0xA767},
2618 {0xA768, 0xA769},
2619 {0xA76A, 0xA76B},
2620 {0xA76C, 0xA76D},
2621 {0xA76E, 0xA76F},
2622 {0xA779, 0xA77A},
2623 {0xA77B, 0xA77C},
2624 {0xA77D, 0x1D79},
2625 {0xA77E, 0xA77F},
2626 {0xA780, 0xA781},
2627 {0xA782, 0xA783},
2628 {0xA784, 0xA785},
2629 {0xA786, 0xA787},
2630 {0xA78B, 0xA78C},
2631 {0xA78D, 0x0265},
2632 {0xA790, 0xA791},
2633 {0xA792, 0xA793},
2634 {0xA796, 0xA797},
2635 {0xA798, 0xA799},
2636 {0xA79A, 0xA79B},
2637 {0xA79C, 0xA79D},
2638 {0xA79E, 0xA79F},
2639 {0xA7A0, 0xA7A1},
2640 {0xA7A2, 0xA7A3},
2641 {0xA7A4, 0xA7A5},
2642 {0xA7A6, 0xA7A7},
2643 {0xA7A8, 0xA7A9},
2644 {0xA7AA, 0x0266},
2645 {0xA7AB, 0x025C},
2646 {0xA7AC, 0x0261},
2647 {0xA7AD, 0x026C},
2648 {0xA7AE, 0x026A},
2649 {0xA7B0, 0x029E},
2650 {0xA7B1, 0x0287},
2651 {0xA7B2, 0x029D},
2652 {0xA7B3, 0xAB53},
2653 {0xA7B4, 0xA7B5},
2654 {0xA7B6, 0xA7B7},
2655 {0xA7B8, 0xA7B9},
2656 {0xA7BA, 0xA7BB},
2657 {0xA7BC, 0xA7BD},
2658 {0xA7BE, 0xA7BF},
2659 {0xA7C0, 0xA7C1},
2660 {0xA7C2, 0xA7C3},
2661 {0xA7C4, 0xA794},
2662 {0xA7C5, 0x0282},
2663 {0xA7C6, 0x1D8E},
2664 {0xA7C7, 0xA7C8},
2665 {0xA7C9, 0xA7CA},
2666 {0xA7D0, 0xA7D1},
2667 {0xA7D6, 0xA7D7},
2668 {0xA7D8, 0xA7D9},
2669 {0xA7F5, 0xA7F6},
2670 {0xAB70, 0x13A0},
2671 {0xAB71, 0x13A1},
2672 {0xAB72, 0x13A2},
2673 {0xAB73, 0x13A3},
2674 {0xAB74, 0x13A4},
2675 {0xAB75, 0x13A5},
2676 {0xAB76, 0x13A6},
2677 {0xAB77, 0x13A7},
2678 {0xAB78, 0x13A8},
2679 {0xAB79, 0x13A9},
2680 {0xAB7A, 0x13AA},
2681 {0xAB7B, 0x13AB},
2682 {0xAB7C, 0x13AC},
2683 {0xAB7D, 0x13AD},
2684 {0xAB7E, 0x13AE},
2685 {0xAB7F, 0x13AF},
2686 {0xAB80, 0x13B0},
2687 {0xAB81, 0x13B1},
2688 {0xAB82, 0x13B2},
2689 {0xAB83, 0x13B3},
2690 {0xAB84, 0x13B4},
2691 {0xAB85, 0x13B5},
2692 {0xAB86, 0x13B6},
2693 {0xAB87, 0x13B7},
2694 {0xAB88, 0x13B8},
2695 {0xAB89, 0x13B9},
2696 {0xAB8A, 0x13BA},
2697 {0xAB8B, 0x13BB},
2698 {0xAB8C, 0x13BC},
2699 {0xAB8D, 0x13BD},
2700 {0xAB8E, 0x13BE},
2701 {0xAB8F, 0x13BF},
2702 {0xAB90, 0x13C0},
2703 {0xAB91, 0x13C1},
2704 {0xAB92, 0x13C2},
2705 {0xAB93, 0x13C3},
2706 {0xAB94, 0x13C4},
2707 {0xAB95, 0x13C5},
2708 {0xAB96, 0x13C6},
2709 {0xAB97, 0x13C7},
2710 {0xAB98, 0x13C8},
2711 {0xAB99, 0x13C9},
2712 {0xAB9A, 0x13CA},
2713 {0xAB9B, 0x13CB},
2714 {0xAB9C, 0x13CC},
2715 {0xAB9D, 0x13CD},
2716 {0xAB9E, 0x13CE},
2717 {0xAB9F, 0x13CF},
2718 {0xABA0, 0x13D0},
2719 {0xABA1, 0x13D1},
2720 {0xABA2, 0x13D2},
2721 {0xABA3, 0x13D3},
2722 {0xABA4, 0x13D4},
2723 {0xABA5, 0x13D5},
2724 {0xABA6, 0x13D6},
2725 {0xABA7, 0x13D7},
2726 {0xABA8, 0x13D8},
2727 {0xABA9, 0x13D9},
2728 {0xABAA, 0x13DA},
2729 {0xABAB, 0x13DB},
2730 {0xABAC, 0x13DC},
2731 {0xABAD, 0x13DD},
2732 {0xABAE, 0x13DE},
2733 {0xABAF, 0x13DF},
2734 {0xABB0, 0x13E0},
2735 {0xABB1, 0x13E1},
2736 {0xABB2, 0x13E2},
2737 {0xABB3, 0x13E3},
2738 {0xABB4, 0x13E4},
2739 {0xABB5, 0x13E5},
2740 {0xABB6, 0x13E6},
2741 {0xABB7, 0x13E7},
2742 {0xABB8, 0x13E8},
2743 {0xABB9, 0x13E9},
2744 {0xABBA, 0x13EA},
2745 {0xABBB, 0x13EB},
2746 {0xABBC, 0x13EC},
2747 {0xABBD, 0x13ED},
2748 {0xABBE, 0x13EE},
2749 {0xABBF, 0x13EF},
2750 {0xFF21, 0xFF41},
2751 {0xFF22, 0xFF42},
2752 {0xFF23, 0xFF43},
2753 {0xFF24, 0xFF44},
2754 {0xFF25, 0xFF45},
2755 {0xFF26, 0xFF46},
2756 {0xFF27, 0xFF47},
2757 {0xFF28, 0xFF48},
2758 {0xFF29, 0xFF49},
2759 {0xFF2A, 0xFF4A},
2760 {0xFF2B, 0xFF4B},
2761 {0xFF2C, 0xFF4C},
2762 {0xFF2D, 0xFF4D},
2763 {0xFF2E, 0xFF4E},
2764 {0xFF2F, 0xFF4F},
2765 {0xFF30, 0xFF50},
2766 {0xFF31, 0xFF51},
2767 {0xFF32, 0xFF52},
2768 {0xFF33, 0xFF53},
2769 {0xFF34, 0xFF54},
2770 {0xFF35, 0xFF55},
2771 {0xFF36, 0xFF56},
2772 {0xFF37, 0xFF57},
2773 {0xFF38, 0xFF58},
2774 {0xFF39, 0xFF59},
2775 {0xFF3A, 0xFF5A},
2776 {0x10400, 0x10428},
2777 {0x10401, 0x10429},
2778 {0x10402, 0x1042A},
2779 {0x10403, 0x1042B},
2780 {0x10404, 0x1042C},
2781 {0x10405, 0x1042D},
2782 {0x10406, 0x1042E},
2783 {0x10407, 0x1042F},
2784 {0x10408, 0x10430},
2785 {0x10409, 0x10431},
2786 {0x1040A, 0x10432},
2787 {0x1040B, 0x10433},
2788 {0x1040C, 0x10434},
2789 {0x1040D, 0x10435},
2790 {0x1040E, 0x10436},
2791 {0x1040F, 0x10437},
2792 {0x10410, 0x10438},
2793 {0x10411, 0x10439},
2794 {0x10412, 0x1043A},
2795 {0x10413, 0x1043B},
2796 {0x10414, 0x1043C},
2797 {0x10415, 0x1043D},
2798 {0x10416, 0x1043E},
2799 {0x10417, 0x1043F},
2800 {0x10418, 0x10440},
2801 {0x10419, 0x10441},
2802 {0x1041A, 0x10442},
2803 {0x1041B, 0x10443},
2804 {0x1041C, 0x10444},
2805 {0x1041D, 0x10445},
2806 {0x1041E, 0x10446},
2807 {0x1041F, 0x10447},
2808 {0x10420, 0x10448},
2809 {0x10421, 0x10449},
2810 {0x10422, 0x1044A},
2811 {0x10423, 0x1044B},
2812 {0x10424, 0x1044C},
2813 {0x10425, 0x1044D},
2814 {0x10426, 0x1044E},
2815 {0x10427, 0x1044F},
2816 {0x104B0, 0x104D8},
2817 {0x104B1, 0x104D9},
2818 {0x104B2, 0x104DA},
2819 {0x104B3, 0x104DB},
2820 {0x104B4, 0x104DC},
2821 {0x104B5, 0x104DD},
2822 {0x104B6, 0x104DE},
2823 {0x104B7, 0x104DF},
2824 {0x104B8, 0x104E0},
2825 {0x104B9, 0x104E1},
2826 {0x104BA, 0x104E2},
2827 {0x104BB, 0x104E3},
2828 {0x104BC, 0x104E4},
2829 {0x104BD, 0x104E5},
2830 {0x104BE, 0x104E6},
2831 {0x104BF, 0x104E7},
2832 {0x104C0, 0x104E8},
2833 {0x104C1, 0x104E9},
2834 {0x104C2, 0x104EA},
2835 {0x104C3, 0x104EB},
2836 {0x104C4, 0x104EC},
2837 {0x104C5, 0x104ED},
2838 {0x104C6, 0x104EE},
2839 {0x104C7, 0x104EF},
2840 {0x104C8, 0x104F0},
2841 {0x104C9, 0x104F1},
2842 {0x104CA, 0x104F2},
2843 {0x104CB, 0x104F3},
2844 {0x104CC, 0x104F4},
2845 {0x104CD, 0x104F5},
2846 {0x104CE, 0x104F6},
2847 {0x104CF, 0x104F7},
2848 {0x104D0, 0x104F8},
2849 {0x104D1, 0x104F9},
2850 {0x104D2, 0x104FA},
2851 {0x104D3, 0x104FB},
2852 {0x10570, 0x10597},
2853 {0x10571, 0x10598},
2854 {0x10572, 0x10599},
2855 {0x10573, 0x1059A},
2856 {0x10574, 0x1059B},
2857 {0x10575, 0x1059C},
2858 {0x10576, 0x1059D},
2859 {0x10577, 0x1059E},
2860 {0x10578, 0x1059F},
2861 {0x10579, 0x105A0},
2862 {0x1057A, 0x105A1},
2863 {0x1057C, 0x105A3},
2864 {0x1057D, 0x105A4},
2865 {0x1057E, 0x105A5},
2866 {0x1057F, 0x105A6},
2867 {0x10580, 0x105A7},
2868 {0x10581, 0x105A8},
2869 {0x10582, 0x105A9},
2870 {0x10583, 0x105AA},
2871 {0x10584, 0x105AB},
2872 {0x10585, 0x105AC},
2873 {0x10586, 0x105AD},
2874 {0x10587, 0x105AE},
2875 {0x10588, 0x105AF},
2876 {0x10589, 0x105B0},
2877 {0x1058A, 0x105B1},
2878 {0x1058C, 0x105B3},
2879 {0x1058D, 0x105B4},
2880 {0x1058E, 0x105B5},
2881 {0x1058F, 0x105B6},
2882 {0x10590, 0x105B7},
2883 {0x10591, 0x105B8},
2884 {0x10592, 0x105B9},
2885 {0x10594, 0x105BB},
2886 {0x10595, 0x105BC},
2887 {0x10C80, 0x10CC0},
2888 {0x10C81, 0x10CC1},
2889 {0x10C82, 0x10CC2},
2890 {0x10C83, 0x10CC3},
2891 {0x10C84, 0x10CC4},
2892 {0x10C85, 0x10CC5},
2893 {0x10C86, 0x10CC6},
2894 {0x10C87, 0x10CC7},
2895 {0x10C88, 0x10CC8},
2896 {0x10C89, 0x10CC9},
2897 {0x10C8A, 0x10CCA},
2898 {0x10C8B, 0x10CCB},
2899 {0x10C8C, 0x10CCC},
2900 {0x10C8D, 0x10CCD},
2901 {0x10C8E, 0x10CCE},
2902 {0x10C8F, 0x10CCF},
2903 {0x10C90, 0x10CD0},
2904 {0x10C91, 0x10CD1},
2905 {0x10C92, 0x10CD2},
2906 {0x10C93, 0x10CD3},
2907 {0x10C94, 0x10CD4},
2908 {0x10C95, 0x10CD5},
2909 {0x10C96, 0x10CD6},
2910 {0x10C97, 0x10CD7},
2911 {0x10C98, 0x10CD8},
2912 {0x10C99, 0x10CD9},
2913 {0x10C9A, 0x10CDA},
2914 {0x10C9B, 0x10CDB},
2915 {0x10C9C, 0x10CDC},
2916 {0x10C9D, 0x10CDD},
2917 {0x10C9E, 0x10CDE},
2918 {0x10C9F, 0x10CDF},
2919 {0x10CA0, 0x10CE0},
2920 {0x10CA1, 0x10CE1},
2921 {0x10CA2, 0x10CE2},
2922 {0x10CA3, 0x10CE3},
2923 {0x10CA4, 0x10CE4},
2924 {0x10CA5, 0x10CE5},
2925 {0x10CA6, 0x10CE6},
2926 {0x10CA7, 0x10CE7},
2927 {0x10CA8, 0x10CE8},
2928 {0x10CA9, 0x10CE9},
2929 {0x10CAA, 0x10CEA},
2930 {0x10CAB, 0x10CEB},
2931 {0x10CAC, 0x10CEC},
2932 {0x10CAD, 0x10CED},
2933 {0x10CAE, 0x10CEE},
2934 {0x10CAF, 0x10CEF},
2935 {0x10CB0, 0x10CF0},
2936 {0x10CB1, 0x10CF1},
2937 {0x10CB2, 0x10CF2},
2938 {0x118A0, 0x118C0},
2939 {0x118A1, 0x118C1},
2940 {0x118A2, 0x118C2},
2941 {0x118A3, 0x118C3},
2942 {0x118A4, 0x118C4},
2943 {0x118A5, 0x118C5},
2944 {0x118A6, 0x118C6},
2945 {0x118A7, 0x118C7},
2946 {0x118A8, 0x118C8},
2947 {0x118A9, 0x118C9},
2948 {0x118AA, 0x118CA},
2949 {0x118AB, 0x118CB},
2950 {0x118AC, 0x118CC},
2951 {0x118AD, 0x118CD},
2952 {0x118AE, 0x118CE},
2953 {0x118AF, 0x118CF},
2954 {0x118B0, 0x118D0},
2955 {0x118B1, 0x118D1},
2956 {0x118B2, 0x118D2},
2957 {0x118B3, 0x118D3},
2958 {0x118B4, 0x118D4},
2959 {0x118B5, 0x118D5},
2960 {0x118B6, 0x118D6},
2961 {0x118B7, 0x118D7},
2962 {0x118B8, 0x118D8},
2963 {0x118B9, 0x118D9},
2964 {0x118BA, 0x118DA},
2965 {0x118BB, 0x118DB},
2966 {0x118BC, 0x118DC},
2967 {0x118BD, 0x118DD},
2968 {0x118BE, 0x118DE},
2969 {0x118BF, 0x118DF},
2970 {0x16E40, 0x16E60},
2971 {0x16E41, 0x16E61},
2972 {0x16E42, 0x16E62},
2973 {0x16E43, 0x16E63},
2974 {0x16E44, 0x16E64},
2975 {0x16E45, 0x16E65},
2976 {0x16E46, 0x16E66},
2977 {0x16E47, 0x16E67},
2978 {0x16E48, 0x16E68},
2979 {0x16E49, 0x16E69},
2980 {0x16E4A, 0x16E6A},
2981 {0x16E4B, 0x16E6B},
2982 {0x16E4C, 0x16E6C},
2983 {0x16E4D, 0x16E6D},
2984 {0x16E4E, 0x16E6E},
2985 {0x16E4F, 0x16E6F},
2986 {0x16E50, 0x16E70},
2987 {0x16E51, 0x16E71},
2988 {0x16E52, 0x16E72},
2989 {0x16E53, 0x16E73},
2990 {0x16E54, 0x16E74},
2991 {0x16E55, 0x16E75},
2992 {0x16E56, 0x16E76},
2993 {0x16E57, 0x16E77},
2994 {0x16E58, 0x16E78},
2995 {0x16E59, 0x16E79},
2996 {0x16E5A, 0x16E7A},
2997 {0x16E5B, 0x16E7B},
2998 {0x16E5C, 0x16E7C},
2999 {0x16E5D, 0x16E7D},
3000 {0x16E5E, 0x16E7E},
3001 {0x16E5F, 0x16E7F},
3002 {0x1E900, 0x1E922},
3003 {0x1E901, 0x1E923},
3004 {0x1E902, 0x1E924},
3005 {0x1E903, 0x1E925},
3006 {0x1E904, 0x1E926},
3007 {0x1E905, 0x1E927},
3008 {0x1E906, 0x1E928},
3009 {0x1E907, 0x1E929},
3010 {0x1E908, 0x1E92A},
3011 {0x1E909, 0x1E92B},
3012 {0x1E90A, 0x1E92C},
3013 {0x1E90B, 0x1E92D},
3014 {0x1E90C, 0x1E92E},
3015 {0x1E90D, 0x1E92F},
3016 {0x1E90E, 0x1E930},
3017 {0x1E90F, 0x1E931},
3018 {0x1E910, 0x1E932},
3019 {0x1E911, 0x1E933},
3020 {0x1E912, 0x1E934},
3021 {0x1E913, 0x1E935},
3022 {0x1E914, 0x1E936},
3023 {0x1E915, 0x1E937},
3024 {0x1E916, 0x1E938},
3025 {0x1E917, 0x1E939},
3026 {0x1E918, 0x1E93A},
3027 {0x1E919, 0x1E93B},
3028 {0x1E91A, 0x1E93C},
3029 {0x1E91B, 0x1E93D},
3030 {0x1E91C, 0x1E93E},
3031 {0x1E91D, 0x1E93F},
3032 {0x1E91E, 0x1E940},
3033 {0x1E91F, 0x1E941},
3034 {0x1E920, 0x1E942},
3035 {0x1E921, 0x1E943},
3036};
3037
3038static int fold2[][ 4 ] = {
3039 {0x00DF, 0x0073, 0x0073, 0x0},
3040 {0x0130, 0x0069, 0x0307, 0x0},
3041 {0x0149, 0x02BC, 0x006E, 0x0},
3042 {0x01F0, 0x006A, 0x030C, 0x0},
3043 {0x0390, 0x03B9, 0x0308, 0x0301},
3044 {0x03B0, 0x03C5, 0x0308, 0x0301},
3045 {0x0587, 0x0565, 0x0582, 0x0},
3046 {0x1E96, 0x0068, 0x0331, 0x0},
3047 {0x1E97, 0x0074, 0x0308, 0x0},
3048 {0x1E98, 0x0077, 0x030A, 0x0},
3049 {0x1E99, 0x0079, 0x030A, 0x0},
3050 {0x1E9A, 0x0061, 0x02BE, 0x0},
3051 {0x1E9E, 0x0073, 0x0073, 0x0},
3052 {0x1F50, 0x03C5, 0x0313, 0x0},
3053 {0x1F52, 0x03C5, 0x0313, 0x0300},
3054 {0x1F54, 0x03C5, 0x0313, 0x0301},
3055 {0x1F56, 0x03C5, 0x0313, 0x0342},
3056 {0x1F80, 0x1F00, 0x03B9, 0x0},
3057 {0x1F81, 0x1F01, 0x03B9, 0x0},
3058 {0x1F82, 0x1F02, 0x03B9, 0x0},
3059 {0x1F83, 0x1F03, 0x03B9, 0x0},
3060 {0x1F84, 0x1F04, 0x03B9, 0x0},
3061 {0x1F85, 0x1F05, 0x03B9, 0x0},
3062 {0x1F86, 0x1F06, 0x03B9, 0x0},
3063 {0x1F87, 0x1F07, 0x03B9, 0x0},
3064 {0x1F88, 0x1F00, 0x03B9, 0x0},
3065 {0x1F89, 0x1F01, 0x03B9, 0x0},
3066 {0x1F8A, 0x1F02, 0x03B9, 0x0},
3067 {0x1F8B, 0x1F03, 0x03B9, 0x0},
3068 {0x1F8C, 0x1F04, 0x03B9, 0x0},
3069 {0x1F8D, 0x1F05, 0x03B9, 0x0},
3070 {0x1F8E, 0x1F06, 0x03B9, 0x0},
3071 {0x1F8F, 0x1F07, 0x03B9, 0x0},
3072 {0x1F90, 0x1F20, 0x03B9, 0x0},
3073 {0x1F91, 0x1F21, 0x03B9, 0x0},
3074 {0x1F92, 0x1F22, 0x03B9, 0x0},
3075 {0x1F93, 0x1F23, 0x03B9, 0x0},
3076 {0x1F94, 0x1F24, 0x03B9, 0x0},
3077 {0x1F95, 0x1F25, 0x03B9, 0x0},
3078 {0x1F96, 0x1F26, 0x03B9, 0x0},
3079 {0x1F97, 0x1F27, 0x03B9, 0x0},
3080 {0x1F98, 0x1F20, 0x03B9, 0x0},
3081 {0x1F99, 0x1F21, 0x03B9, 0x0},
3082 {0x1F9A, 0x1F22, 0x03B9, 0x0},
3083 {0x1F9B, 0x1F23, 0x03B9, 0x0},
3084 {0x1F9C, 0x1F24, 0x03B9, 0x0},
3085 {0x1F9D, 0x1F25, 0x03B9, 0x0},
3086 {0x1F9E, 0x1F26, 0x03B9, 0x0},
3087 {0x1F9F, 0x1F27, 0x03B9, 0x0},
3088 {0x1FA0, 0x1F60, 0x03B9, 0x0},
3089 {0x1FA1, 0x1F61, 0x03B9, 0x0},
3090 {0x1FA2, 0x1F62, 0x03B9, 0x0},
3091 {0x1FA3, 0x1F63, 0x03B9, 0x0},
3092 {0x1FA4, 0x1F64, 0x03B9, 0x0},
3093 {0x1FA5, 0x1F65, 0x03B9, 0x0},
3094 {0x1FA6, 0x1F66, 0x03B9, 0x0},
3095 {0x1FA7, 0x1F67, 0x03B9, 0x0},
3096 {0x1FA8, 0x1F60, 0x03B9, 0x0},
3097 {0x1FA9, 0x1F61, 0x03B9, 0x0},
3098 {0x1FAA, 0x1F62, 0x03B9, 0x0},
3099 {0x1FAB, 0x1F63, 0x03B9, 0x0},
3100 {0x1FAC, 0x1F64, 0x03B9, 0x0},
3101 {0x1FAD, 0x1F65, 0x03B9, 0x0},
3102 {0x1FAE, 0x1F66, 0x03B9, 0x0},
3103 {0x1FAF, 0x1F67, 0x03B9, 0x0},
3104 {0x1FB2, 0x1F70, 0x03B9, 0x0},
3105 {0x1FB3, 0x03B1, 0x03B9, 0x0},
3106 {0x1FB4, 0x03AC, 0x03B9, 0x0},
3107 {0x1FB6, 0x03B1, 0x0342, 0x0},
3108 {0x1FB7, 0x03B1, 0x0342, 0x03B9},
3109 {0x1FBC, 0x03B1, 0x03B9, 0x0},
3110 {0x1FC2, 0x1F74, 0x03B9, 0x0},
3111 {0x1FC3, 0x03B7, 0x03B9, 0x0},
3112 {0x1FC4, 0x03AE, 0x03B9, 0x0},
3113 {0x1FC6, 0x03B7, 0x0342, 0x0},
3114 {0x1FC7, 0x03B7, 0x0342, 0x03B9},
3115 {0x1FCC, 0x03B7, 0x03B9, 0x0},
3116 {0x1FD2, 0x03B9, 0x0308, 0x0300},
3117 {0x1FD3, 0x03B9, 0x0308, 0x0301},
3118 {0x1FD6, 0x03B9, 0x0342, 0x0},
3119 {0x1FD7, 0x03B9, 0x0308, 0x0342},
3120 {0x1FE2, 0x03C5, 0x0308, 0x0300},
3121 {0x1FE3, 0x03C5, 0x0308, 0x0301},
3122 {0x1FE4, 0x03C1, 0x0313, 0x0},
3123 {0x1FE6, 0x03C5, 0x0342, 0x0},
3124 {0x1FE7, 0x03C5, 0x0308, 0x0342},
3125 {0x1FF2, 0x1F7C, 0x03B9, 0x0},
3126 {0x1FF3, 0x03C9, 0x03B9, 0x0},
3127 {0x1FF4, 0x03CE, 0x03B9, 0x0},
3128 {0x1FF6, 0x03C9, 0x0342, 0x0},
3129 {0x1FF7, 0x03C9, 0x0342, 0x03B9},
3130 {0x1FFC, 0x03C9, 0x03B9, 0x0},
3131 {0xFB00, 0x0066, 0x0066, 0x0},
3132 {0xFB01, 0x0066, 0x0069, 0x0},
3133 {0xFB02, 0x0066, 0x006C, 0x0},
3134 {0xFB03, 0x0066, 0x0066, 0x0069},
3135 {0xFB04, 0x0066, 0x0066, 0x006C},
3136 {0xFB05, 0x0073, 0x0074, 0x0},
3137 {0xFB06, 0x0073, 0x0074, 0x0},
3138 {0xFB13, 0x0574, 0x0576, 0x0},
3139 {0xFB14, 0x0574, 0x0565, 0x0},
3140 {0xFB15, 0x0574, 0x056B, 0x0},
3141 {0xFB16, 0x057E, 0x0576, 0x0},
3142 {0xFB17, 0x0574, 0x056D, 0x0},
3143};
3144
3145
3146/* Branchless UTF-8 decoder
3147 * https://raw.githubusercontent.com/skeeto/branchless-utf8/
3148 * This is free and unencumbered software released into the public domain.
3149 */
3150
3151/* Decode the next character, C, from BUF, reporting errors in E.
3152 *
3153 * Since this is a branchless decoder, four bytes will be read from the
3154 * buffer regardless of the actual length of the next character. This
3155 * means the buffer _must_ have at least three bytes of zero padding
3156 * following the end of the data stream.
3157 *
3158 * Errors are reported in E, which will be non-zero if the parsed
3159 * character was somehow invalid: invalid byte sequence, non-canonical
3160 * encoding, or a surrogate half.
3161 *
3162 * The function returns a pointer to the next character. When an error
3163 * occurs, this pointer will be a guess that depends on the particular
3164 * error, but it will always advance at least one byte.
3165 */
3166static const char lengths[] = {
3167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3168 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
3169};
3170
3171static C_char *utf8_decode(C_char *buf, C_u32 *c, int *e)
3172{
3173 static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
3174 static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
3175 static const int shiftc[] = {0, 18, 12, 6, 0};
3176 static const int shifte[] = {0, 6, 4, 2, 0};
3177
3178#ifdef DEBUGBUILD
3179 if(buf == NULL)
3180 C_panic_hook(C_text("possibly invalid string index"));
3181#endif
3182
3183 unsigned char *s = (unsigned char *)buf;
3184 int len = lengths[s[0] >> 3];
3185
3186 /* Compute the pointer to the next character early so that the next
3187 * iteration can start working on the next character. Neither Clang
3188 * nor GCC figure out this reordering on their own.
3189 */
3190 unsigned char *next = s + len + !len;
3191
3192 /* Assume a four-byte character and load four bytes. Unused bits are
3193 * shifted out.
3194 */
3195 *c = (C_u32)(s[0] & masks[len]) << 18;
3196 *c |= (C_u32)(s[1] & 0x3f) << 12;
3197 *c |= (C_u32)(s[2] & 0x3f) << 6;
3198 *c |= (C_u32)(s[3] & 0x3f) << 0;
3199 *c >>= shiftc[len];
3200
3201 /* Accumulate the various error conditions. */
3202 *e = (*c < mins[len]) << 6; // non-canonical encoding
3203 *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half?
3204 *e |= (*c > 0x10FFFF) << 8; // out of range?
3205 *e |= (s[1] & 0xc0) >> 2;
3206 *e |= (s[2] & 0xc0) >> 4;
3207 *e |= (s[3] ) >> 6;
3208 *e ^= 0x2a; // top two bits of each tail byte correct?
3209 *e >>= shifte[len];
3210
3211 /* now make all that optimization pointless... */
3212 if(*e) {
3213 *c = 0xdc00 | *s;
3214 return (C_char *)s + 1;
3215 }
3216
3217 return (C_char *)next;
3218}
3219/* */
3220
3221static C_char *utf8_encode(C_u32 u, C_char *p1)
3222{
3223 unsigned char *p = (unsigned char *)p1;
3224 if(u < 0x80) *(p++) = u;
3225 else if((u & 0xff00) == 0xdc00) {
3226 *(p++) = u & 0xff;
3227 } else if(u < 0x800) {
3228 *(p++) = (u >> 6) | 0xC0;
3229 *(p++) = (u & 0x3F) | 0x80;
3230 } else if(u < 0x10000) {
3231 *(p++) = (u >> 12) | 0xE0;
3232 *(p++) = ((u >> 6) & 0x3F) | 0x80;
3233 *(p++) = (u & 0x3F) | 0x80;
3234 } else if(u < 0x110000) {
3235 *(p++) = (u >> 18) | 0xF0;
3236 *(p++) = ((u >> 12) & 0x3F) | 0x80;
3237 *(p++) = ((u >> 6) & 0x3F) | 0x80;
3238 *(p++) = (u & 0x3F) | 0x80;
3239 }
3240 return (C_char *)p;
3241}
3242
3243static C_char *utf_index1(C_word s, C_word i)
3244{
3245 C_word i0 = C_unfix(C_block_item(s, 2));
3246 C_word count = C_unfix(C_block_item(s, 1));
3247 C_word off = 0, index = 0;
3248 C_char *p1, *p = C_c_string(C_block_item(s, 0));
3249 int e;
3250 C_u32 c;
3251 if(i >= i0) {
3252 p += off = C_unfix(C_block_item(s, 3));
3253 index = i0;
3254 }
3255 while(index <= count) {
3256 if(index == i) {
3257 C_set_block_item(s, 2, C_fix(index));
3258 C_set_block_item(s, 3, C_fix(off));
3259 return p;
3260 }
3261 p1 = p;
3262 p = utf8_decode(p, &c, &e);
3263 ++index;
3264 off += p - p1;
3265 }
3266 return NULL;
3267}
3268
3269static C_char *utf_index(C_word s, C_word i)
3270{
3271 C_word bv = C_block_item(s, 0);
3272 if(i == 0) {
3273 C_set_block_item(s, 2, C_fix(0));
3274 C_set_block_item(s, 3, C_fix(0));
3275 return C_c_string(bv);
3276 } else if(C_header_size(bv) - 1 == C_unfix(C_block_item(s, 1))) {
3277 /* len == codepoints */
3278 C_set_block_item(s, 2, C_fix(i));
3279 C_set_block_item(s, 3, C_fix(i));
3280 return C_c_string(bv) + i;
3281 }
3282 return utf_index1(s, i);
3283}
3284
3285C_regparm C_word C_utf_subchar(C_word s, C_word i)
3286{
3287 C_char *p = utf_index(s, C_unfix(i));
3288 int e;
3289 C_u32 c;
3290 utf8_decode(p, &c, &e);
3291 return C_make_character(c);
3292}
3293
3294C_regparm C_word C_utf_setsubchar(C_word s, C_word i, C_word c)
3295{
3296 C_char buf[ 4 ];
3297 C_char *p = utf8_encode(C_character_code(c), buf);
3298 int e;
3299 C_u32 old;
3300 C_char *p1 = utf_index(s, C_unfix(i));
3301 C_char *p2 = utf8_decode(p1, &old, &e);
3302 int nl = p - buf, ol = p2 - p1;
3303 C_word bv = C_block_item(s, 0);
3304 C_word bvlen = C_header_size(bv) - 1;
3305 int prefix = C_unfix(C_block_item(s, 3)); /* offset */
3306 int suffix = bvlen - prefix - ol;
3307
3308 if(nl > ol) {
3309 int tl = bvlen + nl - ol;
3310 if(C_in_scratchspacep(bv)) C_mutate_scratch_slot(NULL, bv);
3311 C_word bvn = C_scratch_alloc(C_SIZEOF_BYTEVECTOR(tl + 1));
3312 C_block_header_init(bvn, C_make_header(C_BYTEVECTOR_TYPE, tl + 1));
3313 if(prefix) C_memcpy(C_c_string(bvn), C_c_string(bv), prefix);
3314 C_memcpy((C_char *)C_data_pointer(bvn) + prefix, buf, nl);
3315 C_memcpy((C_char *)C_data_pointer(bvn) + prefix + nl,
3316 (C_char *)C_data_pointer(bv) + prefix + ol, suffix + 1); /* include 0 byte */
3317 C_mutate_slot(&C_block_item(s, 0), bvn);
3318 C_mutate_scratch_slot(&C_block_item(s, 0), bvn);
3319 } else if(nl < ol) {
3320 C_memcpy(p1, buf, nl);
3321 C_memmove(p1 + nl, p1 + ol, suffix + 1); /* include 0 byte */
3322 C_block_header_init(bv, C_make_header(C_BYTEVECTOR_TYPE, bvlen - (ol - nl) + 1));
3323 } else C_memcpy(p1, buf, nl);
3324
3325 return C_SCHEME_UNDEFINED;
3326}
3327
3328/* copy c bytes of bv into s at position i, occupying len characters */
3329C_regparm C_word C_utf_overwrite(C_word s, C_word i, C_word len, C_word bv,
3330 C_word c)
3331{
3332 C_word bvs = C_block_item(s, 0);
3333 C_word bvlen = C_header_size(bvs) - 1;
3334 C_char *p1 = utf_index(s, C_unfix(i));
3335 C_char *p2 = utf_index(s, C_unfix(i) + C_unfix(len));
3336 int count = C_unfix(c);
3337 int d = p2 - p1;
3338 int prefix = p1 - (C_char *)C_data_pointer(bvs);
3339 int suffix = bvlen - prefix - d;
3340
3341 if(count > d) {
3342 int tl = bvlen + count - d;
3343 C_word bvn = C_scratch_alloc(C_SIZEOF_BYTEVECTOR(tl + 1));
3344 if(C_in_scratchspacep(bvs)) C_mutate_scratch_slot(NULL, bvs);
3345 C_block_header_init(bvn, C_make_header(C_BYTEVECTOR_TYPE, tl + 1));
3346 if(prefix) C_memcpy(C_c_string(bvn), C_data_pointer(bvs), prefix);
3347 C_memcpy((C_char *)C_data_pointer(bvn) + prefix, (C_char *)C_data_pointer(bv),
3348 count);
3349 C_memcpy((C_char *)C_data_pointer(bvn) + prefix + count,
3350 p2, suffix + 1); /* include 0 byte */
3351 C_mutate_slot(&C_block_item(s, 0), bvn);
3352 C_mutate_scratch_slot(&C_block_item(s, 0), bvn);
3353 } else if(count < d && count) {
3354 C_memcpy(p1, C_data_pointer(bv), count);
3355 C_memmove(p1 + count, p2, suffix + 1); /* include 0 byte */
3356 C_block_header_init(bvs, C_make_header(C_BYTEVECTOR_TYPE,
3357 bvlen - (d - count) + 1));
3358 } else if(count) C_memcpy(p1, C_data_pointer(bv), count);
3359
3360 return C_SCHEME_UNDEFINED;
3361}
3362
3363C_regparm C_word C_utf_compare(C_word s1, C_word s2, C_word start1, C_word start2,
3364 C_word len)
3365{
3366 C_char *p1 = utf_index(s1, C_unfix(start1));
3367 C_char *p2 = utf_index(s2, C_unfix(start2));
3368 int e, n = C_unfix(len);
3369 while(n--) {
3370 C_u32 c1, c2;
3371 p1 = utf8_decode(p1, &c1, &e);
3372 p2 = utf8_decode(p2, &c2, &e);
3373 if(c1 != c2) return C_fix((C_word)c1 - (C_word)c2);
3374 }
3375 return C_fix(0);
3376}
3377
3378C_regparm C_word C_utf_compare_ci(C_word s1, C_word s2, C_word start1, C_word start2, C_word len)
3379{
3380 C_char *p1 = utf_index(s1, C_unfix(start1));
3381 C_char *p2 = utf_index(s2, C_unfix(start2));
3382 int e, n = C_unfix(len);
3383 while(n--) {
3384 C_u32 c1, c2;
3385 int *m, r1, r2, i;
3386 p1 = utf8_decode(p1, &c1, &e);
3387 p2 = utf8_decode(p2, &c2, &e);
3388 if(c1 >= 'A' && c1 <= 'Z') r1 = c1 + 32;
3389 else r1 = c1;
3390 if(c2 >= 'A' && c2 <= 'Z') r2 = c2 + 32;
3391 else r2 = c2;
3392 if(r1 == r2) continue;
3393 if(r1 < 128 || r2 < 128) goto fail;
3394 m = bsearch(&r1, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp);
3395 if(m) {
3396 for(i = 1; i < 3; ++i) {
3397 if(m[ i ] == 0) break;
3398 if(m[ i ] != c2) return C_fix(m[ i ] - c2);
3399 if(i != 2 && m[ i + 1 ] != 0) p2 = utf8_decode(p2, &c2, &e);
3400 }
3401 } else {
3402 m = bsearch(&r1, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3403 if(m) {
3404 if(m[ 1 ] != c2) return C_fix(m[ 1 ] - c2);
3405 }
3406 }
3407 m = bsearch(&r2, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp);
3408 if(m) {
3409 for(i = 1; i < 3; ++i) {
3410 if(m[ i ] == 0) break;
3411 if(c1 != m[ i ]) return C_fix(c1 - m[ i ]);
3412 if(i != 2 && m[ i + 1 ]) p1 = utf8_decode(p1, &c1, &e);
3413 }
3414 } else {
3415 m = bsearch(&r2, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3416 if(m) {
3417 if(c1 != m[ 1 ]) return C_fix(c1 - m[ 1 ]);
3418 }
3419 }
3420 continue;
3421fail:
3422 return C_fix(r1 - r2);
3423 }
3424 return C_fix(0);
3425}
3426
3427/* XXX inline this? */
3428C_regparm C_word C_utf_equal(C_word s1, C_word s2)
3429{
3430 C_word b1 = C_block_item(s1, 0);
3431 C_word b2 = C_block_item(s2, 0);
3432 int n1 = C_header_size(b1);
3433 int n2 = C_header_size(b2);
3434 if(n1 != n2) return C_SCHEME_FALSE;
3435 return C_mk_bool(C_memcmp(C_c_string(b1), C_c_string(b2), n1) == 0);
3436}
3437
3438/* XXX inline this? */
3439C_regparm C_word C_utf_equal_ci(C_word s1, C_word s2)
3440{
3441 C_word n1 = C_block_item(s1, 1);
3442 if(n1 != C_block_item(s2, 1)) return C_SCHEME_FALSE;
3443 return C_mk_bool(C_utf_compare_ci(s1, s2, C_fix(0), C_fix(0), n1) == C_fix(0));
3444}
3445
3446C_regparm C_word C_utf_copy(C_word from, C_word to, C_word start1, C_word end1, C_word start2)
3447{
3448 C_char *p1 = utf_index(from, C_unfix(start1));
3449 C_char *p2 = utf_index(to, C_unfix(start2));
3450 C_char *p3 = utf_index(from, C_unfix(end1));
3451 C_memcpy(p2, p1, p3 - p1);
3452 return C_SCHEME_UNDEFINED;
3453}
3454
3455/* compute byte-index from char-index */
3456C_regparm C_word C_utf_position(C_word str, C_word index)
3457{
3458 C_char *p1 = utf_index(str, C_unfix(index));
3459 return C_fix(p1 - C_c_string(C_block_item(str, 0)));
3460}
3461
3462/* compute char-index from byte-index (slow, uncached) */
3463C_regparm int C_utf_char_position(C_word bv, int pos)
3464{
3465 int p = 0;
3466 C_u32 c;
3467 int e;
3468 C_char *ptr = C_c_string(bv), *ptr2;
3469 while(pos > 0) {
3470 ptr2 = utf8_decode(ptr, &c, &e);
3471 pos -= ptr2 - ptr;
3472 ptr = ptr2;
3473 ++p;
3474 }
3475 return p;
3476}
3477
3478/* compute byte-offset between two char-indices */
3479C_regparm C_word C_utf_range(C_word str, C_word start, C_word end)
3480{
3481 C_char *p1 = utf_index(str, C_unfix(start));
3482 C_char *p2 = utf_index(str, C_unfix(end));
3483 return C_fix(p2 - p1);
3484}
3485
3486/* Count characters - slow variant, handles invalid sequences */
3487C_regparm int C_utf_count(C_char *s, int len)
3488{
3489 int i = 0;
3490 C_u32 c;
3491 int e;
3492 C_char *s2;
3493 while (len > 0) {
3494 s2 = utf8_decode(s, &c, &e);
3495 len -= (s2 - s);
3496 s = s2;
3497 i++;
3498 }
3499 return i;
3500}
3501
3502/* Count characters - slow variant, detects invalid sequences */
3503C_regparm C_word C_utf_validate(C_word bv, C_word blen)
3504{
3505 int i = 0;
3506 C_u32 c;
3507 int e;
3508 C_char *s = C_c_string(bv), *s2;
3509 int len = C_unfix(blen);
3510 while (len > 0) {
3511 s2 = utf8_decode(s, &c, &e);
3512 if(e) return C_SCHEME_FALSE;
3513 len -= (s2 - s);
3514 s = s2;
3515 i++;
3516 }
3517 return C_fix(i);
3518}
3519
3520/* count characters, fast, unsafe variant
3521 http://canonical.org/~kragen/strlen-utf8.html */
3522C_regparm int C_utf_fast_count(C_char *s, int len)
3523{
3524 int i = 0, j = 0;
3525 while (len--) {
3526 if ((s[i] & 0xc0) != 0x80) j++;
3527 i++;
3528 }
3529 return j;
3530}
3531
3532C_regparm C_word C_utf_bytes(C_word chr)
3533{
3534 int e;
3535 char buf[ 5 ];
3536 C_char *p1 = utf8_encode(C_character_code(chr), buf);
3537 return C_fix(p1 - buf);
3538}
3539
3540C_regparm C_char * C_utf_encode(C_char *str, int chr)
3541{
3542 return utf8_encode(chr, str);
3543}
3544
3545C_regparm C_word C_utf_decode(C_word bv, C_word pos)
3546{
3547 C_u32 c;
3548 int e;
3549 utf8_decode(C_c_string(bv) + C_unfix(pos), &c, &e);
3550 return C_make_character(c);
3551}
3552
3553C_regparm C_word C_utf_decode_ptr(C_char *bv)
3554{
3555 C_u32 c;
3556 int e;
3557 utf8_decode(bv, &c, &e);
3558 return C_make_character(c);
3559}
3560
3561C_regparm C_word C_utf_advance(C_word bv, C_word pos)
3562{
3563 C_char *p1 = (C_char *)C_data_pointer(bv) + C_unfix(pos);
3564 C_u32 c;
3565 int e;
3566 C_char *p2 = utf8_decode(p1, &c, &e);
3567 return C_fix(C_unfix(pos) + p2 - p1);
3568}
3569
3570C_regparm C_word C_utf_insert(C_word bv, C_word pos, C_word c)
3571{
3572 C_char *p1 = C_c_string(bv) + C_unfix(pos);
3573 C_char *p2 = utf8_encode(C_character_code(c), p1);
3574 return C_fix(C_unfix(pos) + p2 - p1);
3575}
3576
3577C_regparm C_word C_utf_fill(C_word bv, C_word chr)
3578{
3579 char buf[ 5 ];
3580 int size = C_header_size(bv) - 1;
3581 int len = C_utf_encode(buf, C_character_code(chr)) - buf;
3582 C_char *p;
3583 int n;
3584
3585 if(len == 1) {
3586 C_memset(C_data_pointer(bv), *buf, size);
3587 return bv;
3588 }
3589
3590 p = C_data_pointer(bv);
3591 n = size / len;
3592
3593 while(n--) {
3594 C_memcpy(p, buf, len);
3595 p += len;
3596 }
3597 ((C_char *)C_data_pointer(bv))[ size ] = 0; /* terminating zero */
3598 return bv;
3599}
3600
3601C_regparm int C_utf_expect(int byte)
3602{
3603 int len = lengths[ byte >> 3 ];
3604 return len + !len;
3605}
3606
3607/* take bytevector section and compute full + incomplete codepoints */
3608C_regparm C_word C_utf_fragment_counts(C_word bv, C_word pos, C_word len)
3609{
3610 int full = 0;
3611 C_uchar *ptr = C_data_pointer(bv) + C_unfix(pos);
3612 int count = C_unfix(len);
3613
3614 while(count) {
3615 unsigned int byte = *(ptr++);
3616 int n = lengths[ byte >> 3 ];
3617 int bn = n + !n;
3618 if(count >= bn) {
3619 ++full;
3620 count -= bn;
3621 ptr += bn - 1;
3622 } else return C_fix((full << 4) | (bn - count));
3623 }
3624
3625 return C_fix(full << 4);
3626}
3627
3628C_regparm void C_utf_putc(int chr, C_FILEPTR fp)
3629{
3630 C_char buf[ 5 ];
3631 C_char *p = utf8_encode(chr, buf);
3632 *p = '\0';
3633 C_fputs(buf, fp);
3634}
3635
3636C_regparm C_word C_utf_list_size(C_word lst)
3637{
3638 int n = 0;
3639 while(!C_immediatep(lst) && C_header_bits(lst) == C_PAIR_TYPE) {
3640 C_word x = C_block_item(lst, 0);
3641 if(((x) & C_IMMEDIATE_TYPE_BITS) == C_CHARACTER_BITS)
3642 n += C_unfix(C_utf_bytes(x));
3643 lst = C_block_item(lst, 1);
3644 }
3645 return C_fix(n);
3646}
3647
3648C_regparm C_word C_latin_to_utf(C_word from, C_word to, C_word start, C_word len)
3649{
3650 int n = C_unfix(len);
3651 C_uchar *pf = (C_uchar *)C_c_string(from) + C_unfix(start);
3652 C_char *pt = C_c_string(to), *pt0 = pt;
3653 while(n-- > 0) {
3654 C_u32 c = *(pf++);
3655 pt = utf8_encode(c, pt);
3656 }
3657 return C_fix(pt - pt0);
3658}
3659
3660C_regparm C_word C_utf_to_latin(C_word from, C_word to, C_word start, C_word blen)
3661{
3662 int n = C_unfix(blen);
3663 C_char *pf = C_c_string(from) + C_unfix(start), *pf2;
3664 C_char *pt = C_c_string(to), *pt0 = pt;
3665 C_u32 c;
3666 int e;
3667 while(n > 0) {
3668 pf2 = utf8_decode(pf, &c, &e);
3669 n -= pf2 - pf;
3670 pf = pf2;
3671 *(pt++) = c & 0xff;
3672 }
3673 *pt = '\0';
3674 return C_fix(pt - pt0);
3675}
3676
3677C_regparm C_word C_utf_char_foldcase(C_word c)
3678{
3679 int r = C_character_code(c);
3680 int *m = bsearch(&r, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3681 if(m) return C_make_character(m[ 1 ]);
3682 return c;
3683}
3684
3685C_regparm C_word C_utf_string_foldcase(C_word from, C_word to, C_word len)
3686{
3687 C_u32 c;
3688 int e;
3689 C_char *pf = C_c_string(from), *pf2;
3690 C_char *pt = C_c_string(to), *pt0 = pt;
3691 int count = C_unfix(len);
3692 while(count > 0) {
3693 pf2 = utf8_decode(pf, &c, &e);
3694 if(!e) {
3695 int r = c;
3696 int *m = bsearch(&r, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp);
3697 if(m) {
3698 pt = utf8_encode(m[ 1 ], pt);
3699 if(m[ 3 ] != 0) {
3700 pt = utf8_encode(m[ 2 ], pt);
3701 c = m[ 3 ];
3702 } else c = m[ 2 ];
3703 } else {
3704 m = bsearch(&r, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp);
3705 if(m) c = m[ 1 ];
3706 }
3707 }
3708 pt = utf8_encode(c, pt);
3709 count -= pf2 - pf;
3710 pf = pf2;
3711 }
3712 return C_fix(pt - pt0);
3713}
3714
3715#if defined(_WIN32) && !defined(__CYGWIN__)
3716#define C_WCHAR_FNBUF_SIZE 2048
3717static C_WCHAR fnbuf[ C_WCHAR_FNBUF_SIZE ], *pfnbuf;
3718C_regparm C_WCHAR *C_utf16(C_word bv, int cont)
3719{
3720 int len = C_header_size(bv) - 1;
3721 C_WCHAR *p;
3722 if(!cont) pfnbuf = fnbuf;
3723 p = pfnbuf;
3724 int n = MultiByteToWideChar(CP_UTF8,
3725 0,
3726 C_c_string(bv),
3727 -1,
3728 pfnbuf,
3729 C_WCHAR_FNBUF_SIZE - (pfnbuf - fnbuf));
3730 if(n == 0) C_decoding_error(bv, C_fix(0));
3731 pfnbuf += n;
3732 return p;
3733}
3734
3735C_regparm C_char *C_utf8(C_WCHAR *str)
3736{
3737 int n = WideCharToMultiByte(CP_UTF8,
3738 0,
3739 str,
3740 -1,
3741 (C_char *)fnbuf,
3742 C_WCHAR_FNBUF_SIZE,
3743 NULL, NULL);
3744 if(n == 0) C_decoding_error(C_SCHEME_UNDEFINED, C_fix(0));
3745 ((C_char *)fnbuf)[ n ] = '\0';
3746 return (C_char *)fnbuf;
3747}
3748#endif