libidn 1.42
pr29.c
Go to the documentation of this file.
1/* pr29.h --- Detect strings that are non-idempotent under NFKC in Unicode 3.2.
2 Copyright (C) 2004-2024 Simon Josefsson
3
4 This file is part of GNU Libidn.
5
6 GNU Libidn is free software: you can redistribute it and/or
7 modify it under the terms of either:
8
9 * the GNU Lesser General Public License as published by the Free
10 Software Foundation; either version 3 of the License, or (at
11 your option) any later version.
12
13 or
14
15 * the GNU General Public License as published by the Free
16 Software Foundation; either version 2 of the License, or (at
17 your option) any later version.
18
19 or both in parallel, as here.
20
21 GNU Libidn is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received copies of the GNU General Public License and
27 the GNU Lesser General Public License along with this program. If
28 not, see <https://www.gnu.org/licenses/>. */
29
30#include <config.h>
31
32#include "pr29.h"
33
34/* Get stringprep_utf8_to_ucs4. */
35#include <stringprep.h>
36
37/*
38 * The tables used in this file was extracted by Simon Josefsson from
39 * pr-29.html and DerivedCombiningClass-3.2.0.txt, as published by
40 * Unicode Inc., for the GNU Libidn project.
41 *
42 */
43
44/* These are the characters with non-zero combination class, extracted
45 from DerivedCombiningClass-3.2.0.txt. */
46static uint32_t nzcc[] = {
47 /* 1 # Mn [5] COMBINING TILDE OVERLAY..
48 * ..COMBINING LONG SOLIDUS OVERLAY */
49 0x0334,
50 0x0335,
51 0x0336,
52 0x0337,
53 0x0338,
54 /* 1 # Mn [2] COMBINING LONG VERTICAL LINE OVERLAY..
55 * ..COMBINING SHORT VERTICAL LINE OVERLAY */
56 0x20D2,
57 0x20D3,
58 /* 1 # Mn [3] COMBINING RING OVERLAY..
59 * ..COMBINING ANTICLOCKWISE RING OVERLAY */
60 0x20D8,
61 0x20D9,
62 0x20DA,
63 /* 1 # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..
64 * ..COMBINING DOUBLE VERTICAL STROKE OVERLAY */
65 0x20E5,
66 0x20E6,
67 /* 1 # Mn COMBINING LEFTWARDS ARROW OVERLAY */
68 0x20EA,
69 /* 1 # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..
70 * ..MUSICAL SYMBOL COMBINING TREMOLO-3 */
71 0x1D167,
72 0x1D168,
73 0x1D169,
74 /* 7 # Mn DEVANAGARI SIGN NUKTA */
75 0x093C,
76 /* 7 # Mn BENGALI SIGN NUKTA */
77 0x09BC,
78 /* 7 # Mn GURMUKHI SIGN NUKTA */
79 0x0A3C,
80 /* 7 # Mn GUJARATI SIGN NUKTA */
81 0x0ABC,
82 /* 7 # Mn ORIYA SIGN NUKTA */
83 0x0B3C,
84 /* 7 # Mn MYANMAR SIGN DOT BELOW */
85 0x1037,
86 /* 8 # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..
87 * ..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
88 0x3099,
89 0x309A,
90 /* 9 # Mn DEVANAGARI SIGN VIRAMA */
91 0x094D,
92 /* 9 # Mn BENGALI SIGN VIRAMA */
93 0x09CD,
94 /* 9 # Mn GURMUKHI SIGN VIRAMA */
95 0x0A4D,
96 /* 9 # Mn GUJARATI SIGN VIRAMA */
97 0x0ACD,
98 /* 9 # Mn ORIYA SIGN VIRAMA */
99 0x0B4D,
100 /* 9 # Mn TAMIL SIGN VIRAMA */
101 0x0BCD,
102 /* 9 # Mn TELUGU SIGN VIRAMA */
103 0x0C4D,
104 /* 9 # Mn KANNADA SIGN VIRAMA */
105 0x0CCD,
106 /* 9 # Mn MALAYALAM SIGN VIRAMA */
107 0x0D4D,
108 /* 9 # Mn SINHALA SIGN AL-LAKUNA */
109 0x0DCA,
110 /* 9 # Mn THAI CHARACTER PHINTHU */
111 0x0E3A,
112 /* 9 # Mn TIBETAN MARK HALANTA */
113 0x0F84,
114 /* 9 # Mn MYANMAR SIGN VIRAMA */
115 0x1039,
116 /* 9 # Mn TAGALOG SIGN VIRAMA */
117 0x1714,
118 /* 9 # Mn HANUNOO SIGN PAMUDPOD */
119 0x1734,
120 /* 9 # Mn KHMER SIGN COENG */
121 0x17D2,
122 /* 10 # Mn HEBREW POINT SHEVA */
123 0x05B0,
124 /* 11 # Mn HEBREW POINT HATAF SEGOL */
125 0x05B1,
126 /* 12 # Mn HEBREW POINT HATAF PATAH */
127 0x05B2,
128 /* 13 # Mn HEBREW POINT HATAF QAMATS */
129 0x05B3,
130 /* 14 # Mn HEBREW POINT HIRIQ */
131 0x05B4,
132 /* 15 # Mn HEBREW POINT TSERE */
133 0x05B5,
134 /* 16 # Mn HEBREW POINT SEGOL */
135 0x05B6,
136 /* 17 # Mn HEBREW POINT PATAH */
137 0x05B7,
138 /* 18 # Mn HEBREW POINT QAMATS */
139 0x05B8,
140 /* 19 # Mn HEBREW POINT HOLAM */
141 0x05B9,
142 /* 20 # Mn HEBREW POINT QUBUTS */
143 0x05BB,
144 /* 21 # Mn HEBREW POINT DAGESH OR MAPIQ */
145 0x05BC,
146 /* 22 # Mn HEBREW POINT METEG */
147 0x05BD,
148 /* 23 # Mn HEBREW POINT RAFE */
149 0x05BF,
150 /* 24 # Mn HEBREW POINT SHIN DOT */
151 0x05C1,
152 /* 25 # Mn HEBREW POINT SIN DOT */
153 0x05C2,
154 /* 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA */
155 0xFB1E,
156 /* 27 # Mn ARABIC FATHATAN */
157 0x064B,
158 /* 28 # Mn ARABIC DAMMATAN */
159 0x064C,
160 /* 29 # Mn ARABIC KASRATAN */
161 0x064D,
162 /* 30 # Mn ARABIC FATHA */
163 0x064E,
164 /* 31 # Mn ARABIC DAMMA */
165 0x064F,
166 /* 32 # Mn ARABIC KASRA */
167 0x0650,
168 /* 33 # Mn ARABIC SHADDA */
169 0x0651,
170 /* 34 # Mn ARABIC SUKUN */
171 0x0652,
172 /* 35 # Mn ARABIC LETTER SUPERSCRIPT ALEF */
173 0x0670,
174 /* 36 # Mn SYRIAC LETTER SUPERSCRIPT ALAPH */
175 0x0711,
176 /* 84 # Mn TELUGU LENGTH MARK */
177 0x0C55,
178 /* 91 # Mn TELUGU AI LENGTH MARK */
179 0x0C56,
180 /* 103 # Mn [2] THAI CHARACTER SARA U..
181 * ..THAI CHARACTER SARA UU */
182 0x0E38,
183 0x0E39,
184 /* 107 # Mn [4] THAI CHARACTER MAI EK..
185 * ..THAI CHARACTER MAI CHATTAWA */
186 0x0E48,
187 0x0E49,
188 0x0E4A,
189 0x04EB,
190 /* 118 # Mn [2] LAO VOWEL SIGN U..
191 * ..LAO VOWEL SIGN UU */
192 0x0EB8,
193 0x0EB9,
194 /* 122 # Mn [4] LAO TONE MAI EK..
195 * ..LAO TONE MAI CATAWA */
196 0x0EC8,
197 0x0EC9,
198 0x0ECA,
199 0x0ECB,
200 /* 129 # Mn TIBETAN VOWEL SIGN AA */
201 0x0F71,
202 /* 130 # Mn TIBETAN VOWEL SIGN I */
203 0x0F72,
204 /* 130 # Mn [4] TIBETAN VOWEL SIGN E..
205 * ..TIBETAN VOWEL SIGN OO */
206 0x0F7A,
207 0x0F7B,
208 0x0F7C,
209 0x0F7D,
210 /* 130 # Mn TIBETAN VOWEL SIGN REVERSED I */
211 0x0F80,
212 /* 132 # Mn TIBETAN VOWEL SIGN U */
213 0x0F74,
214 /* 202 # Mn [2] COMBINING PALATALIZED HOOK BELOW..
215 * ..COMBINING RETROFLEX HOOK BELOW */
216 0x0321,
217 0x0322,
218 /* 202 # Mn [2] COMBINING CEDILLA..
219 * ..COMBINING OGONEK */
220 0x0327,
221 0x0328,
222 /* 216 # Mn COMBINING HORN */
223 0x031B,
224 /* 216 # Mn TIBETAN MARK TSA -PHRU */
225 0x0F39,
226 /* 216 # Mc [2] MUSICAL SYMBOL COMBINING STEM..
227 * ..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM */
228 0x1D165,
229 0x1D166,
230 /* 216 # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..
231 * ..MUSICAL SYMBOL COMBINING FLAG-5 */
232 0x1D16E,
233 0x1D16F,
234 0x1D170,
235 0x1D171,
236 0x1D172,
237 /* 218 # Mn IDEOGRAPHIC LEVEL TONE MARK */
238 0x302A,
239 /* 220 # Mn [4] COMBINING GRAVE ACCENT BELOW..
240 * ..COMBINING RIGHT TACK BELOW */
241 0x0316,
242 0x0317,
243 0x0318,
244 0x0319,
245 /* 220 # Mn [5] COMBINING LEFT HALF RING BELOW..
246 * ..COMBINING MINUS SIGN BELOW */
247 0x031C,
248 0x031D,
249 0x031E,
250 0x031F,
251 0x0320,
252 /* 220 # Mn [4] COMBINING DOT BELOW..
253 * ..COMBINING COMMA BELOW */
254 0x0323,
255 0x0324,
256 0x0325,
257 0x0326,
258 /* 220 # Mn [11] COMBINING VERTICAL LINE BELOW..
259 * ..COMBINING DOUBLE LOW LINE */
260 0x0329,
261 0x032A,
262 0x032B,
263 0x032C,
264 0x032D,
265 0x032E,
266 0x032F,
267 0x0330,
268 0x0331,
269 0x0332,
270 0x0333,
271 /* 220 # Mn [4] COMBINING RIGHT HALF RING BELOW..
272 * ..COMBINING SEAGULL BELOW */
273 0x0339,
274 0x033A,
275 0x033B,
276 0x033C,
277 /* 220 # Mn [3] COMBINING EQUALS SIGN BELOW..
278 * ..COMBINING LEFT ANGLE BELOW */
279 0x0347,
280 0x0348,
281 0x0349,
282 /* 220 # Mn [2] COMBINING LEFT RIGHT ARROW BELOW..
283 * ..COMBINING UPWARDS ARROW BELOW */
284 0x034D,
285 0x034E,
286 /* 220 # Mn HEBREW ACCENT ETNAHTA */
287 0x0591,
288 /* 220 # Mn HEBREW ACCENT TIPEHA */
289 0x0596,
290 /* 220 # Mn HEBREW ACCENT TEVIR */
291 0x059B,
292 /* 220 # Mn [5] HEBREW ACCENT MUNAH..
293 * ..HEBREW ACCENT DARGA */
294 0x05A3,
295 0x05A4,
296 0x05A5,
297 0x05A6,
298 0x05A7,
299 /* 220 # Mn HEBREW ACCENT YERAH BEN YOMO */
300 0x05AA,
301 /* 220 # Mn ARABIC HAMZA BELOW */
302 0x0655,
303 /* 220 # Mn ARABIC SMALL LOW SEEN */
304 0x06E3,
305 /* 220 # Mn ARABIC EMPTY CENTRE LOW STOP */
306 0x06EA,
307 /* 220 # Mn ARABIC SMALL LOW MEEM */
308 0x06ED,
309 /* 220 # Mn SYRIAC PTHAHA BELOW */
310 0x0731,
311 /* 220 # Mn SYRIAC ZQAPHA BELOW */
312 0x0734,
313 /* 220 # Mn [3] SYRIAC RBASA BELOW..
314 * ..SYRIAC DOTTED ZLAMA ANGULAR */
315 0x0737,
316 0x0738,
317 0x0739,
318 /* 220 # Mn [2] SYRIAC HBASA BELOW..
319 * ..SYRIAC HBASA-ESASA DOTTED */
320 0x073B,
321 0x073C,
322 /* 220 # Mn SYRIAC ESASA BELOW */
323 0x073E,
324 /* 220 # Mn SYRIAC RUKKAKHA */
325 0x0742,
326 /* 220 # Mn SYRIAC TWO VERTICAL DOTS BELOW */
327 0x0744,
328 /* 220 # Mn SYRIAC THREE DOTS BELOW */
329 0x0746,
330 /* 220 # Mn SYRIAC OBLIQUE LINE BELOW */
331 0x0748,
332 /* 220 # Mn DEVANAGARI STRESS SIGN ANUDATTA */
333 0x0952,
334 /* 220 # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..
335 * ..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */
336 0x0F18,
337 0x0F19,
338 /* 220 # Mn TIBETAN MARK NGAS BZUNG NYI ZLA */
339 0x0F35,
340 /* 220 # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS */
341 0x0F37,
342 /* 220 # Mn TIBETAN SYMBOL PADMA GDAN */
343 0x0FC6,
344 /* 220 # Mn COMBINING TRIPLE UNDERDOT */
345 0x20E8,
346 /* 220 # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..
347 * ..MUSICAL SYMBOL COMBINING LOURE */
348 0x1D17B,
349 0x1D17C,
350 0x1D17D,
351 0x1D17E,
352 0x1D17F,
353 0x1D180,
354 0x1D181,
355 0x1D182,
356 /* 220 # Mn [2] MUSICAL SYMBOL COMBINING DOUBLE TONGUE..
357 * ..MUSICAL SYMBOL COMBINING TRIPLE TONGUE */
358 0x1D18A,
359 0x1D18B,
360 /* 222 # Mn HEBREW ACCENT YETIV */
361 0x059A,
362 /* 222 # Mn HEBREW ACCENT DEHI */
363 0x05AD,
364 /* 222 # Mn IDEOGRAPHIC ENTERING TONE MARK */
365 0x302D,
366 /* 224 # Mn [2] HANGUL SINGLE DOT TONE MARK..
367 * ..HANGUL DOUBLE DOT TONE MARK */
368 0x302E,
369 0x302F,
370 /* 226 # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT */
371 0x1D16D,
372 /* 228 # Mn HEBREW ACCENT ZINOR */
373 0x05AE,
374 /* 228 # Mn MONGOLIAN LETTER ALI GALI DAGALGA */
375 0x18A9,
376 /* 228 # Mn IDEOGRAPHIC RISING TONE MARK */
377 0x302B,
378 /* 230 # Mn [21] COMBINING GRAVE ACCENT..
379 * ..COMBINING REVERSED COMMA ABOVE */
380 0x0300,
381 0x0301,
382 0x0302,
383 0x0303,
384 0x0304,
385 0x0305,
386 0x0306,
387 0x0307,
388 0x0308,
389 0x0309,
390 0x030A,
391 0x030B,
392 0x030C,
393 0x030D,
394 0x030E,
395 0x030F,
396 0x0310,
397 0x0311,
398 0x0312,
399 0x0313,
400 0x0314,
401 /* 230 # Mn [8] COMBINING X ABOVE..
402 * ..COMBINING GREEK DIALYTIKA TONOS */
403 0x033D,
404 0x033E,
405 0x033F,
406 0x0340,
407 0x0341,
408 0x0342,
409 0x0343,
410 0x0344,
411 /* 230 # Mn COMBINING BRIDGE ABOVE */
412 0x0346,
413 /* 230 # Mn [3] COMBINING NOT TILDE ABOVE..
414 * ..COMBINING ALMOST EQUAL TO ABOVE */
415 0x034A,
416 0x034B,
417 0x034C,
418 /* 230 # Mn [13] COMBINING LATIN SMALL LETTER A..
419 * ..COMBINING LATIN SMALL LETTER X */
420 0x0363,
421 0x0364,
422 0x0365,
423 0x0366,
424 0x0367,
425 0x0368,
426 0x0369,
427 0x036A,
428 0x036B,
429 0x036C,
430 0x036D,
431 0x036E,
432 0x036F,
433 /* 230 # Mn [4] COMBINING CYRILLIC TITLO..
434 * ..COMBINING CYRILLIC PSILI PNEUMATA */
435 0x0483,
436 0x0484,
437 0x0485,
438 0x0486,
439 /* 230 # Mn [4] HEBREW ACCENT SEGOL..
440 * ..HEBREW ACCENT ZAQEF GADOL */
441 0x0592,
442 0x0593,
443 0x0594,
444 0x0595,
445 /* 230 # Mn [3] HEBREW ACCENT REVIA..
446 * ..HEBREW ACCENT PASHTA */
447 0x0597,
448 0x0598,
449 0x0599,
450 /* 230 # Mn [6] HEBREW ACCENT GERESH..
451 * ..HEBREW ACCENT PAZER */
452 0x059C,
453 0x059D,
454 0x059E,
455 0x059F,
456 0x05A0,
457 0x05A1,
458 /* 230 # Mn [2] HEBREW ACCENT QADMA..
459 * ..HEBREW ACCENT TELISHA QETANA */
460 0x05A8,
461 0x05A9,
462 /* 230 # Mn [2] HEBREW ACCENT OLE..
463 * ..HEBREW ACCENT ILUY */
464 0x05AB,
465 0x05AC,
466 /* 230 # Mn HEBREW MARK MASORA CIRCLE */
467 0x05AF,
468 /* 230 # Mn HEBREW MARK UPPER DOT */
469 0x05C4,
470 /* 230 # Mn [2] ARABIC MADDAH ABOVE..
471 * ..ARABIC HAMZA ABOVE */
472 0x0653,
473 0x0654,
474 /* 230 # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..
475 * ..ARABIC SMALL HIGH SEEN */
476 0x06D6,
477 0x06D7,
478 0x06D8,
479 0x06D9,
480 0x06DA,
481 0x06DB,
482 0x06DC,
483 /* 230 # Mn [4] ARABIC SMALL HIGH ROUNDED ZERO..
484 * ..ARABIC SMALL HIGH MEEM ISOLATED FORM */
485 0x06DF,
486 0x06E0,
487 0x06E1,
488 0x06E2,
489 /* 230 # Mn ARABIC SMALL HIGH MADDA */
490 0x06E4,
491 /* 230 # Mn [2] ARABIC SMALL HIGH YEH..
492 * ..ARABIC SMALL HIGH NOON */
493 0x06E7,
494 0x06E8,
495 /* 230 # Mn [2] ARABIC EMPTY CENTRE HIGH STOP..
496 * ..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE */
497 0x06EB,
498 0x06EC,
499 /* 230 # Mn SYRIAC PTHAHA ABOVE */
500 0x0730,
501 /* 230 # Mn [2] SYRIAC PTHAHA DOTTED..
502 * ..SYRIAC ZQAPHA ABOVE */
503 0x0732,
504 0x0733,
505 /* 230 # Mn [2] SYRIAC ZQAPHA DOTTED..
506 * ..SYRIAC RBASA ABOVE */
507 0x0735,
508 0x0736,
509 /* 230 # Mn SYRIAC HBASA ABOVE */
510 0x073A,
511 /* 230 # Mn SYRIAC ESASA ABOVE */
512 0x073D,
513 /* 230 # Mn [3] SYRIAC RWAHA..
514 * ..SYRIAC QUSHSHAYA */
515 0x073F,
516 0x0740,
517 0x0741,
518 /* 230 # Mn SYRIAC TWO VERTICAL DOTS ABOVE */
519 0x0743,
520 /* 230 # Mn SYRIAC THREE DOTS ABOVE */
521 0x0745,
522 /* 230 # Mn SYRIAC OBLIQUE LINE ABOVE */
523 0x0747,
524 /* 230 # Mn [2] SYRIAC MUSIC..
525 * ..SYRIAC BARREKH */
526 0x0749,
527 0x074A,
528 /* 230 # Mn DEVANAGARI STRESS SIGN UDATTA */
529 0x0951,
530 /* 230 # Mn [2] DEVANAGARI GRAVE ACCENT..
531 * ..DEVANAGARI ACUTE ACCENT */
532 0x0953,
533 0x0954,
534 /* 230 # Mn [2] TIBETAN SIGN NYI ZLA NAA DA..
535 * ..TIBETAN SIGN SNA LDAN */
536 0x0F82,
537 0x0F83,
538 /* 230 # Mn [2] TIBETAN SIGN LCI RTAGS..
539 * ..TIBETAN SIGN YANG RTAGS */
540 0x0F86,
541 0x0F87,
542 /* 230 # Mn [2] COMBINING LEFT HARPOON ABOVE..
543 * ..COMBINING RIGHT HARPOON ABOVE */
544 0x20D0,
545 0x20D1,
546 /* 230 # Mn [4] COMBINING ANTICLOCKWISE ARROW ABOVE..
547 * ..COMBINING RIGHT ARROW ABOVE */
548 0x20D4,
549 0x20D5,
550 0x20D6,
551 0x20D7,
552 /* 230 # Mn [2] COMBINING THREE DOTS ABOVE..
553 * ..COMBINING FOUR DOTS ABOVE */
554 0x20DB,
555 0x20DC,
556 /* 230 # Mn COMBINING LEFT RIGHT ARROW ABOVE */
557 0x20E1,
558 /* 230 # Mn COMBINING ANNUITY SYMBOL */
559 0x20E7,
560 /* 230 # Mn COMBINING WIDE BRIDGE ABOVE */
561 0x20E9,
562 /* 230 # Mn [4] COMBINING LIGATURE LEFT HALF..
563 * ..COMBINING DOUBLE TILDE RIGHT HALF */
564 0xFE20,
565 0xFE21,
566 0xFE22,
567 0xFE23,
568 /* 230 # Mn [5] MUSICAL SYMBOL COMBINING DOIT..
569 * ..MUSICAL SYMBOL COMBINING BEND */
570 0x1D185,
571 0x1D186,
572 0x1D187,
573 0x1D188,
574 0x1D189,
575 /* 230 # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..
576 * ..MUSICAL SYMBOL COMBINING SNAP PIZZICATO */
577 0x1D1AA,
578 0x1D1AB,
579 0x1D1AC,
580 0x1D1AD,
581 /* 232 # Mn COMBINING COMMA ABOVE RIGHT */
582 0x0315,
583 /* 232 # Mn COMBINING LEFT ANGLE ABOVE */
584 0x031A,
585 /* 232 # Mn IDEOGRAPHIC DEPARTING TONE MARK */
586 0x302C,
587 /* 233 # Mn COMBINING DOUBLE RIGHTWARDS ARROW BELOW */
588 0x0362,
589 /* 234 # Mn [2] COMBINING DOUBLE TILDE..
590 * ..COMBINING DOUBLE INVERTED BREVE */
591 0x0360,
592 0x0361,
593 /* 240 # Mn COMBINING GREEK YPOGEGRAMMENI */
594 0x0345,
595 0
596};
597
598/*
599 * 09C7 BENGALI VOWEL SIGN E 09BE BENGALI VOWEL SIGN AA or
600 * 09D7 BENGALI AU LENGTH MARK
601 */
602
603static const uint32_t pr29_1_1[] = {
604 0x09C7, 0
605};
606
607static const uint32_t pr29_1_2[] = {
608 0x09BE, 0x09D7, 0
609};
610
611/*
612 * 0B47 ORIYA VOWEL SIGN E 0B3E ORIYA VOWEL SIGN AA or
613 * 0B56 ORIYA AI LENGTH MARK or
614 * 0B57 ORIYA AU LENGTH MARK
615 */
616
617static const uint32_t pr29_2_1[] = {
618 0x0B47, 0
619};
620
621static const uint32_t pr29_2_2[] = {
622 0x0B3E, 0x0B56, 0x0B57, 0
623};
624
625/*
626 * 0BC6 TAMIL VOWEL SIGN E 0BBE TAMIL VOWEL SIGN AA or
627 * 0BD7 TAMIL AU LENGTH MARK
628 */
629
630static const uint32_t pr29_3_1[] = {
631 0x0BC6, 0
632};
633
634static const uint32_t pr29_3_2[] = {
635 0x0BBE, 0x0BD7, 0
636};
637
638/*
639 * 0BC7 TAMIL VOWEL SIGN EE 0BBE TAMIL VOWEL SIGN AA
640 */
641
642static const uint32_t pr29_4_1[] = {
643 0x0BC7, 0
644};
645
646static const uint32_t pr29_4_2[] = {
647 0x0BBE, 0
648};
649
650/*
651 * 0B92 TAMIL LETTER O 0BD7 TAMIL AU LENGTH MARK
652 */
653
654static const uint32_t pr29_5_1[] = {
655 0x0B92, 0
656};
657
658static const uint32_t pr29_5_2[] = {
659 0x0BD7, 0
660};
661
662/*
663 * 0CC6 KANNADA VOWEL SIGN E 0CC2 KANNADA VOWEL SIGN UU or
664 * 0CD5 KANNADA LENGTH MARK or
665 * 0CD6 KANNADA AI LENGTH MARK
666 */
667
668static const uint32_t pr29_6_1[] = {
669 0x0CC6, 0
670};
671
672static const uint32_t pr29_6_2[] = {
673 0x0CC2, 0xCD5, 0xCD6, 0
674};
675
676/*
677 * 0CBF KANNADA VOWEL SIGN I or
678 * 0CCA KANNADA VOWEL SIGN O 0CD5 KANNADA LENGTH MARK
679 */
680
681static const uint32_t pr29_7_1[] = {
682 0x0CBF, 0xCCA, 0
683};
684
685static const uint32_t pr29_7_2[] = {
686 0x0CD5, 0
687};
688
689/*
690 * 0D47 MALAYALAM VOWEL SIGN EE 0D3E MALAYALAM VOWEL SIGN AA
691 */
692
693static const uint32_t pr29_8_1[] = {
694 0x0D47, 0
695};
696
697static const uint32_t pr29_8_2[] = {
698 0x0D3E, 0
699};
700
701/*
702 * 0D46 MALAYALAM VOWEL SIGN E 0D3E MALAYALAM VOWEL SIGN AA or
703 * 0D57 MALAYALAM AU LENGTH MARK
704 */
705
706static const uint32_t pr29_9_1[] = {
707 0x0D46, 0
708};
709
710static const uint32_t pr29_9_2[] = {
711 0x0D3E, 0x0D57, 0
712};
713
714/*
715 * 1025 MYANMAR LETTER U 102E MYANMAR VOWEL SIGN II
716 */
717
718static const uint32_t pr29_10_1[] = {
719 0x1025, 0
720};
721
722static const uint32_t pr29_10_2[] = {
723 0x102E, 0
724};
725
726/*
727 * 0DD9 SINHALA VOWEL SIGN KOMBUVA 0DCF SINHALA VOWEL SIGN AELA-PILLA or
728 * 0DDF SINHALA VOWEL SIGN GAYANUKITTA
729 */
730
731static const uint32_t pr29_11_1[] = {
732 0x0DD9, 0
733};
734
735static const uint32_t pr29_11_2[] = {
736 0x0DCF, 0x0DDF, 0
737};
738
739/*
740 * 1100..1112 HANGUL CHOSEONG KIYEOK..HIEUH [19 instances]
741 * 1161..1175 HANGUL JUNGSEONG A..I [21 instances]
742 */
743
744static const uint32_t pr29_12_1[] = {
745 0x1100, 0x1101, 0x1102, 0x1103, 0x1104, 0x1105, 0x1106, 0x1107,
746 0x1108, 0x1109, 0x110A, 0x110B, 0x110C, 0x110D, 0x110E, 0x110F,
747 0x1110, 0x1111, 0x1112, 0
748};
749
750static const uint32_t pr29_12_2[] = {
751 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168,
752 0x1169, 0x116A, 0x116B, 0x116C, 0x116D, 0x116E, 0x116F, 0x1170,
753 0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0
754};
755
756
757/*
758 * [:HangulSyllableType=LV:]
759 * 11A8..11C2 HANGUL JONGSEONG KIYEOK..HIEUH [27 instances]
760 */
761
762static const uint32_t pr29_13_1[] = {
763 0xAC00, /* LV # Lo HANGUL SYLLABLE GA */
764 0xAC1C, /* LV # Lo HANGUL SYLLABLE GAE */
765 0xAC38, /* LV # Lo HANGUL SYLLABLE GYA */
766 0xAC54, /* LV # Lo HANGUL SYLLABLE GYAE */
767 0xAC70, /* LV # Lo HANGUL SYLLABLE GEO */
768 0xAC8C, /* LV # Lo HANGUL SYLLABLE GE */
769 0xACA8, /* LV # Lo HANGUL SYLLABLE GYEO */
770 0xACC4, /* LV # Lo HANGUL SYLLABLE GYE */
771 0xACE0, /* LV # Lo HANGUL SYLLABLE GO */
772 0xACFC, /* LV # Lo HANGUL SYLLABLE GWA */
773 0xAD18, /* LV # Lo HANGUL SYLLABLE GWAE */
774 0xAD34, /* LV # Lo HANGUL SYLLABLE GOE */
775 0xAD50, /* LV # Lo HANGUL SYLLABLE GYO */
776 0xAD6C, /* LV # Lo HANGUL SYLLABLE GU */
777 0xAD88, /* LV # Lo HANGUL SYLLABLE GWEO */
778 0xADA4, /* LV # Lo HANGUL SYLLABLE GWE */
779 0xADC0, /* LV # Lo HANGUL SYLLABLE GWI */
780 0xADDC, /* LV # Lo HANGUL SYLLABLE GYU */
781 0xADF8, /* LV # Lo HANGUL SYLLABLE GEU */
782 0xAE14, /* LV # Lo HANGUL SYLLABLE GYI */
783 0xAE30, /* LV # Lo HANGUL SYLLABLE GI */
784 0xAE4C, /* LV # Lo HANGUL SYLLABLE GGA */
785 0xAE68, /* LV # Lo HANGUL SYLLABLE GGAE */
786 0xAE84, /* LV # Lo HANGUL SYLLABLE GGYA */
787 0xAEA0, /* LV # Lo HANGUL SYLLABLE GGYAE */
788 0xAEBC, /* LV # Lo HANGUL SYLLABLE GGEO */
789 0xAED8, /* LV # Lo HANGUL SYLLABLE GGE */
790 0xAEF4, /* LV # Lo HANGUL SYLLABLE GGYEO */
791 0xAF10, /* LV # Lo HANGUL SYLLABLE GGYE */
792 0xAF2C, /* LV # Lo HANGUL SYLLABLE GGO */
793 0xAF48, /* LV # Lo HANGUL SYLLABLE GGWA */
794 0xAF64, /* LV # Lo HANGUL SYLLABLE GGWAE */
795 0xAF80, /* LV # Lo HANGUL SYLLABLE GGOE */
796 0xAF9C, /* LV # Lo HANGUL SYLLABLE GGYO */
797 0xAFB8, /* LV # Lo HANGUL SYLLABLE GGU */
798 0xAFD4, /* LV # Lo HANGUL SYLLABLE GGWEO */
799 0xAFF0, /* LV # Lo HANGUL SYLLABLE GGWE */
800 0xB00C, /* LV # Lo HANGUL SYLLABLE GGWI */
801 0xB028, /* LV # Lo HANGUL SYLLABLE GGYU */
802 0xB044, /* LV # Lo HANGUL SYLLABLE GGEU */
803 0xB060, /* LV # Lo HANGUL SYLLABLE GGYI */
804 0xB07C, /* LV # Lo HANGUL SYLLABLE GGI */
805 0xB098, /* LV # Lo HANGUL SYLLABLE NA */
806 0xB0B4, /* LV # Lo HANGUL SYLLABLE NAE */
807 0xB0D0, /* LV # Lo HANGUL SYLLABLE NYA */
808 0xB0EC, /* LV # Lo HANGUL SYLLABLE NYAE */
809 0xB108, /* LV # Lo HANGUL SYLLABLE NEO */
810 0xB124, /* LV # Lo HANGUL SYLLABLE NE */
811 0xB140, /* LV # Lo HANGUL SYLLABLE NYEO */
812 0xB15C, /* LV # Lo HANGUL SYLLABLE NYE */
813 0xB178, /* LV # Lo HANGUL SYLLABLE NO */
814 0xB194, /* LV # Lo HANGUL SYLLABLE NWA */
815 0xB1B0, /* LV # Lo HANGUL SYLLABLE NWAE */
816 0xB1CC, /* LV # Lo HANGUL SYLLABLE NOE */
817 0xB1E8, /* LV # Lo HANGUL SYLLABLE NYO */
818 0xB204, /* LV # Lo HANGUL SYLLABLE NU */
819 0xB220, /* LV # Lo HANGUL SYLLABLE NWEO */
820 0xB23C, /* LV # Lo HANGUL SYLLABLE NWE */
821 0xB258, /* LV # Lo HANGUL SYLLABLE NWI */
822 0xB274, /* LV # Lo HANGUL SYLLABLE NYU */
823 0xB290, /* LV # Lo HANGUL SYLLABLE NEU */
824 0xB2AC, /* LV # Lo HANGUL SYLLABLE NYI */
825 0xB2C8, /* LV # Lo HANGUL SYLLABLE NI */
826 0xB2E4, /* LV # Lo HANGUL SYLLABLE DA */
827 0xB300, /* LV # Lo HANGUL SYLLABLE DAE */
828 0xB31C, /* LV # Lo HANGUL SYLLABLE DYA */
829 0xB338, /* LV # Lo HANGUL SYLLABLE DYAE */
830 0xB354, /* LV # Lo HANGUL SYLLABLE DEO */
831 0xB370, /* LV # Lo HANGUL SYLLABLE DE */
832 0xB38C, /* LV # Lo HANGUL SYLLABLE DYEO */
833 0xB3A8, /* LV # Lo HANGUL SYLLABLE DYE */
834 0xB3C4, /* LV # Lo HANGUL SYLLABLE DO */
835 0xB3E0, /* LV # Lo HANGUL SYLLABLE DWA */
836 0xB3FC, /* LV # Lo HANGUL SYLLABLE DWAE */
837 0xB418, /* LV # Lo HANGUL SYLLABLE DOE */
838 0xB434, /* LV # Lo HANGUL SYLLABLE DYO */
839 0xB450, /* LV # Lo HANGUL SYLLABLE DU */
840 0xB46C, /* LV # Lo HANGUL SYLLABLE DWEO */
841 0xB488, /* LV # Lo HANGUL SYLLABLE DWE */
842 0xB4A4, /* LV # Lo HANGUL SYLLABLE DWI */
843 0xB4C0, /* LV # Lo HANGUL SYLLABLE DYU */
844 0xB4DC, /* LV # Lo HANGUL SYLLABLE DEU */
845 0xB4F8, /* LV # Lo HANGUL SYLLABLE DYI */
846 0xB514, /* LV # Lo HANGUL SYLLABLE DI */
847 0xB530, /* LV # Lo HANGUL SYLLABLE DDA */
848 0xB54C, /* LV # Lo HANGUL SYLLABLE DDAE */
849 0xB568, /* LV # Lo HANGUL SYLLABLE DDYA */
850 0xB584, /* LV # Lo HANGUL SYLLABLE DDYAE */
851 0xB5A0, /* LV # Lo HANGUL SYLLABLE DDEO */
852 0xB5BC, /* LV # Lo HANGUL SYLLABLE DDE */
853 0xB5D8, /* LV # Lo HANGUL SYLLABLE DDYEO */
854 0xB5F4, /* LV # Lo HANGUL SYLLABLE DDYE */
855 0xB610, /* LV # Lo HANGUL SYLLABLE DDO */
856 0xB62C, /* LV # Lo HANGUL SYLLABLE DDWA */
857 0xB648, /* LV # Lo HANGUL SYLLABLE DDWAE */
858 0xB664, /* LV # Lo HANGUL SYLLABLE DDOE */
859 0xB680, /* LV # Lo HANGUL SYLLABLE DDYO */
860 0xB69C, /* LV # Lo HANGUL SYLLABLE DDU */
861 0xB6B8, /* LV # Lo HANGUL SYLLABLE DDWEO */
862 0xB6D4, /* LV # Lo HANGUL SYLLABLE DDWE */
863 0xB6F0, /* LV # Lo HANGUL SYLLABLE DDWI */
864 0xB70C, /* LV # Lo HANGUL SYLLABLE DDYU */
865 0xB728, /* LV # Lo HANGUL SYLLABLE DDEU */
866 0xB744, /* LV # Lo HANGUL SYLLABLE DDYI */
867 0xB760, /* LV # Lo HANGUL SYLLABLE DDI */
868 0xB77C, /* LV # Lo HANGUL SYLLABLE RA */
869 0xB798, /* LV # Lo HANGUL SYLLABLE RAE */
870 0xB7B4, /* LV # Lo HANGUL SYLLABLE RYA */
871 0xB7D0, /* LV # Lo HANGUL SYLLABLE RYAE */
872 0xB7EC, /* LV # Lo HANGUL SYLLABLE REO */
873 0xB808, /* LV # Lo HANGUL SYLLABLE RE */
874 0xB824, /* LV # Lo HANGUL SYLLABLE RYEO */
875 0xB840, /* LV # Lo HANGUL SYLLABLE RYE */
876 0xB85C, /* LV # Lo HANGUL SYLLABLE RO */
877 0xB878, /* LV # Lo HANGUL SYLLABLE RWA */
878 0xB894, /* LV # Lo HANGUL SYLLABLE RWAE */
879 0xB8B0, /* LV # Lo HANGUL SYLLABLE ROE */
880 0xB8CC, /* LV # Lo HANGUL SYLLABLE RYO */
881 0xB8E8, /* LV # Lo HANGUL SYLLABLE RU */
882 0xB904, /* LV # Lo HANGUL SYLLABLE RWEO */
883 0xB920, /* LV # Lo HANGUL SYLLABLE RWE */
884 0xB93C, /* LV # Lo HANGUL SYLLABLE RWI */
885 0xB958, /* LV # Lo HANGUL SYLLABLE RYU */
886 0xB974, /* LV # Lo HANGUL SYLLABLE REU */
887 0xB990, /* LV # Lo HANGUL SYLLABLE RYI */
888 0xB9AC, /* LV # Lo HANGUL SYLLABLE RI */
889 0xB9C8, /* LV # Lo HANGUL SYLLABLE MA */
890 0xB9E4, /* LV # Lo HANGUL SYLLABLE MAE */
891 0xBA00, /* LV # Lo HANGUL SYLLABLE MYA */
892 0xBA1C, /* LV # Lo HANGUL SYLLABLE MYAE */
893 0xBA38, /* LV # Lo HANGUL SYLLABLE MEO */
894 0xBA54, /* LV # Lo HANGUL SYLLABLE ME */
895 0xBA70, /* LV # Lo HANGUL SYLLABLE MYEO */
896 0xBA8C, /* LV # Lo HANGUL SYLLABLE MYE */
897 0xBAA8, /* LV # Lo HANGUL SYLLABLE MO */
898 0xBAC4, /* LV # Lo HANGUL SYLLABLE MWA */
899 0xBAE0, /* LV # Lo HANGUL SYLLABLE MWAE */
900 0xBAFC, /* LV # Lo HANGUL SYLLABLE MOE */
901 0xBB18, /* LV # Lo HANGUL SYLLABLE MYO */
902 0xBB34, /* LV # Lo HANGUL SYLLABLE MU */
903 0xBB50, /* LV # Lo HANGUL SYLLABLE MWEO */
904 0xBB6C, /* LV # Lo HANGUL SYLLABLE MWE */
905 0xBB88, /* LV # Lo HANGUL SYLLABLE MWI */
906 0xBBA4, /* LV # Lo HANGUL SYLLABLE MYU */
907 0xBBC0, /* LV # Lo HANGUL SYLLABLE MEU */
908 0xBBDC, /* LV # Lo HANGUL SYLLABLE MYI */
909 0xBBF8, /* LV # Lo HANGUL SYLLABLE MI */
910 0xBC14, /* LV # Lo HANGUL SYLLABLE BA */
911 0xBC30, /* LV # Lo HANGUL SYLLABLE BAE */
912 0xBC4C, /* LV # Lo HANGUL SYLLABLE BYA */
913 0xBC68, /* LV # Lo HANGUL SYLLABLE BYAE */
914 0xBC84, /* LV # Lo HANGUL SYLLABLE BEO */
915 0xBCA0, /* LV # Lo HANGUL SYLLABLE BE */
916 0xBCBC, /* LV # Lo HANGUL SYLLABLE BYEO */
917 0xBCD8, /* LV # Lo HANGUL SYLLABLE BYE */
918 0xBCF4, /* LV # Lo HANGUL SYLLABLE BO */
919 0xBD10, /* LV # Lo HANGUL SYLLABLE BWA */
920 0xBD2C, /* LV # Lo HANGUL SYLLABLE BWAE */
921 0xBD48, /* LV # Lo HANGUL SYLLABLE BOE */
922 0xBD64, /* LV # Lo HANGUL SYLLABLE BYO */
923 0xBD80, /* LV # Lo HANGUL SYLLABLE BU */
924 0xBD9C, /* LV # Lo HANGUL SYLLABLE BWEO */
925 0xBDB8, /* LV # Lo HANGUL SYLLABLE BWE */
926 0xBDD4, /* LV # Lo HANGUL SYLLABLE BWI */
927 0xBDF0, /* LV # Lo HANGUL SYLLABLE BYU */
928 0xBE0C, /* LV # Lo HANGUL SYLLABLE BEU */
929 0xBE28, /* LV # Lo HANGUL SYLLABLE BYI */
930 0xBE44, /* LV # Lo HANGUL SYLLABLE BI */
931 0xBE60, /* LV # Lo HANGUL SYLLABLE BBA */
932 0xBE7C, /* LV # Lo HANGUL SYLLABLE BBAE */
933 0xBE98, /* LV # Lo HANGUL SYLLABLE BBYA */
934 0xBEB4, /* LV # Lo HANGUL SYLLABLE BBYAE */
935 0xBED0, /* LV # Lo HANGUL SYLLABLE BBEO */
936 0xBEEC, /* LV # Lo HANGUL SYLLABLE BBE */
937 0xBF08, /* LV # Lo HANGUL SYLLABLE BBYEO */
938 0xBF24, /* LV # Lo HANGUL SYLLABLE BBYE */
939 0xBF40, /* LV # Lo HANGUL SYLLABLE BBO */
940 0xBF5C, /* LV # Lo HANGUL SYLLABLE BBWA */
941 0xBF78, /* LV # Lo HANGUL SYLLABLE BBWAE */
942 0xBF94, /* LV # Lo HANGUL SYLLABLE BBOE */
943 0xBFB0, /* LV # Lo HANGUL SYLLABLE BBYO */
944 0xBFCC, /* LV # Lo HANGUL SYLLABLE BBU */
945 0xBFE8, /* LV # Lo HANGUL SYLLABLE BBWEO */
946 0xC004, /* LV # Lo HANGUL SYLLABLE BBWE */
947 0xC020, /* LV # Lo HANGUL SYLLABLE BBWI */
948 0xC03C, /* LV # Lo HANGUL SYLLABLE BBYU */
949 0xC058, /* LV # Lo HANGUL SYLLABLE BBEU */
950 0xC074, /* LV # Lo HANGUL SYLLABLE BBYI */
951 0xC090, /* LV # Lo HANGUL SYLLABLE BBI */
952 0xC0AC, /* LV # Lo HANGUL SYLLABLE SA */
953 0xC0C8, /* LV # Lo HANGUL SYLLABLE SAE */
954 0xC0E4, /* LV # Lo HANGUL SYLLABLE SYA */
955 0xC100, /* LV # Lo HANGUL SYLLABLE SYAE */
956 0xC11C, /* LV # Lo HANGUL SYLLABLE SEO */
957 0xC138, /* LV # Lo HANGUL SYLLABLE SE */
958 0xC154, /* LV # Lo HANGUL SYLLABLE SYEO */
959 0xC170, /* LV # Lo HANGUL SYLLABLE SYE */
960 0xC18C, /* LV # Lo HANGUL SYLLABLE SO */
961 0xC1A8, /* LV # Lo HANGUL SYLLABLE SWA */
962 0xC1C4, /* LV # Lo HANGUL SYLLABLE SWAE */
963 0xC1E0, /* LV # Lo HANGUL SYLLABLE SOE */
964 0xC1FC, /* LV # Lo HANGUL SYLLABLE SYO */
965 0xC218, /* LV # Lo HANGUL SYLLABLE SU */
966 0xC234, /* LV # Lo HANGUL SYLLABLE SWEO */
967 0xC250, /* LV # Lo HANGUL SYLLABLE SWE */
968 0xC26C, /* LV # Lo HANGUL SYLLABLE SWI */
969 0xC288, /* LV # Lo HANGUL SYLLABLE SYU */
970 0xC2A4, /* LV # Lo HANGUL SYLLABLE SEU */
971 0xC2C0, /* LV # Lo HANGUL SYLLABLE SYI */
972 0xC2DC, /* LV # Lo HANGUL SYLLABLE SI */
973 0xC2F8, /* LV # Lo HANGUL SYLLABLE SSA */
974 0xC314, /* LV # Lo HANGUL SYLLABLE SSAE */
975 0xC330, /* LV # Lo HANGUL SYLLABLE SSYA */
976 0xC34C, /* LV # Lo HANGUL SYLLABLE SSYAE */
977 0xC368, /* LV # Lo HANGUL SYLLABLE SSEO */
978 0xC384, /* LV # Lo HANGUL SYLLABLE SSE */
979 0xC3A0, /* LV # Lo HANGUL SYLLABLE SSYEO */
980 0xC3BC, /* LV # Lo HANGUL SYLLABLE SSYE */
981 0xC3D8, /* LV # Lo HANGUL SYLLABLE SSO */
982 0xC3F4, /* LV # Lo HANGUL SYLLABLE SSWA */
983 0xC410, /* LV # Lo HANGUL SYLLABLE SSWAE */
984 0xC42C, /* LV # Lo HANGUL SYLLABLE SSOE */
985 0xC448, /* LV # Lo HANGUL SYLLABLE SSYO */
986 0xC464, /* LV # Lo HANGUL SYLLABLE SSU */
987 0xC480, /* LV # Lo HANGUL SYLLABLE SSWEO */
988 0xC49C, /* LV # Lo HANGUL SYLLABLE SSWE */
989 0xC4B8, /* LV # Lo HANGUL SYLLABLE SSWI */
990 0xC4D4, /* LV # Lo HANGUL SYLLABLE SSYU */
991 0xC4F0, /* LV # Lo HANGUL SYLLABLE SSEU */
992 0xC50C, /* LV # Lo HANGUL SYLLABLE SSYI */
993 0xC528, /* LV # Lo HANGUL SYLLABLE SSI */
994 0xC544, /* LV # Lo HANGUL SYLLABLE A */
995 0xC560, /* LV # Lo HANGUL SYLLABLE AE */
996 0xC57C, /* LV # Lo HANGUL SYLLABLE YA */
997 0xC598, /* LV # Lo HANGUL SYLLABLE YAE */
998 0xC5B4, /* LV # Lo HANGUL SYLLABLE EO */
999 0xC5D0, /* LV # Lo HANGUL SYLLABLE E */
1000 0xC5EC, /* LV # Lo HANGUL SYLLABLE YEO */
1001 0xC608, /* LV # Lo HANGUL SYLLABLE YE */
1002 0xC624, /* LV # Lo HANGUL SYLLABLE O */
1003 0xC640, /* LV # Lo HANGUL SYLLABLE WA */
1004 0xC65C, /* LV # Lo HANGUL SYLLABLE WAE */
1005 0xC678, /* LV # Lo HANGUL SYLLABLE OE */
1006 0xC694, /* LV # Lo HANGUL SYLLABLE YO */
1007 0xC6B0, /* LV # Lo HANGUL SYLLABLE U */
1008 0xC6CC, /* LV # Lo HANGUL SYLLABLE WEO */
1009 0xC6E8, /* LV # Lo HANGUL SYLLABLE WE */
1010 0xC704, /* LV # Lo HANGUL SYLLABLE WI */
1011 0xC720, /* LV # Lo HANGUL SYLLABLE YU */
1012 0xC73C, /* LV # Lo HANGUL SYLLABLE EU */
1013 0xC758, /* LV # Lo HANGUL SYLLABLE YI */
1014 0xC774, /* LV # Lo HANGUL SYLLABLE I */
1015 0xC790, /* LV # Lo HANGUL SYLLABLE JA */
1016 0xC7AC, /* LV # Lo HANGUL SYLLABLE JAE */
1017 0xC7C8, /* LV # Lo HANGUL SYLLABLE JYA */
1018 0xC7E4, /* LV # Lo HANGUL SYLLABLE JYAE */
1019 0xC800, /* LV # Lo HANGUL SYLLABLE JEO */
1020 0xC81C, /* LV # Lo HANGUL SYLLABLE JE */
1021 0xC838, /* LV # Lo HANGUL SYLLABLE JYEO */
1022 0xC854, /* LV # Lo HANGUL SYLLABLE JYE */
1023 0xC870, /* LV # Lo HANGUL SYLLABLE JO */
1024 0xC88C, /* LV # Lo HANGUL SYLLABLE JWA */
1025 0xC8A8, /* LV # Lo HANGUL SYLLABLE JWAE */
1026 0xC8C4, /* LV # Lo HANGUL SYLLABLE JOE */
1027 0xC8E0, /* LV # Lo HANGUL SYLLABLE JYO */
1028 0xC8FC, /* LV # Lo HANGUL SYLLABLE JU */
1029 0xC918, /* LV # Lo HANGUL SYLLABLE JWEO */
1030 0xC934, /* LV # Lo HANGUL SYLLABLE JWE */
1031 0xC950, /* LV # Lo HANGUL SYLLABLE JWI */
1032 0xC96C, /* LV # Lo HANGUL SYLLABLE JYU */
1033 0xC988, /* LV # Lo HANGUL SYLLABLE JEU */
1034 0xC9A4, /* LV # Lo HANGUL SYLLABLE JYI */
1035 0xC9C0, /* LV # Lo HANGUL SYLLABLE JI */
1036 0xC9DC, /* LV # Lo HANGUL SYLLABLE JJA */
1037 0xC9F8, /* LV # Lo HANGUL SYLLABLE JJAE */
1038 0xCA14, /* LV # Lo HANGUL SYLLABLE JJYA */
1039 0xCA30, /* LV # Lo HANGUL SYLLABLE JJYAE */
1040 0xCA4C, /* LV # Lo HANGUL SYLLABLE JJEO */
1041 0xCA68, /* LV # Lo HANGUL SYLLABLE JJE */
1042 0xCA84, /* LV # Lo HANGUL SYLLABLE JJYEO */
1043 0xCAA0, /* LV # Lo HANGUL SYLLABLE JJYE */
1044 0xCABC, /* LV # Lo HANGUL SYLLABLE JJO */
1045 0xCAD8, /* LV # Lo HANGUL SYLLABLE JJWA */
1046 0xCAF4, /* LV # Lo HANGUL SYLLABLE JJWAE */
1047 0xCB10, /* LV # Lo HANGUL SYLLABLE JJOE */
1048 0xCB2C, /* LV # Lo HANGUL SYLLABLE JJYO */
1049 0xCB48, /* LV # Lo HANGUL SYLLABLE JJU */
1050 0xCB64, /* LV # Lo HANGUL SYLLABLE JJWEO */
1051 0xCB80, /* LV # Lo HANGUL SYLLABLE JJWE */
1052 0xCB9C, /* LV # Lo HANGUL SYLLABLE JJWI */
1053 0xCBB8, /* LV # Lo HANGUL SYLLABLE JJYU */
1054 0xCBD4, /* LV # Lo HANGUL SYLLABLE JJEU */
1055 0xCBF0, /* LV # Lo HANGUL SYLLABLE JJYI */
1056 0xCC0C, /* LV # Lo HANGUL SYLLABLE JJI */
1057 0xCC28, /* LV # Lo HANGUL SYLLABLE CA */
1058 0xCC44, /* LV # Lo HANGUL SYLLABLE CAE */
1059 0xCC60, /* LV # Lo HANGUL SYLLABLE CYA */
1060 0xCC7C, /* LV # Lo HANGUL SYLLABLE CYAE */
1061 0xCC98, /* LV # Lo HANGUL SYLLABLE CEO */
1062 0xCCB4, /* LV # Lo HANGUL SYLLABLE CE */
1063 0xCCD0, /* LV # Lo HANGUL SYLLABLE CYEO */
1064 0xCCEC, /* LV # Lo HANGUL SYLLABLE CYE */
1065 0xCD08, /* LV # Lo HANGUL SYLLABLE CO */
1066 0xCD24, /* LV # Lo HANGUL SYLLABLE CWA */
1067 0xCD40, /* LV # Lo HANGUL SYLLABLE CWAE */
1068 0xCD5C, /* LV # Lo HANGUL SYLLABLE COE */
1069 0xCD78, /* LV # Lo HANGUL SYLLABLE CYO */
1070 0xCD94, /* LV # Lo HANGUL SYLLABLE CU */
1071 0xCDB0, /* LV # Lo HANGUL SYLLABLE CWEO */
1072 0xCDCC, /* LV # Lo HANGUL SYLLABLE CWE */
1073 0xCDE8, /* LV # Lo HANGUL SYLLABLE CWI */
1074 0xCE04, /* LV # Lo HANGUL SYLLABLE CYU */
1075 0xCE20, /* LV # Lo HANGUL SYLLABLE CEU */
1076 0xCE3C, /* LV # Lo HANGUL SYLLABLE CYI */
1077 0xCE58, /* LV # Lo HANGUL SYLLABLE CI */
1078 0xCE74, /* LV # Lo HANGUL SYLLABLE KA */
1079 0xCE90, /* LV # Lo HANGUL SYLLABLE KAE */
1080 0xCEAC, /* LV # Lo HANGUL SYLLABLE KYA */
1081 0xCEC8, /* LV # Lo HANGUL SYLLABLE KYAE */
1082 0xCEE4, /* LV # Lo HANGUL SYLLABLE KEO */
1083 0xCF00, /* LV # Lo HANGUL SYLLABLE KE */
1084 0xCF1C, /* LV # Lo HANGUL SYLLABLE KYEO */
1085 0xCF38, /* LV # Lo HANGUL SYLLABLE KYE */
1086 0xCF54, /* LV # Lo HANGUL SYLLABLE KO */
1087 0xCF70, /* LV # Lo HANGUL SYLLABLE KWA */
1088 0xCF8C, /* LV # Lo HANGUL SYLLABLE KWAE */
1089 0xCFA8, /* LV # Lo HANGUL SYLLABLE KOE */
1090 0xCFC4, /* LV # Lo HANGUL SYLLABLE KYO */
1091 0xCFE0, /* LV # Lo HANGUL SYLLABLE KU */
1092 0xCFFC, /* LV # Lo HANGUL SYLLABLE KWEO */
1093 0xD018, /* LV # Lo HANGUL SYLLABLE KWE */
1094 0xD034, /* LV # Lo HANGUL SYLLABLE KWI */
1095 0xD050, /* LV # Lo HANGUL SYLLABLE KYU */
1096 0xD06C, /* LV # Lo HANGUL SYLLABLE KEU */
1097 0xD088, /* LV # Lo HANGUL SYLLABLE KYI */
1098 0xD0A4, /* LV # Lo HANGUL SYLLABLE KI */
1099 0xD0C0, /* LV # Lo HANGUL SYLLABLE TA */
1100 0xD0DC, /* LV # Lo HANGUL SYLLABLE TAE */
1101 0xD0F8, /* LV # Lo HANGUL SYLLABLE TYA */
1102 0xD114, /* LV # Lo HANGUL SYLLABLE TYAE */
1103 0xD130, /* LV # Lo HANGUL SYLLABLE TEO */
1104 0xD14C, /* LV # Lo HANGUL SYLLABLE TE */
1105 0xD168, /* LV # Lo HANGUL SYLLABLE TYEO */
1106 0xD184, /* LV # Lo HANGUL SYLLABLE TYE */
1107 0xD1A0, /* LV # Lo HANGUL SYLLABLE TO */
1108 0xD1BC, /* LV # Lo HANGUL SYLLABLE TWA */
1109 0xD1D8, /* LV # Lo HANGUL SYLLABLE TWAE */
1110 0xD1F4, /* LV # Lo HANGUL SYLLABLE TOE */
1111 0xD210, /* LV # Lo HANGUL SYLLABLE TYO */
1112 0xD22C, /* LV # Lo HANGUL SYLLABLE TU */
1113 0xD248, /* LV # Lo HANGUL SYLLABLE TWEO */
1114 0xD264, /* LV # Lo HANGUL SYLLABLE TWE */
1115 0xD280, /* LV # Lo HANGUL SYLLABLE TWI */
1116 0xD29C, /* LV # Lo HANGUL SYLLABLE TYU */
1117 0xD2B8, /* LV # Lo HANGUL SYLLABLE TEU */
1118 0xD2D4, /* LV # Lo HANGUL SYLLABLE TYI */
1119 0xD2F0, /* LV # Lo HANGUL SYLLABLE TI */
1120 0xD30C, /* LV # Lo HANGUL SYLLABLE PA */
1121 0xD328, /* LV # Lo HANGUL SYLLABLE PAE */
1122 0xD344, /* LV # Lo HANGUL SYLLABLE PYA */
1123 0xD360, /* LV # Lo HANGUL SYLLABLE PYAE */
1124 0xD37C, /* LV # Lo HANGUL SYLLABLE PEO */
1125 0xD398, /* LV # Lo HANGUL SYLLABLE PE */
1126 0xD3B4, /* LV # Lo HANGUL SYLLABLE PYEO */
1127 0xD3D0, /* LV # Lo HANGUL SYLLABLE PYE */
1128 0xD3EC, /* LV # Lo HANGUL SYLLABLE PO */
1129 0xD408, /* LV # Lo HANGUL SYLLABLE PWA */
1130 0xD424, /* LV # Lo HANGUL SYLLABLE PWAE */
1131 0xD440, /* LV # Lo HANGUL SYLLABLE POE */
1132 0xD45C, /* LV # Lo HANGUL SYLLABLE PYO */
1133 0xD478, /* LV # Lo HANGUL SYLLABLE PU */
1134 0xD494, /* LV # Lo HANGUL SYLLABLE PWEO */
1135 0xD4B0, /* LV # Lo HANGUL SYLLABLE PWE */
1136 0xD4CC, /* LV # Lo HANGUL SYLLABLE PWI */
1137 0xD4E8, /* LV # Lo HANGUL SYLLABLE PYU */
1138 0xD504, /* LV # Lo HANGUL SYLLABLE PEU */
1139 0xD520, /* LV # Lo HANGUL SYLLABLE PYI */
1140 0xD53C, /* LV # Lo HANGUL SYLLABLE PI */
1141 0xD558, /* LV # Lo HANGUL SYLLABLE HA */
1142 0xD574, /* LV # Lo HANGUL SYLLABLE HAE */
1143 0xD590, /* LV # Lo HANGUL SYLLABLE HYA */
1144 0xD5AC, /* LV # Lo HANGUL SYLLABLE HYAE */
1145 0xD5C8, /* LV # Lo HANGUL SYLLABLE HEO */
1146 0xD5E4, /* LV # Lo HANGUL SYLLABLE HE */
1147 0xD600, /* LV # Lo HANGUL SYLLABLE HYEO */
1148 0xD61C, /* LV # Lo HANGUL SYLLABLE HYE */
1149 0xD638, /* LV # Lo HANGUL SYLLABLE HO */
1150 0xD654, /* LV # Lo HANGUL SYLLABLE HWA */
1151 0xD670, /* LV # Lo HANGUL SYLLABLE HWAE */
1152 0xD68C, /* LV # Lo HANGUL SYLLABLE HOE */
1153 0xD6A8, /* LV # Lo HANGUL SYLLABLE HYO */
1154 0xD6C4, /* LV # Lo HANGUL SYLLABLE HU */
1155 0xD6E0, /* LV # Lo HANGUL SYLLABLE HWEO */
1156 0xD6FC, /* LV # Lo HANGUL SYLLABLE HWE */
1157 0xD718, /* LV # Lo HANGUL SYLLABLE HWI */
1158 0xD734, /* LV # Lo HANGUL SYLLABLE HYU */
1159 0xD750, /* LV # Lo HANGUL SYLLABLE HEU */
1160 0xD76C, /* LV # Lo HANGUL SYLLABLE HYI */
1161 0xD788, /* LV # Lo HANGUL SYLLABLE HI */
1162 0
1163};
1164
1165static const uint32_t pr29_13_2[] = {
1166 0x11A8, 0x11A9, 0x11AA, 0x11AB, 0x11AC, 0x11AD, 0x11AE, 0x11AF,
1167 0x11B0, 0x11B1, 0x11B2, 0x11B3, 0x11B4, 0x11B5, 0x11B6, 0x11B7,
1168 0x11B8, 0x11B9, 0x11BA, 0x11BB, 0x11BC, 0x11BD, 0x11BE, 0x11BF,
1169 0x11C0, 0x11C1, 0x11C2, 0
1170};
1171
1172typedef struct
1173{
1174 const uint32_t *first;
1175 const uint32_t *last;
1176} Pr29;
1177
1178static const Pr29 pr29[] = {
1179 {&pr29_1_1[0], &pr29_1_2[0]},
1180 {&pr29_2_1[0], &pr29_2_2[0]},
1181 {&pr29_3_1[0], &pr29_3_2[0]},
1182 {&pr29_4_1[0], &pr29_4_2[0]},
1183 {&pr29_5_1[0], &pr29_5_2[0]},
1184 {&pr29_6_1[0], &pr29_6_2[0]},
1185 {&pr29_7_1[0], &pr29_7_2[0]},
1186 {&pr29_8_1[0], &pr29_8_2[0]},
1187 {&pr29_9_1[0], &pr29_9_2[0]},
1188 {&pr29_10_1[0], &pr29_10_2[0]},
1189 {&pr29_11_1[0], &pr29_11_2[0]},
1190 {&pr29_12_1[0], &pr29_12_2[0]},
1191 {&pr29_13_1[0], &pr29_13_2[0]},
1192 {NULL, NULL}
1193};
1194
1195static size_t
1196first_column (uint32_t c)
1197{
1198 size_t i, j;
1199
1200 for (i = 0; pr29[i].first; i++)
1201 for (j = 0; pr29[i].first[j]; j++)
1202 if (c == pr29[i].first[j])
1203 return i + 1;
1204
1205 return 0;
1206}
1207
1208static int
1209in_last_column_row (uint32_t c, size_t row)
1210{
1211 size_t i;
1212
1213 for (i = 0; pr29[row - 1].last[i]; i++)
1214 if (c == pr29[row - 1].last[i])
1215 return 1;
1216
1217 return 0;
1218}
1219
1220static size_t
1221combinationclass (uint32_t c)
1222{
1223 size_t i;
1224
1225 for (i = 0; nzcc[i]; i++)
1226 if (c == nzcc[i])
1227 return i + 1;
1228
1229 return 0;
1230}
1231
1246int
1247pr29_4 (const uint32_t *in, size_t len)
1248{
1249 size_t i, j, k, row;
1250
1251 /*
1252 * The problem sequence are of the form:
1253 *
1254 * first_character intervening_character+ last_character
1255 *
1256 * where the first_character and last_character come from the same
1257 * row in the following table, and there is at least one
1258 * intervening_character with non-zero Canonical Combining
1259 * Class. (The '+' above means one or more occurrences.)
1260 *
1261 */
1262
1263 for (i = 0; i < len; i++)
1264 if ((row = first_column (in[i])) > 0)
1265 for (j = i + 1; j < len; j++)
1266 if (combinationclass (in[j]))
1267 for (k = j + 1; k < len; k++)
1268 if (in_last_column_row (in[k], row))
1269 return PR29_PROBLEM;
1270
1271 return PR29_SUCCESS;
1272}
1273
1287int
1288pr29_4z (const uint32_t *in)
1289{
1290 size_t len;
1291
1292 for (len = 0; in[len]; len++)
1293 ;
1294
1295 return pr29_4 (in, len);
1296}
1297
1312int
1313pr29_8z (const char *in)
1314{
1315 uint32_t *p;
1316 int rc;
1317
1318 p = stringprep_utf8_to_ucs4 (in, -1, NULL);
1319 if (!p)
1320 return PR29_STRINGPREP_ERROR;
1321
1322 rc = pr29_4z (p);
1323
1324 free (p);
1325
1326 return rc;
1327}
1328
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition nfkc.c:1006
int pr29_8z(const char *in)
Definition pr29.c:1313
int pr29_4z(const uint32_t *in)
Definition pr29.c:1288
int pr29_4(const uint32_t *in, size_t len)
Definition pr29.c:1247
@ PR29_STRINGPREP_ERROR
Definition pr29.h:69
@ PR29_SUCCESS
Definition pr29.h:67
@ PR29_PROBLEM
Definition pr29.h:68
Definition pr29.c:1173
const uint32_t * first
Definition pr29.c:1174
const uint32_t * last
Definition pr29.c:1175