diff --git a/CMakeLists.txt b/CMakeLists.txt index 95b9d1957..84300bf5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,6 +153,9 @@ set (MONGOCRYPT_SOURCES src/os_posix/os_mutex.c src/os_win/os_dll.c src/os_posix/os_dll.c + src/unicode/case-fold-map.c + src/unicode/diacritic-fold-map.c + src/unicode/fold.c ) # If MONGOCRYPT_CRYPTO is not set, choose a system default. @@ -513,6 +516,7 @@ set (TEST_MONGOCRYPT_SOURCES test/test-mongocrypt-util.c test/test-mongocrypt.c test/test-named-kms-providers.c + test/test-unicode-fold.c ) # Define test-mongocrypt diff --git a/src/mc-text-search-str-encode-private.h b/src/mc-text-search-str-encode-private.h index bd69619a8..7bdfeb37f 100644 --- a/src/mc-text-search-str-encode-private.h +++ b/src/mc-text-search-str-encode-private.h @@ -40,11 +40,6 @@ typedef struct { // Run StrEncode with the given spec. mc_str_encode_sets_t *mc_text_search_str_encode(const mc_FLE2TextSearchInsertSpec_t *spec, mongocrypt_status_t *status); -// TODO MONGOCRYPT-759 This helper only exists to test folded_len != unfolded_len; make the test actually use folding -mc_str_encode_sets_t *mc_text_search_str_encode_helper(const mc_FLE2TextSearchInsertSpec_t *spec, - uint32_t unfolded_len, - mongocrypt_status_t *status); - void mc_str_encode_sets_destroy(mc_str_encode_sets_t *sets); #endif /* MONGOCRYPT_TEXT_SEARCH_STR_ENCODE_PRIVATE_H */ \ No newline at end of file diff --git a/src/mc-text-search-str-encode.c b/src/mc-text-search-str-encode.c index 257bf5d9f..e8f2edd4e 100644 --- a/src/mc-text-search-str-encode.c +++ b/src/mc-text-search-str-encode.c @@ -18,6 +18,7 @@ #include "mc-text-search-str-encode-private.h" #include "mongocrypt-buffer-private.h" #include "mongocrypt.h" +#include "unicode/fold.h" #include #include @@ -170,23 +171,47 @@ static uint32_t mc_get_utf8_codepoint_length(const char *buf, uint32_t len) { return codepoint_len; } -// TODO MONGOCRYPT-759 This helper only exists to test folded len != unfolded len; make the test actually use folding -mc_str_encode_sets_t *mc_text_search_str_encode_helper(const mc_FLE2TextSearchInsertSpec_t *spec, - uint32_t unfolded_codepoint_len, - mongocrypt_status_t *status) { +mc_str_encode_sets_t *mc_text_search_str_encode(const mc_FLE2TextSearchInsertSpec_t *spec, + mongocrypt_status_t *status) { BSON_ASSERT_PARAM(spec); + if (spec->len > MAX_ENCODE_BYTE_LEN) { + CLIENT_ERR("StrEncode: String passed in was too long: String was %u bytes, but max is %u bytes", + spec->len, + MAX_ENCODE_BYTE_LEN); + return NULL; + } if (!bson_utf8_validate(spec->v, spec->len, false /* allow_null */)) { CLIENT_ERR("StrEncode: String passed in was not valid UTF-8"); return NULL; } + uint32_t unfolded_codepoint_len = mc_get_utf8_codepoint_length(spec->v, spec->len); + if (unfolded_codepoint_len == 0) { + // Empty string: We set unfolded length to 1 so that we generate fake tokens. + unfolded_codepoint_len = 1; + } - const char *folded_str = spec->v; - uint32_t folded_str_bytes_len = spec->len; + mc_utf8_string_with_bad_char_t *base_string; + if (spec->casef || spec->diacf) { + char *folded_str; + size_t folded_str_bytes_len; + if (!unicode_fold(spec->v, + spec->len, + (spec->casef * kUnicodeFoldToLower) | (spec->diacf * kUnicodeFoldRemoveDiacritics), + &folded_str, + &folded_str_bytes_len, + status)) { + return NULL; + } + base_string = mc_utf8_string_with_bad_char_from_buffer(folded_str, (uint32_t)folded_str_bytes_len); + bson_free(folded_str); + } else { + base_string = mc_utf8_string_with_bad_char_from_buffer(spec->v, spec->len); + } mc_str_encode_sets_t *sets = bson_malloc0(sizeof(mc_str_encode_sets_t)); // Base string is the folded string plus the 0xFF character - sets->base_string = mc_utf8_string_with_bad_char_from_buffer(folded_str, folded_str_bytes_len); + sets->base_string = base_string; if (spec->suffix.set) { sets->suffix_set = generate_suffix_tree(sets->base_string, unfolded_codepoint_len, &spec->suffix.value); } @@ -204,33 +229,11 @@ mc_str_encode_sets_t *mc_text_search_str_encode_helper(const mc_FLE2TextSearchIn } sets->substring_set = generate_substring_tree(sets->base_string, unfolded_codepoint_len, &spec->substr.value); } - // Exact string is always the first len characters of the base string - _mongocrypt_buffer_from_data(&sets->exact, sets->base_string->buf.data, folded_str_bytes_len); + // Exact string is always equal to the base string up until the bad character + _mongocrypt_buffer_from_data(&sets->exact, sets->base_string->buf.data, (uint32_t)sets->base_string->buf.len - 1); return sets; } -mc_str_encode_sets_t *mc_text_search_str_encode(const mc_FLE2TextSearchInsertSpec_t *spec, - mongocrypt_status_t *status) { - BSON_ASSERT_PARAM(spec); - if (spec->len > MAX_ENCODE_BYTE_LEN) { - CLIENT_ERR("StrEncode: String passed in was too long: String was %u bytes, but max is %u bytes", - spec->len, - MAX_ENCODE_BYTE_LEN); - return NULL; - } - // TODO MONGOCRYPT-759 Implement and use CFold - if (!bson_utf8_validate(spec->v, spec->len, false /* allow_null */)) { - CLIENT_ERR("StrEncode: String passed in was not valid UTF-8"); - return NULL; - } - uint32_t unfolded_codepoint_len = mc_get_utf8_codepoint_length(spec->v, spec->len); - if (unfolded_codepoint_len == 0) { - // Empty string: We set unfolded length to 1 so that we generate fake tokens. - unfolded_codepoint_len = 1; - } - return mc_text_search_str_encode_helper(spec, unfolded_codepoint_len, status); -} - void mc_str_encode_sets_destroy(mc_str_encode_sets_t *sets) { if (!sets) { return; diff --git a/src/unicode/case-fold-map.c b/src/unicode/case-fold-map.c new file mode 100644 index 000000000..67549f6da --- /dev/null +++ b/src/unicode/case-fold-map.c @@ -0,0 +1,1434 @@ +/** + * Copyright (C) 2025-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */ + +#include "bson/bson.h" +#include "fold.h" + +bson_unichar_t unicode_codepoint_to_lower(bson_unichar_t codepoint) { + if (codepoint <= 0x7f) { + if (codepoint >= 'A' && codepoint <= 'Z') { + return codepoint | 0x20; // Set the ascii lowercase bit on the character. + } + return codepoint; + } + + switch (codepoint) { + case 0xb5: return 0x3bc; + case 0xc0: return 0xe0; + case 0xc1: return 0xe1; + case 0xc2: return 0xe2; + case 0xc3: return 0xe3; + case 0xc4: return 0xe4; + case 0xc5: return 0xe5; + case 0xc6: return 0xe6; + case 0xc7: return 0xe7; + case 0xc8: return 0xe8; + case 0xc9: return 0xe9; + case 0xca: return 0xea; + case 0xcb: return 0xeb; + case 0xcc: return 0xec; + case 0xcd: return 0xed; + case 0xce: return 0xee; + case 0xcf: return 0xef; + case 0xd0: return 0xf0; + case 0xd1: return 0xf1; + case 0xd2: return 0xf2; + case 0xd3: return 0xf3; + case 0xd4: return 0xf4; + case 0xd5: return 0xf5; + case 0xd6: return 0xf6; + case 0xd8: return 0xf8; + case 0xd9: return 0xf9; + case 0xda: return 0xfa; + case 0xdb: return 0xfb; + case 0xdc: return 0xfc; + case 0xdd: return 0xfd; + case 0xde: return 0xfe; + case 0x100: return 0x101; + case 0x102: return 0x103; + case 0x104: return 0x105; + case 0x106: return 0x107; + case 0x108: return 0x109; + case 0x10a: return 0x10b; + case 0x10c: return 0x10d; + case 0x10e: return 0x10f; + case 0x110: return 0x111; + case 0x112: return 0x113; + case 0x114: return 0x115; + case 0x116: return 0x117; + case 0x118: return 0x119; + case 0x11a: return 0x11b; + case 0x11c: return 0x11d; + case 0x11e: return 0x11f; + case 0x120: return 0x121; + case 0x122: return 0x123; + case 0x124: return 0x125; + case 0x126: return 0x127; + case 0x128: return 0x129; + case 0x12a: return 0x12b; + case 0x12c: return 0x12d; + case 0x12e: return 0x12f; + case 0x132: return 0x133; + case 0x134: return 0x135; + case 0x136: return 0x137; + case 0x139: return 0x13a; + case 0x13b: return 0x13c; + case 0x13d: return 0x13e; + case 0x13f: return 0x140; + case 0x141: return 0x142; + case 0x143: return 0x144; + case 0x145: return 0x146; + case 0x147: return 0x148; + case 0x14a: return 0x14b; + case 0x14c: return 0x14d; + case 0x14e: return 0x14f; + case 0x150: return 0x151; + case 0x152: return 0x153; + case 0x154: return 0x155; + case 0x156: return 0x157; + case 0x158: return 0x159; + case 0x15a: return 0x15b; + case 0x15c: return 0x15d; + case 0x15e: return 0x15f; + case 0x160: return 0x161; + case 0x162: return 0x163; + case 0x164: return 0x165; + case 0x166: return 0x167; + case 0x168: return 0x169; + case 0x16a: return 0x16b; + case 0x16c: return 0x16d; + case 0x16e: return 0x16f; + case 0x170: return 0x171; + case 0x172: return 0x173; + case 0x174: return 0x175; + case 0x176: return 0x177; + case 0x178: return 0xff; + case 0x179: return 0x17a; + case 0x17b: return 0x17c; + case 0x17d: return 0x17e; + case 0x17f: return 0x73; + case 0x181: return 0x253; + case 0x182: return 0x183; + case 0x184: return 0x185; + case 0x186: return 0x254; + case 0x187: return 0x188; + case 0x189: return 0x256; + case 0x18a: return 0x257; + case 0x18b: return 0x18c; + case 0x18e: return 0x1dd; + case 0x18f: return 0x259; + case 0x190: return 0x25b; + case 0x191: return 0x192; + case 0x193: return 0x260; + case 0x194: return 0x263; + case 0x196: return 0x269; + case 0x197: return 0x268; + case 0x198: return 0x199; + case 0x19c: return 0x26f; + case 0x19d: return 0x272; + case 0x19f: return 0x275; + case 0x1a0: return 0x1a1; + case 0x1a2: return 0x1a3; + case 0x1a4: return 0x1a5; + case 0x1a6: return 0x280; + case 0x1a7: return 0x1a8; + case 0x1a9: return 0x283; + case 0x1ac: return 0x1ad; + case 0x1ae: return 0x288; + case 0x1af: return 0x1b0; + case 0x1b1: return 0x28a; + case 0x1b2: return 0x28b; + case 0x1b3: return 0x1b4; + case 0x1b5: return 0x1b6; + case 0x1b7: return 0x292; + case 0x1b8: return 0x1b9; + case 0x1bc: return 0x1bd; + case 0x1c4: return 0x1c6; + case 0x1c5: return 0x1c6; + case 0x1c7: return 0x1c9; + case 0x1c8: return 0x1c9; + case 0x1ca: return 0x1cc; + case 0x1cb: return 0x1cc; + case 0x1cd: return 0x1ce; + case 0x1cf: return 0x1d0; + case 0x1d1: return 0x1d2; + case 0x1d3: return 0x1d4; + case 0x1d5: return 0x1d6; + case 0x1d7: return 0x1d8; + case 0x1d9: return 0x1da; + case 0x1db: return 0x1dc; + case 0x1de: return 0x1df; + case 0x1e0: return 0x1e1; + case 0x1e2: return 0x1e3; + case 0x1e4: return 0x1e5; + case 0x1e6: return 0x1e7; + case 0x1e8: return 0x1e9; + case 0x1ea: return 0x1eb; + case 0x1ec: return 0x1ed; + case 0x1ee: return 0x1ef; + case 0x1f1: return 0x1f3; + case 0x1f2: return 0x1f3; + case 0x1f4: return 0x1f5; + case 0x1f6: return 0x195; + case 0x1f7: return 0x1bf; + case 0x1f8: return 0x1f9; + case 0x1fa: return 0x1fb; + case 0x1fc: return 0x1fd; + case 0x1fe: return 0x1ff; + case 0x200: return 0x201; + case 0x202: return 0x203; + case 0x204: return 0x205; + case 0x206: return 0x207; + case 0x208: return 0x209; + case 0x20a: return 0x20b; + case 0x20c: return 0x20d; + case 0x20e: return 0x20f; + case 0x210: return 0x211; + case 0x212: return 0x213; + case 0x214: return 0x215; + case 0x216: return 0x217; + case 0x218: return 0x219; + case 0x21a: return 0x21b; + case 0x21c: return 0x21d; + case 0x21e: return 0x21f; + case 0x220: return 0x19e; + case 0x222: return 0x223; + case 0x224: return 0x225; + case 0x226: return 0x227; + case 0x228: return 0x229; + case 0x22a: return 0x22b; + case 0x22c: return 0x22d; + case 0x22e: return 0x22f; + case 0x230: return 0x231; + case 0x232: return 0x233; + case 0x23a: return 0x2c65; + case 0x23b: return 0x23c; + case 0x23d: return 0x19a; + case 0x23e: return 0x2c66; + case 0x241: return 0x242; + case 0x243: return 0x180; + case 0x244: return 0x289; + case 0x245: return 0x28c; + case 0x246: return 0x247; + case 0x248: return 0x249; + case 0x24a: return 0x24b; + case 0x24c: return 0x24d; + case 0x24e: return 0x24f; + case 0x345: return 0x3b9; + case 0x370: return 0x371; + case 0x372: return 0x373; + case 0x376: return 0x377; + case 0x37f: return 0x3f3; + case 0x386: return 0x3ac; + case 0x388: return 0x3ad; + case 0x389: return 0x3ae; + case 0x38a: return 0x3af; + case 0x38c: return 0x3cc; + case 0x38e: return 0x3cd; + case 0x38f: return 0x3ce; + case 0x391: return 0x3b1; + case 0x392: return 0x3b2; + case 0x393: return 0x3b3; + case 0x394: return 0x3b4; + case 0x395: return 0x3b5; + case 0x396: return 0x3b6; + case 0x397: return 0x3b7; + case 0x398: return 0x3b8; + case 0x399: return 0x3b9; + case 0x39a: return 0x3ba; + case 0x39b: return 0x3bb; + case 0x39c: return 0x3bc; + case 0x39d: return 0x3bd; + case 0x39e: return 0x3be; + case 0x39f: return 0x3bf; + case 0x3a0: return 0x3c0; + case 0x3a1: return 0x3c1; + case 0x3a3: return 0x3c3; + case 0x3a4: return 0x3c4; + case 0x3a5: return 0x3c5; + case 0x3a6: return 0x3c6; + case 0x3a7: return 0x3c7; + case 0x3a8: return 0x3c8; + case 0x3a9: return 0x3c9; + case 0x3aa: return 0x3ca; + case 0x3ab: return 0x3cb; + case 0x3c2: return 0x3c3; + case 0x3cf: return 0x3d7; + case 0x3d0: return 0x3b2; + case 0x3d1: return 0x3b8; + case 0x3d5: return 0x3c6; + case 0x3d6: return 0x3c0; + case 0x3d8: return 0x3d9; + case 0x3da: return 0x3db; + case 0x3dc: return 0x3dd; + case 0x3de: return 0x3df; + case 0x3e0: return 0x3e1; + case 0x3e2: return 0x3e3; + case 0x3e4: return 0x3e5; + case 0x3e6: return 0x3e7; + case 0x3e8: return 0x3e9; + case 0x3ea: return 0x3eb; + case 0x3ec: return 0x3ed; + case 0x3ee: return 0x3ef; + case 0x3f0: return 0x3ba; + case 0x3f1: return 0x3c1; + case 0x3f4: return 0x3b8; + case 0x3f5: return 0x3b5; + case 0x3f7: return 0x3f8; + case 0x3f9: return 0x3f2; + case 0x3fa: return 0x3fb; + case 0x3fd: return 0x37b; + case 0x3fe: return 0x37c; + case 0x3ff: return 0x37d; + case 0x400: return 0x450; + case 0x401: return 0x451; + case 0x402: return 0x452; + case 0x403: return 0x453; + case 0x404: return 0x454; + case 0x405: return 0x455; + case 0x406: return 0x456; + case 0x407: return 0x457; + case 0x408: return 0x458; + case 0x409: return 0x459; + case 0x40a: return 0x45a; + case 0x40b: return 0x45b; + case 0x40c: return 0x45c; + case 0x40d: return 0x45d; + case 0x40e: return 0x45e; + case 0x40f: return 0x45f; + case 0x410: return 0x430; + case 0x411: return 0x431; + case 0x412: return 0x432; + case 0x413: return 0x433; + case 0x414: return 0x434; + case 0x415: return 0x435; + case 0x416: return 0x436; + case 0x417: return 0x437; + case 0x418: return 0x438; + case 0x419: return 0x439; + case 0x41a: return 0x43a; + case 0x41b: return 0x43b; + case 0x41c: return 0x43c; + case 0x41d: return 0x43d; + case 0x41e: return 0x43e; + case 0x41f: return 0x43f; + case 0x420: return 0x440; + case 0x421: return 0x441; + case 0x422: return 0x442; + case 0x423: return 0x443; + case 0x424: return 0x444; + case 0x425: return 0x445; + case 0x426: return 0x446; + case 0x427: return 0x447; + case 0x428: return 0x448; + case 0x429: return 0x449; + case 0x42a: return 0x44a; + case 0x42b: return 0x44b; + case 0x42c: return 0x44c; + case 0x42d: return 0x44d; + case 0x42e: return 0x44e; + case 0x42f: return 0x44f; + case 0x460: return 0x461; + case 0x462: return 0x463; + case 0x464: return 0x465; + case 0x466: return 0x467; + case 0x468: return 0x469; + case 0x46a: return 0x46b; + case 0x46c: return 0x46d; + case 0x46e: return 0x46f; + case 0x470: return 0x471; + case 0x472: return 0x473; + case 0x474: return 0x475; + case 0x476: return 0x477; + case 0x478: return 0x479; + case 0x47a: return 0x47b; + case 0x47c: return 0x47d; + case 0x47e: return 0x47f; + case 0x480: return 0x481; + case 0x48a: return 0x48b; + case 0x48c: return 0x48d; + case 0x48e: return 0x48f; + case 0x490: return 0x491; + case 0x492: return 0x493; + case 0x494: return 0x495; + case 0x496: return 0x497; + case 0x498: return 0x499; + case 0x49a: return 0x49b; + case 0x49c: return 0x49d; + case 0x49e: return 0x49f; + case 0x4a0: return 0x4a1; + case 0x4a2: return 0x4a3; + case 0x4a4: return 0x4a5; + case 0x4a6: return 0x4a7; + case 0x4a8: return 0x4a9; + case 0x4aa: return 0x4ab; + case 0x4ac: return 0x4ad; + case 0x4ae: return 0x4af; + case 0x4b0: return 0x4b1; + case 0x4b2: return 0x4b3; + case 0x4b4: return 0x4b5; + case 0x4b6: return 0x4b7; + case 0x4b8: return 0x4b9; + case 0x4ba: return 0x4bb; + case 0x4bc: return 0x4bd; + case 0x4be: return 0x4bf; + case 0x4c0: return 0x4cf; + case 0x4c1: return 0x4c2; + case 0x4c3: return 0x4c4; + case 0x4c5: return 0x4c6; + case 0x4c7: return 0x4c8; + case 0x4c9: return 0x4ca; + case 0x4cb: return 0x4cc; + case 0x4cd: return 0x4ce; + case 0x4d0: return 0x4d1; + case 0x4d2: return 0x4d3; + case 0x4d4: return 0x4d5; + case 0x4d6: return 0x4d7; + case 0x4d8: return 0x4d9; + case 0x4da: return 0x4db; + case 0x4dc: return 0x4dd; + case 0x4de: return 0x4df; + case 0x4e0: return 0x4e1; + case 0x4e2: return 0x4e3; + case 0x4e4: return 0x4e5; + case 0x4e6: return 0x4e7; + case 0x4e8: return 0x4e9; + case 0x4ea: return 0x4eb; + case 0x4ec: return 0x4ed; + case 0x4ee: return 0x4ef; + case 0x4f0: return 0x4f1; + case 0x4f2: return 0x4f3; + case 0x4f4: return 0x4f5; + case 0x4f6: return 0x4f7; + case 0x4f8: return 0x4f9; + case 0x4fa: return 0x4fb; + case 0x4fc: return 0x4fd; + case 0x4fe: return 0x4ff; + case 0x500: return 0x501; + case 0x502: return 0x503; + case 0x504: return 0x505; + case 0x506: return 0x507; + case 0x508: return 0x509; + case 0x50a: return 0x50b; + case 0x50c: return 0x50d; + case 0x50e: return 0x50f; + case 0x510: return 0x511; + case 0x512: return 0x513; + case 0x514: return 0x515; + case 0x516: return 0x517; + case 0x518: return 0x519; + case 0x51a: return 0x51b; + case 0x51c: return 0x51d; + case 0x51e: return 0x51f; + case 0x520: return 0x521; + case 0x522: return 0x523; + case 0x524: return 0x525; + case 0x526: return 0x527; + case 0x528: return 0x529; + case 0x52a: return 0x52b; + case 0x52c: return 0x52d; + case 0x52e: return 0x52f; + case 0x531: return 0x561; + case 0x532: return 0x562; + case 0x533: return 0x563; + case 0x534: return 0x564; + case 0x535: return 0x565; + case 0x536: return 0x566; + case 0x537: return 0x567; + case 0x538: return 0x568; + case 0x539: return 0x569; + case 0x53a: return 0x56a; + case 0x53b: return 0x56b; + case 0x53c: return 0x56c; + case 0x53d: return 0x56d; + case 0x53e: return 0x56e; + case 0x53f: return 0x56f; + case 0x540: return 0x570; + case 0x541: return 0x571; + case 0x542: return 0x572; + case 0x543: return 0x573; + case 0x544: return 0x574; + case 0x545: return 0x575; + case 0x546: return 0x576; + case 0x547: return 0x577; + case 0x548: return 0x578; + case 0x549: return 0x579; + case 0x54a: return 0x57a; + case 0x54b: return 0x57b; + case 0x54c: return 0x57c; + case 0x54d: return 0x57d; + case 0x54e: return 0x57e; + case 0x54f: return 0x57f; + case 0x550: return 0x580; + case 0x551: return 0x581; + case 0x552: return 0x582; + case 0x553: return 0x583; + case 0x554: return 0x584; + case 0x555: return 0x585; + case 0x556: return 0x586; + case 0x10a0: return 0x2d00; + case 0x10a1: return 0x2d01; + case 0x10a2: return 0x2d02; + case 0x10a3: return 0x2d03; + case 0x10a4: return 0x2d04; + case 0x10a5: return 0x2d05; + case 0x10a6: return 0x2d06; + case 0x10a7: return 0x2d07; + case 0x10a8: return 0x2d08; + case 0x10a9: return 0x2d09; + case 0x10aa: return 0x2d0a; + case 0x10ab: return 0x2d0b; + case 0x10ac: return 0x2d0c; + case 0x10ad: return 0x2d0d; + case 0x10ae: return 0x2d0e; + case 0x10af: return 0x2d0f; + case 0x10b0: return 0x2d10; + case 0x10b1: return 0x2d11; + case 0x10b2: return 0x2d12; + case 0x10b3: return 0x2d13; + case 0x10b4: return 0x2d14; + case 0x10b5: return 0x2d15; + case 0x10b6: return 0x2d16; + case 0x10b7: return 0x2d17; + case 0x10b8: return 0x2d18; + case 0x10b9: return 0x2d19; + case 0x10ba: return 0x2d1a; + case 0x10bb: return 0x2d1b; + case 0x10bc: return 0x2d1c; + case 0x10bd: return 0x2d1d; + case 0x10be: return 0x2d1e; + case 0x10bf: return 0x2d1f; + case 0x10c0: return 0x2d20; + case 0x10c1: return 0x2d21; + case 0x10c2: return 0x2d22; + case 0x10c3: return 0x2d23; + case 0x10c4: return 0x2d24; + case 0x10c5: return 0x2d25; + case 0x10c7: return 0x2d27; + case 0x10cd: return 0x2d2d; + case 0x13f8: return 0x13f0; + case 0x13f9: return 0x13f1; + case 0x13fa: return 0x13f2; + case 0x13fb: return 0x13f3; + case 0x13fc: return 0x13f4; + case 0x13fd: return 0x13f5; + case 0x1c80: return 0x432; + case 0x1c81: return 0x434; + case 0x1c82: return 0x43e; + case 0x1c83: return 0x441; + case 0x1c84: return 0x442; + case 0x1c85: return 0x442; + case 0x1c86: return 0x44a; + case 0x1c87: return 0x463; + case 0x1c88: return 0xa64b; + case 0x1c90: return 0x10d0; + case 0x1c91: return 0x10d1; + case 0x1c92: return 0x10d2; + case 0x1c93: return 0x10d3; + case 0x1c94: return 0x10d4; + case 0x1c95: return 0x10d5; + case 0x1c96: return 0x10d6; + case 0x1c97: return 0x10d7; + case 0x1c98: return 0x10d8; + case 0x1c99: return 0x10d9; + case 0x1c9a: return 0x10da; + case 0x1c9b: return 0x10db; + case 0x1c9c: return 0x10dc; + case 0x1c9d: return 0x10dd; + case 0x1c9e: return 0x10de; + case 0x1c9f: return 0x10df; + case 0x1ca0: return 0x10e0; + case 0x1ca1: return 0x10e1; + case 0x1ca2: return 0x10e2; + case 0x1ca3: return 0x10e3; + case 0x1ca4: return 0x10e4; + case 0x1ca5: return 0x10e5; + case 0x1ca6: return 0x10e6; + case 0x1ca7: return 0x10e7; + case 0x1ca8: return 0x10e8; + case 0x1ca9: return 0x10e9; + case 0x1caa: return 0x10ea; + case 0x1cab: return 0x10eb; + case 0x1cac: return 0x10ec; + case 0x1cad: return 0x10ed; + case 0x1cae: return 0x10ee; + case 0x1caf: return 0x10ef; + case 0x1cb0: return 0x10f0; + case 0x1cb1: return 0x10f1; + case 0x1cb2: return 0x10f2; + case 0x1cb3: return 0x10f3; + case 0x1cb4: return 0x10f4; + case 0x1cb5: return 0x10f5; + case 0x1cb6: return 0x10f6; + case 0x1cb7: return 0x10f7; + case 0x1cb8: return 0x10f8; + case 0x1cb9: return 0x10f9; + case 0x1cba: return 0x10fa; + case 0x1cbd: return 0x10fd; + case 0x1cbe: return 0x10fe; + case 0x1cbf: return 0x10ff; + case 0x1e00: return 0x1e01; + case 0x1e02: return 0x1e03; + case 0x1e04: return 0x1e05; + case 0x1e06: return 0x1e07; + case 0x1e08: return 0x1e09; + case 0x1e0a: return 0x1e0b; + case 0x1e0c: return 0x1e0d; + case 0x1e0e: return 0x1e0f; + case 0x1e10: return 0x1e11; + case 0x1e12: return 0x1e13; + case 0x1e14: return 0x1e15; + case 0x1e16: return 0x1e17; + case 0x1e18: return 0x1e19; + case 0x1e1a: return 0x1e1b; + case 0x1e1c: return 0x1e1d; + case 0x1e1e: return 0x1e1f; + case 0x1e20: return 0x1e21; + case 0x1e22: return 0x1e23; + case 0x1e24: return 0x1e25; + case 0x1e26: return 0x1e27; + case 0x1e28: return 0x1e29; + case 0x1e2a: return 0x1e2b; + case 0x1e2c: return 0x1e2d; + case 0x1e2e: return 0x1e2f; + case 0x1e30: return 0x1e31; + case 0x1e32: return 0x1e33; + case 0x1e34: return 0x1e35; + case 0x1e36: return 0x1e37; + case 0x1e38: return 0x1e39; + case 0x1e3a: return 0x1e3b; + case 0x1e3c: return 0x1e3d; + case 0x1e3e: return 0x1e3f; + case 0x1e40: return 0x1e41; + case 0x1e42: return 0x1e43; + case 0x1e44: return 0x1e45; + case 0x1e46: return 0x1e47; + case 0x1e48: return 0x1e49; + case 0x1e4a: return 0x1e4b; + case 0x1e4c: return 0x1e4d; + case 0x1e4e: return 0x1e4f; + case 0x1e50: return 0x1e51; + case 0x1e52: return 0x1e53; + case 0x1e54: return 0x1e55; + case 0x1e56: return 0x1e57; + case 0x1e58: return 0x1e59; + case 0x1e5a: return 0x1e5b; + case 0x1e5c: return 0x1e5d; + case 0x1e5e: return 0x1e5f; + case 0x1e60: return 0x1e61; + case 0x1e62: return 0x1e63; + case 0x1e64: return 0x1e65; + case 0x1e66: return 0x1e67; + case 0x1e68: return 0x1e69; + case 0x1e6a: return 0x1e6b; + case 0x1e6c: return 0x1e6d; + case 0x1e6e: return 0x1e6f; + case 0x1e70: return 0x1e71; + case 0x1e72: return 0x1e73; + case 0x1e74: return 0x1e75; + case 0x1e76: return 0x1e77; + case 0x1e78: return 0x1e79; + case 0x1e7a: return 0x1e7b; + case 0x1e7c: return 0x1e7d; + case 0x1e7e: return 0x1e7f; + case 0x1e80: return 0x1e81; + case 0x1e82: return 0x1e83; + case 0x1e84: return 0x1e85; + case 0x1e86: return 0x1e87; + case 0x1e88: return 0x1e89; + case 0x1e8a: return 0x1e8b; + case 0x1e8c: return 0x1e8d; + case 0x1e8e: return 0x1e8f; + case 0x1e90: return 0x1e91; + case 0x1e92: return 0x1e93; + case 0x1e94: return 0x1e95; + case 0x1e9b: return 0x1e61; + case 0x1e9e: return 0xdf; + case 0x1ea0: return 0x1ea1; + case 0x1ea2: return 0x1ea3; + case 0x1ea4: return 0x1ea5; + case 0x1ea6: return 0x1ea7; + case 0x1ea8: return 0x1ea9; + case 0x1eaa: return 0x1eab; + case 0x1eac: return 0x1ead; + case 0x1eae: return 0x1eaf; + case 0x1eb0: return 0x1eb1; + case 0x1eb2: return 0x1eb3; + case 0x1eb4: return 0x1eb5; + case 0x1eb6: return 0x1eb7; + case 0x1eb8: return 0x1eb9; + case 0x1eba: return 0x1ebb; + case 0x1ebc: return 0x1ebd; + case 0x1ebe: return 0x1ebf; + case 0x1ec0: return 0x1ec1; + case 0x1ec2: return 0x1ec3; + case 0x1ec4: return 0x1ec5; + case 0x1ec6: return 0x1ec7; + case 0x1ec8: return 0x1ec9; + case 0x1eca: return 0x1ecb; + case 0x1ecc: return 0x1ecd; + case 0x1ece: return 0x1ecf; + case 0x1ed0: return 0x1ed1; + case 0x1ed2: return 0x1ed3; + case 0x1ed4: return 0x1ed5; + case 0x1ed6: return 0x1ed7; + case 0x1ed8: return 0x1ed9; + case 0x1eda: return 0x1edb; + case 0x1edc: return 0x1edd; + case 0x1ede: return 0x1edf; + case 0x1ee0: return 0x1ee1; + case 0x1ee2: return 0x1ee3; + case 0x1ee4: return 0x1ee5; + case 0x1ee6: return 0x1ee7; + case 0x1ee8: return 0x1ee9; + case 0x1eea: return 0x1eeb; + case 0x1eec: return 0x1eed; + case 0x1eee: return 0x1eef; + case 0x1ef0: return 0x1ef1; + case 0x1ef2: return 0x1ef3; + case 0x1ef4: return 0x1ef5; + case 0x1ef6: return 0x1ef7; + case 0x1ef8: return 0x1ef9; + case 0x1efa: return 0x1efb; + case 0x1efc: return 0x1efd; + case 0x1efe: return 0x1eff; + case 0x1f08: return 0x1f00; + case 0x1f09: return 0x1f01; + case 0x1f0a: return 0x1f02; + case 0x1f0b: return 0x1f03; + case 0x1f0c: return 0x1f04; + case 0x1f0d: return 0x1f05; + case 0x1f0e: return 0x1f06; + case 0x1f0f: return 0x1f07; + case 0x1f18: return 0x1f10; + case 0x1f19: return 0x1f11; + case 0x1f1a: return 0x1f12; + case 0x1f1b: return 0x1f13; + case 0x1f1c: return 0x1f14; + case 0x1f1d: return 0x1f15; + case 0x1f28: return 0x1f20; + case 0x1f29: return 0x1f21; + case 0x1f2a: return 0x1f22; + case 0x1f2b: return 0x1f23; + case 0x1f2c: return 0x1f24; + case 0x1f2d: return 0x1f25; + case 0x1f2e: return 0x1f26; + case 0x1f2f: return 0x1f27; + case 0x1f38: return 0x1f30; + case 0x1f39: return 0x1f31; + case 0x1f3a: return 0x1f32; + case 0x1f3b: return 0x1f33; + case 0x1f3c: return 0x1f34; + case 0x1f3d: return 0x1f35; + case 0x1f3e: return 0x1f36; + case 0x1f3f: return 0x1f37; + case 0x1f48: return 0x1f40; + case 0x1f49: return 0x1f41; + case 0x1f4a: return 0x1f42; + case 0x1f4b: return 0x1f43; + case 0x1f4c: return 0x1f44; + case 0x1f4d: return 0x1f45; + case 0x1f59: return 0x1f51; + case 0x1f5b: return 0x1f53; + case 0x1f5d: return 0x1f55; + case 0x1f5f: return 0x1f57; + case 0x1f68: return 0x1f60; + case 0x1f69: return 0x1f61; + case 0x1f6a: return 0x1f62; + case 0x1f6b: return 0x1f63; + case 0x1f6c: return 0x1f64; + case 0x1f6d: return 0x1f65; + case 0x1f6e: return 0x1f66; + case 0x1f6f: return 0x1f67; + case 0x1f88: return 0x1f80; + case 0x1f89: return 0x1f81; + case 0x1f8a: return 0x1f82; + case 0x1f8b: return 0x1f83; + case 0x1f8c: return 0x1f84; + case 0x1f8d: return 0x1f85; + case 0x1f8e: return 0x1f86; + case 0x1f8f: return 0x1f87; + case 0x1f98: return 0x1f90; + case 0x1f99: return 0x1f91; + case 0x1f9a: return 0x1f92; + case 0x1f9b: return 0x1f93; + case 0x1f9c: return 0x1f94; + case 0x1f9d: return 0x1f95; + case 0x1f9e: return 0x1f96; + case 0x1f9f: return 0x1f97; + case 0x1fa8: return 0x1fa0; + case 0x1fa9: return 0x1fa1; + case 0x1faa: return 0x1fa2; + case 0x1fab: return 0x1fa3; + case 0x1fac: return 0x1fa4; + case 0x1fad: return 0x1fa5; + case 0x1fae: return 0x1fa6; + case 0x1faf: return 0x1fa7; + case 0x1fb8: return 0x1fb0; + case 0x1fb9: return 0x1fb1; + case 0x1fba: return 0x1f70; + case 0x1fbb: return 0x1f71; + case 0x1fbc: return 0x1fb3; + case 0x1fbe: return 0x3b9; + case 0x1fc8: return 0x1f72; + case 0x1fc9: return 0x1f73; + case 0x1fca: return 0x1f74; + case 0x1fcb: return 0x1f75; + case 0x1fcc: return 0x1fc3; + case 0x1fd8: return 0x1fd0; + case 0x1fd9: return 0x1fd1; + case 0x1fda: return 0x1f76; + case 0x1fdb: return 0x1f77; + case 0x1fe8: return 0x1fe0; + case 0x1fe9: return 0x1fe1; + case 0x1fea: return 0x1f7a; + case 0x1feb: return 0x1f7b; + case 0x1fec: return 0x1fe5; + case 0x1ff8: return 0x1f78; + case 0x1ff9: return 0x1f79; + case 0x1ffa: return 0x1f7c; + case 0x1ffb: return 0x1f7d; + case 0x1ffc: return 0x1ff3; + case 0x2126: return 0x3c9; + case 0x212a: return 0x6b; + case 0x212b: return 0xe5; + case 0x2132: return 0x214e; + case 0x2160: return 0x2170; + case 0x2161: return 0x2171; + case 0x2162: return 0x2172; + case 0x2163: return 0x2173; + case 0x2164: return 0x2174; + case 0x2165: return 0x2175; + case 0x2166: return 0x2176; + case 0x2167: return 0x2177; + case 0x2168: return 0x2178; + case 0x2169: return 0x2179; + case 0x216a: return 0x217a; + case 0x216b: return 0x217b; + case 0x216c: return 0x217c; + case 0x216d: return 0x217d; + case 0x216e: return 0x217e; + case 0x216f: return 0x217f; + case 0x2183: return 0x2184; + case 0x24b6: return 0x24d0; + case 0x24b7: return 0x24d1; + case 0x24b8: return 0x24d2; + case 0x24b9: return 0x24d3; + case 0x24ba: return 0x24d4; + case 0x24bb: return 0x24d5; + case 0x24bc: return 0x24d6; + case 0x24bd: return 0x24d7; + case 0x24be: return 0x24d8; + case 0x24bf: return 0x24d9; + case 0x24c0: return 0x24da; + case 0x24c1: return 0x24db; + case 0x24c2: return 0x24dc; + case 0x24c3: return 0x24dd; + case 0x24c4: return 0x24de; + case 0x24c5: return 0x24df; + case 0x24c6: return 0x24e0; + case 0x24c7: return 0x24e1; + case 0x24c8: return 0x24e2; + case 0x24c9: return 0x24e3; + case 0x24ca: return 0x24e4; + case 0x24cb: return 0x24e5; + case 0x24cc: return 0x24e6; + case 0x24cd: return 0x24e7; + case 0x24ce: return 0x24e8; + case 0x24cf: return 0x24e9; + case 0x2c00: return 0x2c30; + case 0x2c01: return 0x2c31; + case 0x2c02: return 0x2c32; + case 0x2c03: return 0x2c33; + case 0x2c04: return 0x2c34; + case 0x2c05: return 0x2c35; + case 0x2c06: return 0x2c36; + case 0x2c07: return 0x2c37; + case 0x2c08: return 0x2c38; + case 0x2c09: return 0x2c39; + case 0x2c0a: return 0x2c3a; + case 0x2c0b: return 0x2c3b; + case 0x2c0c: return 0x2c3c; + case 0x2c0d: return 0x2c3d; + case 0x2c0e: return 0x2c3e; + case 0x2c0f: return 0x2c3f; + case 0x2c10: return 0x2c40; + case 0x2c11: return 0x2c41; + case 0x2c12: return 0x2c42; + case 0x2c13: return 0x2c43; + case 0x2c14: return 0x2c44; + case 0x2c15: return 0x2c45; + case 0x2c16: return 0x2c46; + case 0x2c17: return 0x2c47; + case 0x2c18: return 0x2c48; + case 0x2c19: return 0x2c49; + case 0x2c1a: return 0x2c4a; + case 0x2c1b: return 0x2c4b; + case 0x2c1c: return 0x2c4c; + case 0x2c1d: return 0x2c4d; + case 0x2c1e: return 0x2c4e; + case 0x2c1f: return 0x2c4f; + case 0x2c20: return 0x2c50; + case 0x2c21: return 0x2c51; + case 0x2c22: return 0x2c52; + case 0x2c23: return 0x2c53; + case 0x2c24: return 0x2c54; + case 0x2c25: return 0x2c55; + case 0x2c26: return 0x2c56; + case 0x2c27: return 0x2c57; + case 0x2c28: return 0x2c58; + case 0x2c29: return 0x2c59; + case 0x2c2a: return 0x2c5a; + case 0x2c2b: return 0x2c5b; + case 0x2c2c: return 0x2c5c; + case 0x2c2d: return 0x2c5d; + case 0x2c2e: return 0x2c5e; + case 0x2c60: return 0x2c61; + case 0x2c62: return 0x26b; + case 0x2c63: return 0x1d7d; + case 0x2c64: return 0x27d; + case 0x2c67: return 0x2c68; + case 0x2c69: return 0x2c6a; + case 0x2c6b: return 0x2c6c; + case 0x2c6d: return 0x251; + case 0x2c6e: return 0x271; + case 0x2c6f: return 0x250; + case 0x2c70: return 0x252; + case 0x2c72: return 0x2c73; + case 0x2c75: return 0x2c76; + case 0x2c7e: return 0x23f; + case 0x2c7f: return 0x240; + case 0x2c80: return 0x2c81; + case 0x2c82: return 0x2c83; + case 0x2c84: return 0x2c85; + case 0x2c86: return 0x2c87; + case 0x2c88: return 0x2c89; + case 0x2c8a: return 0x2c8b; + case 0x2c8c: return 0x2c8d; + case 0x2c8e: return 0x2c8f; + case 0x2c90: return 0x2c91; + case 0x2c92: return 0x2c93; + case 0x2c94: return 0x2c95; + case 0x2c96: return 0x2c97; + case 0x2c98: return 0x2c99; + case 0x2c9a: return 0x2c9b; + case 0x2c9c: return 0x2c9d; + case 0x2c9e: return 0x2c9f; + case 0x2ca0: return 0x2ca1; + case 0x2ca2: return 0x2ca3; + case 0x2ca4: return 0x2ca5; + case 0x2ca6: return 0x2ca7; + case 0x2ca8: return 0x2ca9; + case 0x2caa: return 0x2cab; + case 0x2cac: return 0x2cad; + case 0x2cae: return 0x2caf; + case 0x2cb0: return 0x2cb1; + case 0x2cb2: return 0x2cb3; + case 0x2cb4: return 0x2cb5; + case 0x2cb6: return 0x2cb7; + case 0x2cb8: return 0x2cb9; + case 0x2cba: return 0x2cbb; + case 0x2cbc: return 0x2cbd; + case 0x2cbe: return 0x2cbf; + case 0x2cc0: return 0x2cc1; + case 0x2cc2: return 0x2cc3; + case 0x2cc4: return 0x2cc5; + case 0x2cc6: return 0x2cc7; + case 0x2cc8: return 0x2cc9; + case 0x2cca: return 0x2ccb; + case 0x2ccc: return 0x2ccd; + case 0x2cce: return 0x2ccf; + case 0x2cd0: return 0x2cd1; + case 0x2cd2: return 0x2cd3; + case 0x2cd4: return 0x2cd5; + case 0x2cd6: return 0x2cd7; + case 0x2cd8: return 0x2cd9; + case 0x2cda: return 0x2cdb; + case 0x2cdc: return 0x2cdd; + case 0x2cde: return 0x2cdf; + case 0x2ce0: return 0x2ce1; + case 0x2ce2: return 0x2ce3; + case 0x2ceb: return 0x2cec; + case 0x2ced: return 0x2cee; + case 0x2cf2: return 0x2cf3; + case 0xa640: return 0xa641; + case 0xa642: return 0xa643; + case 0xa644: return 0xa645; + case 0xa646: return 0xa647; + case 0xa648: return 0xa649; + case 0xa64a: return 0xa64b; + case 0xa64c: return 0xa64d; + case 0xa64e: return 0xa64f; + case 0xa650: return 0xa651; + case 0xa652: return 0xa653; + case 0xa654: return 0xa655; + case 0xa656: return 0xa657; + case 0xa658: return 0xa659; + case 0xa65a: return 0xa65b; + case 0xa65c: return 0xa65d; + case 0xa65e: return 0xa65f; + case 0xa660: return 0xa661; + case 0xa662: return 0xa663; + case 0xa664: return 0xa665; + case 0xa666: return 0xa667; + case 0xa668: return 0xa669; + case 0xa66a: return 0xa66b; + case 0xa66c: return 0xa66d; + case 0xa680: return 0xa681; + case 0xa682: return 0xa683; + case 0xa684: return 0xa685; + case 0xa686: return 0xa687; + case 0xa688: return 0xa689; + case 0xa68a: return 0xa68b; + case 0xa68c: return 0xa68d; + case 0xa68e: return 0xa68f; + case 0xa690: return 0xa691; + case 0xa692: return 0xa693; + case 0xa694: return 0xa695; + case 0xa696: return 0xa697; + case 0xa698: return 0xa699; + case 0xa69a: return 0xa69b; + case 0xa722: return 0xa723; + case 0xa724: return 0xa725; + case 0xa726: return 0xa727; + case 0xa728: return 0xa729; + case 0xa72a: return 0xa72b; + case 0xa72c: return 0xa72d; + case 0xa72e: return 0xa72f; + case 0xa732: return 0xa733; + case 0xa734: return 0xa735; + case 0xa736: return 0xa737; + case 0xa738: return 0xa739; + case 0xa73a: return 0xa73b; + case 0xa73c: return 0xa73d; + case 0xa73e: return 0xa73f; + case 0xa740: return 0xa741; + case 0xa742: return 0xa743; + case 0xa744: return 0xa745; + case 0xa746: return 0xa747; + case 0xa748: return 0xa749; + case 0xa74a: return 0xa74b; + case 0xa74c: return 0xa74d; + case 0xa74e: return 0xa74f; + case 0xa750: return 0xa751; + case 0xa752: return 0xa753; + case 0xa754: return 0xa755; + case 0xa756: return 0xa757; + case 0xa758: return 0xa759; + case 0xa75a: return 0xa75b; + case 0xa75c: return 0xa75d; + case 0xa75e: return 0xa75f; + case 0xa760: return 0xa761; + case 0xa762: return 0xa763; + case 0xa764: return 0xa765; + case 0xa766: return 0xa767; + case 0xa768: return 0xa769; + case 0xa76a: return 0xa76b; + case 0xa76c: return 0xa76d; + case 0xa76e: return 0xa76f; + case 0xa779: return 0xa77a; + case 0xa77b: return 0xa77c; + case 0xa77d: return 0x1d79; + case 0xa77e: return 0xa77f; + case 0xa780: return 0xa781; + case 0xa782: return 0xa783; + case 0xa784: return 0xa785; + case 0xa786: return 0xa787; + case 0xa78b: return 0xa78c; + case 0xa78d: return 0x265; + case 0xa790: return 0xa791; + case 0xa792: return 0xa793; + case 0xa796: return 0xa797; + case 0xa798: return 0xa799; + case 0xa79a: return 0xa79b; + case 0xa79c: return 0xa79d; + case 0xa79e: return 0xa79f; + case 0xa7a0: return 0xa7a1; + case 0xa7a2: return 0xa7a3; + case 0xa7a4: return 0xa7a5; + case 0xa7a6: return 0xa7a7; + case 0xa7a8: return 0xa7a9; + case 0xa7aa: return 0x266; + case 0xa7ab: return 0x25c; + case 0xa7ac: return 0x261; + case 0xa7ad: return 0x26c; + case 0xa7ae: return 0x26a; + case 0xa7b0: return 0x29e; + case 0xa7b1: return 0x287; + case 0xa7b2: return 0x29d; + case 0xa7b3: return 0xab53; + case 0xa7b4: return 0xa7b5; + case 0xa7b6: return 0xa7b7; + case 0xa7b8: return 0xa7b9; + case 0xa7ba: return 0xa7bb; + case 0xa7bc: return 0xa7bd; + case 0xa7be: return 0xa7bf; + case 0xa7c2: return 0xa7c3; + case 0xa7c4: return 0xa794; + case 0xa7c5: return 0x282; + case 0xa7c6: return 0x1d8e; + case 0xa7c7: return 0xa7c8; + case 0xa7c9: return 0xa7ca; + case 0xa7f5: return 0xa7f6; + case 0xab70: return 0x13a0; + case 0xab71: return 0x13a1; + case 0xab72: return 0x13a2; + case 0xab73: return 0x13a3; + case 0xab74: return 0x13a4; + case 0xab75: return 0x13a5; + case 0xab76: return 0x13a6; + case 0xab77: return 0x13a7; + case 0xab78: return 0x13a8; + case 0xab79: return 0x13a9; + case 0xab7a: return 0x13aa; + case 0xab7b: return 0x13ab; + case 0xab7c: return 0x13ac; + case 0xab7d: return 0x13ad; + case 0xab7e: return 0x13ae; + case 0xab7f: return 0x13af; + case 0xab80: return 0x13b0; + case 0xab81: return 0x13b1; + case 0xab82: return 0x13b2; + case 0xab83: return 0x13b3; + case 0xab84: return 0x13b4; + case 0xab85: return 0x13b5; + case 0xab86: return 0x13b6; + case 0xab87: return 0x13b7; + case 0xab88: return 0x13b8; + case 0xab89: return 0x13b9; + case 0xab8a: return 0x13ba; + case 0xab8b: return 0x13bb; + case 0xab8c: return 0x13bc; + case 0xab8d: return 0x13bd; + case 0xab8e: return 0x13be; + case 0xab8f: return 0x13bf; + case 0xab90: return 0x13c0; + case 0xab91: return 0x13c1; + case 0xab92: return 0x13c2; + case 0xab93: return 0x13c3; + case 0xab94: return 0x13c4; + case 0xab95: return 0x13c5; + case 0xab96: return 0x13c6; + case 0xab97: return 0x13c7; + case 0xab98: return 0x13c8; + case 0xab99: return 0x13c9; + case 0xab9a: return 0x13ca; + case 0xab9b: return 0x13cb; + case 0xab9c: return 0x13cc; + case 0xab9d: return 0x13cd; + case 0xab9e: return 0x13ce; + case 0xab9f: return 0x13cf; + case 0xaba0: return 0x13d0; + case 0xaba1: return 0x13d1; + case 0xaba2: return 0x13d2; + case 0xaba3: return 0x13d3; + case 0xaba4: return 0x13d4; + case 0xaba5: return 0x13d5; + case 0xaba6: return 0x13d6; + case 0xaba7: return 0x13d7; + case 0xaba8: return 0x13d8; + case 0xaba9: return 0x13d9; + case 0xabaa: return 0x13da; + case 0xabab: return 0x13db; + case 0xabac: return 0x13dc; + case 0xabad: return 0x13dd; + case 0xabae: return 0x13de; + case 0xabaf: return 0x13df; + case 0xabb0: return 0x13e0; + case 0xabb1: return 0x13e1; + case 0xabb2: return 0x13e2; + case 0xabb3: return 0x13e3; + case 0xabb4: return 0x13e4; + case 0xabb5: return 0x13e5; + case 0xabb6: return 0x13e6; + case 0xabb7: return 0x13e7; + case 0xabb8: return 0x13e8; + case 0xabb9: return 0x13e9; + case 0xabba: return 0x13ea; + case 0xabbb: return 0x13eb; + case 0xabbc: return 0x13ec; + case 0xabbd: return 0x13ed; + case 0xabbe: return 0x13ee; + case 0xabbf: return 0x13ef; + case 0xff21: return 0xff41; + case 0xff22: return 0xff42; + case 0xff23: return 0xff43; + case 0xff24: return 0xff44; + case 0xff25: return 0xff45; + case 0xff26: return 0xff46; + case 0xff27: return 0xff47; + case 0xff28: return 0xff48; + case 0xff29: return 0xff49; + case 0xff2a: return 0xff4a; + case 0xff2b: return 0xff4b; + case 0xff2c: return 0xff4c; + case 0xff2d: return 0xff4d; + case 0xff2e: return 0xff4e; + case 0xff2f: return 0xff4f; + case 0xff30: return 0xff50; + case 0xff31: return 0xff51; + case 0xff32: return 0xff52; + case 0xff33: return 0xff53; + case 0xff34: return 0xff54; + case 0xff35: return 0xff55; + case 0xff36: return 0xff56; + case 0xff37: return 0xff57; + case 0xff38: return 0xff58; + case 0xff39: return 0xff59; + case 0xff3a: return 0xff5a; + case 0x10400: return 0x10428; + case 0x10401: return 0x10429; + case 0x10402: return 0x1042a; + case 0x10403: return 0x1042b; + case 0x10404: return 0x1042c; + case 0x10405: return 0x1042d; + case 0x10406: return 0x1042e; + case 0x10407: return 0x1042f; + case 0x10408: return 0x10430; + case 0x10409: return 0x10431; + case 0x1040a: return 0x10432; + case 0x1040b: return 0x10433; + case 0x1040c: return 0x10434; + case 0x1040d: return 0x10435; + case 0x1040e: return 0x10436; + case 0x1040f: return 0x10437; + case 0x10410: return 0x10438; + case 0x10411: return 0x10439; + case 0x10412: return 0x1043a; + case 0x10413: return 0x1043b; + case 0x10414: return 0x1043c; + case 0x10415: return 0x1043d; + case 0x10416: return 0x1043e; + case 0x10417: return 0x1043f; + case 0x10418: return 0x10440; + case 0x10419: return 0x10441; + case 0x1041a: return 0x10442; + case 0x1041b: return 0x10443; + case 0x1041c: return 0x10444; + case 0x1041d: return 0x10445; + case 0x1041e: return 0x10446; + case 0x1041f: return 0x10447; + case 0x10420: return 0x10448; + case 0x10421: return 0x10449; + case 0x10422: return 0x1044a; + case 0x10423: return 0x1044b; + case 0x10424: return 0x1044c; + case 0x10425: return 0x1044d; + case 0x10426: return 0x1044e; + case 0x10427: return 0x1044f; + case 0x104b0: return 0x104d8; + case 0x104b1: return 0x104d9; + case 0x104b2: return 0x104da; + case 0x104b3: return 0x104db; + case 0x104b4: return 0x104dc; + case 0x104b5: return 0x104dd; + case 0x104b6: return 0x104de; + case 0x104b7: return 0x104df; + case 0x104b8: return 0x104e0; + case 0x104b9: return 0x104e1; + case 0x104ba: return 0x104e2; + case 0x104bb: return 0x104e3; + case 0x104bc: return 0x104e4; + case 0x104bd: return 0x104e5; + case 0x104be: return 0x104e6; + case 0x104bf: return 0x104e7; + case 0x104c0: return 0x104e8; + case 0x104c1: return 0x104e9; + case 0x104c2: return 0x104ea; + case 0x104c3: return 0x104eb; + case 0x104c4: return 0x104ec; + case 0x104c5: return 0x104ed; + case 0x104c6: return 0x104ee; + case 0x104c7: return 0x104ef; + case 0x104c8: return 0x104f0; + case 0x104c9: return 0x104f1; + case 0x104ca: return 0x104f2; + case 0x104cb: return 0x104f3; + case 0x104cc: return 0x104f4; + case 0x104cd: return 0x104f5; + case 0x104ce: return 0x104f6; + case 0x104cf: return 0x104f7; + case 0x104d0: return 0x104f8; + case 0x104d1: return 0x104f9; + case 0x104d2: return 0x104fa; + case 0x104d3: return 0x104fb; + case 0x10c80: return 0x10cc0; + case 0x10c81: return 0x10cc1; + case 0x10c82: return 0x10cc2; + case 0x10c83: return 0x10cc3; + case 0x10c84: return 0x10cc4; + case 0x10c85: return 0x10cc5; + case 0x10c86: return 0x10cc6; + case 0x10c87: return 0x10cc7; + case 0x10c88: return 0x10cc8; + case 0x10c89: return 0x10cc9; + case 0x10c8a: return 0x10cca; + case 0x10c8b: return 0x10ccb; + case 0x10c8c: return 0x10ccc; + case 0x10c8d: return 0x10ccd; + case 0x10c8e: return 0x10cce; + case 0x10c8f: return 0x10ccf; + case 0x10c90: return 0x10cd0; + case 0x10c91: return 0x10cd1; + case 0x10c92: return 0x10cd2; + case 0x10c93: return 0x10cd3; + case 0x10c94: return 0x10cd4; + case 0x10c95: return 0x10cd5; + case 0x10c96: return 0x10cd6; + case 0x10c97: return 0x10cd7; + case 0x10c98: return 0x10cd8; + case 0x10c99: return 0x10cd9; + case 0x10c9a: return 0x10cda; + case 0x10c9b: return 0x10cdb; + case 0x10c9c: return 0x10cdc; + case 0x10c9d: return 0x10cdd; + case 0x10c9e: return 0x10cde; + case 0x10c9f: return 0x10cdf; + case 0x10ca0: return 0x10ce0; + case 0x10ca1: return 0x10ce1; + case 0x10ca2: return 0x10ce2; + case 0x10ca3: return 0x10ce3; + case 0x10ca4: return 0x10ce4; + case 0x10ca5: return 0x10ce5; + case 0x10ca6: return 0x10ce6; + case 0x10ca7: return 0x10ce7; + case 0x10ca8: return 0x10ce8; + case 0x10ca9: return 0x10ce9; + case 0x10caa: return 0x10cea; + case 0x10cab: return 0x10ceb; + case 0x10cac: return 0x10cec; + case 0x10cad: return 0x10ced; + case 0x10cae: return 0x10cee; + case 0x10caf: return 0x10cef; + case 0x10cb0: return 0x10cf0; + case 0x10cb1: return 0x10cf1; + case 0x10cb2: return 0x10cf2; + case 0x118a0: return 0x118c0; + case 0x118a1: return 0x118c1; + case 0x118a2: return 0x118c2; + case 0x118a3: return 0x118c3; + case 0x118a4: return 0x118c4; + case 0x118a5: return 0x118c5; + case 0x118a6: return 0x118c6; + case 0x118a7: return 0x118c7; + case 0x118a8: return 0x118c8; + case 0x118a9: return 0x118c9; + case 0x118aa: return 0x118ca; + case 0x118ab: return 0x118cb; + case 0x118ac: return 0x118cc; + case 0x118ad: return 0x118cd; + case 0x118ae: return 0x118ce; + case 0x118af: return 0x118cf; + case 0x118b0: return 0x118d0; + case 0x118b1: return 0x118d1; + case 0x118b2: return 0x118d2; + case 0x118b3: return 0x118d3; + case 0x118b4: return 0x118d4; + case 0x118b5: return 0x118d5; + case 0x118b6: return 0x118d6; + case 0x118b7: return 0x118d7; + case 0x118b8: return 0x118d8; + case 0x118b9: return 0x118d9; + case 0x118ba: return 0x118da; + case 0x118bb: return 0x118db; + case 0x118bc: return 0x118dc; + case 0x118bd: return 0x118dd; + case 0x118be: return 0x118de; + case 0x118bf: return 0x118df; + case 0x16e40: return 0x16e60; + case 0x16e41: return 0x16e61; + case 0x16e42: return 0x16e62; + case 0x16e43: return 0x16e63; + case 0x16e44: return 0x16e64; + case 0x16e45: return 0x16e65; + case 0x16e46: return 0x16e66; + case 0x16e47: return 0x16e67; + case 0x16e48: return 0x16e68; + case 0x16e49: return 0x16e69; + case 0x16e4a: return 0x16e6a; + case 0x16e4b: return 0x16e6b; + case 0x16e4c: return 0x16e6c; + case 0x16e4d: return 0x16e6d; + case 0x16e4e: return 0x16e6e; + case 0x16e4f: return 0x16e6f; + case 0x16e50: return 0x16e70; + case 0x16e51: return 0x16e71; + case 0x16e52: return 0x16e72; + case 0x16e53: return 0x16e73; + case 0x16e54: return 0x16e74; + case 0x16e55: return 0x16e75; + case 0x16e56: return 0x16e76; + case 0x16e57: return 0x16e77; + case 0x16e58: return 0x16e78; + case 0x16e59: return 0x16e79; + case 0x16e5a: return 0x16e7a; + case 0x16e5b: return 0x16e7b; + case 0x16e5c: return 0x16e7c; + case 0x16e5d: return 0x16e7d; + case 0x16e5e: return 0x16e7e; + case 0x16e5f: return 0x16e7f; + case 0x1e900: return 0x1e922; + case 0x1e901: return 0x1e923; + case 0x1e902: return 0x1e924; + case 0x1e903: return 0x1e925; + case 0x1e904: return 0x1e926; + case 0x1e905: return 0x1e927; + case 0x1e906: return 0x1e928; + case 0x1e907: return 0x1e929; + case 0x1e908: return 0x1e92a; + case 0x1e909: return 0x1e92b; + case 0x1e90a: return 0x1e92c; + case 0x1e90b: return 0x1e92d; + case 0x1e90c: return 0x1e92e; + case 0x1e90d: return 0x1e92f; + case 0x1e90e: return 0x1e930; + case 0x1e90f: return 0x1e931; + case 0x1e910: return 0x1e932; + case 0x1e911: return 0x1e933; + case 0x1e912: return 0x1e934; + case 0x1e913: return 0x1e935; + case 0x1e914: return 0x1e936; + case 0x1e915: return 0x1e937; + case 0x1e916: return 0x1e938; + case 0x1e917: return 0x1e939; + case 0x1e918: return 0x1e93a; + case 0x1e919: return 0x1e93b; + case 0x1e91a: return 0x1e93c; + case 0x1e91b: return 0x1e93d; + case 0x1e91c: return 0x1e93e; + case 0x1e91d: return 0x1e93f; + case 0x1e91e: return 0x1e940; + case 0x1e91f: return 0x1e941; + case 0x1e920: return 0x1e942; + case 0x1e921: return 0x1e943; + default: return codepoint; + } +} \ No newline at end of file diff --git a/src/unicode/diacritic-fold-map.c b/src/unicode/diacritic-fold-map.c new file mode 100644 index 000000000..e78ca264a --- /dev/null +++ b/src/unicode/diacritic-fold-map.c @@ -0,0 +1,2884 @@ +/** + * Copyright (C) 2025-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */ + +#include "bson/bson.h" +#include "fold.h" + +bson_unichar_t unicode_codepoint_remove_diacritics(bson_unichar_t codepoint) { + switch (codepoint) { + case 0x5e: return 0x0; + case 0x60: return 0x0; + case 0xa8: return 0x0; + case 0xaf: return 0x0; + case 0xb4: return 0x0; + case 0xb7: return 0x0; + case 0xb8: return 0x0; + case 0xc0: return 0x41; + case 0xc1: return 0x41; + case 0xc2: return 0x41; + case 0xc3: return 0x41; + case 0xc4: return 0x41; + case 0xc5: return 0x41; + case 0xc7: return 0x43; + case 0xc8: return 0x45; + case 0xc9: return 0x45; + case 0xca: return 0x45; + case 0xcb: return 0x45; + case 0xcc: return 0x49; + case 0xcd: return 0x49; + case 0xce: return 0x49; + case 0xcf: return 0x49; + case 0xd1: return 0x4e; + case 0xd2: return 0x4f; + case 0xd3: return 0x4f; + case 0xd4: return 0x4f; + case 0xd5: return 0x4f; + case 0xd6: return 0x4f; + case 0xd9: return 0x55; + case 0xda: return 0x55; + case 0xdb: return 0x55; + case 0xdc: return 0x55; + case 0xdd: return 0x59; + case 0xe0: return 0x61; + case 0xe1: return 0x61; + case 0xe2: return 0x61; + case 0xe3: return 0x61; + case 0xe4: return 0x61; + case 0xe5: return 0x61; + case 0xe7: return 0x63; + case 0xe8: return 0x65; + case 0xe9: return 0x65; + case 0xea: return 0x65; + case 0xeb: return 0x65; + case 0xec: return 0x69; + case 0xed: return 0x69; + case 0xee: return 0x69; + case 0xef: return 0x69; + case 0xf1: return 0x6e; + case 0xf2: return 0x6f; + case 0xf3: return 0x6f; + case 0xf4: return 0x6f; + case 0xf5: return 0x6f; + case 0xf6: return 0x6f; + case 0xf9: return 0x75; + case 0xfa: return 0x75; + case 0xfb: return 0x75; + case 0xfc: return 0x75; + case 0xfd: return 0x79; + case 0xff: return 0x79; + case 0x100: return 0x41; + case 0x101: return 0x61; + case 0x102: return 0x41; + case 0x103: return 0x61; + case 0x104: return 0x41; + case 0x105: return 0x61; + case 0x106: return 0x43; + case 0x107: return 0x63; + case 0x108: return 0x43; + case 0x109: return 0x63; + case 0x10a: return 0x43; + case 0x10b: return 0x63; + case 0x10c: return 0x43; + case 0x10d: return 0x63; + case 0x10e: return 0x44; + case 0x10f: return 0x64; + case 0x112: return 0x45; + case 0x113: return 0x65; + case 0x114: return 0x45; + case 0x115: return 0x65; + case 0x116: return 0x45; + case 0x117: return 0x65; + case 0x118: return 0x45; + case 0x119: return 0x65; + case 0x11a: return 0x45; + case 0x11b: return 0x65; + case 0x11c: return 0x47; + case 0x11d: return 0x67; + case 0x11e: return 0x47; + case 0x11f: return 0x67; + case 0x120: return 0x47; + case 0x121: return 0x67; + case 0x122: return 0x47; + case 0x123: return 0x67; + case 0x124: return 0x48; + case 0x125: return 0x68; + case 0x128: return 0x49; + case 0x129: return 0x69; + case 0x12a: return 0x49; + case 0x12b: return 0x69; + case 0x12c: return 0x49; + case 0x12d: return 0x69; + case 0x12e: return 0x49; + case 0x12f: return 0x69; + case 0x130: return 0x49; + case 0x134: return 0x4a; + case 0x135: return 0x6a; + case 0x136: return 0x4b; + case 0x137: return 0x6b; + case 0x139: return 0x4c; + case 0x13a: return 0x6c; + case 0x13b: return 0x4c; + case 0x13c: return 0x6c; + case 0x13d: return 0x4c; + case 0x13e: return 0x6c; + case 0x143: return 0x4e; + case 0x144: return 0x6e; + case 0x145: return 0x4e; + case 0x146: return 0x6e; + case 0x147: return 0x4e; + case 0x148: return 0x6e; + case 0x14c: return 0x4f; + case 0x14d: return 0x6f; + case 0x14e: return 0x4f; + case 0x14f: return 0x6f; + case 0x150: return 0x4f; + case 0x151: return 0x6f; + case 0x154: return 0x52; + case 0x155: return 0x72; + case 0x156: return 0x52; + case 0x157: return 0x72; + case 0x158: return 0x52; + case 0x159: return 0x72; + case 0x15a: return 0x53; + case 0x15b: return 0x73; + case 0x15c: return 0x53; + case 0x15d: return 0x73; + case 0x15e: return 0x53; + case 0x15f: return 0x73; + case 0x160: return 0x53; + case 0x161: return 0x73; + case 0x162: return 0x54; + case 0x163: return 0x74; + case 0x164: return 0x54; + case 0x165: return 0x74; + case 0x168: return 0x55; + case 0x169: return 0x75; + case 0x16a: return 0x55; + case 0x16b: return 0x75; + case 0x16c: return 0x55; + case 0x16d: return 0x75; + case 0x16e: return 0x55; + case 0x16f: return 0x75; + case 0x170: return 0x55; + case 0x171: return 0x75; + case 0x172: return 0x55; + case 0x173: return 0x75; + case 0x174: return 0x57; + case 0x175: return 0x77; + case 0x176: return 0x59; + case 0x177: return 0x79; + case 0x178: return 0x59; + case 0x179: return 0x5a; + case 0x17a: return 0x7a; + case 0x17b: return 0x5a; + case 0x17c: return 0x7a; + case 0x17d: return 0x5a; + case 0x17e: return 0x7a; + case 0x1a0: return 0x4f; + case 0x1a1: return 0x6f; + case 0x1af: return 0x55; + case 0x1b0: return 0x75; + case 0x1cd: return 0x41; + case 0x1ce: return 0x61; + case 0x1cf: return 0x49; + case 0x1d0: return 0x69; + case 0x1d1: return 0x4f; + case 0x1d2: return 0x6f; + case 0x1d3: return 0x55; + case 0x1d4: return 0x75; + case 0x1d5: return 0x55; + case 0x1d6: return 0x75; + case 0x1d7: return 0x55; + case 0x1d8: return 0x75; + case 0x1d9: return 0x55; + case 0x1da: return 0x75; + case 0x1db: return 0x55; + case 0x1dc: return 0x75; + case 0x1de: return 0x41; + case 0x1df: return 0x61; + case 0x1e0: return 0x41; + case 0x1e1: return 0x61; + case 0x1e2: return 0xc6; + case 0x1e3: return 0xe6; + case 0x1e6: return 0x47; + case 0x1e7: return 0x67; + case 0x1e8: return 0x4b; + case 0x1e9: return 0x6b; + case 0x1ea: return 0x4f; + case 0x1eb: return 0x6f; + case 0x1ec: return 0x4f; + case 0x1ed: return 0x6f; + case 0x1ee: return 0x1b7; + case 0x1ef: return 0x292; + case 0x1f0: return 0x6a; + case 0x1f4: return 0x47; + case 0x1f5: return 0x67; + case 0x1f8: return 0x4e; + case 0x1f9: return 0x6e; + case 0x1fa: return 0x41; + case 0x1fb: return 0x61; + case 0x1fc: return 0xc6; + case 0x1fd: return 0xe6; + case 0x1fe: return 0xd8; + case 0x1ff: return 0xf8; + case 0x200: return 0x41; + case 0x201: return 0x61; + case 0x202: return 0x41; + case 0x203: return 0x61; + case 0x204: return 0x45; + case 0x205: return 0x65; + case 0x206: return 0x45; + case 0x207: return 0x65; + case 0x208: return 0x49; + case 0x209: return 0x69; + case 0x20a: return 0x49; + case 0x20b: return 0x69; + case 0x20c: return 0x4f; + case 0x20d: return 0x6f; + case 0x20e: return 0x4f; + case 0x20f: return 0x6f; + case 0x210: return 0x52; + case 0x211: return 0x72; + case 0x212: return 0x52; + case 0x213: return 0x72; + case 0x214: return 0x55; + case 0x215: return 0x75; + case 0x216: return 0x55; + case 0x217: return 0x75; + case 0x218: return 0x53; + case 0x219: return 0x73; + case 0x21a: return 0x54; + case 0x21b: return 0x74; + case 0x21e: return 0x48; + case 0x21f: return 0x68; + case 0x226: return 0x41; + case 0x227: return 0x61; + case 0x228: return 0x45; + case 0x229: return 0x65; + case 0x22a: return 0x4f; + case 0x22b: return 0x6f; + case 0x22c: return 0x4f; + case 0x22d: return 0x6f; + case 0x22e: return 0x4f; + case 0x22f: return 0x6f; + case 0x230: return 0x4f; + case 0x231: return 0x6f; + case 0x232: return 0x59; + case 0x233: return 0x79; + case 0x2b0: return 0x0; + case 0x2b1: return 0x0; + case 0x2b2: return 0x0; + case 0x2b3: return 0x0; + case 0x2b4: return 0x0; + case 0x2b5: return 0x0; + case 0x2b6: return 0x0; + case 0x2b7: return 0x0; + case 0x2b8: return 0x0; + case 0x2b9: return 0x0; + case 0x2ba: return 0x0; + case 0x2bb: return 0x0; + case 0x2bc: return 0x0; + case 0x2bd: return 0x0; + case 0x2be: return 0x0; + case 0x2bf: return 0x0; + case 0x2c0: return 0x0; + case 0x2c1: return 0x0; + case 0x2c2: return 0x0; + case 0x2c3: return 0x0; + case 0x2c4: return 0x0; + case 0x2c5: return 0x0; + case 0x2c6: return 0x0; + case 0x2c7: return 0x0; + case 0x2c8: return 0x0; + case 0x2c9: return 0x0; + case 0x2ca: return 0x0; + case 0x2cb: return 0x0; + case 0x2cc: return 0x0; + case 0x2cd: return 0x0; + case 0x2ce: return 0x0; + case 0x2cf: return 0x0; + case 0x2d0: return 0x0; + case 0x2d1: return 0x0; + case 0x2d2: return 0x0; + case 0x2d3: return 0x0; + case 0x2d4: return 0x0; + case 0x2d5: return 0x0; + case 0x2d6: return 0x0; + case 0x2d7: return 0x0; + case 0x2d8: return 0x0; + case 0x2d9: return 0x0; + case 0x2da: return 0x0; + case 0x2db: return 0x0; + case 0x2dc: return 0x0; + case 0x2dd: return 0x0; + case 0x2de: return 0x0; + case 0x2df: return 0x0; + case 0x2e0: return 0x0; + case 0x2e1: return 0x0; + case 0x2e2: return 0x0; + case 0x2e3: return 0x0; + case 0x2e4: return 0x0; + case 0x2e5: return 0x0; + case 0x2e6: return 0x0; + case 0x2e7: return 0x0; + case 0x2e8: return 0x0; + case 0x2e9: return 0x0; + case 0x2ea: return 0x0; + case 0x2eb: return 0x0; + case 0x2ec: return 0x0; + case 0x2ed: return 0x0; + case 0x2ee: return 0x0; + case 0x2ef: return 0x0; + case 0x2f0: return 0x0; + case 0x2f1: return 0x0; + case 0x2f2: return 0x0; + case 0x2f3: return 0x0; + case 0x2f4: return 0x0; + case 0x2f5: return 0x0; + case 0x2f6: return 0x0; + case 0x2f7: return 0x0; + case 0x2f8: return 0x0; + case 0x2f9: return 0x0; + case 0x2fa: return 0x0; + case 0x2fb: return 0x0; + case 0x2fc: return 0x0; + case 0x2fd: return 0x0; + case 0x2fe: return 0x0; + case 0x2ff: return 0x0; + case 0x300: return 0x0; + case 0x301: return 0x0; + case 0x302: return 0x0; + case 0x303: return 0x0; + case 0x304: return 0x0; + case 0x305: return 0x0; + case 0x306: return 0x0; + case 0x307: return 0x0; + case 0x308: return 0x0; + case 0x309: return 0x0; + case 0x30a: return 0x0; + case 0x30b: return 0x0; + case 0x30c: return 0x0; + case 0x30d: return 0x0; + case 0x30e: return 0x0; + case 0x30f: return 0x0; + case 0x310: return 0x0; + case 0x311: return 0x0; + case 0x312: return 0x0; + case 0x313: return 0x0; + case 0x314: return 0x0; + case 0x315: return 0x0; + case 0x316: return 0x0; + case 0x317: return 0x0; + case 0x318: return 0x0; + case 0x319: return 0x0; + case 0x31a: return 0x0; + case 0x31b: return 0x0; + case 0x31c: return 0x0; + case 0x31d: return 0x0; + case 0x31e: return 0x0; + case 0x31f: return 0x0; + case 0x320: return 0x0; + case 0x321: return 0x0; + case 0x322: return 0x0; + case 0x323: return 0x0; + case 0x324: return 0x0; + case 0x325: return 0x0; + case 0x326: return 0x0; + case 0x327: return 0x0; + case 0x328: return 0x0; + case 0x329: return 0x0; + case 0x32a: return 0x0; + case 0x32b: return 0x0; + case 0x32c: return 0x0; + case 0x32d: return 0x0; + case 0x32e: return 0x0; + case 0x32f: return 0x0; + case 0x330: return 0x0; + case 0x331: return 0x0; + case 0x332: return 0x0; + case 0x333: return 0x0; + case 0x334: return 0x0; + case 0x335: return 0x0; + case 0x336: return 0x0; + case 0x337: return 0x0; + case 0x338: return 0x0; + case 0x339: return 0x0; + case 0x33a: return 0x0; + case 0x33b: return 0x0; + case 0x33c: return 0x0; + case 0x33d: return 0x0; + case 0x33e: return 0x0; + case 0x33f: return 0x0; + case 0x340: return 0x0; + case 0x341: return 0x0; + case 0x342: return 0x0; + case 0x343: return 0x0; + case 0x344: return 0x0; + case 0x345: return 0x0; + case 0x346: return 0x0; + case 0x347: return 0x0; + case 0x348: return 0x0; + case 0x349: return 0x0; + case 0x34a: return 0x0; + case 0x34b: return 0x0; + case 0x34c: return 0x0; + case 0x34d: return 0x0; + case 0x34e: return 0x0; + case 0x350: return 0x0; + case 0x351: return 0x0; + case 0x352: return 0x0; + case 0x353: return 0x0; + case 0x354: return 0x0; + case 0x355: return 0x0; + case 0x356: return 0x0; + case 0x357: return 0x0; + case 0x35d: return 0x0; + case 0x35e: return 0x0; + case 0x35f: return 0x0; + case 0x360: return 0x0; + case 0x361: return 0x0; + case 0x362: return 0x0; + case 0x374: return 0x0; + case 0x375: return 0x0; + case 0x37a: return 0x0; + case 0x37e: return 0x3b; + case 0x384: return 0x0; + case 0x385: return 0x0; + case 0x386: return 0x391; + case 0x388: return 0x395; + case 0x389: return 0x397; + case 0x38a: return 0x399; + case 0x38c: return 0x39f; + case 0x38e: return 0x3a5; + case 0x38f: return 0x3a9; + case 0x390: return 0x3b9; + case 0x3aa: return 0x399; + case 0x3ab: return 0x3a5; + case 0x3ac: return 0x3b1; + case 0x3ad: return 0x3b5; + case 0x3ae: return 0x3b7; + case 0x3af: return 0x3b9; + case 0x3b0: return 0x3c5; + case 0x3ca: return 0x3b9; + case 0x3cb: return 0x3c5; + case 0x3cc: return 0x3bf; + case 0x3cd: return 0x3c5; + case 0x3ce: return 0x3c9; + case 0x3d3: return 0x3d2; + case 0x3d4: return 0x3d2; + case 0x400: return 0x415; + case 0x401: return 0x415; + case 0x403: return 0x413; + case 0x407: return 0x406; + case 0x40c: return 0x41a; + case 0x40d: return 0x418; + case 0x40e: return 0x423; + case 0x419: return 0x418; + case 0x439: return 0x438; + case 0x450: return 0x435; + case 0x451: return 0x435; + case 0x453: return 0x433; + case 0x457: return 0x456; + case 0x45c: return 0x43a; + case 0x45d: return 0x438; + case 0x45e: return 0x443; + case 0x476: return 0x474; + case 0x477: return 0x475; + case 0x483: return 0x0; + case 0x484: return 0x0; + case 0x485: return 0x0; + case 0x486: return 0x0; + case 0x487: return 0x0; + case 0x4c1: return 0x416; + case 0x4c2: return 0x436; + case 0x4d0: return 0x410; + case 0x4d1: return 0x430; + case 0x4d2: return 0x410; + case 0x4d3: return 0x430; + case 0x4d6: return 0x415; + case 0x4d7: return 0x435; + case 0x4da: return 0x4d8; + case 0x4db: return 0x4d9; + case 0x4dc: return 0x416; + case 0x4dd: return 0x436; + case 0x4de: return 0x417; + case 0x4df: return 0x437; + case 0x4e2: return 0x418; + case 0x4e3: return 0x438; + case 0x4e4: return 0x418; + case 0x4e5: return 0x438; + case 0x4e6: return 0x41e; + case 0x4e7: return 0x43e; + case 0x4ea: return 0x4e8; + case 0x4eb: return 0x4e9; + case 0x4ec: return 0x42d; + case 0x4ed: return 0x44d; + case 0x4ee: return 0x423; + case 0x4ef: return 0x443; + case 0x4f0: return 0x423; + case 0x4f1: return 0x443; + case 0x4f2: return 0x423; + case 0x4f3: return 0x443; + case 0x4f4: return 0x427; + case 0x4f5: return 0x447; + case 0x4f8: return 0x42b; + case 0x4f9: return 0x44b; + case 0x559: return 0x0; + case 0x591: return 0x0; + case 0x592: return 0x0; + case 0x593: return 0x0; + case 0x594: return 0x0; + case 0x595: return 0x0; + case 0x596: return 0x0; + case 0x597: return 0x0; + case 0x598: return 0x0; + case 0x599: return 0x0; + case 0x59a: return 0x0; + case 0x59b: return 0x0; + case 0x59c: return 0x0; + case 0x59d: return 0x0; + case 0x59e: return 0x0; + case 0x59f: return 0x0; + case 0x5a0: return 0x0; + case 0x5a1: return 0x0; + case 0x5a3: return 0x0; + case 0x5a4: return 0x0; + case 0x5a5: return 0x0; + case 0x5a6: return 0x0; + case 0x5a7: return 0x0; + case 0x5a8: return 0x0; + case 0x5a9: return 0x0; + case 0x5aa: return 0x0; + case 0x5ab: return 0x0; + case 0x5ac: return 0x0; + case 0x5ad: return 0x0; + case 0x5ae: return 0x0; + case 0x5af: return 0x0; + case 0x5b0: return 0x0; + case 0x5b1: return 0x0; + case 0x5b2: return 0x0; + case 0x5b3: return 0x0; + case 0x5b4: return 0x0; + case 0x5b5: return 0x0; + case 0x5b6: return 0x0; + case 0x5b7: return 0x0; + case 0x5b8: return 0x0; + case 0x5b9: return 0x0; + case 0x5ba: return 0x0; + case 0x5bb: return 0x0; + case 0x5bc: return 0x0; + case 0x5bd: return 0x0; + case 0x5bf: return 0x0; + case 0x5c1: return 0x0; + case 0x5c2: return 0x0; + case 0x5c4: return 0x0; + case 0x64b: return 0x0; + case 0x64c: return 0x0; + case 0x64d: return 0x0; + case 0x64e: return 0x0; + case 0x64f: return 0x0; + case 0x650: return 0x0; + case 0x651: return 0x0; + case 0x652: return 0x0; + case 0x657: return 0x0; + case 0x658: return 0x0; + case 0x6df: return 0x0; + case 0x6e0: return 0x0; + case 0x6e5: return 0x0; + case 0x6e6: return 0x0; + case 0x6ea: return 0x0; + case 0x6eb: return 0x0; + case 0x6ec: return 0x0; + case 0x730: return 0x0; + case 0x731: return 0x0; + case 0x732: return 0x0; + case 0x733: return 0x0; + case 0x734: return 0x0; + case 0x735: return 0x0; + case 0x736: return 0x0; + case 0x737: return 0x0; + case 0x738: return 0x0; + case 0x739: return 0x0; + case 0x73a: return 0x0; + case 0x73b: return 0x0; + case 0x73c: return 0x0; + case 0x73d: return 0x0; + case 0x73e: return 0x0; + case 0x73f: return 0x0; + case 0x740: return 0x0; + case 0x741: return 0x0; + case 0x742: return 0x0; + case 0x743: return 0x0; + case 0x744: return 0x0; + case 0x745: return 0x0; + case 0x746: return 0x0; + case 0x747: return 0x0; + case 0x748: return 0x0; + case 0x749: return 0x0; + case 0x74a: return 0x0; + case 0x7a6: return 0x0; + case 0x7a7: return 0x0; + case 0x7a8: return 0x0; + case 0x7a9: return 0x0; + case 0x7aa: return 0x0; + case 0x7ab: return 0x0; + case 0x7ac: return 0x0; + case 0x7ad: return 0x0; + case 0x7ae: return 0x0; + case 0x7af: return 0x0; + case 0x7b0: return 0x0; + case 0x7eb: return 0x0; + case 0x7ec: return 0x0; + case 0x7ed: return 0x0; + case 0x7ee: return 0x0; + case 0x7ef: return 0x0; + case 0x7f0: return 0x0; + case 0x7f1: return 0x0; + case 0x7f2: return 0x0; + case 0x7f3: return 0x0; + case 0x7f4: return 0x0; + case 0x7f5: return 0x0; + case 0x818: return 0x0; + case 0x819: return 0x0; + case 0x8e3: return 0x0; + case 0x8e4: return 0x0; + case 0x8e5: return 0x0; + case 0x8e6: return 0x0; + case 0x8e7: return 0x0; + case 0x8e8: return 0x0; + case 0x8e9: return 0x0; + case 0x8ea: return 0x0; + case 0x8eb: return 0x0; + case 0x8ec: return 0x0; + case 0x8ed: return 0x0; + case 0x8ee: return 0x0; + case 0x8ef: return 0x0; + case 0x8f0: return 0x0; + case 0x8f1: return 0x0; + case 0x8f2: return 0x0; + case 0x8f3: return 0x0; + case 0x8f4: return 0x0; + case 0x8f5: return 0x0; + case 0x8f6: return 0x0; + case 0x8f7: return 0x0; + case 0x8f8: return 0x0; + case 0x8f9: return 0x0; + case 0x8fa: return 0x0; + case 0x8fb: return 0x0; + case 0x8fc: return 0x0; + case 0x8fd: return 0x0; + case 0x8fe: return 0x0; + case 0x929: return 0x928; + case 0x931: return 0x930; + case 0x934: return 0x933; + case 0x93c: return 0x0; + case 0x94d: return 0x0; + case 0x951: return 0x0; + case 0x952: return 0x0; + case 0x953: return 0x0; + case 0x954: return 0x0; + case 0x958: return 0x915; + case 0x959: return 0x916; + case 0x95a: return 0x917; + case 0x95b: return 0x91c; + case 0x95c: return 0x921; + case 0x95d: return 0x922; + case 0x95e: return 0x92b; + case 0x95f: return 0x92f; + case 0x971: return 0x0; + case 0x9bc: return 0x0; + case 0x9cd: return 0x0; + case 0x9dc: return 0x9a1; + case 0x9dd: return 0x9a2; + case 0x9df: return 0x9af; + case 0xa33: return 0xa32; + case 0xa36: return 0xa38; + case 0xa3c: return 0x0; + case 0xa4d: return 0x0; + case 0xa59: return 0xa16; + case 0xa5a: return 0xa17; + case 0xa5b: return 0xa1c; + case 0xa5e: return 0xa2b; + case 0xabc: return 0x0; + case 0xacd: return 0x0; + case 0xafd: return 0x0; + case 0xafe: return 0x0; + case 0xaff: return 0x0; + case 0xb3c: return 0x0; + case 0xb4d: return 0x0; + case 0xb55: return 0x0; + case 0xb5c: return 0xb21; + case 0xb5d: return 0xb22; + case 0xbcd: return 0x0; + case 0xc4d: return 0x0; + case 0xcbc: return 0x0; + case 0xccd: return 0x0; + case 0xd3b: return 0x0; + case 0xd3c: return 0x0; + case 0xd4d: return 0x0; + case 0xdca: return 0x0; + case 0xdda: return 0xdd9; + case 0xddd: return 0xddc; + case 0xe47: return 0x0; + case 0xe48: return 0x0; + case 0xe49: return 0x0; + case 0xe4a: return 0x0; + case 0xe4b: return 0x0; + case 0xe4c: return 0x0; + case 0xe4e: return 0x0; + case 0xeba: return 0x0; + case 0xec8: return 0x0; + case 0xec9: return 0x0; + case 0xeca: return 0x0; + case 0xecb: return 0x0; + case 0xecc: return 0x0; + case 0xf18: return 0x0; + case 0xf19: return 0x0; + case 0xf35: return 0x0; + case 0xf37: return 0x0; + case 0xf39: return 0x0; + case 0xf3e: return 0x0; + case 0xf3f: return 0x0; + case 0xf82: return 0x0; + case 0xf83: return 0x0; + case 0xf84: return 0x0; + case 0xf86: return 0x0; + case 0xf87: return 0x0; + case 0xfc6: return 0x0; + case 0x1037: return 0x0; + case 0x1039: return 0x0; + case 0x103a: return 0x0; + case 0x1063: return 0x0; + case 0x1064: return 0x0; + case 0x1069: return 0x0; + case 0x106a: return 0x0; + case 0x106b: return 0x0; + case 0x106c: return 0x0; + case 0x106d: return 0x0; + case 0x1087: return 0x0; + case 0x1088: return 0x0; + case 0x1089: return 0x0; + case 0x108a: return 0x0; + case 0x108b: return 0x0; + case 0x108c: return 0x0; + case 0x108d: return 0x0; + case 0x108f: return 0x0; + case 0x109a: return 0x0; + case 0x109b: return 0x0; + case 0x135d: return 0x0; + case 0x135e: return 0x0; + case 0x135f: return 0x0; + case 0x17c9: return 0x0; + case 0x17ca: return 0x0; + case 0x17cb: return 0x0; + case 0x17cc: return 0x0; + case 0x17cd: return 0x0; + case 0x17ce: return 0x0; + case 0x17cf: return 0x0; + case 0x17d0: return 0x0; + case 0x17d1: return 0x0; + case 0x17d2: return 0x0; + case 0x17d3: return 0x0; + case 0x17dd: return 0x0; + case 0x1939: return 0x0; + case 0x193a: return 0x0; + case 0x193b: return 0x0; + case 0x1a75: return 0x0; + case 0x1a76: return 0x0; + case 0x1a77: return 0x0; + case 0x1a78: return 0x0; + case 0x1a79: return 0x0; + case 0x1a7a: return 0x0; + case 0x1a7b: return 0x0; + case 0x1a7c: return 0x0; + case 0x1a7f: return 0x0; + case 0x1ab0: return 0x0; + case 0x1ab1: return 0x0; + case 0x1ab2: return 0x0; + case 0x1ab3: return 0x0; + case 0x1ab4: return 0x0; + case 0x1ab5: return 0x0; + case 0x1ab6: return 0x0; + case 0x1ab7: return 0x0; + case 0x1ab8: return 0x0; + case 0x1ab9: return 0x0; + case 0x1aba: return 0x0; + case 0x1abb: return 0x0; + case 0x1abc: return 0x0; + case 0x1abd: return 0x0; + case 0x1b34: return 0x0; + case 0x1b44: return 0x0; + case 0x1b6b: return 0x0; + case 0x1b6c: return 0x0; + case 0x1b6d: return 0x0; + case 0x1b6e: return 0x0; + case 0x1b6f: return 0x0; + case 0x1b70: return 0x0; + case 0x1b71: return 0x0; + case 0x1b72: return 0x0; + case 0x1b73: return 0x0; + case 0x1baa: return 0x0; + case 0x1bab: return 0x0; + case 0x1c36: return 0x0; + case 0x1c37: return 0x0; + case 0x1c78: return 0x0; + case 0x1c79: return 0x0; + case 0x1c7a: return 0x0; + case 0x1c7b: return 0x0; + case 0x1c7c: return 0x0; + case 0x1c7d: return 0x0; + case 0x1cd0: return 0x0; + case 0x1cd1: return 0x0; + case 0x1cd2: return 0x0; + case 0x1cd3: return 0x0; + case 0x1cd4: return 0x0; + case 0x1cd5: return 0x0; + case 0x1cd6: return 0x0; + case 0x1cd7: return 0x0; + case 0x1cd8: return 0x0; + case 0x1cd9: return 0x0; + case 0x1cda: return 0x0; + case 0x1cdb: return 0x0; + case 0x1cdc: return 0x0; + case 0x1cdd: return 0x0; + case 0x1cde: return 0x0; + case 0x1cdf: return 0x0; + case 0x1ce0: return 0x0; + case 0x1ce1: return 0x0; + case 0x1ce2: return 0x0; + case 0x1ce3: return 0x0; + case 0x1ce4: return 0x0; + case 0x1ce5: return 0x0; + case 0x1ce6: return 0x0; + case 0x1ce7: return 0x0; + case 0x1ce8: return 0x0; + case 0x1ced: return 0x0; + case 0x1cf4: return 0x0; + case 0x1cf7: return 0x0; + case 0x1cf8: return 0x0; + case 0x1cf9: return 0x0; + case 0x1d2c: return 0x0; + case 0x1d2d: return 0x0; + case 0x1d2e: return 0x0; + case 0x1d2f: return 0x0; + case 0x1d30: return 0x0; + case 0x1d31: return 0x0; + case 0x1d32: return 0x0; + case 0x1d33: return 0x0; + case 0x1d34: return 0x0; + case 0x1d35: return 0x0; + case 0x1d36: return 0x0; + case 0x1d37: return 0x0; + case 0x1d38: return 0x0; + case 0x1d39: return 0x0; + case 0x1d3a: return 0x0; + case 0x1d3b: return 0x0; + case 0x1d3c: return 0x0; + case 0x1d3d: return 0x0; + case 0x1d3e: return 0x0; + case 0x1d3f: return 0x0; + case 0x1d40: return 0x0; + case 0x1d41: return 0x0; + case 0x1d42: return 0x0; + case 0x1d43: return 0x0; + case 0x1d44: return 0x0; + case 0x1d45: return 0x0; + case 0x1d46: return 0x0; + case 0x1d47: return 0x0; + case 0x1d48: return 0x0; + case 0x1d49: return 0x0; + case 0x1d4a: return 0x0; + case 0x1d4b: return 0x0; + case 0x1d4c: return 0x0; + case 0x1d4d: return 0x0; + case 0x1d4e: return 0x0; + case 0x1d4f: return 0x0; + case 0x1d50: return 0x0; + case 0x1d51: return 0x0; + case 0x1d52: return 0x0; + case 0x1d53: return 0x0; + case 0x1d54: return 0x0; + case 0x1d55: return 0x0; + case 0x1d56: return 0x0; + case 0x1d57: return 0x0; + case 0x1d58: return 0x0; + case 0x1d59: return 0x0; + case 0x1d5a: return 0x0; + case 0x1d5b: return 0x0; + case 0x1d5c: return 0x0; + case 0x1d5d: return 0x0; + case 0x1d5e: return 0x0; + case 0x1d5f: return 0x0; + case 0x1d60: return 0x0; + case 0x1d61: return 0x0; + case 0x1d62: return 0x0; + case 0x1d63: return 0x0; + case 0x1d64: return 0x0; + case 0x1d65: return 0x0; + case 0x1d66: return 0x0; + case 0x1d67: return 0x0; + case 0x1d68: return 0x0; + case 0x1d69: return 0x0; + case 0x1d6a: return 0x0; + case 0x1dc4: return 0x0; + case 0x1dc5: return 0x0; + case 0x1dc6: return 0x0; + case 0x1dc7: return 0x0; + case 0x1dc8: return 0x0; + case 0x1dc9: return 0x0; + case 0x1dca: return 0x0; + case 0x1dcb: return 0x0; + case 0x1dcc: return 0x0; + case 0x1dcd: return 0x0; + case 0x1dce: return 0x0; + case 0x1dcf: return 0x0; + case 0x1df5: return 0x0; + case 0x1df6: return 0x0; + case 0x1df7: return 0x0; + case 0x1df8: return 0x0; + case 0x1df9: return 0x0; + case 0x1dfd: return 0x0; + case 0x1dfe: return 0x0; + case 0x1dff: return 0x0; + case 0x1e00: return 0x41; + case 0x1e01: return 0x61; + case 0x1e02: return 0x42; + case 0x1e03: return 0x62; + case 0x1e04: return 0x42; + case 0x1e05: return 0x62; + case 0x1e06: return 0x42; + case 0x1e07: return 0x62; + case 0x1e08: return 0x43; + case 0x1e09: return 0x63; + case 0x1e0a: return 0x44; + case 0x1e0b: return 0x64; + case 0x1e0c: return 0x44; + case 0x1e0d: return 0x64; + case 0x1e0e: return 0x44; + case 0x1e0f: return 0x64; + case 0x1e10: return 0x44; + case 0x1e11: return 0x64; + case 0x1e12: return 0x44; + case 0x1e13: return 0x64; + case 0x1e14: return 0x45; + case 0x1e15: return 0x65; + case 0x1e16: return 0x45; + case 0x1e17: return 0x65; + case 0x1e18: return 0x45; + case 0x1e19: return 0x65; + case 0x1e1a: return 0x45; + case 0x1e1b: return 0x65; + case 0x1e1c: return 0x45; + case 0x1e1d: return 0x65; + case 0x1e1e: return 0x46; + case 0x1e1f: return 0x66; + case 0x1e20: return 0x47; + case 0x1e21: return 0x67; + case 0x1e22: return 0x48; + case 0x1e23: return 0x68; + case 0x1e24: return 0x48; + case 0x1e25: return 0x68; + case 0x1e26: return 0x48; + case 0x1e27: return 0x68; + case 0x1e28: return 0x48; + case 0x1e29: return 0x68; + case 0x1e2a: return 0x48; + case 0x1e2b: return 0x68; + case 0x1e2c: return 0x49; + case 0x1e2d: return 0x69; + case 0x1e2e: return 0x49; + case 0x1e2f: return 0x69; + case 0x1e30: return 0x4b; + case 0x1e31: return 0x6b; + case 0x1e32: return 0x4b; + case 0x1e33: return 0x6b; + case 0x1e34: return 0x4b; + case 0x1e35: return 0x6b; + case 0x1e36: return 0x4c; + case 0x1e37: return 0x6c; + case 0x1e38: return 0x4c; + case 0x1e39: return 0x6c; + case 0x1e3a: return 0x4c; + case 0x1e3b: return 0x6c; + case 0x1e3c: return 0x4c; + case 0x1e3d: return 0x6c; + case 0x1e3e: return 0x4d; + case 0x1e3f: return 0x6d; + case 0x1e40: return 0x4d; + case 0x1e41: return 0x6d; + case 0x1e42: return 0x4d; + case 0x1e43: return 0x6d; + case 0x1e44: return 0x4e; + case 0x1e45: return 0x6e; + case 0x1e46: return 0x4e; + case 0x1e47: return 0x6e; + case 0x1e48: return 0x4e; + case 0x1e49: return 0x6e; + case 0x1e4a: return 0x4e; + case 0x1e4b: return 0x6e; + case 0x1e4c: return 0x4f; + case 0x1e4d: return 0x6f; + case 0x1e4e: return 0x4f; + case 0x1e4f: return 0x6f; + case 0x1e50: return 0x4f; + case 0x1e51: return 0x6f; + case 0x1e52: return 0x4f; + case 0x1e53: return 0x6f; + case 0x1e54: return 0x50; + case 0x1e55: return 0x70; + case 0x1e56: return 0x50; + case 0x1e57: return 0x70; + case 0x1e58: return 0x52; + case 0x1e59: return 0x72; + case 0x1e5a: return 0x52; + case 0x1e5b: return 0x72; + case 0x1e5c: return 0x52; + case 0x1e5d: return 0x72; + case 0x1e5e: return 0x52; + case 0x1e5f: return 0x72; + case 0x1e60: return 0x53; + case 0x1e61: return 0x73; + case 0x1e62: return 0x53; + case 0x1e63: return 0x73; + case 0x1e64: return 0x53; + case 0x1e65: return 0x73; + case 0x1e66: return 0x53; + case 0x1e67: return 0x73; + case 0x1e68: return 0x53; + case 0x1e69: return 0x73; + case 0x1e6a: return 0x54; + case 0x1e6b: return 0x74; + case 0x1e6c: return 0x54; + case 0x1e6d: return 0x74; + case 0x1e6e: return 0x54; + case 0x1e6f: return 0x74; + case 0x1e70: return 0x54; + case 0x1e71: return 0x74; + case 0x1e72: return 0x55; + case 0x1e73: return 0x75; + case 0x1e74: return 0x55; + case 0x1e75: return 0x75; + case 0x1e76: return 0x55; + case 0x1e77: return 0x75; + case 0x1e78: return 0x55; + case 0x1e79: return 0x75; + case 0x1e7a: return 0x55; + case 0x1e7b: return 0x75; + case 0x1e7c: return 0x56; + case 0x1e7d: return 0x76; + case 0x1e7e: return 0x56; + case 0x1e7f: return 0x76; + case 0x1e80: return 0x57; + case 0x1e81: return 0x77; + case 0x1e82: return 0x57; + case 0x1e83: return 0x77; + case 0x1e84: return 0x57; + case 0x1e85: return 0x77; + case 0x1e86: return 0x57; + case 0x1e87: return 0x77; + case 0x1e88: return 0x57; + case 0x1e89: return 0x77; + case 0x1e8a: return 0x58; + case 0x1e8b: return 0x78; + case 0x1e8c: return 0x58; + case 0x1e8d: return 0x78; + case 0x1e8e: return 0x59; + case 0x1e8f: return 0x79; + case 0x1e90: return 0x5a; + case 0x1e91: return 0x7a; + case 0x1e92: return 0x5a; + case 0x1e93: return 0x7a; + case 0x1e94: return 0x5a; + case 0x1e95: return 0x7a; + case 0x1e96: return 0x68; + case 0x1e97: return 0x74; + case 0x1e98: return 0x77; + case 0x1e99: return 0x79; + case 0x1e9b: return 0x17f; + case 0x1ea0: return 0x41; + case 0x1ea1: return 0x61; + case 0x1ea2: return 0x41; + case 0x1ea3: return 0x61; + case 0x1ea4: return 0x41; + case 0x1ea5: return 0x61; + case 0x1ea6: return 0x41; + case 0x1ea7: return 0x61; + case 0x1ea8: return 0x41; + case 0x1ea9: return 0x61; + case 0x1eaa: return 0x41; + case 0x1eab: return 0x61; + case 0x1eac: return 0x41; + case 0x1ead: return 0x61; + case 0x1eae: return 0x41; + case 0x1eaf: return 0x61; + case 0x1eb0: return 0x41; + case 0x1eb1: return 0x61; + case 0x1eb2: return 0x41; + case 0x1eb3: return 0x61; + case 0x1eb4: return 0x41; + case 0x1eb5: return 0x61; + case 0x1eb6: return 0x41; + case 0x1eb7: return 0x61; + case 0x1eb8: return 0x45; + case 0x1eb9: return 0x65; + case 0x1eba: return 0x45; + case 0x1ebb: return 0x65; + case 0x1ebc: return 0x45; + case 0x1ebd: return 0x65; + case 0x1ebe: return 0x45; + case 0x1ebf: return 0x65; + case 0x1ec0: return 0x45; + case 0x1ec1: return 0x65; + case 0x1ec2: return 0x45; + case 0x1ec3: return 0x65; + case 0x1ec4: return 0x45; + case 0x1ec5: return 0x65; + case 0x1ec6: return 0x45; + case 0x1ec7: return 0x65; + case 0x1ec8: return 0x49; + case 0x1ec9: return 0x69; + case 0x1eca: return 0x49; + case 0x1ecb: return 0x69; + case 0x1ecc: return 0x4f; + case 0x1ecd: return 0x6f; + case 0x1ece: return 0x4f; + case 0x1ecf: return 0x6f; + case 0x1ed0: return 0x4f; + case 0x1ed1: return 0x6f; + case 0x1ed2: return 0x4f; + case 0x1ed3: return 0x6f; + case 0x1ed4: return 0x4f; + case 0x1ed5: return 0x6f; + case 0x1ed6: return 0x4f; + case 0x1ed7: return 0x6f; + case 0x1ed8: return 0x4f; + case 0x1ed9: return 0x6f; + case 0x1eda: return 0x4f; + case 0x1edb: return 0x6f; + case 0x1edc: return 0x4f; + case 0x1edd: return 0x6f; + case 0x1ede: return 0x4f; + case 0x1edf: return 0x6f; + case 0x1ee0: return 0x4f; + case 0x1ee1: return 0x6f; + case 0x1ee2: return 0x4f; + case 0x1ee3: return 0x6f; + case 0x1ee4: return 0x55; + case 0x1ee5: return 0x75; + case 0x1ee6: return 0x55; + case 0x1ee7: return 0x75; + case 0x1ee8: return 0x55; + case 0x1ee9: return 0x75; + case 0x1eea: return 0x55; + case 0x1eeb: return 0x75; + case 0x1eec: return 0x55; + case 0x1eed: return 0x75; + case 0x1eee: return 0x55; + case 0x1eef: return 0x75; + case 0x1ef0: return 0x55; + case 0x1ef1: return 0x75; + case 0x1ef2: return 0x59; + case 0x1ef3: return 0x79; + case 0x1ef4: return 0x59; + case 0x1ef5: return 0x79; + case 0x1ef6: return 0x59; + case 0x1ef7: return 0x79; + case 0x1ef8: return 0x59; + case 0x1ef9: return 0x79; + case 0x1f00: return 0x3b1; + case 0x1f01: return 0x3b1; + case 0x1f02: return 0x3b1; + case 0x1f03: return 0x3b1; + case 0x1f04: return 0x3b1; + case 0x1f05: return 0x3b1; + case 0x1f06: return 0x3b1; + case 0x1f07: return 0x3b1; + case 0x1f08: return 0x391; + case 0x1f09: return 0x391; + case 0x1f0a: return 0x391; + case 0x1f0b: return 0x391; + case 0x1f0c: return 0x391; + case 0x1f0d: return 0x391; + case 0x1f0e: return 0x391; + case 0x1f0f: return 0x391; + case 0x1f10: return 0x3b5; + case 0x1f11: return 0x3b5; + case 0x1f12: return 0x3b5; + case 0x1f13: return 0x3b5; + case 0x1f14: return 0x3b5; + case 0x1f15: return 0x3b5; + case 0x1f18: return 0x395; + case 0x1f19: return 0x395; + case 0x1f1a: return 0x395; + case 0x1f1b: return 0x395; + case 0x1f1c: return 0x395; + case 0x1f1d: return 0x395; + case 0x1f20: return 0x3b7; + case 0x1f21: return 0x3b7; + case 0x1f22: return 0x3b7; + case 0x1f23: return 0x3b7; + case 0x1f24: return 0x3b7; + case 0x1f25: return 0x3b7; + case 0x1f26: return 0x3b7; + case 0x1f27: return 0x3b7; + case 0x1f28: return 0x397; + case 0x1f29: return 0x397; + case 0x1f2a: return 0x397; + case 0x1f2b: return 0x397; + case 0x1f2c: return 0x397; + case 0x1f2d: return 0x397; + case 0x1f2e: return 0x397; + case 0x1f2f: return 0x397; + case 0x1f30: return 0x3b9; + case 0x1f31: return 0x3b9; + case 0x1f32: return 0x3b9; + case 0x1f33: return 0x3b9; + case 0x1f34: return 0x3b9; + case 0x1f35: return 0x3b9; + case 0x1f36: return 0x3b9; + case 0x1f37: return 0x3b9; + case 0x1f38: return 0x399; + case 0x1f39: return 0x399; + case 0x1f3a: return 0x399; + case 0x1f3b: return 0x399; + case 0x1f3c: return 0x399; + case 0x1f3d: return 0x399; + case 0x1f3e: return 0x399; + case 0x1f3f: return 0x399; + case 0x1f40: return 0x3bf; + case 0x1f41: return 0x3bf; + case 0x1f42: return 0x3bf; + case 0x1f43: return 0x3bf; + case 0x1f44: return 0x3bf; + case 0x1f45: return 0x3bf; + case 0x1f48: return 0x39f; + case 0x1f49: return 0x39f; + case 0x1f4a: return 0x39f; + case 0x1f4b: return 0x39f; + case 0x1f4c: return 0x39f; + case 0x1f4d: return 0x39f; + case 0x1f50: return 0x3c5; + case 0x1f51: return 0x3c5; + case 0x1f52: return 0x3c5; + case 0x1f53: return 0x3c5; + case 0x1f54: return 0x3c5; + case 0x1f55: return 0x3c5; + case 0x1f56: return 0x3c5; + case 0x1f57: return 0x3c5; + case 0x1f59: return 0x3a5; + case 0x1f5b: return 0x3a5; + case 0x1f5d: return 0x3a5; + case 0x1f5f: return 0x3a5; + case 0x1f60: return 0x3c9; + case 0x1f61: return 0x3c9; + case 0x1f62: return 0x3c9; + case 0x1f63: return 0x3c9; + case 0x1f64: return 0x3c9; + case 0x1f65: return 0x3c9; + case 0x1f66: return 0x3c9; + case 0x1f67: return 0x3c9; + case 0x1f68: return 0x3a9; + case 0x1f69: return 0x3a9; + case 0x1f6a: return 0x3a9; + case 0x1f6b: return 0x3a9; + case 0x1f6c: return 0x3a9; + case 0x1f6d: return 0x3a9; + case 0x1f6e: return 0x3a9; + case 0x1f6f: return 0x3a9; + case 0x1f70: return 0x3b1; + case 0x1f71: return 0x3b1; + case 0x1f72: return 0x3b5; + case 0x1f73: return 0x3b5; + case 0x1f74: return 0x3b7; + case 0x1f75: return 0x3b7; + case 0x1f76: return 0x3b9; + case 0x1f77: return 0x3b9; + case 0x1f78: return 0x3bf; + case 0x1f79: return 0x3bf; + case 0x1f7a: return 0x3c5; + case 0x1f7b: return 0x3c5; + case 0x1f7c: return 0x3c9; + case 0x1f7d: return 0x3c9; + case 0x1f80: return 0x3b1; + case 0x1f81: return 0x3b1; + case 0x1f82: return 0x3b1; + case 0x1f83: return 0x3b1; + case 0x1f84: return 0x3b1; + case 0x1f85: return 0x3b1; + case 0x1f86: return 0x3b1; + case 0x1f87: return 0x3b1; + case 0x1f88: return 0x391; + case 0x1f89: return 0x391; + case 0x1f8a: return 0x391; + case 0x1f8b: return 0x391; + case 0x1f8c: return 0x391; + case 0x1f8d: return 0x391; + case 0x1f8e: return 0x391; + case 0x1f8f: return 0x391; + case 0x1f90: return 0x3b7; + case 0x1f91: return 0x3b7; + case 0x1f92: return 0x3b7; + case 0x1f93: return 0x3b7; + case 0x1f94: return 0x3b7; + case 0x1f95: return 0x3b7; + case 0x1f96: return 0x3b7; + case 0x1f97: return 0x3b7; + case 0x1f98: return 0x397; + case 0x1f99: return 0x397; + case 0x1f9a: return 0x397; + case 0x1f9b: return 0x397; + case 0x1f9c: return 0x397; + case 0x1f9d: return 0x397; + case 0x1f9e: return 0x397; + case 0x1f9f: return 0x397; + case 0x1fa0: return 0x3c9; + case 0x1fa1: return 0x3c9; + case 0x1fa2: return 0x3c9; + case 0x1fa3: return 0x3c9; + case 0x1fa4: return 0x3c9; + case 0x1fa5: return 0x3c9; + case 0x1fa6: return 0x3c9; + case 0x1fa7: return 0x3c9; + case 0x1fa8: return 0x3a9; + case 0x1fa9: return 0x3a9; + case 0x1faa: return 0x3a9; + case 0x1fab: return 0x3a9; + case 0x1fac: return 0x3a9; + case 0x1fad: return 0x3a9; + case 0x1fae: return 0x3a9; + case 0x1faf: return 0x3a9; + case 0x1fb0: return 0x3b1; + case 0x1fb1: return 0x3b1; + case 0x1fb2: return 0x3b1; + case 0x1fb3: return 0x3b1; + case 0x1fb4: return 0x3b1; + case 0x1fb6: return 0x3b1; + case 0x1fb7: return 0x3b1; + case 0x1fb8: return 0x391; + case 0x1fb9: return 0x391; + case 0x1fba: return 0x391; + case 0x1fbb: return 0x391; + case 0x1fbc: return 0x391; + case 0x1fbd: return 0x0; + case 0x1fbe: return 0x3b9; + case 0x1fbf: return 0x0; + case 0x1fc0: return 0x0; + case 0x1fc1: return 0x0; + case 0x1fc2: return 0x3b7; + case 0x1fc3: return 0x3b7; + case 0x1fc4: return 0x3b7; + case 0x1fc6: return 0x3b7; + case 0x1fc7: return 0x3b7; + case 0x1fc8: return 0x395; + case 0x1fc9: return 0x395; + case 0x1fca: return 0x397; + case 0x1fcb: return 0x397; + case 0x1fcc: return 0x397; + case 0x1fcd: return 0x0; + case 0x1fce: return 0x0; + case 0x1fcf: return 0x0; + case 0x1fd0: return 0x3b9; + case 0x1fd1: return 0x3b9; + case 0x1fd2: return 0x3b9; + case 0x1fd3: return 0x3b9; + case 0x1fd6: return 0x3b9; + case 0x1fd7: return 0x3b9; + case 0x1fd8: return 0x399; + case 0x1fd9: return 0x399; + case 0x1fda: return 0x399; + case 0x1fdb: return 0x399; + case 0x1fdd: return 0x0; + case 0x1fde: return 0x0; + case 0x1fdf: return 0x0; + case 0x1fe0: return 0x3c5; + case 0x1fe1: return 0x3c5; + case 0x1fe2: return 0x3c5; + case 0x1fe3: return 0x3c5; + case 0x1fe4: return 0x3c1; + case 0x1fe5: return 0x3c1; + case 0x1fe6: return 0x3c5; + case 0x1fe7: return 0x3c5; + case 0x1fe8: return 0x3a5; + case 0x1fe9: return 0x3a5; + case 0x1fea: return 0x3a5; + case 0x1feb: return 0x3a5; + case 0x1fec: return 0x3a1; + case 0x1fed: return 0x0; + case 0x1fee: return 0x0; + case 0x1fef: return 0x0; + case 0x1ff2: return 0x3c9; + case 0x1ff3: return 0x3c9; + case 0x1ff4: return 0x3c9; + case 0x1ff6: return 0x3c9; + case 0x1ff7: return 0x3c9; + case 0x1ff8: return 0x39f; + case 0x1ff9: return 0x39f; + case 0x1ffa: return 0x3a9; + case 0x1ffb: return 0x3a9; + case 0x1ffc: return 0x3a9; + case 0x1ffd: return 0x0; + case 0x1ffe: return 0x0; + case 0x2000: return 0x2002; + case 0x2001: return 0x2003; + case 0x2126: return 0x3a9; + case 0x212a: return 0x4b; + case 0x212b: return 0x41; + case 0x219a: return 0x2190; + case 0x219b: return 0x2192; + case 0x21ae: return 0x2194; + case 0x21cd: return 0x21d0; + case 0x21ce: return 0x21d4; + case 0x21cf: return 0x21d2; + case 0x2204: return 0x2203; + case 0x2209: return 0x2208; + case 0x220c: return 0x220b; + case 0x2224: return 0x2223; + case 0x2226: return 0x2225; + case 0x2241: return 0x223c; + case 0x2244: return 0x2243; + case 0x2247: return 0x2245; + case 0x2249: return 0x2248; + case 0x2260: return 0x3d; + case 0x2262: return 0x2261; + case 0x226d: return 0x224d; + case 0x226e: return 0x3c; + case 0x226f: return 0x3e; + case 0x2270: return 0x2264; + case 0x2271: return 0x2265; + case 0x2274: return 0x2272; + case 0x2275: return 0x2273; + case 0x2278: return 0x2276; + case 0x2279: return 0x2277; + case 0x2280: return 0x227a; + case 0x2281: return 0x227b; + case 0x2284: return 0x2282; + case 0x2285: return 0x2283; + case 0x2288: return 0x2286; + case 0x2289: return 0x2287; + case 0x22ac: return 0x22a2; + case 0x22ad: return 0x22a8; + case 0x22ae: return 0x22a9; + case 0x22af: return 0x22ab; + case 0x22e0: return 0x227c; + case 0x22e1: return 0x227d; + case 0x22e2: return 0x2291; + case 0x22e3: return 0x2292; + case 0x22ea: return 0x22b2; + case 0x22eb: return 0x22b3; + case 0x22ec: return 0x22b4; + case 0x22ed: return 0x22b5; + case 0x2329: return 0x3008; + case 0x232a: return 0x3009; + case 0x2adc: return 0x2add; + case 0x2cef: return 0x0; + case 0x2cf0: return 0x0; + case 0x2cf1: return 0x0; + case 0x2e2f: return 0x0; + case 0x302a: return 0x0; + case 0x302b: return 0x0; + case 0x302c: return 0x0; + case 0x302d: return 0x0; + case 0x302e: return 0x0; + case 0x302f: return 0x0; + case 0x304c: return 0x304b; + case 0x304e: return 0x304d; + case 0x3050: return 0x304f; + case 0x3052: return 0x3051; + case 0x3054: return 0x3053; + case 0x3056: return 0x3055; + case 0x3058: return 0x3057; + case 0x305a: return 0x3059; + case 0x305c: return 0x305b; + case 0x305e: return 0x305d; + case 0x3060: return 0x305f; + case 0x3062: return 0x3061; + case 0x3065: return 0x3064; + case 0x3067: return 0x3066; + case 0x3069: return 0x3068; + case 0x3070: return 0x306f; + case 0x3071: return 0x306f; + case 0x3073: return 0x3072; + case 0x3074: return 0x3072; + case 0x3076: return 0x3075; + case 0x3077: return 0x3075; + case 0x3079: return 0x3078; + case 0x307a: return 0x3078; + case 0x307c: return 0x307b; + case 0x307d: return 0x307b; + case 0x3094: return 0x3046; + case 0x3099: return 0x0; + case 0x309a: return 0x0; + case 0x309b: return 0x0; + case 0x309c: return 0x0; + case 0x309e: return 0x309d; + case 0x30ac: return 0x30ab; + case 0x30ae: return 0x30ad; + case 0x30b0: return 0x30af; + case 0x30b2: return 0x30b1; + case 0x30b4: return 0x30b3; + case 0x30b6: return 0x30b5; + case 0x30b8: return 0x30b7; + case 0x30ba: return 0x30b9; + case 0x30bc: return 0x30bb; + case 0x30be: return 0x30bd; + case 0x30c0: return 0x30bf; + case 0x30c2: return 0x30c1; + case 0x30c5: return 0x30c4; + case 0x30c7: return 0x30c6; + case 0x30c9: return 0x30c8; + case 0x30d0: return 0x30cf; + case 0x30d1: return 0x30cf; + case 0x30d3: return 0x30d2; + case 0x30d4: return 0x30d2; + case 0x30d6: return 0x30d5; + case 0x30d7: return 0x30d5; + case 0x30d9: return 0x30d8; + case 0x30da: return 0x30d8; + case 0x30dc: return 0x30db; + case 0x30dd: return 0x30db; + case 0x30f4: return 0x30a6; + case 0x30f7: return 0x30ef; + case 0x30f8: return 0x30f0; + case 0x30f9: return 0x30f1; + case 0x30fa: return 0x30f2; + case 0x30fc: return 0x0; + case 0x30fe: return 0x30fd; + case 0xa66f: return 0x0; + case 0xa67c: return 0x0; + case 0xa67d: return 0x0; + case 0xa67f: return 0x0; + case 0xa69c: return 0x0; + case 0xa69d: return 0x0; + case 0xa6f0: return 0x0; + case 0xa6f1: return 0x0; + case 0xa700: return 0x0; + case 0xa701: return 0x0; + case 0xa702: return 0x0; + case 0xa703: return 0x0; + case 0xa704: return 0x0; + case 0xa705: return 0x0; + case 0xa706: return 0x0; + case 0xa707: return 0x0; + case 0xa708: return 0x0; + case 0xa709: return 0x0; + case 0xa70a: return 0x0; + case 0xa70b: return 0x0; + case 0xa70c: return 0x0; + case 0xa70d: return 0x0; + case 0xa70e: return 0x0; + case 0xa70f: return 0x0; + case 0xa710: return 0x0; + case 0xa711: return 0x0; + case 0xa712: return 0x0; + case 0xa713: return 0x0; + case 0xa714: return 0x0; + case 0xa715: return 0x0; + case 0xa716: return 0x0; + case 0xa717: return 0x0; + case 0xa718: return 0x0; + case 0xa719: return 0x0; + case 0xa71a: return 0x0; + case 0xa71b: return 0x0; + case 0xa71c: return 0x0; + case 0xa71d: return 0x0; + case 0xa71e: return 0x0; + case 0xa71f: return 0x0; + case 0xa720: return 0x0; + case 0xa721: return 0x0; + case 0xa788: return 0x0; + case 0xa789: return 0x0; + case 0xa78a: return 0x0; + case 0xa7f8: return 0x0; + case 0xa7f9: return 0x0; + case 0xa8c4: return 0x0; + case 0xa8e0: return 0x0; + case 0xa8e1: return 0x0; + case 0xa8e2: return 0x0; + case 0xa8e3: return 0x0; + case 0xa8e4: return 0x0; + case 0xa8e5: return 0x0; + case 0xa8e6: return 0x0; + case 0xa8e7: return 0x0; + case 0xa8e8: return 0x0; + case 0xa8e9: return 0x0; + case 0xa8ea: return 0x0; + case 0xa8eb: return 0x0; + case 0xa8ec: return 0x0; + case 0xa8ed: return 0x0; + case 0xa8ee: return 0x0; + case 0xa8ef: return 0x0; + case 0xa8f0: return 0x0; + case 0xa8f1: return 0x0; + case 0xa92b: return 0x0; + case 0xa92c: return 0x0; + case 0xa92d: return 0x0; + case 0xa92e: return 0x0; + case 0xa953: return 0x0; + case 0xa9b3: return 0x0; + case 0xa9c0: return 0x0; + case 0xa9e5: return 0x0; + case 0xaa7b: return 0x0; + case 0xaa7c: return 0x0; + case 0xaa7d: return 0x0; + case 0xaabf: return 0x0; + case 0xaac0: return 0x0; + case 0xaac1: return 0x0; + case 0xaac2: return 0x0; + case 0xaaf6: return 0x0; + case 0xab5b: return 0x0; + case 0xab5c: return 0x0; + case 0xab5d: return 0x0; + case 0xab5e: return 0x0; + case 0xab5f: return 0x0; + case 0xab69: return 0x0; + case 0xab6a: return 0x0; + case 0xab6b: return 0x0; + case 0xabec: return 0x0; + case 0xabed: return 0x0; + case 0xf900: return 0x8c48; + case 0xf901: return 0x66f4; + case 0xf902: return 0x8eca; + case 0xf903: return 0x8cc8; + case 0xf904: return 0x6ed1; + case 0xf905: return 0x4e32; + case 0xf906: return 0x53e5; + case 0xf907: return 0x9f9c; + case 0xf908: return 0x9f9c; + case 0xf909: return 0x5951; + case 0xf90a: return 0x91d1; + case 0xf90b: return 0x5587; + case 0xf90c: return 0x5948; + case 0xf90d: return 0x61f6; + case 0xf90e: return 0x7669; + case 0xf90f: return 0x7f85; + case 0xf910: return 0x863f; + case 0xf911: return 0x87ba; + case 0xf912: return 0x88f8; + case 0xf913: return 0x908f; + case 0xf914: return 0x6a02; + case 0xf915: return 0x6d1b; + case 0xf916: return 0x70d9; + case 0xf917: return 0x73de; + case 0xf918: return 0x843d; + case 0xf919: return 0x916a; + case 0xf91a: return 0x99f1; + case 0xf91b: return 0x4e82; + case 0xf91c: return 0x5375; + case 0xf91d: return 0x6b04; + case 0xf91e: return 0x721b; + case 0xf91f: return 0x862d; + case 0xf920: return 0x9e1e; + case 0xf921: return 0x5d50; + case 0xf922: return 0x6feb; + case 0xf923: return 0x85cd; + case 0xf924: return 0x8964; + case 0xf925: return 0x62c9; + case 0xf926: return 0x81d8; + case 0xf927: return 0x881f; + case 0xf928: return 0x5eca; + case 0xf929: return 0x6717; + case 0xf92a: return 0x6d6a; + case 0xf92b: return 0x72fc; + case 0xf92c: return 0x90ce; + case 0xf92d: return 0x4f86; + case 0xf92e: return 0x51b7; + case 0xf92f: return 0x52de; + case 0xf930: return 0x64c4; + case 0xf931: return 0x6ad3; + case 0xf932: return 0x7210; + case 0xf933: return 0x76e7; + case 0xf934: return 0x8001; + case 0xf935: return 0x8606; + case 0xf936: return 0x865c; + case 0xf937: return 0x8def; + case 0xf938: return 0x9732; + case 0xf939: return 0x9b6f; + case 0xf93a: return 0x9dfa; + case 0xf93b: return 0x788c; + case 0xf93c: return 0x797f; + case 0xf93d: return 0x7da0; + case 0xf93e: return 0x83c9; + case 0xf93f: return 0x9304; + case 0xf940: return 0x9e7f; + case 0xf941: return 0x8ad6; + case 0xf942: return 0x58df; + case 0xf943: return 0x5f04; + case 0xf944: return 0x7c60; + case 0xf945: return 0x807e; + case 0xf946: return 0x7262; + case 0xf947: return 0x78ca; + case 0xf948: return 0x8cc2; + case 0xf949: return 0x96f7; + case 0xf94a: return 0x58d8; + case 0xf94b: return 0x5c62; + case 0xf94c: return 0x6a13; + case 0xf94d: return 0x6dda; + case 0xf94e: return 0x6f0f; + case 0xf94f: return 0x7d2f; + case 0xf950: return 0x7e37; + case 0xf951: return 0x964b; + case 0xf952: return 0x52d2; + case 0xf953: return 0x808b; + case 0xf954: return 0x51dc; + case 0xf955: return 0x51cc; + case 0xf956: return 0x7a1c; + case 0xf957: return 0x7dbe; + case 0xf958: return 0x83f1; + case 0xf959: return 0x9675; + case 0xf95a: return 0x8b80; + case 0xf95b: return 0x62cf; + case 0xf95c: return 0x6a02; + case 0xf95d: return 0x8afe; + case 0xf95e: return 0x4e39; + case 0xf95f: return 0x5be7; + case 0xf960: return 0x6012; + case 0xf961: return 0x7387; + case 0xf962: return 0x7570; + case 0xf963: return 0x5317; + case 0xf964: return 0x78fb; + case 0xf965: return 0x4fbf; + case 0xf966: return 0x5fa9; + case 0xf967: return 0x4e0d; + case 0xf968: return 0x6ccc; + case 0xf969: return 0x6578; + case 0xf96a: return 0x7d22; + case 0xf96b: return 0x53c3; + case 0xf96c: return 0x585e; + case 0xf96d: return 0x7701; + case 0xf96e: return 0x8449; + case 0xf96f: return 0x8aaa; + case 0xf970: return 0x6bba; + case 0xf971: return 0x8fb0; + case 0xf972: return 0x6c88; + case 0xf973: return 0x62fe; + case 0xf974: return 0x82e5; + case 0xf975: return 0x63a0; + case 0xf976: return 0x7565; + case 0xf977: return 0x4eae; + case 0xf978: return 0x5169; + case 0xf979: return 0x51c9; + case 0xf97a: return 0x6881; + case 0xf97b: return 0x7ce7; + case 0xf97c: return 0x826f; + case 0xf97d: return 0x8ad2; + case 0xf97e: return 0x91cf; + case 0xf97f: return 0x52f5; + case 0xf980: return 0x5442; + case 0xf981: return 0x5973; + case 0xf982: return 0x5eec; + case 0xf983: return 0x65c5; + case 0xf984: return 0x6ffe; + case 0xf985: return 0x792a; + case 0xf986: return 0x95ad; + case 0xf987: return 0x9a6a; + case 0xf988: return 0x9e97; + case 0xf989: return 0x9ece; + case 0xf98a: return 0x529b; + case 0xf98b: return 0x66c6; + case 0xf98c: return 0x6b77; + case 0xf98d: return 0x8f62; + case 0xf98e: return 0x5e74; + case 0xf98f: return 0x6190; + case 0xf990: return 0x6200; + case 0xf991: return 0x649a; + case 0xf992: return 0x6f23; + case 0xf993: return 0x7149; + case 0xf994: return 0x7489; + case 0xf995: return 0x79ca; + case 0xf996: return 0x7df4; + case 0xf997: return 0x806f; + case 0xf998: return 0x8f26; + case 0xf999: return 0x84ee; + case 0xf99a: return 0x9023; + case 0xf99b: return 0x934a; + case 0xf99c: return 0x5217; + case 0xf99d: return 0x52a3; + case 0xf99e: return 0x54bd; + case 0xf99f: return 0x70c8; + case 0xf9a0: return 0x88c2; + case 0xf9a1: return 0x8aaa; + case 0xf9a2: return 0x5ec9; + case 0xf9a3: return 0x5ff5; + case 0xf9a4: return 0x637b; + case 0xf9a5: return 0x6bae; + case 0xf9a6: return 0x7c3e; + case 0xf9a7: return 0x7375; + case 0xf9a8: return 0x4ee4; + case 0xf9a9: return 0x56f9; + case 0xf9aa: return 0x5be7; + case 0xf9ab: return 0x5dba; + case 0xf9ac: return 0x601c; + case 0xf9ad: return 0x73b2; + case 0xf9ae: return 0x7469; + case 0xf9af: return 0x7f9a; + case 0xf9b0: return 0x8046; + case 0xf9b1: return 0x9234; + case 0xf9b2: return 0x96f6; + case 0xf9b3: return 0x9748; + case 0xf9b4: return 0x9818; + case 0xf9b5: return 0x4f8b; + case 0xf9b6: return 0x79ae; + case 0xf9b7: return 0x91b4; + case 0xf9b8: return 0x96b8; + case 0xf9b9: return 0x60e1; + case 0xf9ba: return 0x4e86; + case 0xf9bb: return 0x50da; + case 0xf9bc: return 0x5bee; + case 0xf9bd: return 0x5c3f; + case 0xf9be: return 0x6599; + case 0xf9bf: return 0x6a02; + case 0xf9c0: return 0x71ce; + case 0xf9c1: return 0x7642; + case 0xf9c2: return 0x84fc; + case 0xf9c3: return 0x907c; + case 0xf9c4: return 0x9f8d; + case 0xf9c5: return 0x6688; + case 0xf9c6: return 0x962e; + case 0xf9c7: return 0x5289; + case 0xf9c8: return 0x677b; + case 0xf9c9: return 0x67f3; + case 0xf9ca: return 0x6d41; + case 0xf9cb: return 0x6e9c; + case 0xf9cc: return 0x7409; + case 0xf9cd: return 0x7559; + case 0xf9ce: return 0x786b; + case 0xf9cf: return 0x7d10; + case 0xf9d0: return 0x985e; + case 0xf9d1: return 0x516d; + case 0xf9d2: return 0x622e; + case 0xf9d3: return 0x9678; + case 0xf9d4: return 0x502b; + case 0xf9d5: return 0x5d19; + case 0xf9d6: return 0x6dea; + case 0xf9d7: return 0x8f2a; + case 0xf9d8: return 0x5f8b; + case 0xf9d9: return 0x6144; + case 0xf9da: return 0x6817; + case 0xf9db: return 0x7387; + case 0xf9dc: return 0x9686; + case 0xf9dd: return 0x5229; + case 0xf9de: return 0x540f; + case 0xf9df: return 0x5c65; + case 0xf9e0: return 0x6613; + case 0xf9e1: return 0x674e; + case 0xf9e2: return 0x68a8; + case 0xf9e3: return 0x6ce5; + case 0xf9e4: return 0x7406; + case 0xf9e5: return 0x75e2; + case 0xf9e6: return 0x7f79; + case 0xf9e7: return 0x88cf; + case 0xf9e8: return 0x88e1; + case 0xf9e9: return 0x91cc; + case 0xf9ea: return 0x96e2; + case 0xf9eb: return 0x533f; + case 0xf9ec: return 0x6eba; + case 0xf9ed: return 0x541d; + case 0xf9ee: return 0x71d0; + case 0xf9ef: return 0x7498; + case 0xf9f0: return 0x85fa; + case 0xf9f1: return 0x96a3; + case 0xf9f2: return 0x9c57; + case 0xf9f3: return 0x9e9f; + case 0xf9f4: return 0x6797; + case 0xf9f5: return 0x6dcb; + case 0xf9f6: return 0x81e8; + case 0xf9f7: return 0x7acb; + case 0xf9f8: return 0x7b20; + case 0xf9f9: return 0x7c92; + case 0xf9fa: return 0x72c0; + case 0xf9fb: return 0x7099; + case 0xf9fc: return 0x8b58; + case 0xf9fd: return 0x4ec0; + case 0xf9fe: return 0x8336; + case 0xf9ff: return 0x523a; + case 0xfa00: return 0x5207; + case 0xfa01: return 0x5ea6; + case 0xfa02: return 0x62d3; + case 0xfa03: return 0x7cd6; + case 0xfa04: return 0x5b85; + case 0xfa05: return 0x6d1e; + case 0xfa06: return 0x66b4; + case 0xfa07: return 0x8f3b; + case 0xfa08: return 0x884c; + case 0xfa09: return 0x964d; + case 0xfa0a: return 0x898b; + case 0xfa0b: return 0x5ed3; + case 0xfa0c: return 0x5140; + case 0xfa0d: return 0x55c0; + case 0xfa10: return 0x585a; + case 0xfa12: return 0x6674; + case 0xfa15: return 0x51de; + case 0xfa16: return 0x732a; + case 0xfa17: return 0x76ca; + case 0xfa18: return 0x793c; + case 0xfa19: return 0x795e; + case 0xfa1a: return 0x7965; + case 0xfa1b: return 0x798f; + case 0xfa1c: return 0x9756; + case 0xfa1d: return 0x7cbe; + case 0xfa1e: return 0x7fbd; + case 0xfa20: return 0x8612; + case 0xfa22: return 0x8af8; + case 0xfa25: return 0x9038; + case 0xfa26: return 0x90fd; + case 0xfa2a: return 0x98ef; + case 0xfa2b: return 0x98fc; + case 0xfa2c: return 0x9928; + case 0xfa2d: return 0x9db4; + case 0xfa2e: return 0x90de; + case 0xfa2f: return 0x96b7; + case 0xfa30: return 0x4fae; + case 0xfa31: return 0x50e7; + case 0xfa32: return 0x514d; + case 0xfa33: return 0x52c9; + case 0xfa34: return 0x52e4; + case 0xfa35: return 0x5351; + case 0xfa36: return 0x559d; + case 0xfa37: return 0x5606; + case 0xfa38: return 0x5668; + case 0xfa39: return 0x5840; + case 0xfa3a: return 0x58a8; + case 0xfa3b: return 0x5c64; + case 0xfa3c: return 0x5c6e; + case 0xfa3d: return 0x6094; + case 0xfa3e: return 0x6168; + case 0xfa3f: return 0x618e; + case 0xfa40: return 0x61f2; + case 0xfa41: return 0x654f; + case 0xfa42: return 0x65e2; + case 0xfa43: return 0x6691; + case 0xfa44: return 0x6885; + case 0xfa45: return 0x6d77; + case 0xfa46: return 0x6e1a; + case 0xfa47: return 0x6f22; + case 0xfa48: return 0x716e; + case 0xfa49: return 0x722b; + case 0xfa4a: return 0x7422; + case 0xfa4b: return 0x7891; + case 0xfa4c: return 0x793e; + case 0xfa4d: return 0x7949; + case 0xfa4e: return 0x7948; + case 0xfa4f: return 0x7950; + case 0xfa50: return 0x7956; + case 0xfa51: return 0x795d; + case 0xfa52: return 0x798d; + case 0xfa53: return 0x798e; + case 0xfa54: return 0x7a40; + case 0xfa55: return 0x7a81; + case 0xfa56: return 0x7bc0; + case 0xfa57: return 0x7df4; + case 0xfa58: return 0x7e09; + case 0xfa59: return 0x7e41; + case 0xfa5a: return 0x7f72; + case 0xfa5b: return 0x8005; + case 0xfa5c: return 0x81ed; + case 0xfa5d: return 0x8279; + case 0xfa5e: return 0x8279; + case 0xfa5f: return 0x8457; + case 0xfa60: return 0x8910; + case 0xfa61: return 0x8996; + case 0xfa62: return 0x8b01; + case 0xfa63: return 0x8b39; + case 0xfa64: return 0x8cd3; + case 0xfa65: return 0x8d08; + case 0xfa66: return 0x8fb6; + case 0xfa67: return 0x9038; + case 0xfa68: return 0x96e3; + case 0xfa69: return 0x97ff; + case 0xfa6a: return 0x983b; + case 0xfa6b: return 0x6075; + case 0xfa6c: return 0x242ee; + case 0xfa6d: return 0x8218; + case 0xfa70: return 0x4e26; + case 0xfa71: return 0x51b5; + case 0xfa72: return 0x5168; + case 0xfa73: return 0x4f80; + case 0xfa74: return 0x5145; + case 0xfa75: return 0x5180; + case 0xfa76: return 0x52c7; + case 0xfa77: return 0x52fa; + case 0xfa78: return 0x559d; + case 0xfa79: return 0x5555; + case 0xfa7a: return 0x5599; + case 0xfa7b: return 0x55e2; + case 0xfa7c: return 0x585a; + case 0xfa7d: return 0x58b3; + case 0xfa7e: return 0x5944; + case 0xfa7f: return 0x5954; + case 0xfa80: return 0x5a62; + case 0xfa81: return 0x5b28; + case 0xfa82: return 0x5ed2; + case 0xfa83: return 0x5ed9; + case 0xfa84: return 0x5f69; + case 0xfa85: return 0x5fad; + case 0xfa86: return 0x60d8; + case 0xfa87: return 0x614e; + case 0xfa88: return 0x6108; + case 0xfa89: return 0x618e; + case 0xfa8a: return 0x6160; + case 0xfa8b: return 0x61f2; + case 0xfa8c: return 0x6234; + case 0xfa8d: return 0x63c4; + case 0xfa8e: return 0x641c; + case 0xfa8f: return 0x6452; + case 0xfa90: return 0x6556; + case 0xfa91: return 0x6674; + case 0xfa92: return 0x6717; + case 0xfa93: return 0x671b; + case 0xfa94: return 0x6756; + case 0xfa95: return 0x6b79; + case 0xfa96: return 0x6bba; + case 0xfa97: return 0x6d41; + case 0xfa98: return 0x6edb; + case 0xfa99: return 0x6ecb; + case 0xfa9a: return 0x6f22; + case 0xfa9b: return 0x701e; + case 0xfa9c: return 0x716e; + case 0xfa9d: return 0x77a7; + case 0xfa9e: return 0x7235; + case 0xfa9f: return 0x72af; + case 0xfaa0: return 0x732a; + case 0xfaa1: return 0x7471; + case 0xfaa2: return 0x7506; + case 0xfaa3: return 0x753b; + case 0xfaa4: return 0x761d; + case 0xfaa5: return 0x761f; + case 0xfaa6: return 0x76ca; + case 0xfaa7: return 0x76db; + case 0xfaa8: return 0x76f4; + case 0xfaa9: return 0x774a; + case 0xfaaa: return 0x7740; + case 0xfaab: return 0x78cc; + case 0xfaac: return 0x7ab1; + case 0xfaad: return 0x7bc0; + case 0xfaae: return 0x7c7b; + case 0xfaaf: return 0x7d5b; + case 0xfab0: return 0x7df4; + case 0xfab1: return 0x7f3e; + case 0xfab2: return 0x8005; + case 0xfab3: return 0x8352; + case 0xfab4: return 0x83ef; + case 0xfab5: return 0x8779; + case 0xfab6: return 0x8941; + case 0xfab7: return 0x8986; + case 0xfab8: return 0x8996; + case 0xfab9: return 0x8abf; + case 0xfaba: return 0x8af8; + case 0xfabb: return 0x8acb; + case 0xfabc: return 0x8b01; + case 0xfabd: return 0x8afe; + case 0xfabe: return 0x8aed; + case 0xfabf: return 0x8b39; + case 0xfac0: return 0x8b8a; + case 0xfac1: return 0x8d08; + case 0xfac2: return 0x8f38; + case 0xfac3: return 0x9072; + case 0xfac4: return 0x9199; + case 0xfac5: return 0x9276; + case 0xfac6: return 0x967c; + case 0xfac7: return 0x96e3; + case 0xfac8: return 0x9756; + case 0xfac9: return 0x97db; + case 0xfaca: return 0x97ff; + case 0xfacb: return 0x980b; + case 0xfacc: return 0x983b; + case 0xfacd: return 0x9b12; + case 0xface: return 0x9f9c; + case 0xfacf: return 0x2284a; + case 0xfad0: return 0x22844; + case 0xfad1: return 0x233d5; + case 0xfad2: return 0x3b9d; + case 0xfad3: return 0x4018; + case 0xfad4: return 0x4039; + case 0xfad5: return 0x25249; + case 0xfad6: return 0x25cd0; + case 0xfad7: return 0x27ed3; + case 0xfad8: return 0x9f43; + case 0xfad9: return 0x9f8e; + case 0xfb1d: return 0x5d9; + case 0xfb1e: return 0x0; + case 0xfb1f: return 0x5f2; + case 0xfb2a: return 0x5e9; + case 0xfb2b: return 0x5e9; + case 0xfb2c: return 0x5e9; + case 0xfb2d: return 0x5e9; + case 0xfb2e: return 0x5d0; + case 0xfb2f: return 0x5d0; + case 0xfb30: return 0x5d0; + case 0xfb31: return 0x5d1; + case 0xfb32: return 0x5d2; + case 0xfb33: return 0x5d3; + case 0xfb34: return 0x5d4; + case 0xfb35: return 0x5d5; + case 0xfb36: return 0x5d6; + case 0xfb38: return 0x5d8; + case 0xfb39: return 0x5d9; + case 0xfb3a: return 0x5da; + case 0xfb3b: return 0x5db; + case 0xfb3c: return 0x5dc; + case 0xfb3e: return 0x5de; + case 0xfb40: return 0x5e0; + case 0xfb41: return 0x5e1; + case 0xfb43: return 0x5e3; + case 0xfb44: return 0x5e4; + case 0xfb46: return 0x5e6; + case 0xfb47: return 0x5e7; + case 0xfb48: return 0x5e8; + case 0xfb49: return 0x5e9; + case 0xfb4a: return 0x5ea; + case 0xfb4b: return 0x5d5; + case 0xfb4c: return 0x5d1; + case 0xfb4d: return 0x5db; + case 0xfb4e: return 0x5e4; + case 0xfe20: return 0x0; + case 0xfe21: return 0x0; + case 0xfe22: return 0x0; + case 0xfe23: return 0x0; + case 0xfe24: return 0x0; + case 0xfe25: return 0x0; + case 0xfe26: return 0x0; + case 0xfe27: return 0x0; + case 0xfe28: return 0x0; + case 0xfe29: return 0x0; + case 0xfe2a: return 0x0; + case 0xfe2b: return 0x0; + case 0xfe2c: return 0x0; + case 0xfe2d: return 0x0; + case 0xfe2e: return 0x0; + case 0xfe2f: return 0x0; + case 0xff3e: return 0x0; + case 0xff40: return 0x0; + case 0xff70: return 0x0; + case 0xff9e: return 0x0; + case 0xff9f: return 0x0; + case 0xffe3: return 0x0; + case 0x102e0: return 0x0; + case 0x10ae5: return 0x0; + case 0x10ae6: return 0x0; + case 0x10d22: return 0x0; + case 0x10d23: return 0x0; + case 0x10d24: return 0x0; + case 0x10d25: return 0x0; + case 0x10d26: return 0x0; + case 0x10d27: return 0x0; + case 0x10f46: return 0x0; + case 0x10f47: return 0x0; + case 0x10f48: return 0x0; + case 0x10f49: return 0x0; + case 0x10f4a: return 0x0; + case 0x10f4b: return 0x0; + case 0x10f4c: return 0x0; + case 0x10f4d: return 0x0; + case 0x10f4e: return 0x0; + case 0x10f4f: return 0x0; + case 0x10f50: return 0x0; + case 0x1109a: return 0x11099; + case 0x1109c: return 0x1109b; + case 0x110ab: return 0x110a5; + case 0x110b9: return 0x0; + case 0x110ba: return 0x0; + case 0x11133: return 0x0; + case 0x11134: return 0x0; + case 0x11173: return 0x0; + case 0x111c0: return 0x0; + case 0x111ca: return 0x0; + case 0x111cb: return 0x0; + case 0x111cc: return 0x0; + case 0x11235: return 0x0; + case 0x11236: return 0x0; + case 0x112e9: return 0x0; + case 0x112ea: return 0x0; + case 0x1133c: return 0x0; + case 0x1134d: return 0x0; + case 0x11366: return 0x0; + case 0x11367: return 0x0; + case 0x11368: return 0x0; + case 0x11369: return 0x0; + case 0x1136a: return 0x0; + case 0x1136b: return 0x0; + case 0x1136c: return 0x0; + case 0x11370: return 0x0; + case 0x11371: return 0x0; + case 0x11372: return 0x0; + case 0x11373: return 0x0; + case 0x11374: return 0x0; + case 0x11442: return 0x0; + case 0x11446: return 0x0; + case 0x114c2: return 0x0; + case 0x114c3: return 0x0; + case 0x115bf: return 0x0; + case 0x115c0: return 0x0; + case 0x1163f: return 0x0; + case 0x116b6: return 0x0; + case 0x116b7: return 0x0; + case 0x1172b: return 0x0; + case 0x11839: return 0x0; + case 0x1183a: return 0x0; + case 0x1193d: return 0x0; + case 0x1193e: return 0x0; + case 0x11943: return 0x0; + case 0x119e0: return 0x0; + case 0x11a34: return 0x0; + case 0x11a47: return 0x0; + case 0x11a99: return 0x0; + case 0x11c3f: return 0x0; + case 0x11d42: return 0x0; + case 0x11d44: return 0x0; + case 0x11d45: return 0x0; + case 0x11d97: return 0x0; + case 0x16af0: return 0x0; + case 0x16af1: return 0x0; + case 0x16af2: return 0x0; + case 0x16af3: return 0x0; + case 0x16af4: return 0x0; + case 0x16b30: return 0x0; + case 0x16b31: return 0x0; + case 0x16b32: return 0x0; + case 0x16b33: return 0x0; + case 0x16b34: return 0x0; + case 0x16b35: return 0x0; + case 0x16b36: return 0x0; + case 0x16f8f: return 0x0; + case 0x16f90: return 0x0; + case 0x16f91: return 0x0; + case 0x16f92: return 0x0; + case 0x16f93: return 0x0; + case 0x16f94: return 0x0; + case 0x16f95: return 0x0; + case 0x16f96: return 0x0; + case 0x16f97: return 0x0; + case 0x16f98: return 0x0; + case 0x16f99: return 0x0; + case 0x16f9a: return 0x0; + case 0x16f9b: return 0x0; + case 0x16f9c: return 0x0; + case 0x16f9d: return 0x0; + case 0x16f9e: return 0x0; + case 0x16f9f: return 0x0; + case 0x16ff0: return 0x0; + case 0x16ff1: return 0x0; + case 0x1d167: return 0x0; + case 0x1d168: return 0x0; + case 0x1d169: return 0x0; + case 0x1d16d: return 0x0; + case 0x1d16e: return 0x0; + case 0x1d16f: return 0x0; + case 0x1d170: return 0x0; + case 0x1d171: return 0x0; + case 0x1d172: return 0x0; + case 0x1d17b: return 0x0; + case 0x1d17c: return 0x0; + case 0x1d17d: return 0x0; + case 0x1d17e: return 0x0; + case 0x1d17f: return 0x0; + case 0x1d180: return 0x0; + case 0x1d181: return 0x0; + case 0x1d182: return 0x0; + case 0x1d185: return 0x0; + case 0x1d186: return 0x0; + case 0x1d187: return 0x0; + case 0x1d188: return 0x0; + case 0x1d189: return 0x0; + case 0x1d18a: return 0x0; + case 0x1d18b: return 0x0; + case 0x1d1aa: return 0x0; + case 0x1d1ab: return 0x0; + case 0x1d1ac: return 0x0; + case 0x1d1ad: return 0x0; + case 0x1e130: return 0x0; + case 0x1e131: return 0x0; + case 0x1e132: return 0x0; + case 0x1e133: return 0x0; + case 0x1e134: return 0x0; + case 0x1e135: return 0x0; + case 0x1e136: return 0x0; + case 0x1e2ec: return 0x0; + case 0x1e2ed: return 0x0; + case 0x1e2ee: return 0x0; + case 0x1e2ef: return 0x0; + case 0x1e8d0: return 0x0; + case 0x1e8d1: return 0x0; + case 0x1e8d2: return 0x0; + case 0x1e8d3: return 0x0; + case 0x1e8d4: return 0x0; + case 0x1e8d5: return 0x0; + case 0x1e8d6: return 0x0; + case 0x1e944: return 0x0; + case 0x1e945: return 0x0; + case 0x1e946: return 0x0; + case 0x1e948: return 0x0; + case 0x1e949: return 0x0; + case 0x1e94a: return 0x0; + case 0x2f800: return 0x4e3d; + case 0x2f801: return 0x4e38; + case 0x2f802: return 0x4e41; + case 0x2f803: return 0x20122; + case 0x2f804: return 0x4f60; + case 0x2f805: return 0x4fae; + case 0x2f806: return 0x4fbb; + case 0x2f807: return 0x5002; + case 0x2f808: return 0x507a; + case 0x2f809: return 0x5099; + case 0x2f80a: return 0x50e7; + case 0x2f80b: return 0x50cf; + case 0x2f80c: return 0x349e; + case 0x2f80d: return 0x2063a; + case 0x2f80e: return 0x514d; + case 0x2f80f: return 0x5154; + case 0x2f810: return 0x5164; + case 0x2f811: return 0x5177; + case 0x2f812: return 0x2051c; + case 0x2f813: return 0x34b9; + case 0x2f814: return 0x5167; + case 0x2f815: return 0x518d; + case 0x2f816: return 0x2054b; + case 0x2f817: return 0x5197; + case 0x2f818: return 0x51a4; + case 0x2f819: return 0x4ecc; + case 0x2f81a: return 0x51ac; + case 0x2f81b: return 0x51b5; + case 0x2f81c: return 0x291df; + case 0x2f81d: return 0x51f5; + case 0x2f81e: return 0x5203; + case 0x2f81f: return 0x34df; + case 0x2f820: return 0x523b; + case 0x2f821: return 0x5246; + case 0x2f822: return 0x5272; + case 0x2f823: return 0x5277; + case 0x2f824: return 0x3515; + case 0x2f825: return 0x52c7; + case 0x2f826: return 0x52c9; + case 0x2f827: return 0x52e4; + case 0x2f828: return 0x52fa; + case 0x2f829: return 0x5305; + case 0x2f82a: return 0x5306; + case 0x2f82b: return 0x5317; + case 0x2f82c: return 0x5349; + case 0x2f82d: return 0x5351; + case 0x2f82e: return 0x535a; + case 0x2f82f: return 0x5373; + case 0x2f830: return 0x537d; + case 0x2f831: return 0x537f; + case 0x2f832: return 0x537f; + case 0x2f833: return 0x537f; + case 0x2f834: return 0x20a2c; + case 0x2f835: return 0x7070; + case 0x2f836: return 0x53ca; + case 0x2f837: return 0x53df; + case 0x2f838: return 0x20b63; + case 0x2f839: return 0x53eb; + case 0x2f83a: return 0x53f1; + case 0x2f83b: return 0x5406; + case 0x2f83c: return 0x549e; + case 0x2f83d: return 0x5438; + case 0x2f83e: return 0x5448; + case 0x2f83f: return 0x5468; + case 0x2f840: return 0x54a2; + case 0x2f841: return 0x54f6; + case 0x2f842: return 0x5510; + case 0x2f843: return 0x5553; + case 0x2f844: return 0x5563; + case 0x2f845: return 0x5584; + case 0x2f846: return 0x5584; + case 0x2f847: return 0x5599; + case 0x2f848: return 0x55ab; + case 0x2f849: return 0x55b3; + case 0x2f84a: return 0x55c2; + case 0x2f84b: return 0x5716; + case 0x2f84c: return 0x5606; + case 0x2f84d: return 0x5717; + case 0x2f84e: return 0x5651; + case 0x2f84f: return 0x5674; + case 0x2f850: return 0x5207; + case 0x2f851: return 0x58ee; + case 0x2f852: return 0x57ce; + case 0x2f853: return 0x57f4; + case 0x2f854: return 0x580d; + case 0x2f855: return 0x578b; + case 0x2f856: return 0x5832; + case 0x2f857: return 0x5831; + case 0x2f858: return 0x58ac; + case 0x2f859: return 0x214e4; + case 0x2f85a: return 0x58f2; + case 0x2f85b: return 0x58f7; + case 0x2f85c: return 0x5906; + case 0x2f85d: return 0x591a; + case 0x2f85e: return 0x5922; + case 0x2f85f: return 0x5962; + case 0x2f860: return 0x216a8; + case 0x2f861: return 0x216ea; + case 0x2f862: return 0x59ec; + case 0x2f863: return 0x5a1b; + case 0x2f864: return 0x5a27; + case 0x2f865: return 0x59d8; + case 0x2f866: return 0x5a66; + case 0x2f867: return 0x36ee; + case 0x2f868: return 0x36fc; + case 0x2f869: return 0x5b08; + case 0x2f86a: return 0x5b3e; + case 0x2f86b: return 0x5b3e; + case 0x2f86c: return 0x219c8; + case 0x2f86d: return 0x5bc3; + case 0x2f86e: return 0x5bd8; + case 0x2f86f: return 0x5be7; + case 0x2f870: return 0x5bf3; + case 0x2f871: return 0x21b18; + case 0x2f872: return 0x5bff; + case 0x2f873: return 0x5c06; + case 0x2f874: return 0x5f53; + case 0x2f875: return 0x5c22; + case 0x2f876: return 0x3781; + case 0x2f877: return 0x5c60; + case 0x2f878: return 0x5c6e; + case 0x2f879: return 0x5cc0; + case 0x2f87a: return 0x5c8d; + case 0x2f87b: return 0x21de4; + case 0x2f87c: return 0x5d43; + case 0x2f87d: return 0x21de6; + case 0x2f87e: return 0x5d6e; + case 0x2f87f: return 0x5d6b; + case 0x2f880: return 0x5d7c; + case 0x2f881: return 0x5de1; + case 0x2f882: return 0x5de2; + case 0x2f883: return 0x382f; + case 0x2f884: return 0x5dfd; + case 0x2f885: return 0x5e28; + case 0x2f886: return 0x5e3d; + case 0x2f887: return 0x5e69; + case 0x2f888: return 0x3862; + case 0x2f889: return 0x22183; + case 0x2f88a: return 0x387c; + case 0x2f88b: return 0x5eb0; + case 0x2f88c: return 0x5eb3; + case 0x2f88d: return 0x5eb6; + case 0x2f88e: return 0x5eca; + case 0x2f88f: return 0x2a392; + case 0x2f890: return 0x5efe; + case 0x2f891: return 0x22331; + case 0x2f892: return 0x22331; + case 0x2f893: return 0x8201; + case 0x2f894: return 0x5f22; + case 0x2f895: return 0x5f22; + case 0x2f896: return 0x38c7; + case 0x2f897: return 0x232b8; + case 0x2f898: return 0x261da; + case 0x2f899: return 0x5f62; + case 0x2f89a: return 0x5f6b; + case 0x2f89b: return 0x38e3; + case 0x2f89c: return 0x5f9a; + case 0x2f89d: return 0x5fcd; + case 0x2f89e: return 0x5fd7; + case 0x2f89f: return 0x5ff9; + case 0x2f8a0: return 0x6081; + case 0x2f8a1: return 0x393a; + case 0x2f8a2: return 0x391c; + case 0x2f8a3: return 0x6094; + case 0x2f8a4: return 0x226d4; + case 0x2f8a5: return 0x60c7; + case 0x2f8a6: return 0x6148; + case 0x2f8a7: return 0x614c; + case 0x2f8a8: return 0x614e; + case 0x2f8a9: return 0x614c; + case 0x2f8aa: return 0x617a; + case 0x2f8ab: return 0x618e; + case 0x2f8ac: return 0x61b2; + case 0x2f8ad: return 0x61a4; + case 0x2f8ae: return 0x61af; + case 0x2f8af: return 0x61de; + case 0x2f8b0: return 0x61f2; + case 0x2f8b1: return 0x61f6; + case 0x2f8b2: return 0x6210; + case 0x2f8b3: return 0x621b; + case 0x2f8b4: return 0x625d; + case 0x2f8b5: return 0x62b1; + case 0x2f8b6: return 0x62d4; + case 0x2f8b7: return 0x6350; + case 0x2f8b8: return 0x22b0c; + case 0x2f8b9: return 0x633d; + case 0x2f8ba: return 0x62fc; + case 0x2f8bb: return 0x6368; + case 0x2f8bc: return 0x6383; + case 0x2f8bd: return 0x63e4; + case 0x2f8be: return 0x22bf1; + case 0x2f8bf: return 0x6422; + case 0x2f8c0: return 0x63c5; + case 0x2f8c1: return 0x63a9; + case 0x2f8c2: return 0x3a2e; + case 0x2f8c3: return 0x6469; + case 0x2f8c4: return 0x647e; + case 0x2f8c5: return 0x649d; + case 0x2f8c6: return 0x6477; + case 0x2f8c7: return 0x3a6c; + case 0x2f8c8: return 0x654f; + case 0x2f8c9: return 0x656c; + case 0x2f8ca: return 0x2300a; + case 0x2f8cb: return 0x65e3; + case 0x2f8cc: return 0x66f8; + case 0x2f8cd: return 0x6649; + case 0x2f8ce: return 0x3b19; + case 0x2f8cf: return 0x6691; + case 0x2f8d0: return 0x3b08; + case 0x2f8d1: return 0x3ae4; + case 0x2f8d2: return 0x5192; + case 0x2f8d3: return 0x5195; + case 0x2f8d4: return 0x6700; + case 0x2f8d5: return 0x669c; + case 0x2f8d6: return 0x80ad; + case 0x2f8d7: return 0x43d9; + case 0x2f8d8: return 0x6717; + case 0x2f8d9: return 0x671b; + case 0x2f8da: return 0x6721; + case 0x2f8db: return 0x675e; + case 0x2f8dc: return 0x6753; + case 0x2f8dd: return 0x233c3; + case 0x2f8de: return 0x3b49; + case 0x2f8df: return 0x67fa; + case 0x2f8e0: return 0x6785; + case 0x2f8e1: return 0x6852; + case 0x2f8e2: return 0x6885; + case 0x2f8e3: return 0x2346d; + case 0x2f8e4: return 0x688e; + case 0x2f8e5: return 0x681f; + case 0x2f8e6: return 0x6914; + case 0x2f8e7: return 0x3b9d; + case 0x2f8e8: return 0x6942; + case 0x2f8e9: return 0x69a3; + case 0x2f8ea: return 0x69ea; + case 0x2f8eb: return 0x6aa8; + case 0x2f8ec: return 0x236a3; + case 0x2f8ed: return 0x6adb; + case 0x2f8ee: return 0x3c18; + case 0x2f8ef: return 0x6b21; + case 0x2f8f0: return 0x238a7; + case 0x2f8f1: return 0x6b54; + case 0x2f8f2: return 0x3c4e; + case 0x2f8f3: return 0x6b72; + case 0x2f8f4: return 0x6b9f; + case 0x2f8f5: return 0x6bba; + case 0x2f8f6: return 0x6bbb; + case 0x2f8f7: return 0x23a8d; + case 0x2f8f8: return 0x21d0b; + case 0x2f8f9: return 0x23afa; + case 0x2f8fa: return 0x6c4e; + case 0x2f8fb: return 0x23cbc; + case 0x2f8fc: return 0x6cbf; + case 0x2f8fd: return 0x6ccd; + case 0x2f8fe: return 0x6c67; + case 0x2f8ff: return 0x6d16; + case 0x2f900: return 0x6d3e; + case 0x2f901: return 0x6d77; + case 0x2f902: return 0x6d41; + case 0x2f903: return 0x6d69; + case 0x2f904: return 0x6d78; + case 0x2f905: return 0x6d85; + case 0x2f906: return 0x23d1e; + case 0x2f907: return 0x6d34; + case 0x2f908: return 0x6e2f; + case 0x2f909: return 0x6e6e; + case 0x2f90a: return 0x3d33; + case 0x2f90b: return 0x6ecb; + case 0x2f90c: return 0x6ec7; + case 0x2f90d: return 0x23ed1; + case 0x2f90e: return 0x6df9; + case 0x2f90f: return 0x6f6e; + case 0x2f910: return 0x23f5e; + case 0x2f911: return 0x23f8e; + case 0x2f912: return 0x6fc6; + case 0x2f913: return 0x7039; + case 0x2f914: return 0x701e; + case 0x2f915: return 0x701b; + case 0x2f916: return 0x3d96; + case 0x2f917: return 0x704a; + case 0x2f918: return 0x707d; + case 0x2f919: return 0x7077; + case 0x2f91a: return 0x70ad; + case 0x2f91b: return 0x20525; + case 0x2f91c: return 0x7145; + case 0x2f91d: return 0x24263; + case 0x2f91e: return 0x719c; + case 0x2f91f: return 0x243ab; + case 0x2f920: return 0x7228; + case 0x2f921: return 0x7235; + case 0x2f922: return 0x7250; + case 0x2f923: return 0x24608; + case 0x2f924: return 0x7280; + case 0x2f925: return 0x7295; + case 0x2f926: return 0x24735; + case 0x2f927: return 0x24814; + case 0x2f928: return 0x737a; + case 0x2f929: return 0x738b; + case 0x2f92a: return 0x3eac; + case 0x2f92b: return 0x73a5; + case 0x2f92c: return 0x3eb8; + case 0x2f92d: return 0x3eb8; + case 0x2f92e: return 0x7447; + case 0x2f92f: return 0x745c; + case 0x2f930: return 0x7471; + case 0x2f931: return 0x7485; + case 0x2f932: return 0x74ca; + case 0x2f933: return 0x3f1b; + case 0x2f934: return 0x7524; + case 0x2f935: return 0x24c36; + case 0x2f936: return 0x753e; + case 0x2f937: return 0x24c92; + case 0x2f938: return 0x7570; + case 0x2f939: return 0x2219f; + case 0x2f93a: return 0x7610; + case 0x2f93b: return 0x24fa1; + case 0x2f93c: return 0x24fb8; + case 0x2f93d: return 0x25044; + case 0x2f93e: return 0x3ffc; + case 0x2f93f: return 0x4008; + case 0x2f940: return 0x76f4; + case 0x2f941: return 0x250f3; + case 0x2f942: return 0x250f2; + case 0x2f943: return 0x25119; + case 0x2f944: return 0x25133; + case 0x2f945: return 0x771e; + case 0x2f946: return 0x771f; + case 0x2f947: return 0x771f; + case 0x2f948: return 0x774a; + case 0x2f949: return 0x4039; + case 0x2f94a: return 0x778b; + case 0x2f94b: return 0x4046; + case 0x2f94c: return 0x4096; + case 0x2f94d: return 0x2541d; + case 0x2f94e: return 0x784e; + case 0x2f94f: return 0x788c; + case 0x2f950: return 0x78cc; + case 0x2f951: return 0x40e3; + case 0x2f952: return 0x25626; + case 0x2f953: return 0x7956; + case 0x2f954: return 0x2569a; + case 0x2f955: return 0x256c5; + case 0x2f956: return 0x798f; + case 0x2f957: return 0x79eb; + case 0x2f958: return 0x412f; + case 0x2f959: return 0x7a40; + case 0x2f95a: return 0x7a4a; + case 0x2f95b: return 0x7a4f; + case 0x2f95c: return 0x2597c; + case 0x2f95d: return 0x25aa7; + case 0x2f95e: return 0x25aa7; + case 0x2f95f: return 0x7aee; + case 0x2f960: return 0x4202; + case 0x2f961: return 0x25bab; + case 0x2f962: return 0x7bc6; + case 0x2f963: return 0x7bc9; + case 0x2f964: return 0x4227; + case 0x2f965: return 0x25c80; + case 0x2f966: return 0x7cd2; + case 0x2f967: return 0x42a0; + case 0x2f968: return 0x7ce8; + case 0x2f969: return 0x7ce3; + case 0x2f96a: return 0x7d00; + case 0x2f96b: return 0x25f86; + case 0x2f96c: return 0x7d63; + case 0x2f96d: return 0x4301; + case 0x2f96e: return 0x7dc7; + case 0x2f96f: return 0x7e02; + case 0x2f970: return 0x7e45; + case 0x2f971: return 0x4334; + case 0x2f972: return 0x26228; + case 0x2f973: return 0x26247; + case 0x2f974: return 0x4359; + case 0x2f975: return 0x262d9; + case 0x2f976: return 0x7f7a; + case 0x2f977: return 0x2633e; + case 0x2f978: return 0x7f95; + case 0x2f979: return 0x7ffa; + case 0x2f97a: return 0x8005; + case 0x2f97b: return 0x264da; + case 0x2f97c: return 0x26523; + case 0x2f97d: return 0x8060; + case 0x2f97e: return 0x265a8; + case 0x2f97f: return 0x8070; + case 0x2f980: return 0x2335f; + case 0x2f981: return 0x43d5; + case 0x2f982: return 0x80b2; + case 0x2f983: return 0x8103; + case 0x2f984: return 0x440b; + case 0x2f985: return 0x813e; + case 0x2f986: return 0x5ab5; + case 0x2f987: return 0x267a7; + case 0x2f988: return 0x267b5; + case 0x2f989: return 0x23393; + case 0x2f98a: return 0x2339c; + case 0x2f98b: return 0x8201; + case 0x2f98c: return 0x8204; + case 0x2f98d: return 0x8f9e; + case 0x2f98e: return 0x446b; + case 0x2f98f: return 0x8291; + case 0x2f990: return 0x828b; + case 0x2f991: return 0x829d; + case 0x2f992: return 0x52b3; + case 0x2f993: return 0x82b1; + case 0x2f994: return 0x82b3; + case 0x2f995: return 0x82bd; + case 0x2f996: return 0x82e6; + case 0x2f997: return 0x26b3c; + case 0x2f998: return 0x82e5; + case 0x2f999: return 0x831d; + case 0x2f99a: return 0x8363; + case 0x2f99b: return 0x83ad; + case 0x2f99c: return 0x8323; + case 0x2f99d: return 0x83bd; + case 0x2f99e: return 0x83e7; + case 0x2f99f: return 0x8457; + case 0x2f9a0: return 0x8353; + case 0x2f9a1: return 0x83ca; + case 0x2f9a2: return 0x83cc; + case 0x2f9a3: return 0x83dc; + case 0x2f9a4: return 0x26c36; + case 0x2f9a5: return 0x26d6b; + case 0x2f9a6: return 0x26cd5; + case 0x2f9a7: return 0x452b; + case 0x2f9a8: return 0x84f1; + case 0x2f9a9: return 0x84f3; + case 0x2f9aa: return 0x8516; + case 0x2f9ab: return 0x273ca; + case 0x2f9ac: return 0x8564; + case 0x2f9ad: return 0x26f2c; + case 0x2f9ae: return 0x455d; + case 0x2f9af: return 0x4561; + case 0x2f9b0: return 0x26fb1; + case 0x2f9b1: return 0x270d2; + case 0x2f9b2: return 0x456b; + case 0x2f9b3: return 0x8650; + case 0x2f9b4: return 0x865c; + case 0x2f9b5: return 0x8667; + case 0x2f9b6: return 0x8669; + case 0x2f9b7: return 0x86a9; + case 0x2f9b8: return 0x8688; + case 0x2f9b9: return 0x870e; + case 0x2f9ba: return 0x86e2; + case 0x2f9bb: return 0x8779; + case 0x2f9bc: return 0x8728; + case 0x2f9bd: return 0x876b; + case 0x2f9be: return 0x8786; + case 0x2f9bf: return 0x45d7; + case 0x2f9c0: return 0x87e1; + case 0x2f9c1: return 0x8801; + case 0x2f9c2: return 0x45f9; + case 0x2f9c3: return 0x8860; + case 0x2f9c4: return 0x8863; + case 0x2f9c5: return 0x27667; + case 0x2f9c6: return 0x88d7; + case 0x2f9c7: return 0x88de; + case 0x2f9c8: return 0x4635; + case 0x2f9c9: return 0x88fa; + case 0x2f9ca: return 0x34bb; + case 0x2f9cb: return 0x278ae; + case 0x2f9cc: return 0x27966; + case 0x2f9cd: return 0x46be; + case 0x2f9ce: return 0x46c7; + case 0x2f9cf: return 0x8aa0; + case 0x2f9d0: return 0x8aed; + case 0x2f9d1: return 0x8b8a; + case 0x2f9d2: return 0x8c55; + case 0x2f9d3: return 0x27ca8; + case 0x2f9d4: return 0x8cab; + case 0x2f9d5: return 0x8cc1; + case 0x2f9d6: return 0x8d1b; + case 0x2f9d7: return 0x8d77; + case 0x2f9d8: return 0x27f2f; + case 0x2f9d9: return 0x20804; + case 0x2f9da: return 0x8dcb; + case 0x2f9db: return 0x8dbc; + case 0x2f9dc: return 0x8df0; + case 0x2f9dd: return 0x208de; + case 0x2f9de: return 0x8ed4; + case 0x2f9df: return 0x8f38; + case 0x2f9e0: return 0x285d2; + case 0x2f9e1: return 0x285ed; + case 0x2f9e2: return 0x9094; + case 0x2f9e3: return 0x90f1; + case 0x2f9e4: return 0x9111; + case 0x2f9e5: return 0x2872e; + case 0x2f9e6: return 0x911b; + case 0x2f9e7: return 0x9238; + case 0x2f9e8: return 0x92d7; + case 0x2f9e9: return 0x92d8; + case 0x2f9ea: return 0x927c; + case 0x2f9eb: return 0x93f9; + case 0x2f9ec: return 0x9415; + case 0x2f9ed: return 0x28bfa; + case 0x2f9ee: return 0x958b; + case 0x2f9ef: return 0x4995; + case 0x2f9f0: return 0x95b7; + case 0x2f9f1: return 0x28d77; + case 0x2f9f2: return 0x49e6; + case 0x2f9f3: return 0x96c3; + case 0x2f9f4: return 0x5db2; + case 0x2f9f5: return 0x9723; + case 0x2f9f6: return 0x29145; + case 0x2f9f7: return 0x2921a; + case 0x2f9f8: return 0x4a6e; + case 0x2f9f9: return 0x4a76; + case 0x2f9fa: return 0x97e0; + case 0x2f9fb: return 0x2940a; + case 0x2f9fc: return 0x4ab2; + case 0x2f9fd: return 0x29496; + case 0x2f9fe: return 0x980b; + case 0x2f9ff: return 0x980b; + case 0x2fa00: return 0x9829; + case 0x2fa01: return 0x295b6; + case 0x2fa02: return 0x98e2; + case 0x2fa03: return 0x4b33; + case 0x2fa04: return 0x9929; + case 0x2fa05: return 0x99a7; + case 0x2fa06: return 0x99c2; + case 0x2fa07: return 0x99fe; + case 0x2fa08: return 0x4bce; + case 0x2fa09: return 0x29b30; + case 0x2fa0a: return 0x9b12; + case 0x2fa0b: return 0x9c40; + case 0x2fa0c: return 0x9cfd; + case 0x2fa0d: return 0x4cce; + case 0x2fa0e: return 0x4ced; + case 0x2fa0f: return 0x9d67; + case 0x2fa10: return 0x2a0ce; + case 0x2fa11: return 0x4cf8; + case 0x2fa12: return 0x2a105; + case 0x2fa13: return 0x2a20e; + case 0x2fa14: return 0x2a291; + case 0x2fa15: return 0x9ebb; + case 0x2fa16: return 0x4d56; + case 0x2fa17: return 0x9ef9; + case 0x2fa18: return 0x9efe; + case 0x2fa19: return 0x9f05; + case 0x2fa1a: return 0x9f0f; + case 0x2fa1b: return 0x9f16; + case 0x2fa1c: return 0x9f3b; + case 0x2fa1d: return 0x2a600; + default: return codepoint; + } +} \ No newline at end of file diff --git a/src/unicode/fold.c b/src/unicode/fold.c new file mode 100644 index 000000000..223910818 --- /dev/null +++ b/src/unicode/fold.c @@ -0,0 +1,139 @@ +/** + * Copyright (C) 2025-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */ + +#include "fold.h" +#include "../mc-range-edge-generation-private.h" // For mc_count_leading_zeros_u64 +#include "../mongocrypt-private.h" + +static bool append_utf8_codepoint(bson_unichar_t codepoint, char **output_it, mongocrypt_status_t *status) { + if (codepoint <= 0x7f /* max 1-byte codepoint */) { + *(*output_it)++ = (char)codepoint; + } else if (codepoint <= 0x7ff /* max 2-byte codepoint*/) { + *(*output_it)++ = (char)((codepoint >> (6 * 1)) | 0xc0); // 2 leading 1s. + *(*output_it)++ = (char)(((codepoint >> (6 * 0)) & 0x3f) | 0x80); + } else if (codepoint <= 0xffff /* max 3-byte codepoint*/) { + *(*output_it)++ = (char)((codepoint >> (6 * 2)) | 0xe0); // 3 leading 1s. + *(*output_it)++ = (char)(((codepoint >> (6 * 1)) & 0x3f) | 0x80); + *(*output_it)++ = (char)(((codepoint >> (6 * 0)) & 0x3f) | 0x80); + } else { + if (codepoint > 0x10FFFF) { + CLIENT_ERR("append_utf8_codepoint: codepoint was out of range for UTF-8"); + return false; + } + *(*output_it)++ = (char)((codepoint >> (6 * 3)) | 0xf0); // 4 leading 1s. + *(*output_it)++ = (char)(((codepoint >> (6 * 2)) & 0x3f) | 0x80); + *(*output_it)++ = (char)(((codepoint >> (6 * 1)) & 0x3f) | 0x80); + *(*output_it)++ = (char)(((codepoint >> (6 * 0)) & 0x3f) | 0x80); + } + return true; +} + +// C translation of mongo::unicode::String::caseFoldAndStripDiacritics. +bool unicode_fold(const char *str, + size_t len, + unicode_fold_options_t options, + char **out_str, + size_t *out_len, + mongocrypt_status_t *status) { + BSON_ASSERT_PARAM(str); + BSON_ASSERT_PARAM(out_str); + BSON_ASSERT_PARAM(out_len); + BSON_ASSERT_PARAM(status); + + if (!(options & (kUnicodeFoldRemoveDiacritics | kUnicodeFoldToLower))) { + CLIENT_ERR("unicode_fold: Either case or diacritic folding must be enabled"); + return false; + } + // Allocate space for possible growth. Folding characters may result in longer UTF-8 sequences. + // 2x is an upper bound. With current fold maps, the largest growth is a 2-byte sequence mapping to a 3-byte + // sequence. + *out_str = bson_malloc(2 * len + 1); + const char *input_it = str; + const char *end_it = str + len; + char *output_it = *out_str; + while (input_it < end_it) { + const uint8_t first_byte = (uint8_t)*input_it++; + bson_unichar_t codepoint = 0; + if (first_byte <= 0x7f) { + // ASCII special case. Can use faster operations. + if ((options & kUnicodeFoldToLower) && (first_byte >= 'A' && first_byte <= 'Z')) { + codepoint = first_byte | 0x20; // Set the ascii lowercase bit on the character. + } else { + // ASCII has two pure diacritics that should be skipped, and no characters that + // change when removing diacritics. + if ((options & kUnicodeFoldRemoveDiacritics) && (first_byte == '^' || first_byte == '`')) { + continue; + } + codepoint = first_byte; + } + } else { + // Multi-byte character + size_t leading_ones = mc_count_leading_zeros_u64(~(((uint64_t)first_byte) << (64 - 8))); + + // Only checking enough to ensure that this code doesn't crash in the face of malformed + // utf-8. We make no guarantees about what results will be returned in this case. + if (!(leading_ones > 1 && leading_ones <= 4 && input_it + (leading_ones - 1) <= end_it)) { + CLIENT_ERR("unicode_fold: Text contains invalid UTF-8"); + bson_free(*out_str); + return false; + } + + codepoint = (bson_unichar_t)(first_byte & (0xff >> leading_ones)); // mask off the size indicator. + for (size_t sub_byte_index = 1; sub_byte_index < leading_ones; sub_byte_index++) { + const uint8_t sub_byte = (uint8_t)*input_it++; + codepoint <<= 6; + codepoint |= sub_byte & 0x3f; // mask off continuation bits. + } + + if (options & kUnicodeFoldToLower) { + bson_unichar_t new_cp = unicode_codepoint_to_lower(codepoint); + codepoint = new_cp; + } + + if ((options & kUnicodeFoldRemoveDiacritics)) { + codepoint = unicode_codepoint_remove_diacritics(codepoint); + if (!codepoint) { + continue; // codepoint is a pure diacritic. + } + } + } + + if (!append_utf8_codepoint(codepoint, &output_it, status)) { + bson_free(*out_str); + return false; + } + } + + // Null terminate + *output_it = '\0'; + *out_len = (size_t)(output_it - *out_str); + return true; +} \ No newline at end of file diff --git a/src/unicode/fold.h b/src/unicode/fold.h new file mode 100644 index 000000000..755d00805 --- /dev/null +++ b/src/unicode/fold.h @@ -0,0 +1,58 @@ +/** + * Copyright (C) 2025-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#ifndef UNICODE_FOLD_H +#define UNICODE_FOLD_H + +#include "../mongocrypt-status-private.h" + +#include + +#include + +bson_unichar_t unicode_codepoint_to_lower(bson_unichar_t codepoint); +bson_unichar_t unicode_codepoint_remove_diacritics(bson_unichar_t codepoint); + +typedef enum { + kUnicodeFoldNone = 0, + kUnicodeFoldToLower = 1 << 0, + kUnicodeFoldRemoveDiacritics = 1 << 1 +} unicode_fold_options_t; + +// Fold unicode string str of length len according to options. len should not include the null terminator, if it exists. +// Returns true if successful, and returns the null-terminated folded string and its byte length, excluding the null +// terminator, as out_str and out_len. On failure, returns false and sets status accordingly. +bool unicode_fold(const char *str, + size_t len, + unicode_fold_options_t options, + char **out_str, + size_t *out_len, + mongocrypt_status_t *status); + +#endif diff --git a/test/test-mc-text-search-str-encode.c b/test/test-mc-text-search-str-encode.c index e0490ed96..cebe7ef38 100644 --- a/test/test-mc-text-search-str-encode.c +++ b/test/test-mc-text-search-str-encode.c @@ -20,10 +20,11 @@ #include "mc-fle2-encryption-placeholder-private.h" #include "mc-str-encode-string-sets-private.h" #include "mc-text-search-str-encode-private.h" +#include "unicode/fold.h" #include #include -uint32_t get_utf8_codepoint_length(const char *buf, uint32_t len) { +static uint32_t get_utf8_codepoint_length(const char *buf, uint32_t len) { const char *cur = buf; const char *end = buf + len; uint32_t codepoint_len = 0; @@ -34,22 +35,24 @@ uint32_t get_utf8_codepoint_length(const char *buf, uint32_t len) { return codepoint_len; } -// TODO MONGOCRYPT-759 Modify these tests not to take unfolded_codepoint_len, but to instead take strings with -// diacritics and fold them static void test_nofold_suffix_prefix_case(_mongocrypt_tester_t *tester, const char *str, uint32_t lb, uint32_t ub, - uint32_t unfolded_codepoint_len) { - TEST_PRINTF("Testing nofold suffix/prefix case: str=\"%s\", lb=%u, ub=%u, unfolded_codepoint_len=%u\n", + bool casef, + bool diacf, + int foldable_codepoints) { + TEST_PRINTF("Testing nofold suffix/prefix case: str=\"%s\", lb=%u, ub=%u, casef=%d, diacf=%d\n", str, lb, ub, - unfolded_codepoint_len); + casef, + diacf); uint32_t byte_len = (uint32_t)strlen(str); - uint32_t codepoint_len = get_utf8_codepoint_length(str, byte_len); + uint32_t unfolded_codepoint_len = byte_len == 0 ? 1 : get_utf8_codepoint_length(str, byte_len); + uint32_t folded_codepoint_len = byte_len == 0 ? 0 : unfolded_codepoint_len - foldable_codepoints; uint32_t max_padded_len = 16 * (uint32_t)((unfolded_codepoint_len + 15) / 16); - uint32_t max_affix_len = BSON_MIN(ub, codepoint_len); + uint32_t max_affix_len = BSON_MIN(ub, folded_codepoint_len); uint32_t n_real_affixes = max_affix_len >= lb ? max_affix_len - lb + 1 : 0; uint32_t n_affixes = BSON_MIN(ub, max_padded_len) - lb + 1; uint32_t n_padding = n_affixes - n_real_affixes; @@ -58,20 +61,30 @@ static void test_nofold_suffix_prefix_case(_mongocrypt_tester_t *tester, mongocrypt_status_t *status = mongocrypt_status_new(); for (int suffix = 0; suffix <= 1; suffix++) { if (suffix) { - mc_FLE2TextSearchInsertSpec_t spec = {.v = str, .len = byte_len, .suffix = {{lb, ub}, true}}; - sets = mc_text_search_str_encode_helper(&spec, unfolded_codepoint_len, status); + mc_FLE2TextSearchInsertSpec_t spec = {.v = str, + .len = byte_len, + .suffix = {{lb, ub}, true}, + .casef = casef, + .diacf = diacf}; + sets = mc_text_search_str_encode(&spec, status); } else { - mc_FLE2TextSearchInsertSpec_t spec = {.v = str, .len = byte_len, .prefix = {{lb, ub}, true}}; - sets = mc_text_search_str_encode_helper(&spec, unfolded_codepoint_len, status); + mc_FLE2TextSearchInsertSpec_t spec = {.v = str, + .len = byte_len, + .prefix = {{lb, ub}, true}, + .casef = casef, + .diacf = diacf}; + sets = mc_text_search_str_encode(&spec, status); } ASSERT_OR_PRINT(sets, status); - ASSERT(sets->base_string->buf.len == byte_len + 1); - ASSERT(sets->base_string->codepoint_len == codepoint_len + 1); - ASSERT(0 == memcmp(sets->base_string->buf.data, str, byte_len)); - ASSERT(sets->base_string->buf.data[byte_len] == (uint8_t)0xFF); + ASSERT_CMPUINT32(sets->base_string->codepoint_len, ==, folded_codepoint_len + 1); + if (!casef && !diacf) { + ASSERT_CMPUINT32(sets->base_string->buf.len, ==, byte_len + 1); + ASSERT_CMPINT(0, ==, memcmp(sets->base_string->buf.data, str, byte_len)); + } + ASSERT_CMPUINT8(sets->base_string->buf.data[sets->base_string->buf.len - 1], ==, (uint8_t)0xFF); ASSERT(sets->substring_set == NULL); - ASSERT(sets->exact.len == byte_len); - ASSERT(0 == memcmp(sets->exact.data, str, byte_len)); + ASSERT_CMPUINT32(sets->exact.len, ==, sets->base_string->buf.len - 1); + ASSERT_CMPINT(0, ==, memcmp(sets->exact.data, sets->base_string->buf.data, sets->exact.len)); if (lb > max_padded_len) { ASSERT(sets->suffix_set == NULL); @@ -103,45 +116,47 @@ static void test_nofold_suffix_prefix_case(_mongocrypt_tester_t *tester, uint32_t affix_count = 0; uint32_t total_real_affix_count = 0; while (mc_affix_set_iter_next(&it, &affix, &affix_len, &affix_count)) { - // Since all substrings are just views on the base string, we can use pointer math to find our start and end - // indices. + // Since all substrings are just views on the base string, we can use pointer math to find our start and + // end indices. TEST_PRINTF("Affix starting %lld, ending %lld, count %u\n", (long long)((uint8_t *)affix - sets->base_string->buf.data), (long long)((uint8_t *)affix - sets->base_string->buf.data + affix_len), affix_count); - if (affix_len == byte_len + 1) { + if (affix_len == sets->base_string->buf.len) { // This is padding, so there should be no more entries due to how we ordered them ASSERT(!mc_affix_set_iter_next(&it, NULL, NULL, NULL)); break; } - ASSERT(affix_len <= byte_len); - ASSERT(0 < affix_len); + ASSERT_CMPUINT32(affix_len, <=, sets->base_string->buf.len - 1); + ASSERT_CMPUINT32(0, <, affix_len); - // We happen to always order from smallest to largest in the suffix/prefix algorithm, which makes our life - // slightly easier when testing. + // We happen to always order from smallest to largest in the suffix/prefix algorithm, which makes our + // life slightly easier when testing. if (suffix) { - uint32_t start_offset = sets->base_string->codepoint_offsets[codepoint_len - (lb + idx)]; - ASSERT((uint8_t *)affix == sets->base_string->buf.data + start_offset); - ASSERT(affix_len == sets->base_string->codepoint_offsets[codepoint_len] - start_offset) + uint32_t start_offset = sets->base_string->codepoint_offsets[folded_codepoint_len - (lb + idx)]; + ASSERT_CMPPTR((uint8_t *)affix, ==, sets->base_string->buf.data + start_offset); + ASSERT_CMPUINT32(affix_len, + ==, + sets->base_string->codepoint_offsets[folded_codepoint_len] - start_offset) } else { uint32_t end_offset = sets->base_string->codepoint_offsets[lb + idx]; - ASSERT((uint8_t *)affix == sets->base_string->buf.data); - ASSERT(affix_len == end_offset); + ASSERT_CMPPTR((uint8_t *)affix, ==, sets->base_string->buf.data); + ASSERT_CMPUINT32(affix_len, ==, end_offset); } // The count should always be 1, except for padding. - ASSERT(1 == affix_count); + ASSERT_CMPUINT32(1, ==, affix_count); total_real_affix_count++; idx++; } - ASSERT(total_real_affix_count == n_real_affixes); - if (affix_len == byte_len + 1) { + ASSERT_CMPUINT32(total_real_affix_count, ==, n_real_affixes); + if (affix_len == sets->base_string->buf.len) { // Padding - ASSERT((uint8_t *)affix == sets->base_string->buf.data); - ASSERT(affix_count == n_padding); + ASSERT_CMPPTR((uint8_t *)affix, ==, sets->base_string->buf.data); + ASSERT_CMPUINT32(affix_count, ==, n_padding); } else { // No padding found - ASSERT(n_padding == 0); + ASSERT_CMPUINT32(n_padding, ==, 0); } CONTINUE: mc_str_encode_sets_destroy(sets); @@ -202,22 +217,30 @@ static void test_nofold_substring_case(_mongocrypt_tester_t *tester, uint32_t lb, uint32_t ub, uint32_t mlen, - uint32_t unfolded_codepoint_len) { - TEST_PRINTF("Testing nofold substring case: str=\"%s\", lb=%u, ub=%u, mlen=%u, unfolded_codepoint_len=%u\n", + bool casef, + bool diacf, + int foldable_codepoints) { + TEST_PRINTF("Testing nofold substring case: str=\"%s\", lb=%u, ub=%u, mlen=%u, casef=%d, diacf=%d\n", str, lb, ub, mlen, - unfolded_codepoint_len); + casef, + diacf); uint32_t byte_len = (uint32_t)strlen(str); - uint32_t codepoint_len = get_utf8_codepoint_length(str, byte_len); + uint32_t unfolded_codepoint_len = byte_len == 0 ? 1 : get_utf8_codepoint_length(str, byte_len); + uint32_t folded_codepoint_len = byte_len == 0 ? 0 : unfolded_codepoint_len - foldable_codepoints; uint32_t max_padded_len = 16 * (uint32_t)((unfolded_codepoint_len + 15) / 16); uint32_t n_substrings = calc_number_of_substrings(BSON_MIN(max_padded_len, mlen), lb, ub); mongocrypt_status_t *status = mongocrypt_status_new(); mc_str_encode_sets_t *sets; - mc_FLE2TextSearchInsertSpec_t spec = {.v = str, .len = byte_len, .substr = {{mlen, lb, ub}, true}}; - sets = mc_text_search_str_encode_helper(&spec, unfolded_codepoint_len, status); + mc_FLE2TextSearchInsertSpec_t spec = {.v = str, + .len = byte_len, + .substr = {{mlen, lb, ub}, true}, + .casef = casef, + .diacf = diacf}; + sets = mc_text_search_str_encode(&spec, status); if (unfolded_codepoint_len > mlen) { ASSERT_FAILS_STATUS(sets, status, "longer than the maximum length"); mongocrypt_status_destroy(status); @@ -225,14 +248,17 @@ static void test_nofold_substring_case(_mongocrypt_tester_t *tester, } ASSERT_OR_PRINT(sets, status); mongocrypt_status_destroy(status); - ASSERT(sets->base_string->buf.len == byte_len + 1); - ASSERT(sets->base_string->codepoint_len == codepoint_len + 1); - ASSERT(0 == memcmp(sets->base_string->buf.data, str, byte_len)); - ASSERT(sets->base_string->buf.data[byte_len] == (uint8_t)0xFF); - ASSERT(sets->suffix_set == NULL) + ASSERT_CMPUINT32(sets->base_string->codepoint_len, ==, folded_codepoint_len + 1); + if (!casef && !diacf) { + ASSERT_CMPUINT32(sets->base_string->buf.len, ==, byte_len + 1); + ASSERT_CMPINT(0, ==, memcmp(sets->base_string->buf.data, str, byte_len)); + } + + ASSERT_CMPUINT8(sets->base_string->buf.data[sets->base_string->buf.len - 1], ==, (uint8_t)0xFF); + ASSERT(sets->suffix_set == NULL); ASSERT(sets->prefix_set == NULL); - ASSERT(sets->exact.len == byte_len); - ASSERT(0 == memcmp(sets->exact.data, str, byte_len)); + ASSERT_CMPUINT32(sets->exact.len, ==, sets->base_string->buf.len - 1); + ASSERT_CMPINT(0, ==, memcmp(sets->exact.data, sets->base_string->buf.data, sets->base_string->buf.len - 1)); if (lb > max_padded_len) { ASSERT(sets->substring_set == NULL); @@ -264,26 +290,28 @@ static void test_nofold_substring_case(_mongocrypt_tester_t *tester, substring_count, substring_len, substring); - if (substring_len == byte_len + 1) { + if (substring_len == sets->base_string->buf.len) { // This is padding, so there should be no more entries due to how we ordered them ASSERT(!mc_substring_set_iter_next(&it, NULL, NULL, NULL)); break; } - ASSERT((uint8_t *)substring + substring_len <= sets->base_string->buf.data + byte_len); - ASSERT(substring_len <= byte_len); - ASSERT(0 < substring_len); - ASSERT(1 == substring_count); + ASSERT_CMPPTR((uint8_t *)substring + substring_len, + <=, + sets->base_string->buf.data + sets->base_string->buf.len); + ASSERT_CMPUINT32(substring_len, <=, sets->base_string->buf.len - 1); + ASSERT_CMPUINT32(0, <, substring_len); + ASSERT_CMPUINT32(1, ==, substring_count); total_real_substring_count++; } - ASSERT(total_real_substring_count == n_real_substrings); - if (substring_len == byte_len + 1) { + ASSERT_CMPUINT32(total_real_substring_count, ==, n_real_substrings); + if (substring_len == sets->base_string->buf.len) { // Padding - ASSERT((uint8_t *)substring == sets->base_string->buf.data); - ASSERT(substring_count == n_padding); + ASSERT_CMPPTR((uint8_t *)substring, ==, sets->base_string->buf.data); + ASSERT_CMPUINT32(substring_count, ==, n_padding); } else { // No padding found - ASSERT(n_padding == 0); + ASSERT_CMPUINT32(n_padding, ==, 0); } cleanup: mc_str_encode_sets_destroy(sets); @@ -293,246 +321,778 @@ static void test_nofold_substring_case_multiple_mlen(_mongocrypt_tester_t *teste const char *str, uint32_t lb, uint32_t ub, - uint32_t unfolded_codepoint_len) { + uint32_t unfolded_codepoint_len, + bool casef, + bool diacf, + int foldable_codepoints) { // mlen < unfolded_codepoint_len - test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len - 1, unfolded_codepoint_len); + test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len - 1, casef, diacf, foldable_codepoints); // mlen = unfolded_codepoint_len - test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len, unfolded_codepoint_len); + test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len, casef, diacf, foldable_codepoints); // mlen > unfolded_codepoint_len - test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len + 1, unfolded_codepoint_len); + test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len + 1, casef, diacf, foldable_codepoints); // mlen >> unfolded_codepoint_len - test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len + 64, unfolded_codepoint_len); + test_nofold_substring_case(tester, str, lb, ub, unfolded_codepoint_len + 64, casef, diacf, foldable_codepoints); // mlen = cbclen uint32_t max_padded_len = 16 * (uint32_t)((unfolded_codepoint_len + 15) / 16); - test_nofold_substring_case(tester, str, lb, ub, max_padded_len, unfolded_codepoint_len); + test_nofold_substring_case(tester, str, lb, ub, max_padded_len, casef, diacf, foldable_codepoints); } -const uint32_t UNFOLDED_CASES[] = {0, 1, 3, 16}; -const char short_string[] = "123456789"; -const char medium_string[] = "0123456789abcdef"; -const char long_string[] = "123456789123456789123458980"; -// The unicode test strings are a mix of 1, 2, and 3-byte unicode characters. -const char short_unicode_string[] = "1二𓀀4五六❼8𓀯"; -const char medium_unicode_string[] = "⓪1二𓀀4五六❼8𓀯あいうえおf"; -const char long_unicode_string[] = "1二𓀀4五六❼8𓀯1二𓀀4五六𓀯1二𓀀4❼8𓀯❼8五六"; -const uint32_t SHORT_LEN = sizeof(short_string) - 1; -const uint32_t MEDIUM_LEN = sizeof(medium_string) - 1; -const uint32_t LONG_LEN = sizeof(long_string) - 1; - -static void test_text_search_str_encode_suffix_prefix(_mongocrypt_tester_t *tester, - const char *short_s, - const char *medium_s, - const char *long_s) { - for (uint32_t i = 0; i < sizeof(UNFOLDED_CASES) / sizeof(UNFOLDED_CASES[0]); i++) { - uint32_t short_unfolded_codepoint_len = SHORT_LEN + UNFOLDED_CASES[i]; - uint32_t medium_unfolded_codepoint_len = MEDIUM_LEN + UNFOLDED_CASES[i]; - uint32_t long_unfolded_codepoint_len = LONG_LEN + UNFOLDED_CASES[i]; - // LB > 16 - test_nofold_suffix_prefix_case(tester, short_s, 17, 19, short_unfolded_codepoint_len); - // Simple cases - test_nofold_suffix_prefix_case(tester, short_s, 2, 4, short_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, short_s, 3, 6, short_unfolded_codepoint_len); - // LB = UB - test_nofold_suffix_prefix_case(tester, short_s, 2, 2, short_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, short_s, 9, 9, short_unfolded_codepoint_len); - // UB = len - test_nofold_suffix_prefix_case(tester, short_s, 2, 9, short_unfolded_codepoint_len); - // 16 > UB > len - test_nofold_suffix_prefix_case(tester, short_s, 2, 14, short_unfolded_codepoint_len); - // UB = 16 - test_nofold_suffix_prefix_case(tester, short_s, 2, 16, short_unfolded_codepoint_len); - // UB > 16 - test_nofold_suffix_prefix_case(tester, short_s, 2, 19, short_unfolded_codepoint_len); - // UB > 32 - test_nofold_suffix_prefix_case(tester, short_s, 2, 35, short_unfolded_codepoint_len); - // 16 >= LB > len - test_nofold_suffix_prefix_case(tester, short_s, 12, 19, short_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, short_s, 12, 16, short_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, short_s, 16, 19, short_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, short_s, 12, 35, short_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, short_s, 16, 35, short_unfolded_codepoint_len); - - // len = 16 cases - // LB > 16 - test_nofold_suffix_prefix_case(tester, medium_s, 17, 19, medium_unfolded_codepoint_len); - // Simple cases - test_nofold_suffix_prefix_case(tester, medium_s, 2, 4, medium_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, medium_s, 3, 6, medium_unfolded_codepoint_len); - // LB = UB - test_nofold_suffix_prefix_case(tester, medium_s, 2, 2, medium_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, medium_s, 16, 16, medium_unfolded_codepoint_len); - // UB = len - test_nofold_suffix_prefix_case(tester, medium_s, 2, 16, medium_unfolded_codepoint_len); - // UB > len - test_nofold_suffix_prefix_case(tester, medium_s, 2, 19, medium_unfolded_codepoint_len); - // UB = 32 - test_nofold_suffix_prefix_case(tester, medium_s, 2, 32, medium_unfolded_codepoint_len); - // UB > 32 - test_nofold_suffix_prefix_case(tester, medium_s, 2, 35, medium_unfolded_codepoint_len); - // LB = len - test_nofold_suffix_prefix_case(tester, medium_s, 16, 19, medium_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, medium_s, 16, 35, medium_unfolded_codepoint_len); - - // len > 16 cases - // LB > 32 - test_nofold_suffix_prefix_case(tester, long_s, 33, 38, long_unfolded_codepoint_len); - // Simple cases - test_nofold_suffix_prefix_case(tester, long_s, 2, 4, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 3, 6, long_unfolded_codepoint_len); - // LB < 16 <= UB <= len - test_nofold_suffix_prefix_case(tester, long_s, 3, 18, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 3, 16, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 3, 27, long_unfolded_codepoint_len); - // 16 <= LB < UB <= len - test_nofold_suffix_prefix_case(tester, long_s, 18, 24, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 16, 24, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 18, 27, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 16, 27, long_unfolded_codepoint_len); - // LB = UB - test_nofold_suffix_prefix_case(tester, long_s, 3, 3, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 16, 16, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 27, 27, long_unfolded_codepoint_len); - // 32 > UB > len - test_nofold_suffix_prefix_case(tester, long_s, 3, 29, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 18, 29, long_unfolded_codepoint_len); - // UB = 32 - test_nofold_suffix_prefix_case(tester, long_s, 3, 32, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 18, 32, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 27, 32, long_unfolded_codepoint_len); - // UB > 32 - test_nofold_suffix_prefix_case(tester, long_s, 3, 35, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 18, 35, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 27, 32, long_unfolded_codepoint_len); - // UB > 48 - test_nofold_suffix_prefix_case(tester, long_s, 3, 49, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 18, 49, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 27, 32, long_unfolded_codepoint_len); - // 32 >= LB > len - test_nofold_suffix_prefix_case(tester, long_s, 28, 30, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 28, 28, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 28, 32, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 28, 34, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 28, 49, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 32, 32, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 32, 34, long_unfolded_codepoint_len); - test_nofold_suffix_prefix_case(tester, long_s, 32, 49, long_unfolded_codepoint_len); +const char *normal_ascii_strings[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", + "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", + "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"}; +const char *ascii_diacritics[] = {"^", "`"}; +const char *normal_unicode_strings[] = {"ぁ", "あ", "ぃ", "い", "ぅ", "う", "ぇ", "え", "ぉ", "お", "か", "が", + "き", "ぎ", "く", "け", "Ѐ", "Ё", "Ђ", "Ѓ", "Є", "Ѕ", "І", "Ї", + "Ј", "Љ", "Њ", "Ћ", "Ќ", "Ѝ", "Ў", "Џ", "𓀀", "𓀁", "𓀂", "𓀃", + "𓀄", "𓀅", "𓀆", "𓀇", "𓀈", "𓀉", "𓀊", "𓀋", "𓀌", "𓀍", "𓀎", "𓀏"}; +const char *unicode_diacritics[] = {"̀", "́", "̂", "̃", "̄", "̅", "̆", "̇", "̈", "̉", "̊", "̋", "̌", "̍", "̎", + "̏", "᷄", "᷅", "᷆", "᷇", "᷈", "᷉", "᷊", "᷋", "᷌", "᷍", "᷎", "᷏", "︠", "︡", + "︢", "︣", "︤", "︥", "︦", "︧", "︨", "︩", "︪", "︫", "︬", "︭", "︮", "︯"}; + +// Build a random string which has unfolded_len codepoints, but folds to folded_len codepoints after diacritic folding. +char *build_random_string_to_fold(uint32_t folded_len, uint32_t unfolded_len) { + ASSERT_CMPUINT32(unfolded_len, >=, folded_len); + // Max size in bytes is # unicode characters * 4 bytes for each character + 1 null terminator. + char *str = malloc(unfolded_len * 4 + 1); + char *ptr = str; + uint32_t folded_size = 0; + uint32_t diacritics = unfolded_len - folded_len; + int dia_prob = (diacritics * 1000) / unfolded_len; + for (uint32_t n_codepoints = 0; n_codepoints < unfolded_len; n_codepoints++) { + const char *src_ptr; + bool must_add_diacritic = folded_size == folded_len; + bool must_add_normal = n_codepoints - folded_size == diacritics; + if (must_add_diacritic || (!must_add_normal && (rand() % 1000 < dia_prob))) { + // Add diacritic. + if (rand() % 2) { + int i = rand() % (sizeof(ascii_diacritics) / sizeof(char *)); + src_ptr = ascii_diacritics[i]; + } else { + int i = rand() % (sizeof(unicode_diacritics) / sizeof(char *)); + src_ptr = unicode_diacritics[i]; + } + } else { + // Add normal character. + if (rand() % 2) { + int i = rand() % (sizeof(normal_ascii_strings) / sizeof(char *)); + src_ptr = normal_ascii_strings[i]; + } else { + int i = rand() % (sizeof(normal_unicode_strings) / sizeof(char *)); + src_ptr = normal_unicode_strings[i]; + } + folded_size++; + } + strcpy(ptr, src_ptr); + ptr += strlen(src_ptr); } + + uint32_t len = (uint32_t)(ptr - str); + // ptr points to the final null character, include that in the final string. + str = realloc(str, len + 1); + + // Make sure we did everything right. + ASSERT_CMPUINT32(unfolded_len, ==, get_utf8_codepoint_length(str, len)); + mongocrypt_status_t *status = mongocrypt_status_new(); + char *out_str; + size_t out_len; + ASSERT_OK_STATUS(unicode_fold(str, len, kUnicodeFoldRemoveDiacritics, &out_str, &out_len, status), status); + ASSERT_CMPUINT32(folded_len, ==, get_utf8_codepoint_length(out_str, (uint32_t)out_len)); + bson_free(out_str); + mongocrypt_status_destroy(status); + return str; } -static void test_text_search_str_encode_substring(_mongocrypt_tester_t *tester, - const char *short_s, - const char *medium_s, - const char *long_s) { +static void suffix_prefix_run_folding_case(_mongocrypt_tester_t *tester, + const char *short_s, + const char *medium_s, + const char *long_s, + bool casef, + bool diacf, + int foldable_codepoints) { + // LB > 16 + test_nofold_suffix_prefix_case(tester, short_s, 17, 19, casef, diacf, foldable_codepoints); + // Simple cases + test_nofold_suffix_prefix_case(tester, short_s, 2, 4, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, short_s, 3, 6, casef, diacf, foldable_codepoints); + // LB = UB + test_nofold_suffix_prefix_case(tester, short_s, 2, 2, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, short_s, 9, 9, casef, diacf, foldable_codepoints); + // UB = len + test_nofold_suffix_prefix_case(tester, short_s, 2, 9, casef, diacf, foldable_codepoints); + // 16 > UB > len + test_nofold_suffix_prefix_case(tester, short_s, 2, 14, casef, diacf, foldable_codepoints); + // UB = 16 + test_nofold_suffix_prefix_case(tester, short_s, 2, 16, casef, diacf, foldable_codepoints); + // UB > 16 + test_nofold_suffix_prefix_case(tester, short_s, 2, 19, casef, diacf, foldable_codepoints); + // UB > 32 + test_nofold_suffix_prefix_case(tester, short_s, 2, 35, casef, diacf, foldable_codepoints); + // 16 >= LB > len + test_nofold_suffix_prefix_case(tester, short_s, 12, 19, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, short_s, 12, 16, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, short_s, 16, 19, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, short_s, 12, 35, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, short_s, 16, 35, casef, diacf, foldable_codepoints); + + // len = 16 cases + // LB > 16 + test_nofold_suffix_prefix_case(tester, medium_s, 17, 19, casef, diacf, foldable_codepoints); + // Simple cases + test_nofold_suffix_prefix_case(tester, medium_s, 2, 4, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, medium_s, 3, 6, casef, diacf, foldable_codepoints); + // LB = UB + test_nofold_suffix_prefix_case(tester, medium_s, 2, 2, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, medium_s, 16, 16, casef, diacf, foldable_codepoints); + // UB = len + test_nofold_suffix_prefix_case(tester, medium_s, 2, 16, casef, diacf, foldable_codepoints); + // UB > len + test_nofold_suffix_prefix_case(tester, medium_s, 2, 19, casef, diacf, foldable_codepoints); + // UB = 32 + test_nofold_suffix_prefix_case(tester, medium_s, 2, 32, casef, diacf, foldable_codepoints); + // UB > 32 + test_nofold_suffix_prefix_case(tester, medium_s, 2, 35, casef, diacf, foldable_codepoints); + // LB = len + test_nofold_suffix_prefix_case(tester, medium_s, 16, 19, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, medium_s, 16, 35, casef, diacf, foldable_codepoints); + + // len > 16 cases + // LB > 32 + test_nofold_suffix_prefix_case(tester, long_s, 33, 38, casef, diacf, foldable_codepoints); + // Simple cases + test_nofold_suffix_prefix_case(tester, long_s, 2, 4, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 3, 6, casef, diacf, foldable_codepoints); + // LB < 16 <= UB <= len + test_nofold_suffix_prefix_case(tester, long_s, 3, 18, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 3, 16, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 3, 27, casef, diacf, foldable_codepoints); + // 16 <= LB < UB <= len + test_nofold_suffix_prefix_case(tester, long_s, 18, 24, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 16, 24, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 18, 27, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 16, 27, casef, diacf, foldable_codepoints); + // LB = UB + test_nofold_suffix_prefix_case(tester, long_s, 3, 3, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 16, 16, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 27, 27, casef, diacf, foldable_codepoints); + // 32 > UB > len + test_nofold_suffix_prefix_case(tester, long_s, 3, 29, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 18, 29, casef, diacf, foldable_codepoints); + // UB = 32 + test_nofold_suffix_prefix_case(tester, long_s, 3, 32, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 18, 32, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 27, 32, casef, diacf, foldable_codepoints); + // UB > 32 + test_nofold_suffix_prefix_case(tester, long_s, 3, 35, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 18, 35, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 27, 32, casef, diacf, foldable_codepoints); + // UB > 48 + test_nofold_suffix_prefix_case(tester, long_s, 3, 49, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 18, 49, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 27, 32, casef, diacf, foldable_codepoints); + // 32 >= LB > len + test_nofold_suffix_prefix_case(tester, long_s, 28, 30, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 28, 28, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 28, 32, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 28, 34, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 28, 49, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 32, 32, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 32, 34, casef, diacf, foldable_codepoints); + test_nofold_suffix_prefix_case(tester, long_s, 32, 49, casef, diacf, foldable_codepoints); +} + +const uint32_t UNFOLDED_CASES[] = {0, 1, 3, 16}; +// Predefined lengths to test a variety of cases +const uint32_t SHORT_LEN = 9; +const uint32_t MEDIUM_LEN = 16; +const uint32_t LONG_LEN = 27; + +static void _test_text_search_str_encode_suffix_prefix(_mongocrypt_tester_t *tester) { + unsigned int seed = (unsigned int)time(0); + TEST_PRINTF("Testing with seed: %u", seed); + srand(seed); + // Run diacritic folding and case+diacritic folding for a variety of folded/unfolded sizes. for (uint32_t i = 0; i < sizeof(UNFOLDED_CASES) / sizeof(UNFOLDED_CASES[0]); i++) { - uint32_t short_unfolded_codepoint_len = SHORT_LEN + UNFOLDED_CASES[i]; - uint32_t medium_unfolded_codepoint_len = MEDIUM_LEN + UNFOLDED_CASES[i]; - uint32_t long_unfolded_codepoint_len = LONG_LEN + UNFOLDED_CASES[i]; - // LB > 16 - test_nofold_substring_case_multiple_mlen(tester, short_s, 17, 19, short_unfolded_codepoint_len); - // Simple cases - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 4, short_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, short_s, 3, 6, short_unfolded_codepoint_len); - // LB = UB - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 2, short_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, short_s, 9, 9, short_unfolded_codepoint_len); - // UB = len - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 9, short_unfolded_codepoint_len); - // 16 > UB > len - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 14, short_unfolded_codepoint_len); - // UB = 16 - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 16, short_unfolded_codepoint_len); - // UB > 16 - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 19, short_unfolded_codepoint_len); - // UB > 32 - test_nofold_substring_case_multiple_mlen(tester, short_s, 2, 35, short_unfolded_codepoint_len); - // 16 >= LB > len - test_nofold_substring_case_multiple_mlen(tester, short_s, 12, 19, short_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, short_s, 12, 16, short_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, short_s, 16, 19, short_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, short_s, 12, 35, short_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, short_s, 16, 35, short_unfolded_codepoint_len); - - // len = 16 cases - // LB > 16 - test_nofold_substring_case_multiple_mlen(tester, medium_s, 17, 19, medium_unfolded_codepoint_len); - // Simple cases - test_nofold_substring_case_multiple_mlen(tester, medium_s, 2, 4, medium_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, medium_s, 3, 6, medium_unfolded_codepoint_len); - // LB = UB - test_nofold_substring_case_multiple_mlen(tester, medium_s, 2, 2, medium_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, medium_s, 16, 16, medium_unfolded_codepoint_len); - // UB = len - test_nofold_substring_case_multiple_mlen(tester, medium_s, 2, 16, medium_unfolded_codepoint_len); - // UB > len - test_nofold_substring_case_multiple_mlen(tester, medium_s, 2, 19, medium_unfolded_codepoint_len); - // UB = 32 - test_nofold_substring_case_multiple_mlen(tester, medium_s, 2, 32, medium_unfolded_codepoint_len); - // UB > 32 - test_nofold_substring_case_multiple_mlen(tester, medium_s, 2, 35, medium_unfolded_codepoint_len); - // LB = len - test_nofold_substring_case_multiple_mlen(tester, medium_s, 16, 19, medium_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, medium_s, 16, 35, medium_unfolded_codepoint_len); - - // len > 16 cases - // LB > 32 - test_nofold_substring_case_multiple_mlen(tester, long_s, 33, 38, long_unfolded_codepoint_len); - // Simple cases - test_nofold_substring_case_multiple_mlen(tester, long_s, 2, 4, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 6, long_unfolded_codepoint_len); - // LB < 16 <= UB <= len - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 18, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 16, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 27, long_unfolded_codepoint_len); - // 16 <= LB < UB <= len - test_nofold_substring_case_multiple_mlen(tester, long_s, 18, 24, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 16, 24, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 18, 27, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 16, 27, long_unfolded_codepoint_len); - // LB = UB - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 3, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 16, 16, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 27, 27, long_unfolded_codepoint_len); - // 32 > UB > len - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 29, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 18, 29, long_unfolded_codepoint_len); - // UB = 32 - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 32, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 18, 32, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 27, 32, long_unfolded_codepoint_len); - // UB > 32 - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 35, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 18, 35, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 27, 32, long_unfolded_codepoint_len); - // UB > 48 - test_nofold_substring_case_multiple_mlen(tester, long_s, 3, 49, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 18, 49, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 27, 32, long_unfolded_codepoint_len); - // 32 >= LB > len - test_nofold_substring_case_multiple_mlen(tester, long_s, 28, 30, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 28, 28, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 28, 32, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 28, 34, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 28, 49, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 32, 32, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 32, 34, long_unfolded_codepoint_len); - test_nofold_substring_case_multiple_mlen(tester, long_s, 32, 49, long_unfolded_codepoint_len); + char *short_s = build_random_string_to_fold(SHORT_LEN, SHORT_LEN + UNFOLDED_CASES[i]); + char *medium_s = build_random_string_to_fold(MEDIUM_LEN, MEDIUM_LEN + UNFOLDED_CASES[i]); + char *long_s = build_random_string_to_fold(LONG_LEN, LONG_LEN + UNFOLDED_CASES[i]); + for (int casef = 0; casef <= 1; casef++) { + suffix_prefix_run_folding_case(tester, + short_s, + medium_s, + long_s, + casef, + true /* diacf */, + UNFOLDED_CASES[i]); + } + bson_free(short_s); + bson_free(medium_s); + bson_free(long_s); } + // Run case folding and no folding for different sizes. Only unfolded size matters. + char *short_s = build_random_string_to_fold(SHORT_LEN, SHORT_LEN); + char *medium_s = build_random_string_to_fold(MEDIUM_LEN, MEDIUM_LEN); + char *long_s = build_random_string_to_fold(LONG_LEN, LONG_LEN); + for (int casef = 0; casef <= 1; casef++) { + suffix_prefix_run_folding_case(tester, short_s, medium_s, long_s, casef, false /* diacf*/, 0); + } + bson_free(short_s); + bson_free(medium_s); + bson_free(long_s); } -static void _test_text_search_str_encode_suffix_prefix_ascii(_mongocrypt_tester_t *tester) { - test_text_search_str_encode_suffix_prefix(tester, short_string, medium_string, long_string); -} +static void substring_run_folding_case(_mongocrypt_tester_t *tester, + const char *short_s, + uint32_t short_unfolded_codepoint_len, + const char *medium_s, + uint32_t medium_unfolded_codepoint_len, + const char *long_s, + uint32_t long_unfolded_codepoint_len, + bool casef, + bool diacf, + int foldable_codepoints) { + // LB > 16 + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 17, + 19, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // Simple cases + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 4, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 3, + 6, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // LB = UB + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 2, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 9, + 9, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB = len + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 9, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // 16 > UB > len + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 14, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB = 16 + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 16, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB > 16 + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 19, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB > 32 + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 2, + 35, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // 16 >= LB > len + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 12, + 19, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 12, + 16, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 16, + 19, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 12, + 35, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + short_s, + 16, + 35, + short_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); -static void _test_text_search_str_encode_suffix_prefix_utf8(_mongocrypt_tester_t *tester) { - test_text_search_str_encode_suffix_prefix(tester, short_unicode_string, medium_unicode_string, long_unicode_string); -} + // len = 16 cases + // LB > 16 + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 17, + 19, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // Simple cases + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 2, + 4, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 3, + 6, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // LB = UB + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 2, + 2, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 16, + 16, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB = len + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 2, + 16, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB > len + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 2, + 19, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB = 32 + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 2, + 32, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB > 32 + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 2, + 35, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // LB = len + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 16, + 19, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + medium_s, + 16, + 35, + medium_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); -static void _test_text_search_str_encode_substring_ascii(_mongocrypt_tester_t *tester) { - test_text_search_str_encode_substring(tester, short_string, medium_string, long_string); + // len > 16 cases + // LB > 32 + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 33, + 38, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // Simple cases + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 2, + 4, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 6, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // LB < 16 <= UB <= len + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 18, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 16, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 27, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // 16 <= LB < UB <= len + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 18, + 24, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 16, + 24, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 18, + 27, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 16, + 27, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // LB = UB + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 3, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 16, + 16, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 27, + 27, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // 32 > UB > len + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 29, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 18, + 29, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB = 32 + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 18, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 27, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB > 32 + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 35, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 18, + 35, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 27, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // UB > 48 + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 3, + 49, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 18, + 49, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 27, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + // 32 >= LB > len + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 28, + 30, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 28, + 28, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 28, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 28, + 34, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 28, + 49, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 32, + 32, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 32, + 34, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); + test_nofold_substring_case_multiple_mlen(tester, + long_s, + 32, + 49, + long_unfolded_codepoint_len, + casef, + diacf, + foldable_codepoints); } -static void _test_text_search_str_encode_substring_utf8(_mongocrypt_tester_t *tester) { - test_text_search_str_encode_substring(tester, short_unicode_string, medium_unicode_string, long_unicode_string); +static void _test_text_search_str_encode_substring(_mongocrypt_tester_t *tester) { + unsigned int seed = (unsigned int)time(0); + TEST_PRINTF("Testing with seed: %u", seed); + srand(seed); + // Run diacritic folding and case+diacritic folding for a variety of folded/unfolded sizes. + for (uint32_t i = 0; i < sizeof(UNFOLDED_CASES) / sizeof(UNFOLDED_CASES[0]); i++) { + char *short_s = build_random_string_to_fold(SHORT_LEN, SHORT_LEN + UNFOLDED_CASES[i]); + char *medium_s = build_random_string_to_fold(MEDIUM_LEN, MEDIUM_LEN + UNFOLDED_CASES[i]); + char *long_s = build_random_string_to_fold(LONG_LEN, LONG_LEN + UNFOLDED_CASES[i]); + for (int casef = 0; casef <= 1; casef++) { + substring_run_folding_case(tester, + short_s, + SHORT_LEN + UNFOLDED_CASES[i], + medium_s, + MEDIUM_LEN + UNFOLDED_CASES[i], + long_s, + LONG_LEN + UNFOLDED_CASES[i], + casef, + true /* diacf */, + UNFOLDED_CASES[i]); + } + bson_free(short_s); + bson_free(medium_s); + bson_free(long_s); + } + // Run case folding and no folding for different sizes. Only unfolded size matters. + char *short_s = build_random_string_to_fold(SHORT_LEN, SHORT_LEN); + char *medium_s = build_random_string_to_fold(MEDIUM_LEN, MEDIUM_LEN); + char *long_s = build_random_string_to_fold(LONG_LEN, LONG_LEN); + for (int casef = 0; casef <= 1; casef++) { + substring_run_folding_case(tester, + short_s, + SHORT_LEN, + medium_s, + MEDIUM_LEN, + long_s, + LONG_LEN, + casef, + false /* diacf */, + 0); + } + bson_free(short_s); + bson_free(medium_s); + bson_free(long_s); } static void _test_text_search_str_encode_multiple(_mongocrypt_tester_t *tester) { @@ -554,27 +1114,27 @@ static void _test_text_search_str_encode_multiple(_mongocrypt_tester_t *tester) mc_affix_set_iter_t it; mc_affix_set_iter_init(&it, sets->suffix_set); ASSERT(mc_affix_set_iter_next(&it, &str, &len, &count)); - ASSERT(len == 1); - ASSERT(*str == '9'); - ASSERT(count == 1); + ASSERT_CMPUINT32(len, ==, 1); + ASSERT_CMPUINT8((uint8_t)*str, ==, (uint8_t)'9'); + ASSERT_CMPUINT32(count, ==, 1); ASSERT(sets->prefix_set != NULL); mc_affix_set_iter_init(&it, sets->prefix_set); ASSERT(mc_affix_set_iter_next(&it, &str, &len, &count)); - ASSERT(len == 6); - ASSERT(0 == memcmp("123456", str, 6)); - ASSERT(count == 1); + ASSERT_CMPUINT32(len, ==, 6); + ASSERT_CMPINT(0, ==, memcmp("123456", str, 6)); + ASSERT_CMPUINT32(count, ==, 1); ASSERT(sets->substring_set != NULL); mc_substring_set_iter_t ss_it; mc_substring_set_iter_init(&ss_it, sets->substring_set); ASSERT(mc_substring_set_iter_next(&ss_it, &str, &len, &count)); - ASSERT(len == 9); - ASSERT(0 == memcmp("123456789", str, 9)); - ASSERT(count == 1); + ASSERT_CMPUINT32(len, ==, 9); + ASSERT_CMPINT(0, ==, memcmp("123456789", str, 9)); + ASSERT_CMPUINT32(count, ==, 1); - ASSERT(sets->exact.len == 9); - ASSERT(0 == memcmp(sets->exact.data, str, 9)); + ASSERT_CMPUINT32(sets->exact.len, ==, 9); + ASSERT_CMPINT(0, ==, memcmp(sets->exact.data, str, 9)); mc_str_encode_sets_destroy(sets); } @@ -593,28 +1153,30 @@ static void _test_text_search_str_encode_bad_string(_mongocrypt_tester_t *tester } static void _test_text_search_str_encode_empty_string(_mongocrypt_tester_t *tester) { - test_nofold_suffix_prefix_case(tester, "", 1, 1, 1); - test_nofold_suffix_prefix_case(tester, "", 1, 2, 1); - test_nofold_suffix_prefix_case(tester, "", 2, 3, 1); - test_nofold_suffix_prefix_case(tester, "", 1, 16, 1); - test_nofold_suffix_prefix_case(tester, "", 1, 17, 1); - test_nofold_suffix_prefix_case(tester, "", 2, 16, 1); - test_nofold_suffix_prefix_case(tester, "", 2, 17, 1); - - test_nofold_substring_case_multiple_mlen(tester, "", 1, 1, 1); - test_nofold_substring_case_multiple_mlen(tester, "", 1, 2, 1); - test_nofold_substring_case_multiple_mlen(tester, "", 2, 3, 1); - test_nofold_substring_case_multiple_mlen(tester, "", 1, 16, 1); - test_nofold_substring_case_multiple_mlen(tester, "", 1, 17, 1); - test_nofold_substring_case_multiple_mlen(tester, "", 2, 16, 1); - test_nofold_substring_case_multiple_mlen(tester, "", 2, 17, 1); + for (int casef = 0; casef <= 1; casef++) { + for (int diacf = 0; diacf <= 1; diacf++) { + test_nofold_suffix_prefix_case(tester, "", 1, 1, casef, diacf, 0); + test_nofold_suffix_prefix_case(tester, "", 1, 2, casef, diacf, 0); + test_nofold_suffix_prefix_case(tester, "", 2, 3, casef, diacf, 0); + test_nofold_suffix_prefix_case(tester, "", 1, 16, casef, diacf, 0); + test_nofold_suffix_prefix_case(tester, "", 1, 17, casef, diacf, 0); + test_nofold_suffix_prefix_case(tester, "", 2, 16, casef, diacf, 0); + test_nofold_suffix_prefix_case(tester, "", 2, 17, casef, diacf, 0); + + test_nofold_substring_case_multiple_mlen(tester, "", 1, 1, 1, casef, diacf, 0); + test_nofold_substring_case_multiple_mlen(tester, "", 1, 2, 1, casef, diacf, 0); + test_nofold_substring_case_multiple_mlen(tester, "", 2, 3, 1, casef, diacf, 0); + test_nofold_substring_case_multiple_mlen(tester, "", 1, 16, 1, casef, diacf, 0); + test_nofold_substring_case_multiple_mlen(tester, "", 1, 17, 1, casef, diacf, 0); + test_nofold_substring_case_multiple_mlen(tester, "", 2, 16, 1, casef, diacf, 0); + test_nofold_substring_case_multiple_mlen(tester, "", 2, 17, 1, casef, diacf, 0); + } + } } void _mongocrypt_tester_install_text_search_str_encode(_mongocrypt_tester_t *tester) { - INSTALL_TEST(_test_text_search_str_encode_suffix_prefix_ascii); - INSTALL_TEST(_test_text_search_str_encode_suffix_prefix_utf8); - INSTALL_TEST(_test_text_search_str_encode_substring_ascii); - INSTALL_TEST(_test_text_search_str_encode_substring_utf8); + INSTALL_TEST(_test_text_search_str_encode_suffix_prefix); + INSTALL_TEST(_test_text_search_str_encode_substring); INSTALL_TEST(_test_text_search_str_encode_multiple); INSTALL_TEST(_test_text_search_str_encode_bad_string); INSTALL_TEST(_test_text_search_str_encode_empty_string); diff --git a/test/test-mongocrypt.c b/test/test-mongocrypt.c index c27e7c62e..ba622d139 100644 --- a/test/test-mongocrypt.c +++ b/test/test-mongocrypt.c @@ -924,6 +924,7 @@ int main(int argc, char **argv) { _mongocrypt_tester_install_named_kms_providers(&tester); _mongocrypt_tester_install_mc_cmp(&tester); _mongocrypt_tester_install_text_search_str_encode(&tester); + _mongocrypt_tester_install_unicode_fold(&tester); #ifdef MONGOCRYPT_ENABLE_CRYPTO_COMMON_CRYPTO char osversion[32]; diff --git a/test/test-mongocrypt.h b/test/test-mongocrypt.h index c46ede530..eb0683ca3 100644 --- a/test/test-mongocrypt.h +++ b/test/test-mongocrypt.h @@ -216,6 +216,8 @@ void _mongocrypt_tester_install_mc_cmp(_mongocrypt_tester_t *tester); void _mongocrypt_tester_install_text_search_str_encode(_mongocrypt_tester_t *tester); +void _mongocrypt_tester_install_unicode_fold(_mongocrypt_tester_t *tester); + /* Conveniences for getting test data. */ /* Get a temporary bson_t from a JSON string. Do not free it. */ diff --git a/test/test-unicode-fold.c b/test/test-unicode-fold.c new file mode 100644 index 000000000..d0f78d172 --- /dev/null +++ b/test/test-unicode-fold.c @@ -0,0 +1,97 @@ +/* + * Copyright 2025-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mongocrypt-status-private.h" +#include "test-mongocrypt-assert.h" +#include "test-mongocrypt.h" +#include "unicode/fold.h" + +#define TEST_UNICODE_FOLD(expected, expected_len, input, input_len, options) \ + do { \ + char *_buf; \ + size_t _len; \ + ASSERT_OR_PRINT(unicode_fold(input, input_len, options, &_buf, &_len, status), status); \ + TEST_PRINTF("Testing: input=%.*s, expected=%.*s, output=%.*s\n", \ + (int)input_len, \ + input, \ + (int)expected_len, \ + expected, \ + (int)_len, \ + _buf); \ + ASSERT_CMPSIZE_T(_len, ==, expected_len); \ + ASSERT_CMPBYTES((uint8_t *)_buf, _len, (uint8_t *)expected, expected_len); \ + ASSERT_CMPUINT8((uint8_t)(_buf[_len]), ==, 0); \ + bson_free(_buf); \ + } while (0) + +#define TEST_UNICODE_FOLD_ALL_CASES(input, case_folded, dia_folded, both_folded) \ + do { \ + size_t _input_len = strlen(input); \ + size_t _cf_len = strlen(case_folded); \ + size_t _df_len = strlen(dia_folded); \ + size_t _both_len = strlen(both_folded); \ + TEST_UNICODE_FOLD(case_folded, _cf_len, input, _input_len, kUnicodeFoldToLower); \ + TEST_UNICODE_FOLD(dia_folded, _df_len, input, _input_len, kUnicodeFoldRemoveDiacritics); \ + TEST_UNICODE_FOLD(both_folded, \ + _both_len, \ + input, \ + _input_len, \ + kUnicodeFoldToLower | kUnicodeFoldRemoveDiacritics); \ + } while (0) + +static void test_unicode_fold(_mongocrypt_tester_t *tester) { + mongocrypt_status_t *status = mongocrypt_status_new(); + // Test all ascii chars. + char *buf1 = bson_malloc0(2); + char *buf2 = bson_malloc0(2); + for (unsigned char ch = 0; ch <= 0x7f; ch++) { + buf1[0] = ch; + if (ch >= 'A' && ch <= 'Z') { + // Caps + buf2[0] = ch + 0x20; + TEST_UNICODE_FOLD_ALL_CASES(buf1, buf2, buf1, buf2); + } else if (ch == '^' || ch == '`') { + // Diacritics + TEST_UNICODE_FOLD_ALL_CASES(buf1, buf1, "", ""); + } else { + // Characters with no transformations + TEST_UNICODE_FOLD_ALL_CASES(buf1, buf1, buf1, buf1); + } + } + bson_free(buf1); + bson_free(buf2); + TEST_UNICODE_FOLD_ALL_CASES("abc", "abc", "abc", "abc"); + // Tests of composed unicode + TEST_UNICODE_FOLD_ALL_CASES("¿CUÁNTOS AÑOS tienes Tú?", + "¿cuántos años tienes tú?", + "¿CUANTOS ANOS tienes Tu?", + "¿cuantos anos tienes tu?"); + TEST_UNICODE_FOLD_ALL_CASES("СКОЛЬКО ТЕБЕ ЛЕТ?", "сколько тебе лет?", "СКОЛЬКО ТЕБЕ ЛЕТ?", "сколько тебе лет?"); + TEST_UNICODE_FOLD_ALL_CASES("Πόσο χρονών είσαι?", "πόσο χρονών είσαι?", "Ποσο χρονων εισαι?", "ποσο χρονων εισαι?"); + // Tests of decomposed unicode + TEST_UNICODE_FOLD_ALL_CASES("Cafe\xcc\x81", "cafe\xcc\x81", "Cafe", "cafe"); + TEST_UNICODE_FOLD_ALL_CASES("CafE\xcc\x81", "cafe\xcc\x81", "CafE", "cafe"); + // Test string with null bytes + TEST_UNICODE_FOLD("fo\0bar", 6, "fo\0bar", 6, kUnicodeFoldToLower | kUnicodeFoldRemoveDiacritics); + // Test strings with folded representations longer in bytes than the input + TEST_UNICODE_FOLD("\xe2\xb1\xa6", 3, "\xc8\xbe", 2, kUnicodeFoldToLower); + TEST_UNICODE_FOLD("\xf0\xa4\x8b\xae", 4, "\xef\xa9\xac", 3, kUnicodeFoldRemoveDiacritics); + mongocrypt_status_destroy(status); +} + +void _mongocrypt_tester_install_unicode_fold(_mongocrypt_tester_t *tester) { + INSTALL_TEST(test_unicode_fold); +}