From f2774810fd9cd75b5216743a26965f9448a8fb72 Mon Sep 17 00:00:00 2001 From: lijianglin Date: Mon, 8 May 2023 14:39:03 +0800 Subject: [PATCH] add GB18030-2022 charmap achieve GB18030-2022 level 2 after add and change some transcoding relationship of GB18030-2022.Details are as follows: add 25 transcoding relationship UE81E 0x82359037 UE826 0x82359038 UE82B 0x82359039 UE82C 0x82359130 UE832 0x82359131 UE843 0x82359132 UE854 0x82359133 UE864 0x82359134 UE78D 0x84318236 UE78F 0x84318237 UE78E 0x84318238 UE790 0x84318239 UE791 0x84318330 UE792 0x84318331 UE793 0x84318332 UE794 0x84318333 UE795 0x84318334 UE796 0x84318335 UE816 0xfe51 UE817 0xfe52 UE818 0xfe53 UE831 0xfe6c UE83B 0xfe76 UE855 0xfe91 change 6 transcoding relationship U20087 0x95329031 U20089 0x95329033 U200CC 0x95329730 U215D7 0x9536b937 U2298F 0x9630ba35 U241FE 0x9635b630 --- iconvdata/gb18030.c | 52 +++++++++++++++++++++++---- localedata/charmaps/GB18030 | 70 ++++++++++++++++--------------------- 2 files changed, 76 insertions(+), 46 deletions(-) diff --git a/iconvdata/gb18030.c b/iconvdata/gb18030.c index 0b03b9bb..7dca9b09 100644 --- a/iconvdata/gb18030.c +++ b/iconvdata/gb18030.c @@ -6021,14 +6021,17 @@ static const uint16_t __twobyte_to_ucs[] = [0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20, [0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27, [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0x2e81, [0x5dd4] = 0x2e84, + [0x5dd1] = 0xe816, [0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd5] = 0x3473, [0x5dd6] = 0x3447, [0x5dd7] = 0x2e88, [0x5dd8] = 0x2e8b, [0x5dd9] = 0x9fb4, [0x5dda] = 0x359e, [0x5ddb] = 0x361a, [0x5ddc] = 0x360e, [0x5ddd] = 0x2e8c, [0x5dde] = 0x2e97, [0x5ddf] = 0x396e, [0x5de0] = 0x3918, [0x5de1] = 0x9fb5, [0x5de2] = 0x39cf, [0x5de3] = 0x39df, [0x5de4] = 0x3a73, [0x5de5] = 0x39d0, [0x5de6] = 0x9fb6, [0x5de7] = 0x9fb7, [0x5de8] = 0x3b4e, [0x5de9] = 0x3c6e, [0x5dea] = 0x3ce0, [0x5deb] = 0x2ea7, [0x5ded] = 0x9fb8, + [0x5dec] = 0xe831, [0x5dee] = 0x2eaa, [0x5def] = 0x4056, [0x5df0] = 0x415f, [0x5df1] = 0x2eae, [0x5df2] = 0x4337, [0x5df3] = 0x2eb3, [0x5df4] = 0x2eb6, [0x5df5] = 0x2eb7, + [0x5df6] = 0xe83b, [0x5df7] = 0x43b1, [0x5df8] = 0x43ac, [0x5df9] = 0x2ebb, [0x5dfa] = 0x43dd, [0x5dfb] = 0x44d6, [0x5dfc] = 0x4661, [0x5dfd] = 0x464c, [0x5dfe] = 0x9fb9, [0x5e00] = 0x4723, [0x5e01] = 0x4729, [0x5e02] = 0x477c, [0x5e03] = 0x478d, @@ -6036,6 +6039,7 @@ static const uint16_t __twobyte_to_ucs[] = [0x5e08] = 0x4982, [0x5e09] = 0x4983, [0x5e0a] = 0x4985, [0x5e0b] = 0x4986, [0x5e0c] = 0x499f, [0x5e0d] = 0x499b, [0x5e0e] = 0x49b7, [0x5e0f] = 0x49b6, [0x5e10] = 0x9fba, [0x5e12] = 0x4ca3, [0x5e13] = 0x4c9f, [0x5e14] = 0x4ca0, + [0x5e11] = 0xe855, [0x5e15] = 0x4ca1, [0x5e16] = 0x4c77, [0x5e17] = 0x4ca2, [0x5e18] = 0x4d13, [0x5e19] = 0x4d14, [0x5e1a] = 0x4d15, [0x5e1b] = 0x4d16, [0x5e1c] = 0x4d17, [0x5e1d] = 0x4d18, [0x5e1e] = 0x4d19, [0x5e1f] = 0x4dae, [0x5e20] = 0x9fbb, @@ -8692,6 +8696,8 @@ static const uint16_t __fourbyte_to_ucs[0x99e2 - 6637 - 2110 - 14404 - 4295] = [0x283c] = 0x9faa, [0x283d] = 0x9fab, [0x283e] = 0x9fac, [0x283f] = 0x9fad, [0x2840] = 0x9fae, [0x2841] = 0x9faf, [0x2842] = 0x9fb0, [0x2843] = 0x9fb1, [0x2844] = 0x9fb2, [0x2845] = 0x9fb3, [0x284e] = 0xe76c, [0x284f] = 0xe7c8, + [0x2846] = 0xe81e, [0x2847] = 0xe826, [0x2848] = 0xe82b, [0x2849] = 0xe82c, + [0x284a] = 0xe832, [0x284b] = 0xe843, [0x284c] = 0xe854, [0x284d] = 0xe864, [0x2850] = 0xe7e7, [0x2851] = 0xe7e8, [0x2852] = 0xe7e9, [0x2853] = 0xe7ea, [0x2854] = 0xe7eb, [0x2855] = 0xe7ec, [0x2856] = 0xe7ed, [0x2857] = 0xe7ee, [0x2858] = 0xe7ef, [0x2859] = 0xe7f0, [0x285a] = 0xe7f1, [0x285b] = 0xe7f2, @@ -9020,6 +9026,9 @@ static const uint16_t __fourbyte_to_ucs[0x99e2 - 6637 - 2110 - 14404 - 4295] = [0x2d64] = 0xfe06, [0x2d65] = 0xfe07, [0x2d66] = 0xfe08, [0x2d67] = 0xfe09, [0x2d68] = 0xfe0a, [0x2d69] = 0xfe0b, [0x2d6a] = 0xfe0c, [0x2d6b] = 0xfe0d, [0x2d6c] = 0xfe0e, [0x2d6d] = 0xfe0f, [0x2d78] = 0xfe1a, [0x2d79] = 0xfe1b, + [0x2d6e] = 0xe78d, [0x2d6f] = 0xe78f, [0x2d70] = 0xe78e, [0x2d71] = 0xe790, + [0x2d72] = 0xe791, [0x2d73] = 0xe792, [0x2d74] = 0xe793, [0x2d75] = 0xe794, + [0x2d76] = 0xe795, [0x2d77] = 0xe796, [0x2d7a] = 0xfe1c, [0x2d7b] = 0xfe1d, [0x2d7c] = 0xfe1e, [0x2d7d] = 0xfe1f, [0x2d7e] = 0xfe20, [0x2d7f] = 0xfe21, [0x2d80] = 0xfe22, [0x2d81] = 0xfe23, [0x2d82] = 0xfe24, [0x2d83] = 0xfe25, [0x2d84] = 0xfe26, [0x2d85] = 0xfe27, @@ -23449,6 +23458,10 @@ static const unsigned char __ucs_to_gb18030_tab2[][2] = [0x0786] = "\xa6\xba", [0x0787] = "\xa6\xbb", [0x0788] = "\xa6\xbc", [0x0789] = "\xa6\xbd", [0x078a] = "\xa6\xbe", [0x078b] = "\xa6\xbf", [0x078c] = "\xa6\xc0", [0x0797] = "\xa6\xf6", [0x0798] = "\xa6\xf7", + [0x078d] = "\x7b\x84", [0x078e] = "\x7b\x86", [0x078f] = "\x7b\x85", + [0x0790] = "\x7b\x87", [0x0791] = "\x7b\x88", [0x0792] = "\x7b\x89", + [0x0793] = "\x7b\x8a", [0x0794] = "\x7b\x8b", [0x0795] = "\x7b\x8c", + [0x0796] = "\x7b\x8d", [0x0799] = "\xa6\xf8", [0x079a] = "\xa6\xf9", [0x079b] = "\xa6\xfa", [0x079c] = "\xa6\xfb", [0x079d] = "\xa6\xfc", [0x079e] = "\xa6\xfd", [0x079f] = "\xa6\xfe", [0x07a0] = "\xa7\xc2", [0x07a1] = "\xa7\xc3", @@ -23491,28 +23504,37 @@ static const unsigned char __ucs_to_gb18030_tab2[][2] = [0x080e] = "\xa9\xfd", [0x080f] = "\xa9\xfe", [0x0810] = "\xd7\xfa", [0x0811] = "\xd7\xfb", [0x0812] = "\xd7\xfc", [0x0813] = "\xd7\xfd", [0x0814] = "\xd7\xfe", [0x0815] = "\x65\xac", [0x0819] = "\x65\xad", + [0x0816] = "\xfe\x51", [0x0817] = "\xfe\x52", [0x0818] = "\xfe\x53", [0x081a] = "\x65\xae", [0x081b] = "\x65\xaf", [0x081c] = "\x65\xb0", [0x081d] = "\x65\xb1", [0x081f] = "\x65\xb2", [0x0820] = "\x65\xb3", + [0x081e] = "\x2d\x51", [0x0821] = "\x65\xb4", [0x0822] = "\x65\xb5", [0x0823] = "\x65\xb6", [0x0824] = "\x65\xb7", [0x0825] = "\x65\xb8", [0x0827] = "\x65\xb9", + [0x0826] = "\x2d\x52", [0x0828] = "\x65\xba", [0x0829] = "\x65\xbb", [0x082a] = "\x65\xbc", + [0x082b] = "\x2d\x53", [0x082c] = "\x2d\x54", [0x082d] = "\x65\xbd", [0x082e] = "\x65\xbe", [0x082f] = "\x65\xbf", [0x0830] = "\x65\xc0", [0x0833] = "\x65\xc1", [0x0834] = "\x65\xc2", + [0x0831] = "\xfe\x6c", [0x0832] = "\x2d\x55", [0x0835] = "\x65\xc3", [0x0836] = "\x65\xc4", [0x0837] = "\x65\xc5", [0x0838] = "\x65\xc6", [0x0839] = "\x65\xc7", [0x083a] = "\x65\xc8", + [0x083b] = "\xfe\x76", [0x083c] = "\x65\xc9", [0x083d] = "\x65\xca", [0x083e] = "\x65\xcb", [0x083f] = "\x65\xcc", [0x0840] = "\x65\xcd", [0x0841] = "\x65\xce", [0x0842] = "\x65\xcf", [0x0844] = "\x65\xd0", [0x0845] = "\x65\xd1", + [0x0843] = "\x2d\x56", [0x0846] = "\x65\xd2", [0x0847] = "\x65\xd3", [0x0848] = "\x65\xd4", [0x0849] = "\x65\xd5", [0x084a] = "\x65\xd6", [0x084b] = "\x65\xd7", [0x084c] = "\x65\xd8", [0x084d] = "\x65\xd9", [0x084e] = "\x65\xda", [0x084f] = "\x65\xdb", [0x0850] = "\x65\xdc", [0x0851] = "\x65\xdd", [0x0852] = "\x65\xde", [0x0853] = "\x65\xdf", [0x0856] = "\x65\xe0", + [0x0854] = "\x2d\x57", [0x0855] = "\xfe\x91", [0x0857] = "\x65\xe1", [0x0858] = "\x65\xe2", [0x0859] = "\x65\xe3", [0x085a] = "\x65\xe4", [0x085b] = "\x65\xe5", [0x085c] = "\x65\xe6", [0x085d] = "\x65\xe7", [0x085e] = "\x65\xe8", [0x085f] = "\x65\xe9", [0x0860] = "\x65\xea", [0x0861] = "\x65\xeb", [0x0862] = "\x65\xec", [0x0863] = "\x65\xed", [0x0865] = "\xfd\x9c", [0x0866] = "\x76\xb5", + [0x0864] = "\x2d\x58", [0x0867] = "\x76\xb6", [0x0868] = "\x76\xb7", [0x0869] = "\x76\xb8", [0x086a] = "\x76\xb9", [0x086b] = "\x76\xba", [0x086c] = "\x76\xbb", [0x086d] = "\x76\xbc", [0x086e] = "\x76\xbd", [0x086f] = "\x76\xbe", @@ -24331,17 +24353,35 @@ static const unsigned char __ucs_to_gb18030_tab2[][2] = len = 4; \ } \ else if (ch == 0x20087) \ - cp = (const unsigned char *) "\xfe\x51"; \ + { \ + idx = 0x3E2CF; \ + len = 4; \ + } \ else if (ch == 0x20089) \ - cp = (const unsigned char *) "\xfe\x52"; \ + { \ + idx = 0x3E2D1; \ + len = 4; \ + } \ else if (ch == 0x200CC) \ - cp = (const unsigned char *) "\xfe\x53"; \ + { \ + idx = 0x3E314; \ + len = 4; \ + } \ else if (ch == 0x215d7) \ - cp = (const unsigned char *) "\xfe\x6c"; \ + { \ + idx = 0x3F81F; \ + len = 4; \ + } \ else if (ch == 0x2298F) \ - cp = (const unsigned char *) "\xfe\x76"; \ + { \ + idx = 0x40BD7; \ + len = 4; \ + } \ else if (ch == 0x241FE) \ - cp = (const unsigned char *) "\xfe\x91"; \ + { \ + idx = 0x42446; \ + len = 4; \ + } \ else if (ch >= 0x10000 && ch <= 0x10FFFF) \ { \ idx = ch + 0x1E248; \ diff --git a/localedata/charmaps/GB18030 b/localedata/charmaps/GB18030 index ad6728c5..228e63f4 100644 --- a/localedata/charmaps/GB18030 +++ b/localedata/charmaps/GB18030 @@ -57249,17 +57249,16 @@ CHARMAP % The recommendation to use the non-PUA code points, where available, % is based on "CJKV Information Processing" 2nd Ed. by Dr. Ken Lunde. % -% These 10 PUA mappings use equivalents from to . -% /xa6/xd9 -% /xa6/xda -% /xa6/xdb -% /xa6/xdc -% /xa6/xdd -% /xa6/xde -% /xa6/xdf -% /xa6/xec -% /xa6/xed -% /xa6/xf3 + /x84/x31/x82/x36 + /x84/x31/x82/x38 + /x84/x31/x82/x37 + /x84/x31/x82/x39 + /x84/x31/x83/x30 + /x84/x31/x83/x31 + /x84/x31/x83/x32 + /x84/x31/x83/x33 + /x84/x31/x83/x34 + /x84/x31/x83/x35 /xa6/xf6 /xa6/xf7 /xa6/xf8 @@ -57387,17 +57386,15 @@ CHARMAP /xd7/xfd /xd7/xfe /x83/x36/xc9/x34 -% These 3 PUA mappings use equivalents , and . -% /xfe/x51 -% /xfe/x52 -% /xfe/x53 + /xfe/x51 + /xfe/x52 + /xfe/x53 /x83/x36/xc9/x35 /x83/x36/xc9/x36 /x83/x36/xc9/x37 /x83/x36/xc9/x38 /x83/x36/xc9/x39 -% This 1 PUA mapping uses the equivalent . -% /xfe/x59 + /x82/x35/x90/x37 /x83/x36/xca/x30 /x83/x36/xca/x31 /x83/x36/xca/x32 @@ -57405,22 +57402,19 @@ CHARMAP /x83/x36/xca/x34 /x83/x36/xca/x35 /x83/x36/xca/x36 -% This 1 PUA mapping uses the equivalent . -% /xfe/x61 + /x82/x35/x90/x38 /x83/x36/xca/x37 /x83/x36/xca/x38 /x83/x36/xca/x39 /x83/x36/xcb/x30 -% These 2 PUA mappings use the equivalents and . -% /xfe/x66 -% /xfe/x67 + /x82/x35/x90/x39 + /x82/x35/x91/x30 /x83/x36/xcb/x31 /x83/x36/xcb/x32 /x83/x36/xcb/x33 /x83/x36/xcb/x34 -% These 2 PUA mappings use the equivalents and . -% /xfe/x6c -% /xfe/x6d + /xfe/x6c + /x82/x35/x91/x31 /x83/x36/xcb/x35 /x83/x36/xcb/x36 /x83/x36/xcb/x37 @@ -57429,8 +57423,7 @@ CHARMAP /x83/x36/xcc/x30 /x83/x36/xcc/x31 /x83/x36/xcc/x32 -% This 1 PUA mapping uses the equivalent . -% /xfe/x76 + /xfe/x76 /x83/x36/xcc/x33 /x83/x36/xcc/x34 /x83/x36/xcc/x35 @@ -57438,8 +57431,7 @@ CHARMAP /x83/x36/xcc/x37 /x83/x36/xcc/x38 /x83/x36/xcc/x39 -% This 1 PUA mapping uses the equivalent . -% /xfe/x7e + /x82/x35/x91/x32 /x83/x36/xcd/x30 /x83/x36/xcd/x31 /x83/x36/xcd/x32 @@ -57456,9 +57448,8 @@ CHARMAP /x83/x36/xce/x33 /x83/x36/xce/x34 /x83/x36/xce/x35 -% These 2 PUA mappings use the equivalents and . -% /xfe/x90 -% /xfe/x91 + /x82/x35/x91/x33 + /xfe/x91 /x83/x36/xce/x36 /x83/x36/xce/x37 /x83/x36/xce/x38 @@ -57473,8 +57464,7 @@ CHARMAP /x83/x36/xcf/x37 /x83/x36/xcf/x38 /x83/x36/xcf/x39 -% This 1 PUA mapping uses the equivalent . -% /xfe/xa0 + /x82/x35/x91/x34 /x83/x36/xd0/x30 /x83/x36/xd0/x31 /x83/x36/xd0/x32 @@ -70448,9 +70438,9 @@ CHARMAP .. /x95/x32/x8e/x30 .. /x95/x32/x8f/x30 /x95/x32/x90/x30 - /xfe/x51 + /x95/x32/x90/x31 /x95/x32/x90/x32 - /xfe/x52 + /x95/x32/x90/x33 .. /x95/x32/x90/x34 .. /x95/x32/x91/x30 .. /x95/x32/x92/x30 @@ -70458,7 +70448,7 @@ CHARMAP .. /x95/x32/x94/x30 .. /x95/x32/x95/x30 .. /x95/x32/x96/x30 - /xfe/x53 + /x95/x32/x97/x30 .. /x95/x32/x97/x31 .. /x95/x32/x98/x30 .. /x95/x32/x99/x30 @@ -70998,7 +70988,7 @@ CHARMAP .. /x95/x36/xb7/x30 .. /x95/x36/xb8/x30 .. /x95/x36/xb9/x30 - /xfe/x6c + /x95/x36/xb9/x37 .. /x95/x36/xb9/x38 .. /x95/x36/xba/x30 .. /x95/x36/xbb/x30 @@ -71505,7 +71495,7 @@ CHARMAP .. /x96/x30/xb8/x30 .. /x96/x30/xb9/x30 .. /x96/x30/xba/x30 - /xfe/x76 + /x96/x30/xba/x35 .. /x96/x30/xba/x36 .. /x96/x30/xbb/x30 .. /x96/x30/xbc/x30 @@ -72132,7 +72122,7 @@ CHARMAP .. /x96/x35/xb3/x30 .. /x96/x35/xb4/x30 .. /x96/x35/xb5/x30 - /xfe/x91 + /x96/x35/xb6/x30 .. /x96/x35/xb6/x31 .. /x96/x35/xb7/x30 .. /x96/x35/xb8/x30 -- 2.33.0