252 lines
8.0 KiB
Diff
252 lines
8.0 KiB
Diff
|
|
From c580e6466d6da8262820cdbad19f32c5546226cf Mon Sep 17 00:00:00 2001
|
||
|
|
From: Carlos O'Donell <carlos@redhat.com>
|
||
|
|
Date: Fri, 27 Mar 2020 17:03:36 -0400
|
||
|
|
Subject: [PATCH] Reset converter state after second wchar_t output (Bug 25734)
|
||
|
|
|
||
|
|
An input BIG5-HKSCS character may be converted into at most 2 wchar_t
|
||
|
|
characters. After outputting the second whcar_t character (which was
|
||
|
|
saved in the converter state) we must reset the state. If we fail
|
||
|
|
to reset the state we will be stuck continually copying that
|
||
|
|
character to the output even if we have further input to consider.
|
||
|
|
|
||
|
|
We add a new test case that covers the 4 BIG5-HKSCS characters
|
||
|
|
that may become 2 wchar_t characters.
|
||
|
|
|
||
|
|
Reviewed-by: Tom Honermann <tom@honermann.net>
|
||
|
|
---
|
||
|
|
iconvdata/Makefile | 17 ++-
|
||
|
|
iconvdata/big5hkscs.c | 3 +
|
||
|
|
iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c | 160 ++++++++++++++++++++++
|
||
|
|
3 files changed, 176 insertions(+), 4 deletions(-)
|
||
|
|
create mode 100644 iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
|
||
|
|
|
||
|
|
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
|
||
|
|
index c83962f351b..4ec2741cdce 100644
|
||
|
|
--- a/iconvdata/Makefile
|
||
|
|
+++ b/iconvdata/Makefile
|
||
|
|
@@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules))
|
||
|
|
ifeq (yes,$(build-shared))
|
||
|
|
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
|
||
|
|
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
|
||
|
|
- bug-iconv10 bug-iconv11 bug-iconv12
|
||
|
|
+ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4
|
||
|
|
ifeq ($(have-thread-library),yes)
|
||
|
|
tests += bug-iconv3
|
||
|
|
endif
|
||
|
|
@@ -275,16 +275,21 @@ endif
|
||
|
|
endif
|
||
|
|
endif
|
||
|
|
|
||
|
|
-include ../Rules
|
||
|
|
-
|
||
|
|
ifeq ($(run-built-tests),yes)
|
||
|
|
-LOCALES := de_DE.UTF-8
|
||
|
|
+LOCALES := \
|
||
|
|
+ de_DE.UTF-8 \
|
||
|
|
+ zh_HK.BIG5-HKSCS \
|
||
|
|
+ $(NULL)
|
||
|
|
+
|
||
|
|
include ../gen-locales.mk
|
||
|
|
|
||
|
|
$(objpfx)bug-iconv6.out: $(gen-locales)
|
||
|
|
$(objpfx)tst-iconv7.out: $(gen-locales)
|
||
|
|
+$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(gen-locales)
|
||
|
|
endif
|
||
|
|
|
||
|
|
+include ../Rules
|
||
|
|
+
|
||
|
|
# Set libof-* for each routine.
|
||
|
|
cpp-srcs-left := $(modules) $(generated-modules) $(libJIS-routines) \
|
||
|
|
$(libKSC-routines) $(libGB-routines) $(libCNS-routines) \
|
||
|
|
@@ -340,3 +345,7 @@ tst-tables-clean:
|
||
|
|
|
||
|
|
$(objpfx)gconv-modules: gconv-modules
|
||
|
|
cat $(sysdeps-gconv-modules) $^ > $@
|
||
|
|
+
|
||
|
|
+# Test requires BIG5HKSCS.
|
||
|
|
+$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(objpfx)gconv-modules \
|
||
|
|
+ $(addprefix $(objpfx),$(modules.so))
|
||
|
|
diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c
|
||
|
|
index 01fcfeba76b..ef325119b18 100644
|
||
|
|
--- a/iconvdata/big5hkscs.c
|
||
|
|
+++ b/iconvdata/big5hkscs.c
|
||
|
|
@@ -17895,6 +17895,9 @@ static struct
|
||
|
|
else \
|
||
|
|
++inptr; \
|
||
|
|
} \
|
||
|
|
+ else \
|
||
|
|
+ /* Clear the queue and proceed to output the saved character. */ \
|
||
|
|
+ *statep = 0; \
|
||
|
|
\
|
||
|
|
put32 (outptr, ch); \
|
||
|
|
outptr += 4; \
|
||
|
|
diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
|
||
|
|
new file mode 100644
|
||
|
|
index 00000000000..8389adebf27
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
|
||
|
|
@@ -0,0 +1,160 @@
|
||
|
|
+/* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734).
|
||
|
|
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
|
|
+ This file is part of the GNU C Library.
|
||
|
|
+
|
||
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
|
+ License as published by the Free Software Foundation; either
|
||
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
|
+
|
||
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
|
+ Lesser General Public License for more details.
|
||
|
|
+
|
||
|
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
|
+ License along with the GNU C Library; if not, see
|
||
|
|
+ <https://www.gnu.org/licenses/>. */
|
||
|
|
+
|
||
|
|
+#include <stdio.h>
|
||
|
|
+#include <string.h>
|
||
|
|
+#include <locale.h>
|
||
|
|
+#include <wchar.h>
|
||
|
|
+#include <support/check.h>
|
||
|
|
+#include <support/support.h>
|
||
|
|
+
|
||
|
|
+/* A few BIG5-HKSCS characters map in two unicode code points.
|
||
|
|
+ They are:
|
||
|
|
+ /x88/x62 => <U00CA><U0304>
|
||
|
|
+ /x88/x64 => <U00CA><U030C>
|
||
|
|
+ /x88/xa3 => <U00EA><U0304>
|
||
|
|
+ /x88/xa5 => <U00EA><U030C>
|
||
|
|
+ Each of these is special cased in iconvdata/big5hkscs.c.
|
||
|
|
+ This test ensures that we correctly reset the shift state after
|
||
|
|
+ outputting any of these characters. We do this by converting
|
||
|
|
+ each them followed by converting an ASCII character. If we fail
|
||
|
|
+ to reset the shift state (bug 25734) then we'll see the last
|
||
|
|
+ character in the queue output again. */
|
||
|
|
+
|
||
|
|
+/* Each test has name, input bytes, and expected wide character
|
||
|
|
+ output. */
|
||
|
|
+struct testdata {
|
||
|
|
+ const char *name;
|
||
|
|
+ const char input[3];
|
||
|
|
+ wchar_t expected[3];
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+/* In BIG5-HKSCS (2008) there are 4 characters that generate multiple
|
||
|
|
+ wide characters. */
|
||
|
|
+struct testdata tests[4] = {
|
||
|
|
+ /* <H-8862>X => <U+00CA><U+0304>X */
|
||
|
|
+ { "<H-8862>", "\x88\x62\x58", { 0x00CA, 0x0304, 0x0058 } },
|
||
|
|
+ /* <H-8864>X => <U+00CA><U+030C>X */
|
||
|
|
+ { "<H-8864>", "\x88\x64\x58", { 0x00CA, 0x030C, 0x0058 } },
|
||
|
|
+ /* <H-88A3>X => <U+00EA><U+0304>X */
|
||
|
|
+ { "<H-88A3>", "\x88\xa3\x58", { 0x00EA, 0x0304, 0x0058 } },
|
||
|
|
+ /* <H-88A5>X => <U+00EA><U+030C>X */
|
||
|
|
+ { "<H-88A5>", "\x88\xa5\x58", { 0x00EA, 0x030C, 0x0058 } }
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+/* Each test is of the form:
|
||
|
|
+ - Translate first code sequence (two bytes)
|
||
|
|
+ - Translate second (zero bytes)
|
||
|
|
+ - Translate the third (one byte). */
|
||
|
|
+static int
|
||
|
|
+check_conversion (struct testdata test)
|
||
|
|
+{
|
||
|
|
+ int err = 0;
|
||
|
|
+ wchar_t wc;
|
||
|
|
+ mbstate_t st;
|
||
|
|
+ size_t ret;
|
||
|
|
+ const char *mbs = test.input;
|
||
|
|
+ int consumed = 0;
|
||
|
|
+ /* Input is always 3 bytes long. */
|
||
|
|
+ int inlen = 3;
|
||
|
|
+
|
||
|
|
+ memset (&st, 0, sizeof (st));
|
||
|
|
+ /* First conversion: Consumes first 2 bytes. */
|
||
|
|
+ ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
|
||
|
|
+ if (ret != 2)
|
||
|
|
+ {
|
||
|
|
+ printf ("error: First conversion consumed only %zd bytes.\n", ret);
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* Advance the two consumed bytes. */
|
||
|
|
+ mbs += ret;
|
||
|
|
+ consumed += ret;
|
||
|
|
+ if (wc != test.expected[0])
|
||
|
|
+ {
|
||
|
|
+ printf ("error: Result of first conversion was wrong.\n");
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* Second conversion: Consumes 0 bytes. */
|
||
|
|
+ ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
|
||
|
|
+ if (ret != 0)
|
||
|
|
+ {
|
||
|
|
+ printf ("error: Second conversion consumed only %zd bytes.\n", ret);
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* Advance the zero consumed bytes. */
|
||
|
|
+ mbs += ret;
|
||
|
|
+ consumed += ret;
|
||
|
|
+ if (wc != test.expected[1])
|
||
|
|
+ {
|
||
|
|
+ printf ("error: Result of second conversion was wrong.\n");
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* After the second conversion the state of the converter should be
|
||
|
|
+ in the initial state. It is in the initial state because the two
|
||
|
|
+ input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have
|
||
|
|
+ been output. */
|
||
|
|
+ if (mbsinit (&st) == 0)
|
||
|
|
+ {
|
||
|
|
+ printf ("error: Converter not in initial state.\n");
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* Third conversion: Consumes 1 byte (it's an ASCII character). */
|
||
|
|
+ ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
|
||
|
|
+ if (ret != 1)
|
||
|
|
+ {
|
||
|
|
+ printf ("error: Third conversion consumed only %zd bytes.\n", ret);
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* Advance the one byte. */
|
||
|
|
+ mbs += ret;
|
||
|
|
+ consumed += ret;
|
||
|
|
+ if (wc != test.expected[2])
|
||
|
|
+ {
|
||
|
|
+ printf ("error: Result of third conversion was wrong.\n");
|
||
|
|
+ err++;
|
||
|
|
+ }
|
||
|
|
+ /* Return 0 if we saw no errors. */
|
||
|
|
+ return err;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static int
|
||
|
|
+do_test (void)
|
||
|
|
+{
|
||
|
|
+ int err = 0;
|
||
|
|
+ int ret;
|
||
|
|
+ /* Testing BIG5-HKSCS. */
|
||
|
|
+ setlocale (LC_ALL, "zh_HK.BIG5-HKSCS");
|
||
|
|
+
|
||
|
|
+ /* Run all the special conversions. */
|
||
|
|
+ for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++)
|
||
|
|
+ {
|
||
|
|
+ printf ("Running test for %s\n", tests[i].name);
|
||
|
|
+ ret = check_conversion (tests[i]);
|
||
|
|
+ if (ret > 0)
|
||
|
|
+ printf ("Test %s failed.\n", tests[i].name);
|
||
|
|
+ err += ret;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Fail if any conversion had an error. */
|
||
|
|
+ if (err > 0)
|
||
|
|
+ FAIL_EXIT1 ("One or more conversions failed.");
|
||
|
|
+
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+#include <support/test-driver.c>
|
||
|
|
--
|
||
|
|
2.19.1
|
||
|
|
|