From de6d869a8ef5ca327231fb73489f4c9024d8757a Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 27 Apr 2019 14:33:29 +0200 Subject: [PATCH 15/26] Fix numbering in non-Latin scripts The `token` type wasn't wide enough to hold a Unicode code point. --- libxslt/numbers.c | 24 +++++++++-------- tests/docs/bug-219.xml | 22 +++++++++++++++ tests/general/bug-219.out | 68 +++++++++++++++++++++++++++++++++++++++++++++++ tests/general/bug-219.xsl | 17 ++++++++++++ 4 files changed, 120 insertions(+), 11 deletions(-) create mode 100644 tests/docs/bug-219.xml create mode 100644 tests/general/bug-219.out create mode 100644 tests/general/bug-219.xsl diff --git a/libxslt/numbers.c b/libxslt/numbers.c index 75c31eb..0a2a51c 100644 --- a/libxslt/numbers.c +++ b/libxslt/numbers.c @@ -36,7 +36,7 @@ #define SYMBOL_QUOTE ((xmlChar)'\'') -#define DEFAULT_TOKEN (xmlChar)'0' +#define DEFAULT_TOKEN '0' #define DEFAULT_SEPARATOR "." #define MAX_TOKENS 1024 @@ -45,7 +45,7 @@ typedef struct _xsltFormatToken xsltFormatToken; typedef xsltFormatToken *xsltFormatTokenPtr; struct _xsltFormatToken { xmlChar *separator; - xmlChar token; + int token; int width; }; @@ -107,20 +107,22 @@ xsltUTF8Charcmp(xmlChar *utf1, xmlChar *utf2) { (xsltUTF8Charcmp((letter), (self)->patternSeparator) == 0)) #define IS_DIGIT_ZERO(x) xsltIsDigitZero(x) -#define IS_DIGIT_ONE(x) xsltIsDigitZero((xmlChar)(x)-1) +#define IS_DIGIT_ONE(x) xsltIsDigitZero((x)-1) static int xsltIsDigitZero(unsigned int ch) { /* * Reference: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt + * + * There a many more digit ranges in newer Unicode versions. These + * are only the zeros that match Digit in XML 1.0 (IS_DIGIT macro). */ switch (ch) { case 0x0030: case 0x0660: case 0x06F0: case 0x0966: case 0x09E6: case 0x0A66: case 0x0AE6: case 0x0B66: case 0x0C66: case 0x0CE6: case 0x0D66: case 0x0E50: - case 0x0E60: case 0x0F20: case 0x1040: case 0x17E0: - case 0x1810: case 0xFF10: + case 0x0ED0: case 0x0F20: return TRUE; default: return FALSE; @@ -383,13 +385,13 @@ xsltNumberFormatTokenize(const xmlChar *format, ix += len; val = xmlStringCurrentChar(NULL, format+ix, &len); } else { - tokens->tokens[tokens->nTokens].token = (xmlChar)'0'; + tokens->tokens[tokens->nTokens].token = '0'; tokens->tokens[tokens->nTokens].width = 1; } - } else if ( (val == (xmlChar)'A') || - (val == (xmlChar)'a') || - (val == (xmlChar)'I') || - (val == (xmlChar)'i') ) { + } else if ( (val == 'A') || + (val == 'a') || + (val == 'I') || + (val == 'i') ) { tokens->tokens[tokens->nTokens].token = val; ix += len; val = xmlStringCurrentChar(NULL, format+ix, &len); @@ -400,7 +402,7 @@ xsltNumberFormatTokenize(const xmlChar *format, * not support a numbering sequence that starts with that * token, it must use a format token of 1." */ - tokens->tokens[tokens->nTokens].token = (xmlChar)'0'; + tokens->tokens[tokens->nTokens].token = '0'; tokens->tokens[tokens->nTokens].width = 1; } /* diff --git a/tests/docs/bug-219.xml b/tests/docs/bug-219.xml new file mode 100644 index 0000000..6549781 --- /dev/null +++ b/tests/docs/bug-219.xml @@ -0,0 +1,22 @@ + + + ٠١ + ۰۱ + ०१ + ০১ + ੦੧ + ૦૧ + ୦୧ + ౦౧ + ೦೧ + ൦൧ + ๐๑ + ໐໑ + ༠༡ + + + 0 + 9 + 1234567890 + + diff --git a/tests/general/bug-219.out b/tests/general/bug-219.out new file mode 100644 index 0000000..908043c --- /dev/null +++ b/tests/general/bug-219.out @@ -0,0 +1,68 @@ + + + + ٠٠ + ٠٩ + ١٢٣٤٥٦٧٨٩٠ + + + ۰۰ + ۰۹ + ۱۲۳۴۵۶۷۸۹۰ + + + ०० + ०९ + १२३४५६७८९० + + + ০০ + ০৯ + ১২৩৪৫৬৭৮৯০ + + + ੦੦ + ੦੯ + ੧੨੩੪੫੬੭੮੯੦ + + + ૦૦ + ૦૯ + ૧૨૩૪૫૬૭૮૯૦ + + + ୦୦ + ୦୯ + ୧୨୩୪୫୬୭୮୯୦ + + + ౦౦ + ౦౯ + ౧౨౩౪౫౬౭౮౯౦ + + + ೦೦ + ೦೯ + ೧೨೩೪೫೬೭೮೯೦ + + + ൦൦ + ൦൯ + ൧൨൩൪൫൬൭൮൯൦ + + + ๐๐ + ๐๙ + ๑๒๓๔๕๖๗๘๙๐ + + + ໐໐ + ໐໙ + ໑໒໓໔໕໖໗໘໙໐ + + + ༠༠ + ༠༩ + ༡༢༣༤༥༦༧༨༩༠ + + diff --git a/tests/general/bug-219.xsl b/tests/general/bug-219.xsl new file mode 100644 index 0000000..e291994 --- /dev/null +++ b/tests/general/bug-219.xsl @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + -- 1.8.3.1