glib2/gdatetime-Store-intermediate-result-of-g_date_time_f.patch
2019-09-30 10:40:42 -04:00

569 lines
17 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From bc59892b1af2a8f4374a1ad2054ff9444151732b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= <tomasz.miasko@gmail.com>
Date: Fri, 30 Nov 2018 00:00:00 +0000
Subject: [PATCH 320/682] gdatetime: Store intermediate result of
g_date_time_format in UTF-8
In date time formatting routine, instead of converting from UTF-8 to
locale charset and then from locale charset to UTF-8, store all
intermediate result in UTF-8.
This solves the issue where user provided UTF-8 format string might be
unrepresentable in the current locale charset.
Fixes issue #1605.
---
glib/gdatetime.c | 320 +++++++++++++++++++++--------------------
glib/tests/gdatetime.c | 18 +++
2 files changed, 179 insertions(+), 159 deletions(-)
diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 32fd501aa..6afe14438 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -4,6 +4,7 @@
* Copyright (C) 2010 Thiago Santos <thiago.sousa.santos@collabora.co.uk>
* Copyright (C) 2010 Emmanuele Bassi <ebassi@linux.intel.com>
* Copyright © 2010 Codethink Limited
+ * Copyright © 2018 Tomasz Miąsko
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
@@ -2771,6 +2772,51 @@ format_z (GString *outstr,
return TRUE;
}
+#ifdef HAVE_LANGINFO_OUTDIGIT
+/** Initializes the array with UTF-8 encoded alternate digits suibtable for use
+ * in current locale. Returns NULL when current locale does not use alternate
+ * digits or there was an error converting them to UTF-8.
+ */
+static const gchar * const *
+initialize_alt_digits (void)
+{
+ guint i;
+ gsize digit_len;
+ gchar *digit;
+ const gchar *locale_digit;
+#define N_DIGITS 10
+#define MAX_UTF8_ENCODING_LEN 4
+ static gchar buffer[N_DIGITS * (MAX_UTF8_ENCODING_LEN + 1 /* null separator */)];
+#undef N_DIGITS
+#undef MAX_UTF8_ENCODING_LEN
+ gchar *buffer_end = buffer;
+ static const gchar *alt_digits[10];
+
+ for (i = 0; i != 10; ++i)
+ {
+ locale_digit = nl_langinfo (_NL_CTYPE_OUTDIGIT0_MB + i);
+
+ if (g_strcmp0 (locale_digit, "") == 0)
+ return NULL;
+
+ digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
+ if (digit == NULL)
+ return NULL;
+
+ g_assert (digit_len < buffer + sizeof (buffer) - buffer_end);
+
+ alt_digits[i] = buffer_end;
+ buffer_end = g_stpcpy (buffer_end, digit);
+ /* skip trailing null byte */
+ buffer_end += 1;
+
+ g_free (digit);
+ }
+
+ return alt_digits;
+}
+#endif /* HAVE_LANGINFO_OUTDIGIT */
+
static void
format_number (GString *str,
gboolean use_alt_digits,
@@ -2781,7 +2827,7 @@ format_number (GString *str,
const gchar *ascii_digits[10] = {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
};
- const gchar **digits = ascii_digits;
+ const gchar * const *digits = ascii_digits;
const gchar *tmp[10];
gint i = 0;
@@ -2790,17 +2836,16 @@ format_number (GString *str,
#ifdef HAVE_LANGINFO_OUTDIGIT
if (use_alt_digits)
{
- static const gchar *alt_digits[10];
+ static const gchar * const *alt_digits = NULL;
static gsize initialised;
- /* 2^32 has 10 digits */
if G_UNLIKELY (g_once_init_enter (&initialised))
{
-#define DO_DIGIT(n) \
- alt_digits[n] = nl_langinfo (_NL_CTYPE_OUTDIGIT## n ##_MB)
- DO_DIGIT(0); DO_DIGIT(1); DO_DIGIT(2); DO_DIGIT(3); DO_DIGIT(4);
- DO_DIGIT(5); DO_DIGIT(6); DO_DIGIT(7); DO_DIGIT(8); DO_DIGIT(9);
-#undef DO_DIGIT
+ alt_digits = initialize_alt_digits ();
+
+ if (alt_digits == NULL)
+ alt_digits = ascii_digits;
+
g_once_init_leave (&initialised, TRUE);
}
@@ -2833,7 +2878,6 @@ format_ampm (GDateTime *datetime,
{
const gchar *ampm;
gchar *tmp = NULL, *ampm_dup;
- gsize len;
ampm = GET_AMPM (datetime);
@@ -2844,104 +2888,107 @@ format_ampm (GDateTime *datetime,
{
/* This assumes that locale encoding can't have embedded NULs */
ampm = tmp = g_locale_to_utf8 (ampm, -1, NULL, NULL, NULL);
- if (!tmp)
+ if (tmp == NULL)
return FALSE;
}
if (uppercase)
ampm_dup = g_utf8_strup (ampm, -1);
else
ampm_dup = g_utf8_strdown (ampm, -1);
- len = strlen (ampm_dup);
- if (!locale_is_utf8 && GET_AMPM_IS_LOCALE)
- {
- g_free (tmp);
- tmp = g_locale_from_utf8 (ampm_dup, -1, NULL, &len, NULL);
- g_free (ampm_dup);
- if (!tmp)
- return FALSE;
- ampm_dup = tmp;
- }
- g_string_append_len (outstr, ampm_dup, len);
+ g_free (tmp);
+
+ g_string_append (outstr, ampm_dup);
g_free (ampm_dup);
return TRUE;
}
-static gboolean g_date_time_format_locale (GDateTime *datetime,
- const gchar *format,
- GString *outstr,
- gboolean locale_is_utf8);
+static gboolean g_date_time_format_utf8 (GDateTime *datetime,
+ const gchar *format,
+ GString *outstr,
+ gboolean locale_is_utf8);
/* g_date_time_format() subroutine that takes a locale-encoded format
- * string and produces a locale-encoded date/time string.
+ * string and produces a UTF-8 encoded date/time string.
*/
static gboolean
-g_date_time_locale_format_locale (GDateTime *datetime,
- const gchar *format,
- GString *outstr,
- gboolean locale_is_utf8)
+g_date_time_format_locale (GDateTime *datetime,
+ const gchar *locale_format,
+ GString *outstr,
+ gboolean locale_is_utf8)
{
gchar *utf8_format;
gboolean success;
if (locale_is_utf8)
- return g_date_time_format_locale (datetime, format, outstr,
- locale_is_utf8);
+ return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
- utf8_format = g_locale_to_utf8 (format, -1, NULL, NULL, NULL);
- if (!utf8_format)
+ utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
+ if (utf8_format == NULL)
return FALSE;
- success = g_date_time_format_locale (datetime, utf8_format, outstr,
- locale_is_utf8);
+ success = g_date_time_format_utf8 (datetime, utf8_format, outstr,
+ locale_is_utf8);
g_free (utf8_format);
return success;
}
-/* g_date_time_format() subroutine that takes a UTF-8 format
- * string and produces a locale-encoded date/time string.
+static inline gboolean
+string_append (GString *string,
+ const gchar *s,
+ gboolean s_is_utf8)
+{
+ gchar *utf8;
+ gsize utf8_len;
+
+ if (s_is_utf8)
+ {
+ g_string_append (string, s);
+ }
+ else
+ {
+ utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
+ if (utf8 == NULL)
+ return FALSE;
+ g_string_append_len (string, utf8, utf8_len);
+ g_free (utf8);
+ }
+
+ return TRUE;
+}
+
+/* g_date_time_format() subroutine that takes a UTF-8 encoded format
+ * string and produces a UTF-8 encoded date/time string.
*/
static gboolean
-g_date_time_format_locale (GDateTime *datetime,
- const gchar *format,
- GString *outstr,
- gboolean locale_is_utf8)
+g_date_time_format_utf8 (GDateTime *datetime,
+ const gchar *utf8_format,
+ GString *outstr,
+ gboolean locale_is_utf8)
{
guint len;
guint colons;
- gchar *tmp;
- gsize tmp_len;
gunichar c;
gboolean alt_digits = FALSE;
gboolean pad_set = FALSE;
+ gboolean name_is_utf8;
const gchar *pad = "";
const gchar *name;
const gchar *tz;
- while (*format)
+ while (*utf8_format)
{
- len = strcspn (format, "%");
+ len = strcspn (utf8_format, "%");
if (len)
- {
- if (locale_is_utf8)
- g_string_append_len (outstr, format, len);
- else
- {
- tmp = g_locale_from_utf8 (format, len, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- }
+ g_string_append_len (outstr, utf8_format, len);
- format += len;
- if (!*format)
+ utf8_format += len;
+ if (!*utf8_format)
break;
- g_assert (*format == '%');
- format++;
- if (!*format)
+ g_assert (*utf8_format == '%');
+ utf8_format++;
+ if (!*utf8_format)
break;
colons = 0;
@@ -2949,91 +2996,67 @@ g_date_time_format_locale (GDateTime *datetime,
pad_set = FALSE;
next_mod:
- c = g_utf8_get_char (format);
- format = g_utf8_next_char (format);
+ c = g_utf8_get_char (utf8_format);
+ utf8_format = g_utf8_next_char (utf8_format);
switch (c)
{
case 'a':
name = WEEKDAY_ABBR (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 && !WEEKDAY_ABBR_IS_LOCALE)
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 || !WEEKDAY_ABBR_IS_LOCALE;
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'A':
name = WEEKDAY_FULL (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 && !WEEKDAY_FULL_IS_LOCALE)
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 || !WEEKDAY_FULL_IS_LOCALE;
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'b':
name = alt_digits ? MONTH_ABBR_STANDALONE (datetime)
: MONTH_ABBR_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 &&
- ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
- (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE)))
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 ||
+ ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
+ (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'B':
name = alt_digits ? MONTH_FULL_STANDALONE (datetime)
: MONTH_FULL_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 &&
- ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
- (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE)))
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 ||
+ ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
+ (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE));
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'c':
{
if (g_strcmp0 (PREFERRED_DATE_TIME_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
- outstr, locale_is_utf8))
- return FALSE;
+ if (!g_date_time_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
+ outstr, locale_is_utf8))
+ return FALSE;
}
break;
case 'C':
@@ -3067,20 +3090,14 @@ g_date_time_format_locale (GDateTime *datetime,
: MONTH_ABBR_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 &&
- ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
- (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE)))
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 ||
+ ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
+ (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'H':
format_number (outstr, alt_digits, pad_set ? pad : "0", 2,
@@ -3128,8 +3145,8 @@ g_date_time_format_locale (GDateTime *datetime,
{
if (g_strcmp0 (PREFERRED_12HR_TIME_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
- outstr, locale_is_utf8))
+ if (!g_date_time_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
+ outstr, locale_is_utf8))
return FALSE;
}
break;
@@ -3170,8 +3187,8 @@ g_date_time_format_locale (GDateTime *datetime,
{
if (g_strcmp0 (PREFERRED_DATE_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_FMT,
- outstr, locale_is_utf8))
+ if (!g_date_time_format_locale (datetime, PREFERRED_DATE_FMT,
+ outstr, locale_is_utf8))
return FALSE;
}
break;
@@ -3179,8 +3196,8 @@ g_date_time_format_locale (GDateTime *datetime,
{
if (g_strcmp0 (PREFERRED_TIME_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_TIME_FMT,
- outstr, locale_is_utf8))
+ if (!g_date_time_format_locale (datetime, PREFERRED_TIME_FMT,
+ outstr, locale_is_utf8))
return FALSE;
}
break;
@@ -3202,16 +3219,7 @@ g_date_time_format_locale (GDateTime *datetime,
break;
case 'Z':
tz = g_date_time_get_timezone_abbreviation (datetime);
- tmp = NULL;
- tmp_len = strlen (tz);
- if (!locale_is_utf8)
- {
- tz = tmp = g_locale_from_utf8 (tz, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- }
- g_string_append_len (outstr, tz, tmp_len);
- g_free (tmp);
+ g_string_append (outstr, tz);
break;
case '%':
g_string_append_c (outstr, '%');
@@ -3230,7 +3238,7 @@ g_date_time_format_locale (GDateTime *datetime,
goto next_mod;
case ':':
/* Colons are only allowed before 'z' */
- if (*format && *format != 'z' && *format != ':')
+ if (*utf8_format && *utf8_format != 'z' && *utf8_format != ':')
return FALSE;
colons++;
goto next_mod;
@@ -3355,7 +3363,6 @@ g_date_time_format (GDateTime *datetime,
const gchar *format)
{
GString *outstr;
- gchar *utf8;
gboolean locale_is_utf8 = g_get_charset (NULL);
g_return_val_if_fail (datetime != NULL, NULL);
@@ -3364,18 +3371,13 @@ g_date_time_format (GDateTime *datetime,
outstr = g_string_sized_new (strlen (format) * 2);
- if (!g_date_time_format_locale (datetime, format, outstr, locale_is_utf8))
+ if (!g_date_time_format_utf8 (datetime, format, outstr, locale_is_utf8))
{
g_string_free (outstr, TRUE);
return NULL;
}
- if (locale_is_utf8)
- return g_string_free (outstr, FALSE);
-
- utf8 = g_locale_to_utf8 (outstr->str, outstr->len, NULL, NULL, NULL);
- g_string_free (outstr, TRUE);
- return utf8;
+ return g_string_free (outstr, FALSE);
}
diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index 09f84cb21..1a46cf55a 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -1525,6 +1525,23 @@ test_non_utf8_printf (void)
g_free (oldlocale);
}
+/* Checks that it is possible to use format string that
+ * is unrepresentable in current locale charset. */
+static void
+test_format_unrepresentable (void)
+{
+ gchar *oldlocale = g_strdup (setlocale (LC_ALL, NULL));
+ setlocale (LC_ALL, "POSIX");
+
+ TEST_PRINTF ("ąśćł", "ąśćł");
+
+ /* We are using Unicode ratio symbol here, which is outside ASCII. */
+ TEST_PRINTF_TIME (23, 15, 0, "%H%M", "2315");
+
+ setlocale (LC_ALL, oldlocale);
+ g_free (oldlocale);
+}
+
static void
test_modifiers (void)
{
@@ -2493,6 +2510,7 @@ main (gint argc,
g_test_add_func ("/GDateTime/now", test_GDateTime_now);
g_test_add_func ("/GDateTime/printf", test_GDateTime_printf);
g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
+ g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
g_test_add_func ("/GDateTime/strftime", test_strftime);
g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
g_test_add_func ("/GDateTime/modifiers", test_modifiers);
--
2.19.1