glib2/gdatetime-Store-intermediate-result-of-g_date_time_f.patch

From bc59892b1af2a8f4374a1ad2054ff9444151732b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= <tomasz.miasko@gmail.com>
Date: Fri, 30 Nov 2018 00:00:00 +0000
Subject: [PATCH 320/682] gdatetime: Store intermediate result of
 g_date_time_format in UTF-8

In date time formatting routine, instead of converting from UTF-8 to
locale charset and then from locale charset to UTF-8, store all
intermediate result in UTF-8.

This solves the issue where user provided UTF-8 format string might be
unrepresentable in the current locale charset.

Fixes issue #1605.
---
 glib/gdatetime.c       | 320 +++++++++++++++++++++--------------------
 glib/tests/gdatetime.c |  18 +++
 2 files changed, 179 insertions(+), 159 deletions(-)

diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 32fd501aa..6afe14438 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2010 Thiago Santos <thiago.sousa.santos@collabora.co.uk>
  * Copyright (C) 2010 Emmanuele Bassi <ebassi@linux.intel.com>
  * Copyright © 2010 Codethink Limited
+ * Copyright © 2018 Tomasz Miąsko
  *
  * This library is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
@@ -2771,6 +2772,51 @@ format_z (GString *outstr,
   return TRUE;
 }

+#ifdef HAVE_LANGINFO_OUTDIGIT
+/** Initializes the array with UTF-8 encoded alternate digits suibtable for use
+ * in current locale. Returns NULL when current locale does not use alternate
+ * digits or there was an error converting them to UTF-8.
+ */
+static const gchar * const *
+initialize_alt_digits (void)
+{
+  guint i;
+  gsize digit_len;
+  gchar *digit;
+  const gchar *locale_digit;
+#define N_DIGITS 10
+#define MAX_UTF8_ENCODING_LEN 4
+  static gchar buffer[N_DIGITS * (MAX_UTF8_ENCODING_LEN + 1 /* null separator */)];
+#undef N_DIGITS
+#undef MAX_UTF8_ENCODING_LEN
+  gchar *buffer_end = buffer;
+  static const gchar *alt_digits[10];
+
+  for (i = 0; i != 10; ++i)
+    {
+      locale_digit = nl_langinfo (_NL_CTYPE_OUTDIGIT0_MB + i);
+
+      if (g_strcmp0 (locale_digit, "") == 0)
+        return NULL;
+
+      digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
+      if (digit == NULL)
+        return NULL;
+
+      g_assert (digit_len < buffer + sizeof (buffer) - buffer_end);
+
+      alt_digits[i] = buffer_end;
+      buffer_end = g_stpcpy (buffer_end, digit);
+      /* skip trailing null byte */
+      buffer_end += 1;
+
+      g_free (digit);
+    }
+
+  return alt_digits;
+}
+#endif /* HAVE_LANGINFO_OUTDIGIT */
+
 static void
 format_number (GString     *str,
                gboolean     use_alt_digits,
@@ -2781,7 +2827,7 @@ format_number (GString     *str,
   const gchar *ascii_digits[10] = {
     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
   };
-  const gchar **digits = ascii_digits;
+  const gchar * const *digits = ascii_digits;
   const gchar *tmp[10];
   gint i = 0;

@@ -2790,17 +2836,16 @@ format_number (GString     *str,
 #ifdef HAVE_LANGINFO_OUTDIGIT
   if (use_alt_digits)
     {
-      static const gchar *alt_digits[10];
+      static const gchar * const *alt_digits = NULL;
       static gsize initialised;
-      /* 2^32 has 10 digits */

       if G_UNLIKELY (g_once_init_enter (&initialised))
         {
-#define DO_DIGIT(n) \
-        alt_digits[n] = nl_langinfo (_NL_CTYPE_OUTDIGIT## n ##_MB)
-          DO_DIGIT(0); DO_DIGIT(1); DO_DIGIT(2); DO_DIGIT(3); DO_DIGIT(4);
-          DO_DIGIT(5); DO_DIGIT(6); DO_DIGIT(7); DO_DIGIT(8); DO_DIGIT(9);
-#undef DO_DIGIT
+          alt_digits = initialize_alt_digits ();
+
+          if (alt_digits == NULL)
+            alt_digits = ascii_digits;
+
           g_once_init_leave (&initialised, TRUE);
         }

@@ -2833,7 +2878,6 @@ format_ampm (GDateTime *datetime,
 {
   const gchar *ampm;
   gchar       *tmp = NULL, *ampm_dup;
-  gsize        len;

   ampm = GET_AMPM (datetime);

@@ -2844,104 +2888,107 @@ format_ampm (GDateTime *datetime,
     {
       /* This assumes that locale encoding can't have embedded NULs */
       ampm = tmp = g_locale_to_utf8 (ampm, -1, NULL, NULL, NULL);
-      if (!tmp)
+      if (tmp == NULL)
         return FALSE;
     }
   if (uppercase)
     ampm_dup = g_utf8_strup (ampm, -1);
   else
     ampm_dup = g_utf8_strdown (ampm, -1);
-  len = strlen (ampm_dup);
-  if (!locale_is_utf8 && GET_AMPM_IS_LOCALE)
-    {
-      g_free (tmp);
-      tmp = g_locale_from_utf8 (ampm_dup, -1, NULL, &len, NULL);
-      g_free (ampm_dup);
-      if (!tmp)
-        return FALSE;
-      ampm_dup = tmp;
-    }
-  g_string_append_len (outstr, ampm_dup, len);
+  g_free (tmp);
+
+  g_string_append (outstr, ampm_dup);
   g_free (ampm_dup);

   return TRUE;
 }

-static gboolean g_date_time_format_locale (GDateTime   *datetime,
-					   const gchar *format,
-					   GString     *outstr,
-					   gboolean     locale_is_utf8);
+static gboolean g_date_time_format_utf8 (GDateTime   *datetime,
+					 const gchar *format,
+					 GString     *outstr,
+					 gboolean     locale_is_utf8);

 /* g_date_time_format() subroutine that takes a locale-encoded format
- * string and produces a locale-encoded date/time string.
+ * string and produces a UTF-8 encoded date/time string.
  */
 static gboolean
-g_date_time_locale_format_locale (GDateTime   *datetime,
-				  const gchar *format,
-				  GString     *outstr,
-				  gboolean     locale_is_utf8)
+g_date_time_format_locale (GDateTime   *datetime,
+			   const gchar *locale_format,
+			   GString     *outstr,
+			   gboolean     locale_is_utf8)
 {
   gchar *utf8_format;
   gboolean success;

   if (locale_is_utf8)
-    return g_date_time_format_locale (datetime, format, outstr,
-				      locale_is_utf8);
+    return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);

-  utf8_format = g_locale_to_utf8 (format, -1, NULL, NULL, NULL);
-  if (!utf8_format)
+  utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
+  if (utf8_format == NULL)
     return FALSE;

-  success = g_date_time_format_locale (datetime, utf8_format, outstr,
-				       locale_is_utf8);
+  success = g_date_time_format_utf8 (datetime, utf8_format, outstr,
+                                     locale_is_utf8);
   g_free (utf8_format);
   return success;
 }

-/* g_date_time_format() subroutine that takes a UTF-8 format
- * string and produces a locale-encoded date/time string.
+static inline gboolean
+string_append (GString     *string,
+               const gchar *s,
+               gboolean     s_is_utf8)
+{
+  gchar *utf8;
+  gsize  utf8_len;
+
+  if (s_is_utf8)
+    {
+      g_string_append (string, s);
+    }
+  else
+    {
+      utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
+      if (utf8 == NULL)
+        return FALSE;
+      g_string_append_len (string, utf8, utf8_len);
+      g_free (utf8);
+    }
+
+  return TRUE;
+}
+
+/* g_date_time_format() subroutine that takes a UTF-8 encoded format
+ * string and produces a UTF-8 encoded date/time string.
  */
 static gboolean
-g_date_time_format_locale (GDateTime   *datetime,
-			   const gchar *format,
-			   GString     *outstr,
-			   gboolean     locale_is_utf8)
+g_date_time_format_utf8 (GDateTime   *datetime,
+			 const gchar *utf8_format,
+			 GString     *outstr,
+			 gboolean     locale_is_utf8)
 {
   guint     len;
   guint     colons;
-  gchar    *tmp;
-  gsize     tmp_len;
   gunichar  c;
   gboolean  alt_digits = FALSE;
   gboolean  pad_set = FALSE;
+  gboolean  name_is_utf8;
   const gchar *pad = "";
   const gchar *name;
   const gchar *tz;

-  while (*format)
+  while (*utf8_format)
     {
-      len = strcspn (format, "%");
+      len = strcspn (utf8_format, "%");
       if (len)
-	{
-	  if (locale_is_utf8)
-	    g_string_append_len (outstr, format, len);
-	  else
-	    {
-	      tmp = g_locale_from_utf8 (format, len, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	      g_string_append_len (outstr, tmp, tmp_len);
-	      g_free (tmp);
-	    }
-	}
+        g_string_append_len (outstr, utf8_format, len);

-      format += len;
-      if (!*format)
+      utf8_format += len;
+      if (!*utf8_format)
 	break;

-      g_assert (*format == '%');
-      format++;
-      if (!*format)
+      g_assert (*utf8_format == '%');
+      utf8_format++;
+      if (!*utf8_format)
 	break;

       colons = 0;
@@ -2949,91 +2996,67 @@ g_date_time_format_locale (GDateTime   *datetime,
       pad_set = FALSE;

     next_mod:
-      c = g_utf8_get_char (format);
-      format = g_utf8_next_char (format);
+      c = g_utf8_get_char (utf8_format);
+      utf8_format = g_utf8_next_char (utf8_format);
       switch (c)
 	{
 	case 'a':
 	  name = WEEKDAY_ABBR (datetime);
           if (g_strcmp0 (name, "") == 0)
             return FALSE;
-	  if (!locale_is_utf8 && !WEEKDAY_ABBR_IS_LOCALE)
-	    {
-	      tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	      g_string_append_len (outstr, tmp, tmp_len);
-	      g_free (tmp);
-	    }
-	  else
-	    {
-	      g_string_append (outstr, name);
-	    }
+
+          name_is_utf8 = locale_is_utf8 || !WEEKDAY_ABBR_IS_LOCALE;
+
+          if (!string_append (outstr, name, name_is_utf8))
+            return FALSE;
+
 	  break;
 	case 'A':
 	  name = WEEKDAY_FULL (datetime);
           if (g_strcmp0 (name, "") == 0)
             return FALSE;
-	  if (!locale_is_utf8 && !WEEKDAY_FULL_IS_LOCALE)
-	    {
-	      tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	      g_string_append_len (outstr, tmp, tmp_len);
-	      g_free (tmp);
-	    }
-	  else
-	    {
-	      g_string_append (outstr, name);
-	    }
+
+          name_is_utf8 = locale_is_utf8 || !WEEKDAY_FULL_IS_LOCALE;
+
+          if (!string_append (outstr, name, name_is_utf8))
+            return FALSE;
+
 	  break;
 	case 'b':
 	  name = alt_digits ? MONTH_ABBR_STANDALONE (datetime)
 			    : MONTH_ABBR_WITH_DAY (datetime);
           if (g_strcmp0 (name, "") == 0)
             return FALSE;
-	  if (!locale_is_utf8 &&
-	      ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
-	       (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE)))
-	    {
-	      tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	      g_string_append_len (outstr, tmp, tmp_len);
-	      g_free (tmp);
-	    }
-	  else
-	    {
-	      g_string_append (outstr, name);
-	    }
+
+          name_is_utf8 = locale_is_utf8 ||
+            ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
+             (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
+
+          if (!string_append (outstr, name, name_is_utf8))
+            return FALSE;
+
 	  break;
 	case 'B':
 	  name = alt_digits ? MONTH_FULL_STANDALONE (datetime)
 			    : MONTH_FULL_WITH_DAY (datetime);
           if (g_strcmp0 (name, "") == 0)
             return FALSE;
-	  if (!locale_is_utf8 &&
-	      ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
-	       (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE)))
-	    {
-	      tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	      g_string_append_len (outstr, tmp, tmp_len);
-	      g_free (tmp);
-	    }
-	  else
-	    {
-	      g_string_append (outstr, name);
-	    }
+
+          name_is_utf8 = locale_is_utf8 ||
+            ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
+             (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE));
+
+          if (!string_append (outstr, name, name_is_utf8))
+              return FALSE;
+
 	  break;
 	case 'c':
 	  {
             if (g_strcmp0 (PREFERRED_DATE_TIME_FMT, "") == 0)
               return FALSE;
-	    if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
-						   outstr, locale_is_utf8))
-	      return FALSE;
+            if (!g_date_time_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
+                                            outstr, locale_is_utf8))
+              return FALSE;
 	  }
 	  break;
 	case 'C':
@@ -3067,20 +3090,14 @@ g_date_time_format_locale (GDateTime   *datetime,
 			    : MONTH_ABBR_WITH_DAY (datetime);
           if (g_strcmp0 (name, "") == 0)
             return FALSE;
-	  if (!locale_is_utf8 &&
-	      ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
-	       (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE)))
-	    {
-	      tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	      g_string_append_len (outstr, tmp, tmp_len);
-	      g_free (tmp);
-	    }
-	  else
-	    {
-	      g_string_append (outstr, name);
-	    }
+
+          name_is_utf8 = locale_is_utf8 ||
+            ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
+             (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
+
+          if (!string_append (outstr, name, name_is_utf8))
+            return FALSE;
+
 	  break;
 	case 'H':
 	  format_number (outstr, alt_digits, pad_set ? pad : "0", 2,
@@ -3128,8 +3145,8 @@ g_date_time_format_locale (GDateTime   *datetime,
 	  {
             if (g_strcmp0 (PREFERRED_12HR_TIME_FMT, "") == 0)
               return FALSE;
-	    if (!g_date_time_locale_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
-						   outstr, locale_is_utf8))
+	    if (!g_date_time_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
+					    outstr, locale_is_utf8))
 	      return FALSE;
 	  }
 	  break;
@@ -3170,8 +3187,8 @@ g_date_time_format_locale (GDateTime   *datetime,
 	  {
             if (g_strcmp0 (PREFERRED_DATE_FMT, "") == 0)
               return FALSE;
-	    if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_FMT,
-						   outstr, locale_is_utf8))
+	    if (!g_date_time_format_locale (datetime, PREFERRED_DATE_FMT,
+					    outstr, locale_is_utf8))
 	      return FALSE;
 	  }
 	  break;
@@ -3179,8 +3196,8 @@ g_date_time_format_locale (GDateTime   *datetime,
 	  {
             if (g_strcmp0 (PREFERRED_TIME_FMT, "") == 0)
               return FALSE;
-	    if (!g_date_time_locale_format_locale (datetime, PREFERRED_TIME_FMT,
-						   outstr, locale_is_utf8))
+	    if (!g_date_time_format_locale (datetime, PREFERRED_TIME_FMT,
+					    outstr, locale_is_utf8))
 	      return FALSE;
 	  }
 	  break;
@@ -3202,16 +3219,7 @@ g_date_time_format_locale (GDateTime   *datetime,
 	  break;
 	case 'Z':
 	  tz = g_date_time_get_timezone_abbreviation (datetime);
-	  tmp = NULL;
-	  tmp_len = strlen (tz);
-	  if (!locale_is_utf8)
-	    {
-	      tz = tmp = g_locale_from_utf8 (tz, -1, NULL, &tmp_len, NULL);
-	      if (!tmp)
-		return FALSE;
-	    }
-	  g_string_append_len (outstr, tz, tmp_len);
-	  g_free (tmp);
+          g_string_append (outstr, tz);
 	  break;
 	case '%':
 	  g_string_append_c (outstr, '%');
@@ -3230,7 +3238,7 @@ g_date_time_format_locale (GDateTime   *datetime,
 	  goto next_mod;
 	case ':':
 	  /* Colons are only allowed before 'z' */
-	  if (*format && *format != 'z' && *format != ':')
+	  if (*utf8_format && *utf8_format != 'z' && *utf8_format != ':')
 	    return FALSE;
 	  colons++;
 	  goto next_mod;
@@ -3355,7 +3363,6 @@ g_date_time_format (GDateTime   *datetime,
                     const gchar *format)
 {
   GString  *outstr;
-  gchar *utf8;
   gboolean locale_is_utf8 = g_get_charset (NULL);

   g_return_val_if_fail (datetime != NULL, NULL);
@@ -3364,18 +3371,13 @@ g_date_time_format (GDateTime   *datetime,

   outstr = g_string_sized_new (strlen (format) * 2);

-  if (!g_date_time_format_locale (datetime, format, outstr, locale_is_utf8))
+  if (!g_date_time_format_utf8 (datetime, format, outstr, locale_is_utf8))
     {
       g_string_free (outstr, TRUE);
       return NULL;
     }

-  if (locale_is_utf8)
-    return g_string_free (outstr, FALSE);
-
-  utf8 = g_locale_to_utf8 (outstr->str, outstr->len, NULL, NULL, NULL);
-  g_string_free (outstr, TRUE);
-  return utf8;
+  return g_string_free (outstr, FALSE);
 }


diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index 09f84cb21..1a46cf55a 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -1525,6 +1525,23 @@ test_non_utf8_printf (void)
   g_free (oldlocale);
 }

+/* Checks that it is possible to use format string that
+ * is unrepresentable in current locale charset. */
+static void
+test_format_unrepresentable (void)
+{
+  gchar *oldlocale = g_strdup (setlocale (LC_ALL, NULL));
+  setlocale (LC_ALL, "POSIX");
+
+  TEST_PRINTF ("ąśćł", "ąśćł");
+
+  /* We are using Unicode ratio symbol here, which is outside ASCII. */
+  TEST_PRINTF_TIME (23, 15, 0, "%H∶%M", "23∶15");
+
+  setlocale (LC_ALL, oldlocale);
+  g_free (oldlocale);
+}
+
 static void
 test_modifiers (void)
 {
@@ -2493,6 +2510,7 @@ main (gint   argc,
   g_test_add_func ("/GDateTime/now", test_GDateTime_now);
   g_test_add_func ("/GDateTime/printf", test_GDateTime_printf);
   g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
+  g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
   g_test_add_func ("/GDateTime/strftime", test_strftime);
   g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
   g_test_add_func ("/GDateTime/modifiers", test_modifiers);
--
2.19.1