[glib: 3/4] Add private functions to correctly convert datetime when LC_TIME is not UTF8
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 3/4] Add private functions to correctly convert datetime when LC_TIME is not UTF8
- Date: Wed, 31 Mar 2021 16:16:17 +0000 (UTC)
commit 782eb1f7af8a1e934b786102b49abf731c0ed606
Author: Frederic Martinsons <frederic martinsons sigfox com>
Date: Tue Dec 1 12:47:27 2020 +0100
Add private functions to correctly convert datetime when LC_TIME is not UTF8
Functions (_g_get_time_charset and _g_get_ctype_charset) to get LC_TIME and LC_CTYPE charset
by using nl_langinfo with _NL_TIME_CODESET and CODESET).
Another functions (_g_locale_time_to_utf8 and _g_locale_ctype_to_utf8) which uses thel and format
the input string accordingly.
Add new test cases with mixing UTF8 and non UTF8 LC_TIME along with UTF8
and non UTF8 LC_MESSAGES.
Closed #2055
Signed-off-by: Frederic Martinsons <frederic martinsons sigfox com>
glib/gcharset.c | 87 +++++++++++++++++++++++++++++++++++
glib/gcharsetprivate.h | 4 ++
glib/gconvert.c | 47 +++++++++++++++++++
glib/gconvertprivate.h | 40 ++++++++++++++++
glib/gdatetime.c | 15 +++---
glib/tests/gdatetime.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++
meson.build | 9 ++++
7 files changed, 318 insertions(+), 6 deletions(-)
---
diff --git a/glib/gcharset.c b/glib/gcharset.c
index bb775bda4..9f91a9b48 100644
--- a/glib/gcharset.c
+++ b/glib/gcharset.c
@@ -36,6 +36,12 @@
#include <string.h>
#include <stdio.h>
+
+#if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET)
+#include <langinfo.h>
+#endif
+
+#include <locale.h>
#ifdef G_OS_WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
@@ -215,6 +221,87 @@ g_get_charset (const char **charset)
return cache->is_utf8;
}
+/*
+ * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
+ * LC_TIME) to correctly check for charset about time conversion relatives.
+ *
+ * Returns: %TRUE if the returned charset is UTF-8
+ */
+gboolean
+_g_get_time_charset (const char **charset)
+{
+ static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
+ GCharsetCache *cache = g_private_get (&cache_private);
+ const gchar *raw;
+
+ if (!cache)
+ cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
+
+#ifdef HAVE_LANGINFO_TIME_CODESET
+ raw = nl_langinfo (_NL_TIME_CODESET);
+#else
+ G_LOCK (aliases);
+ raw = _g_locale_charset_raw ();
+ G_UNLOCK (aliases);
+#endif
+
+ if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
+ {
+ const gchar *new_charset;
+
+ g_free (cache->raw);
+ g_free (cache->charset);
+ cache->raw = g_strdup (raw);
+ cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
+ cache->charset = g_strdup (new_charset);
+ }
+
+ if (charset)
+ *charset = cache->charset;
+
+ return cache->is_utf8;
+}
+/*
+ * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
+ * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives.
+ *
+ * Returns: %TRUE if the returned charset is UTF-8
+ */
+gboolean
+_g_get_ctype_charset (const char **charset)
+{
+ static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
+ GCharsetCache *cache = g_private_get (&cache_private);
+ const gchar *raw;
+
+ if (!cache)
+ cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
+
+#ifdef HAVE_LANGINFO_CODESET
+ raw = nl_langinfo (CODESET);
+#else
+ G_LOCK (aliases);
+ raw = _g_locale_charset_raw ();
+ G_UNLOCK (aliases);
+#endif
+
+ if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
+ {
+ const gchar *new_charset;
+
+ g_free (cache->raw);
+ g_free (cache->charset);
+ cache->raw = g_strdup (raw);
+ cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
+ cache->charset = g_strdup (new_charset);
+ }
+
+ if (charset)
+ *charset = cache->charset;
+
+ return cache->is_utf8;
+}
+
/**
* g_get_codeset:
*
diff --git a/glib/gcharsetprivate.h b/glib/gcharsetprivate.h
index f6b68dcd7..9b1def278 100644
--- a/glib/gcharsetprivate.h
+++ b/glib/gcharsetprivate.h
@@ -25,6 +25,10 @@ G_BEGIN_DECLS
const char ** _g_charset_get_aliases (const char *canonical_name);
+gboolean _g_get_time_charset (const char **charset);
+
+gboolean _g_get_ctype_charset (const char **charset);
+
G_END_DECLS
#endif
diff --git a/glib/gconvert.c b/glib/gconvert.c
index f78cff01d..7697ff65d 100644
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -40,6 +40,7 @@
#endif
#include "gconvert.h"
+#include "gconvertprivate.h"
#include "gcharsetprivate.h"
#include "gslist.h"
@@ -1015,6 +1016,52 @@ g_locale_to_utf8 (const gchar *opsysstring,
bytes_read, bytes_written, error);
}
+/*
+ * Do the exact same as g_locale_to_utf8 except that the charset would
+ * be retrieved from _g_get_time_charset (which uses LC_TIME)
+ *
+ * Returns: The converted string, or %NULL on an error.
+ */
+gchar *
+_g_time_locale_to_utf8 (const gchar *opsysstring,
+ gssize len,
+ gsize *bytes_read,
+ gsize *bytes_written,
+ GError **error)
+{
+ const char *charset;
+
+ if (_g_get_time_charset (&charset))
+ return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
+ else
+ return convert_checked (opsysstring, len, "UTF-8", charset,
+ CONVERT_CHECK_NO_NULS_IN_OUTPUT,
+ bytes_read, bytes_written, error);
+}
+
+/*
+ * Do the exact same as g_locale_to_utf8 except that the charset would
+ * be retrieved from _g_get_ctype_charset (which uses LC_CTYPE)
+ *
+ * Returns: The converted string, or %NULL on an error.
+ */
+gchar *
+_g_ctype_locale_to_utf8 (const gchar *opsysstring,
+ gssize len,
+ gsize *bytes_read,
+ gsize *bytes_written,
+ GError **error)
+{
+ const char *charset;
+
+ if (_g_get_ctype_charset (&charset))
+ return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
+ else
+ return convert_checked (opsysstring, len, "UTF-8", charset,
+ CONVERT_CHECK_NO_NULS_IN_OUTPUT,
+ bytes_read, bytes_written, error);
+}
+
/**
* g_locale_from_utf8:
* @utf8string: a UTF-8 encoded string
diff --git a/glib/gconvertprivate.h b/glib/gconvertprivate.h
new file mode 100644
index 000000000..5bdc87ff6
--- /dev/null
+++ b/glib/gconvertprivate.h
@@ -0,0 +1,40 @@
+/* gconvertprivate.h - Private GLib gconvert functions
+ *
+ * Copyright 2020 Frederic Martinsons
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __G_CONVERTPRIVATE_H__
+#define __G_CONVERTPRIVATE_H__
+
+G_BEGIN_DECLS
+
+#include "glib.h"
+
+gchar *_g_time_locale_to_utf8 (const gchar *opsysstring,
+ gssize len,
+ gsize *bytes_read,
+ gsize *bytes_written,
+ GError **error) G_GNUC_MALLOC;
+
+gchar *_g_ctype_locale_to_utf8 (const gchar *opsysstring,
+ gssize len,
+ gsize *bytes_read,
+ gsize *bytes_written,
+ GError **error) G_GNUC_MALLOC;
+
+G_END_DECLS
+
+#endif /* __G_CONVERTPRIVATE_H__ */
diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 219dfb7de..a31afe713 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -62,7 +62,9 @@
#include "gatomic.h"
#include "gcharset.h"
+#include "gcharsetprivate.h"
#include "gconvert.h"
+#include "gconvertprivate.h"
#include "gdatetime.h"
#include "gfileutils.h"
#include "ghash.h"
@@ -2869,7 +2871,7 @@ initialize_alt_digits (void)
if (g_strcmp0 (locale_digit, "") == 0)
return NULL;
- digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
+ digit = _g_ctype_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
if (digit == NULL)
return NULL;
@@ -2993,7 +2995,7 @@ g_date_time_format_locale (GDateTime *datetime,
if (locale_is_utf8)
return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
- utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
+ utf8_format = _g_time_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
if (utf8_format == NULL)
return FALSE;
@@ -3017,7 +3019,7 @@ string_append (GString *string,
}
else
{
- utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
+ utf8 = _g_time_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
if (utf8 == NULL)
return FALSE;
g_string_append_len (string, utf8, utf8_len);
@@ -3443,10 +3445,11 @@ g_date_time_format (GDateTime *datetime,
{
GString *outstr;
const gchar *charset;
- /* Avoid conversions from locale charset to UTF-8 if charset is compatible
+ /* Avoid conversions from locale (for LC_TIME and not for LC_MESSAGES unless
+ * specified otherwise) charset to UTF-8 if charset is compatible
* with UTF-8 already. Check for UTF-8 and synonymous canonical names of
* ASCII. */
- gboolean locale_is_utf8_compatible = g_get_charset (&charset) ||
+ gboolean time_is_utf8_compatible = _g_get_time_charset (&charset) ||
g_strcmp0 ("ASCII", charset) == 0 ||
g_strcmp0 ("ANSI_X3.4-1968", charset) == 0;
@@ -3457,7 +3460,7 @@ g_date_time_format (GDateTime *datetime,
outstr = g_string_sized_new (strlen (format) * 2);
if (!g_date_time_format_utf8 (datetime, format, outstr,
- locale_is_utf8_compatible))
+ time_is_utf8_compatible))
{
g_string_free (outstr, TRUE);
return NULL;
diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index bc4eba93a..12f332b44 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -2318,6 +2318,116 @@ test_format_iso8601 (void)
g_time_zone_unref (tz);
}
+typedef struct
+{
+ gboolean utf8_messages;
+ gboolean utf8_time;
+} MixedUtf8TestData;
+
+static const MixedUtf8TestData utf8_time_non_utf8_messages = {
+ .utf8_messages = FALSE,
+ .utf8_time = TRUE
+};
+
+static const MixedUtf8TestData non_utf8_time_utf8_messages = {
+ .utf8_messages = TRUE,
+ .utf8_time = FALSE
+};
+
+static const MixedUtf8TestData utf8_time_utf8_messages = {
+ .utf8_messages = TRUE,
+ .utf8_time = TRUE
+};
+
+static const MixedUtf8TestData non_utf8_time_non_utf8_messages = {
+ .utf8_messages = FALSE,
+ .utf8_time = FALSE
+};
+
+static gboolean
+check_and_set_locale (int category,
+ const gchar *name)
+{
+ setlocale (category, name);
+ if (strstr (setlocale (category, NULL), name) == NULL)
+ {
+ g_print ("Unavaible '%s' locale\n", name);
+ g_test_skip ("required locale not available, skipping tests");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void
+test_format_time_mixed_utf8 (gconstpointer data)
+{
+ const MixedUtf8TestData *test_data;
+ gchar *old_time_locale;
+ gchar *old_messages_locale;
+ g_test_bug ("https://gitlab.gnome.org/GNOME/glib/-/issues/2055");
+
+ test_data = (MixedUtf8TestData *) data;
+ old_time_locale = g_strdup (setlocale (LC_TIME, NULL));
+ old_messages_locale = g_strdup (setlocale (LC_MESSAGES, NULL));
+ if (test_data->utf8_time)
+ {
+ if (!check_and_set_locale (LC_TIME, "C.UTF-8"))
+ {
+ g_free (old_time_locale);
+ setlocale (LC_MESSAGES, old_messages_locale);
+ g_free (old_messages_locale);
+ return;
+ }
+ }
+ else
+ {
+ if (!check_and_set_locale (LC_TIME, "de_DE.iso88591"))
+ {
+ g_free (old_time_locale);
+ setlocale (LC_MESSAGES, old_messages_locale);
+ g_free (old_messages_locale);
+ return;
+ }
+ }
+ if (test_data->utf8_messages)
+ {
+ if (!check_and_set_locale (LC_MESSAGES, "C.UTF-8"))
+ {
+ g_free (old_messages_locale);
+ setlocale (LC_TIME, old_time_locale);
+ g_free (old_time_locale);
+ return;
+ }
+ }
+ else
+ {
+ if (!check_and_set_locale (LC_MESSAGES, "de_DE.iso88591"))
+ {
+ g_free (old_messages_locale);
+ setlocale (LC_TIME, old_time_locale);
+ g_free (old_time_locale);
+ return;
+ }
+ }
+
+ if (!test_data->utf8_time)
+ {
+ /* March to have März in german */
+ TEST_PRINTF_DATE (2020, 3, 1, "%b", "Mär");
+ TEST_PRINTF_DATE (2020, 3, 1, "%B", "März");
+ }
+ else
+ {
+ TEST_PRINTF_DATE (2020, 3, 1, "%b", "mar");
+ TEST_PRINTF_DATE (2020, 3, 1, "%B", "march");
+ }
+
+ setlocale (LC_TIME, old_time_locale);
+ setlocale (LC_MESSAGES, old_messages_locale);
+ g_free (old_time_locale);
+ g_free (old_messages_locale);
+}
+
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-y2k"
static void
@@ -2980,6 +3090,18 @@ main (gint argc,
g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
g_test_add_func ("/GDateTime/format_iso8601", test_format_iso8601);
+ g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_non_utf8_messages",
+ &utf8_time_non_utf8_messages,
+ test_format_time_mixed_utf8);
+ g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_utf8_messages",
+ &utf8_time_utf8_messages,
+ test_format_time_mixed_utf8);
+ g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_non_utf8_messages",
+ &non_utf8_time_non_utf8_messages,
+ test_format_time_mixed_utf8);
+ g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_utf8_messages",
+ &non_utf8_time_utf8_messages,
+ test_format_time_mixed_utf8);
g_test_add_func ("/GDateTime/strftime", test_strftime);
g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
g_test_add_func ("/GDateTime/modifiers", test_modifiers);
diff --git a/meson.build b/meson.build
index 2cf1c7b4f..eb401123c 100644
--- a/meson.build
+++ b/meson.build
@@ -1185,6 +1185,15 @@ if cc.links('''#ifndef _GNU_SOURCE
glib_conf.set('HAVE_LANGINFO_ABALTMON', 1)
endif
+# Check for nl_langinfo and _NL_TIME_CODESET
+if cc.links('''#include <langinfo.h>
+ int main (int argc, char ** argv) {
+ char *codeset = nl_langinfo (_NL_TIME_CODESET);
+ return 0;
+ }''', name : 'nl_langinfo and _NL_TIME_CODESET')
+ glib_conf.set('HAVE_LANGINFO_TIME_CODESET', 1)
+endif
+
# Check if C compiler supports the 'signed' keyword
if not cc.compiles('''signed char x;''', name : 'signed')
glib_conf.set('signed', '/* NOOP */')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]