[evolution] I#1428 - Can't import UTF-16 encoded .ics files



commit ec8e8564ca8676cfdc28529e28f71b4eaca3d7f9
Author: Milan Crha <mcrha redhat com>
Date:   Thu May 27 10:26:36 2021 +0200

    I#1428 - Can't import UTF-16 encoded .ics files
    
    Closes https://gitlab.gnome.org/GNOME/evolution/-/issues/1428

 .../importers/evolution-vcard-importer.c           | 181 ++-------------------
 src/calendar/importers/icalendar-importer.c        |  17 +-
 src/e-util/e-import.c                              |  97 +++++++++++
 src/e-util/e-import.h                              |   3 +
 4 files changed, 124 insertions(+), 174 deletions(-)
---
diff --git a/src/addressbook/importers/evolution-vcard-importer.c 
b/src/addressbook/importers/evolution-vcard-importer.c
index 2eb7185f81..1bf8d6d4c7 100644
--- a/src/addressbook/importers/evolution-vcard-importer.c
+++ b/src/addressbook/importers/evolution-vcard-importer.c
@@ -40,15 +40,6 @@
 
 #include "evolution-addressbook-importers.h"
 
-enum _VCardEncoding {
-       VCARD_ENCODING_NONE,
-       VCARD_ENCODING_UTF8,
-       VCARD_ENCODING_UTF16,
-       VCARD_ENCODING_LOCALE
-};
-
-typedef enum _VCardEncoding VCardEncoding;
-
 typedef struct {
        EImport *import;
        EImportTarget *target;
@@ -67,7 +58,6 @@ typedef struct {
 
        /* when opening book */
        gchar *contents;
-       VCardEncoding encoding;
 } VCardImporter;
 
 static void vcard_import_done (VCardImporter *gci);
@@ -250,108 +240,6 @@ vcard_import_contacts (gpointer data)
        }
 }
 
-#define BOM (gunichar2)0xFEFF
-#define ANTIBOM (gunichar2)0xFFFE
-
-static gboolean
-has_bom (const gunichar2 *utf16)
-{
-
-       if ((utf16 == NULL) || (*utf16 == '\0')) {
-               return FALSE;
-       }
-
-       return ((*utf16 == BOM) || (*utf16 == ANTIBOM));
-}
-
-static void
-fix_utf16_endianness (gunichar2 *utf16)
-{
-       gunichar2 *it;
-
-       if ((utf16 == NULL) || (*utf16 == '\0')) {
-               return;
-       }
-
-       if (*utf16 != ANTIBOM) {
-               return;
-       }
-
-       for (it = utf16; *it != '\0'; it++) {
-               *it = GUINT16_SWAP_LE_BE (*it);
-       }
-}
-
-/* Converts an UTF-16 string to an UTF-8 string removing the BOM character
- * WARNING: this may modify the utf16 argument if the function detects the
- * string isn't using the local endianness
- */
-static gchar *
-utf16_to_utf8 (gunichar2 *utf16)
-{
-
-       if (utf16 == NULL) {
-               return NULL;
-       }
-
-       fix_utf16_endianness (utf16);
-
-       if (*utf16 == BOM) {
-               utf16++;
-       }
-
-       return g_utf16_to_utf8 (utf16, -1, NULL, NULL, NULL);
-}
-
-/* Actually check the contents of this file */
-static VCardEncoding
-guess_vcard_encoding (const gchar *filename)
-{
-       FILE *handle;
-       gchar line[4096];
-       gchar *line_utf8;
-       VCardEncoding encoding = VCARD_ENCODING_NONE;
-
-       handle = g_fopen (filename, "r");
-       if (handle == NULL) {
-               return VCARD_ENCODING_NONE;
-       }
-
-       if (fgets (line, 4096, handle) == NULL) {
-               fclose (handle);
-               return VCARD_ENCODING_NONE;
-       }
-       fclose (handle);
-
-       if (has_bom ((gunichar2 *) line)) {
-               gunichar2 *utf16 = (gunichar2 *) line;
-               /* Check for a BOM to try to detect UTF-16 encoded vcards
-                * (MacOSX address book creates such vcards for example)
-                */
-               line_utf8 = utf16_to_utf8 (utf16);
-               if (line_utf8 == NULL) {
-                       return VCARD_ENCODING_NONE;
-               }
-               encoding = VCARD_ENCODING_UTF16;
-       } else if (g_utf8_validate (line, -1, NULL)) {
-               line_utf8 = g_strdup (line);
-               encoding = VCARD_ENCODING_UTF8;
-       } else {
-               line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL);
-               if (line_utf8 == NULL) {
-                       return VCARD_ENCODING_NONE;
-               }
-               encoding = VCARD_ENCODING_LOCALE;
-       }
-
-       if (g_ascii_strncasecmp (line_utf8, "BEGIN:VCARD", 11) != 0) {
-               encoding = VCARD_ENCODING_NONE;
-       }
-
-       g_free (line_utf8);
-       return encoding;
-}
-
 static void
 primary_selection_changed_cb (ESourceSelector *selector,
                               EImportTarget *target)
@@ -427,7 +315,7 @@ vcard_supported (EImport *ei,
                  EImportImporter *im)
 {
        EImportTargetURI *s;
-       gchar *filename;
+       gchar *filename, *contents;
        gboolean retval;
 
        if (target->type != E_IMPORT_TARGET_URI)
@@ -443,7 +331,9 @@ vcard_supported (EImport *ei,
        filename = g_filename_from_uri (s->uri_src, NULL, NULL);
        if (filename == NULL)
                return FALSE;
-       retval = (guess_vcard_encoding (filename) != VCARD_ENCODING_NONE);
+       contents = e_import_util_get_file_contents (filename, NULL);
+       retval = contents && g_ascii_strncasecmp (contents, "BEGIN:VCARD", 11) == 0;
+       g_free (contents);
        g_free (filename);
 
        return retval;
@@ -480,22 +370,6 @@ book_client_connect_cb (GObject *source_object,
        }
 
        gci->book_client = E_BOOK_CLIENT (client);
-
-       if (gci->encoding == VCARD_ENCODING_UTF16) {
-               gchar *tmp;
-
-               gunichar2 *contents_utf16 = (gunichar2 *) gci->contents;
-               tmp = utf16_to_utf8 (contents_utf16);
-               g_free (gci->contents);
-               gci->contents = tmp;
-
-       } else if (gci->encoding == VCARD_ENCODING_LOCALE) {
-               gchar *tmp;
-               tmp = g_locale_to_utf8 (gci->contents, -1, NULL, NULL, NULL);
-               g_free (gci->contents);
-               gci->contents = tmp;
-       }
-
        gci->contactlist = eab_contact_list_from_string (gci->contents);
        g_free (gci->contents);
        gci->contents = NULL;
@@ -518,7 +392,6 @@ vcard_import (EImport *ei,
        EImportTargetURI *s = (EImportTargetURI *) target;
        gchar *filename;
        gchar *contents;
-       VCardEncoding encoding;
        GError *error = NULL;
 
        filename = g_filename_from_uri (s->uri_src, NULL, &error);
@@ -528,17 +401,9 @@ vcard_import (EImport *ei,
 
                return;
        }
-       encoding = guess_vcard_encoding (filename);
-       if (encoding == VCARD_ENCODING_NONE) {
-               g_free (filename);
-               /* This check is superfluous, we've already
-                * checked otherwise we can't get here ... */
-               e_import_complete (ei, target, NULL);
 
-               return;
-       }
-
-       if (!g_file_get_contents (filename, &contents, NULL, &error)) {
+       contents = e_import_util_get_file_contents (filename, &error);
+       if (!contents) {
                g_free (filename);
                e_import_complete (ei, target, error);
                g_clear_error (&error);
@@ -551,7 +416,6 @@ vcard_import (EImport *ei,
        g_datalist_set_data (&target->data, "vcard-data", gci);
        gci->import = g_object_ref (ei);
        gci->target = target;
-       gci->encoding = encoding;
        gci->contents = contents;
 
        source = g_datalist_get_data (&target->data, "vcard-source");
@@ -578,44 +442,27 @@ vcard_get_preview (EImport *ei,
        GtkWidget *preview;
        GSList *contacts;
        gchar *contents;
-       VCardEncoding encoding;
        EImportTargetURI *s = (EImportTargetURI *) target;
        gchar *filename;
+       GError *error = NULL;
 
-       filename = g_filename_from_uri (s->uri_src, NULL, NULL);
+       filename = g_filename_from_uri (s->uri_src, NULL, &error);
        if (filename == NULL) {
-               g_message (G_STRLOC ": Couldn't get filename from URI '%s'", s->uri_src);
-               return NULL;
-       }
-
-       encoding = guess_vcard_encoding (filename);
-       if (encoding == VCARD_ENCODING_NONE) {
-               g_free (filename);
+               g_message (G_STRLOC ": Couldn't get filename from URI '%s': %s", s->uri_src, error ? 
error->message : "Unknown error");
+               g_clear_error (&error);
                return NULL;
        }
 
-       if (!g_file_get_contents (filename, &contents, NULL, NULL)) {
-               g_message (G_STRLOC ": Couldn't read file.");
+       contents = e_import_util_get_file_contents (filename, &error);
+       if (!contents) {
+               g_message (G_STRLOC ": Couldn't read file '%s': %s", filename, error ? error->message : 
"Unknown error");
+               g_clear_error (&error);
                g_free (filename);
                return NULL;
        }
 
        g_free (filename);
 
-       if (encoding == VCARD_ENCODING_UTF16) {
-               gchar *tmp;
-
-               gunichar2 *contents_utf16 = (gunichar2 *) contents;
-               tmp = utf16_to_utf8 (contents_utf16);
-               g_free (contents);
-               contents = tmp;
-       } else if (encoding == VCARD_ENCODING_LOCALE) {
-               gchar *tmp;
-               tmp = g_locale_to_utf8 (contents, -1, NULL, NULL, NULL);
-               g_free (contents);
-               contents = tmp;
-       }
-
        contacts = eab_contact_list_from_string (contents);
        g_free (contents);
 
diff --git a/src/calendar/importers/icalendar-importer.c b/src/calendar/importers/icalendar-importer.c
index 97e26c9301..f2ddcaa018 100644
--- a/src/calendar/importers/icalendar-importer.c
+++ b/src/calendar/importers/icalendar-importer.c
@@ -572,7 +572,8 @@ ical_supported (EImport *ei,
        if (!filename)
                return FALSE;
 
-       if (g_file_get_contents (filename, &contents, NULL, NULL)) {
+       contents = e_import_util_get_file_contents (filename, NULL);
+       if (contents) {
                ICalComponent *icomp;
 
                icomp = e_cal_util_parse_ics_string (contents);
@@ -606,7 +607,8 @@ ical_import (EImport *ei,
                return;
        }
 
-       if (!g_file_get_contents (filename, &contents, NULL, &error)) {
+       contents = e_import_util_get_file_contents (filename, &error);
+       if (!contents) {
                g_free (filename);
                e_import_complete (ei, target, error);
                g_clear_error (&error);
@@ -640,7 +642,8 @@ ivcal_get_preview (EImport *ei,
                return NULL;
        }
 
-       if (!g_file_get_contents (filename, &contents, NULL, NULL)) {
+       contents = e_import_util_get_file_contents (filename, NULL);
+       if (!contents) {
                g_free (filename);
                return NULL;
        }
@@ -707,9 +710,8 @@ vcal_supported (EImport *ei,
        if (!filename)
                return FALSE;
 
-       /* Z: Wow, this is *efficient* */
-
-       if (g_file_get_contents (filename, &contents, NULL, NULL)) {
+       contents = e_import_util_get_file_contents (filename, NULL);
+       if (contents) {
                VObject *vcal;
                ICalComponent *icomp;
 
@@ -770,7 +772,8 @@ load_vcalendar_file (const gchar *filename)
        defaults.alarm_audio_fmttype = (gchar *) "audio/x-wav";
        defaults.alarm_description = (gchar *) _("Reminder!");
 
-       if (g_file_get_contents (filename, &contents, NULL, NULL)) {
+       contents = e_import_util_get_file_contents (filename, NULL);
+       if (contents) {
                VObject *vcal;
 
                /* parse the file */
diff --git a/src/e-util/e-import.c b/src/e-util/e-import.c
index 62822d7954..0a1b65301a 100644
--- a/src/e-util/e-import.c
+++ b/src/e-util/e-import.c
@@ -467,6 +467,103 @@ e_import_target_new_home (EImport *import)
                import, E_IMPORT_TARGET_HOME, sizeof (EImportTargetHome));
 }
 
+/**
+ * e_import_util_get_file_contents:
+ * @filename: a local file name to read the contents from
+ * @error: (nullable): a return location for a #GError, or %NULL
+ *
+ * Reads the @filename content and returns it in a single-byte encoding.
+ *
+ * Returns: (transfer full) (nullable): the file content, or %NULL on error,
+ *    in which case the @error is set.
+ *
+ * Since: 3.42
+ **/
+gchar *
+e_import_util_get_file_contents (const gchar *filename,
+                                GError **error)
+{
+       gchar *raw_content = NULL;
+       gsize length = 0;
+       gunichar2 *utf16;
+       gboolean is_utf16, is_utf16_swapped;
+       gchar *res = NULL;
+
+       g_return_val_if_fail (filename != NULL, NULL);
+
+       if (!g_file_get_contents (filename, &raw_content, &length, error))
+               return NULL;
+
+       if (length < 2)
+               return raw_content;
+
+       utf16 = (gunichar2 *) raw_content;
+
+       /* check the UTF-16 BOM */
+       is_utf16 = *utf16 == ((gunichar2) 0xFEFF);
+       is_utf16_swapped = *utf16 == ((gunichar2) 0xFFFE);
+
+       if (length > 4 && !is_utf16 && !is_utf16_swapped) {
+               /* Only guess it can be UTF-16 without the leading BOM, which can fail
+                  when the first two characters are encoded into multiple bytes... */
+               is_utf16 = utf16[0] && !(utf16[0] & 0xFF00) && utf16[1] && !(utf16[1] & 0xFF00);
+               is_utf16_swapped = utf16[0] && !(utf16[0] & 0xFF) && utf16[1] && !(utf16[1] & 0xFF);
+       }
+
+       if (is_utf16 || is_utf16_swapped) {
+               glong len = length / 2;
+
+               /* Swap the bytes, to match the local endianness */
+               if (is_utf16_swapped) {
+                       gunichar2 *pos_str;
+                       gsize npos;
+
+                       for (npos = 0, pos_str = utf16; npos < len; npos++, pos_str++) {
+                               *pos_str = GUINT16_SWAP_LE_BE (*pos_str);
+                       }
+               }
+
+               if (*utf16 == ((gunichar2) 0xFEFF)) {
+                       utf16++;
+                       len--;
+               }
+
+               res = g_utf16_to_utf8 (utf16, len, NULL, NULL, NULL);
+
+               if (res) {
+                       g_free (raw_content);
+                       return res;
+               }
+
+               /* Return back any changes */
+               if (len != length / 2) {
+                       utf16--;
+                       len++;
+               }
+
+               if (is_utf16_swapped) {
+                       gunichar2 *pos_str;
+                       gsize npos;
+
+                       for (npos = 0, pos_str = utf16; npos < len; npos++, pos_str++) {
+                               *pos_str = GUINT16_SWAP_LE_BE (*pos_str);
+                       }
+               }
+       }
+
+       if (g_utf8_validate (raw_content, -1, NULL))
+               return raw_content;
+
+       res = g_locale_to_utf8 (raw_content, length, NULL, NULL, NULL);
+
+       if (res)
+               g_free (raw_content);
+       else
+               res = raw_content;
+
+       return res;
+}
+
 /* ********************************************************************** */
 
 /* Import menu plugin handler */
diff --git a/src/e-util/e-import.h b/src/e-util/e-import.h
index fd85a7ecff..1f9fef0974 100644
--- a/src/e-util/e-import.h
+++ b/src/e-util/e-import.h
@@ -245,6 +245,9 @@ EImportTargetURI *
 EImportTargetHome *
                e_import_target_new_home        (EImport *import);
 
+gchar *                e_import_util_get_file_contents (const gchar *filename,
+                                                GError **error);
+
 /* ********************************************************************** */
 
 /* import plugin target, they are closely integrated */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]