[epiphany] Allow importing bookmarks from HTML files



commit 86694e0b04433f478d53cb4b89989c14d37fddcd
Author: ignapk <automat script gmail com>
Date:   Mon Jan 20 03:26:24 2020 +0100

    Allow importing bookmarks from HTML files
    
    Currently there is no way to import bookmarks from HTML files using
    Netscape Bookmark Format.
    
    To fix this issue, add option to import from HTML in the bookmark import
    dialog and parse the chosen file usign GMarkupParser for bookmark url,
    add date and title.
    
    https://gitlab.gnome.org/GNOME/epiphany/issues/483

 src/bookmarks/ephy-bookmarks-import.c | 212 ++++++++++++++++++++++++++++++
 src/bookmarks/ephy-bookmarks-import.h |   4 +
 src/window-commands.c                 | 235 ++++++++++++++++++++++------------
 3 files changed, 366 insertions(+), 85 deletions(-)
---
diff --git a/src/bookmarks/ephy-bookmarks-import.c b/src/bookmarks/ephy-bookmarks-import.c
index 20c884615..de80a907b 100644
--- a/src/bookmarks/ephy-bookmarks-import.c
+++ b/src/bookmarks/ephy-bookmarks-import.c
@@ -23,6 +23,7 @@
 
 #include "ephy-shell.h"
 #include "ephy-sqlite-connection.h"
+#include "ephy-sync-utils.h"
 #include "gvdb-builder.h"
 #include "gvdb-reader.h"
 
@@ -308,3 +309,214 @@ out:
 
   return ret;
 }
+
+void replace_str (char **src,
+                  char  *find,
+                  char  *replace)
+{
+  g_auto (GStrv) split = g_strsplit (*src, find, -1);
+  g_free (*src);
+  *src = g_strjoinv (replace, split);
+}
+
+typedef struct {
+  GQueue *tags_stack;
+  GHashTable *urls_table;
+  GPtrArray *tags;
+  GPtrArray *urls;
+  GPtrArray *add_dates;
+  GPtrArray *titles;
+  gboolean read_title;
+  gboolean read_tag;
+} ParserData;
+
+static ParserData *
+parser_data_new ()
+{
+  ParserData *data;
+
+  data = g_new (ParserData, 1);
+  data->tags_stack = g_queue_new ();
+  data->urls_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, 
(GDestroyNotify)g_ptr_array_unref);
+  data->tags = g_ptr_array_new_with_free_func (g_free);
+  data->urls = g_ptr_array_new_with_free_func (g_free);
+  data->add_dates = g_ptr_array_new_with_free_func (g_free);
+  data->titles = g_ptr_array_new_with_free_func (g_free);
+  data->read_title = FALSE;
+  data->read_tag = FALSE;
+
+  return data;
+}
+
+static void
+parser_data_free (ParserData *data)
+{
+  g_queue_free_full (data->tags_stack, g_free);
+  g_hash_table_destroy (data->urls_table);
+  g_ptr_array_free (data->tags, TRUE);
+  g_ptr_array_free (data->urls, TRUE);
+  g_ptr_array_free (data->titles, TRUE);
+  g_ptr_array_free (data->add_dates, TRUE);
+  g_free (data);
+}
+
+static void
+xml_start_element (GMarkupParseContext  *context,
+                   const gchar          *element_name,
+                   const gchar         **attribute_names,
+                   const gchar         **attribute_values,
+                   gpointer              user_data,
+                   GError              **error)
+{
+  ParserData *data = user_data;
+  const gchar **names = attribute_names;
+  const gchar **values = attribute_values;
+
+  if (strcmp (element_name, "H3") == 0) {
+    data->read_tag = TRUE;
+  } else if (strcmp (element_name, "A") == 0) {
+    data->read_title = TRUE;
+
+    while (*names) {
+      if (strcmp (*names, "HREF") == 0) {
+        GPtrArray *tags;
+        const char *tag = g_queue_peek_head (data->tags_stack);
+
+        if (g_hash_table_lookup_extended (data->urls_table, *values, NULL, (gpointer *)&tags)) {
+          g_ptr_array_add (tags, g_strdup (tag));
+        } else {
+          tags = g_ptr_array_new_with_free_func (g_free);
+          g_ptr_array_add (tags, g_strdup (tag));
+          g_hash_table_insert (data->urls_table, g_strdup (*values), tags);
+          g_ptr_array_add (data->urls, g_strdup (*values));
+        }
+      } else if (strcmp (*names, "ADD_DATE") == 0)
+        g_ptr_array_add (data->add_dates, g_strdup (*values));
+      names++;
+      values++;
+    }
+  }
+}
+
+static void
+xml_end_element (GMarkupParseContext  *context,
+                 const gchar          *element_name,
+                 gpointer              user_data,
+                 GError              **error)
+{
+  ParserData *data = user_data;
+
+  if (strcmp (element_name, "H3") == 0)
+    data->read_tag = FALSE;
+  else if (strcmp (element_name, "A") == 0)
+    data->read_title = FALSE;
+  else if (strcmp (element_name, "DL") == 0)
+    g_free (g_queue_pop_head (data->tags_stack));
+}
+
+static void
+xml_text (GMarkupParseContext  *context,
+          const gchar          *text,
+          gsize                 text_len,
+          gpointer              user_data,
+          GError              **error)
+{
+  ParserData *data = user_data;
+
+  if (data->read_tag) {
+    g_queue_push_head (data->tags_stack, g_strdup (text));
+    g_ptr_array_add (data->tags, g_strdup (text));
+  }
+
+  if (data->read_title)
+    g_ptr_array_add (data->titles, g_strdup (text));
+}
+
+gboolean
+ephy_bookmarks_import_from_html (EphyBookmarksManager  *manager,
+                                 const char            *filename,
+                                 GError               **error)
+{
+  GMarkupParser parser;
+  g_autofree gchar *buf = NULL;
+  g_autoptr (GMarkupParseContext) context = NULL;
+  g_autoptr (GError) my_error = NULL;
+  g_autoptr (GMappedFile) mapped = NULL;
+  g_autoptr (GSequence) bookmarks = NULL;
+  ParserData *data;
+
+  mapped = g_mapped_file_new (filename, FALSE, &my_error);
+
+  if (!mapped) {
+    g_set_error (error,
+                 BOOKMARKS_IMPORT_ERROR,
+                 BOOKMARKS_IMPORT_ERROR_BOOKMARKS,
+                 _("HTML bookmarks database could not be opened: %s"),
+                 my_error->message);
+    return FALSE;
+  }
+
+  buf = g_strdup (g_mapped_file_get_contents (mapped));
+
+  if (!buf) {
+    g_set_error_literal (error,
+                         BOOKMARKS_IMPORT_ERROR,
+                         BOOKMARKS_IMPORT_ERROR_BOOKMARKS,
+                         _("HTML bookmarks database could not be read."));
+    return FALSE;
+  }
+
+  replace_str (&buf, "<DT>", "");
+  replace_str (&buf, "<p>", "");
+  replace_str (&buf, "&", "&amp;");
+
+  parser.start_element = xml_start_element;
+  parser.end_element = xml_end_element;
+  parser.text = xml_text;
+  parser.passthrough = NULL;
+  parser.error = NULL;
+
+  data = parser_data_new ();
+
+  context = g_markup_parse_context_new (&parser, 0, (gpointer)data, NULL);
+  if (!g_markup_parse_context_parse (context, buf, strlen (buf), &my_error)) {
+    g_set_error (error,
+                 BOOKMARKS_IMPORT_ERROR,
+                 BOOKMARKS_IMPORT_ERROR_BOOKMARKS,
+                 _("HTML bookmarks database could not be parsed: %s"),
+                 my_error->message);
+    parser_data_free (data);
+    return FALSE;
+  }
+
+  for (guint i = 0; i < data->tags->len; i++)
+    ephy_bookmarks_manager_create_tag (manager, g_ptr_array_index (data->tags, i));
+
+  bookmarks = g_sequence_new (g_object_unref);
+  for (guint i = 0; i < data->urls->len; i++) {
+    g_autofree const char *guid = ephy_bookmark_generate_random_id ();
+    const char *url = g_ptr_array_index (data->urls, i);
+    const char *title = g_ptr_array_index (data->titles, i);
+    gint64 time_added = (gint64)g_ptr_array_index (data->add_dates, i);
+    EphyBookmark *bookmark;
+    GSequence *tags;
+    GPtrArray *val;
+
+    tags = g_sequence_new (g_free);
+    g_hash_table_lookup_extended (data->urls_table, url, NULL, (gpointer *)&val);
+    for (guint j = 0; j < val->len; j++) {
+      char *tag = g_ptr_array_index (val, j);
+      if (tag)
+        g_sequence_append (tags, g_strdup (tag));
+    }
+    bookmark = ephy_bookmark_new (url, title, tags, guid);
+    ephy_bookmark_set_time_added (bookmark, time_added);
+    ephy_synchronizable_set_server_time_modified (EPHY_SYNCHRONIZABLE (bookmark), time_added);
+
+    g_sequence_prepend (bookmarks, bookmark);
+  }
+  ephy_bookmarks_manager_add_bookmarks (manager, bookmarks);
+
+  parser_data_free (data);
+  return TRUE;
+}
diff --git a/src/bookmarks/ephy-bookmarks-import.h b/src/bookmarks/ephy-bookmarks-import.h
index 881ef8c91..972d70baf 100644
--- a/src/bookmarks/ephy-bookmarks-import.h
+++ b/src/bookmarks/ephy-bookmarks-import.h
@@ -36,4 +36,8 @@ gboolean    ephy_bookmarks_import_from_firefox  (EphyBookmarksManager  *manager,
                                                  const gchar           *profile,
                                                  GError               **error);
 
+gboolean    ephy_bookmarks_import_from_html     (EphyBookmarksManager  *manager,
+                                                 const char            *filename,
+                                                 GError               **error);
+
 G_END_DECLS
diff --git a/src/window-commands.c b/src/window-commands.c
index 5f03fa253..92401029b 100644
--- a/src/window-commands.c
+++ b/src/window-commands.c
@@ -93,8 +93,9 @@ window_cmd_new_incognito_window (GSimpleAction *action,
   ephy_open_incognito_window (NULL);
 }
 
-const gchar *import_option_names[2] = {
+const gchar *import_option_names[3] = {
   N_("GVDB File"),
+  N_("HTML File"),
   N_("Firefox")
 };
 
@@ -108,9 +109,9 @@ combo_box_changed_cb (GtkComboBox *combo_box,
   g_assert (GTK_IS_BUTTON (button));
 
   active = gtk_combo_box_get_active (combo_box);
-  if (active == 0)
-    gtk_button_set_label (button, _("Ch_oose Fileā€¦"));
-  else if (active == 1)
+  if (active == 0 || active == 1)
+    gtk_button_set_label (button, _("Ch_oose File"));
+  else if (active == 2)
     gtk_button_set_label (button, _("I_mport"));
 }
 
@@ -275,98 +276,162 @@ show_profile_selector (GtkWidget *parent,
   return selected_profile;
 }
 
+gboolean
+dialog_bookmarks_import (GtkDialog *dialog)
+{
+  EphyBookmarksManager *manager = ephy_shell_get_bookmarks_manager (ephy_shell_get_default ());
+  g_autoptr (GtkFileChooserNative) file_chooser_dialog = NULL;
+  GtkWidget *import_info_dialog;
+  GtkFileFilter *filter;
+  int chooser_response;
+  gboolean imported = FALSE;
+
+  file_chooser_dialog = gtk_file_chooser_native_new (_("Choose File"),
+                                                     GTK_WINDOW (dialog),
+                                                     GTK_FILE_CHOOSER_ACTION_OPEN,
+                                                     _("I_mport"),
+                                                     _("_Cancel"));
+
+  filter = gtk_file_filter_new ();
+  gtk_file_filter_add_pattern (filter, "*.gvdb");
+  gtk_file_chooser_set_filter (GTK_FILE_CHOOSER (file_chooser_dialog), filter);
+
+  chooser_response = gtk_native_dialog_run (GTK_NATIVE_DIALOG (file_chooser_dialog));
+  if (chooser_response == GTK_RESPONSE_ACCEPT) {
+    g_autoptr (GError) error = NULL;
+    g_autofree char *filename = NULL;
+
+    gtk_native_dialog_hide (GTK_NATIVE_DIALOG (file_chooser_dialog));
+
+    filename = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (file_chooser_dialog));
+    imported = ephy_bookmarks_import (manager, filename, &error);
+
+    import_info_dialog = gtk_message_dialog_new (GTK_WINDOW (dialog),
+                                                 GTK_DIALOG_MODAL,
+                                                 imported ? GTK_MESSAGE_INFO : GTK_MESSAGE_WARNING,
+                                                 GTK_BUTTONS_OK,
+                                                 "%s",
+                                                 imported ? _("Bookmarks successfully imported!")
+                                                          : error->message);
+    gtk_dialog_run (GTK_DIALOG (import_info_dialog));
+
+    gtk_widget_destroy (import_info_dialog);
+  }
+  return imported;
+}
+
+gboolean
+dialog_bookmarks_import_from_html (GtkDialog *dialog)
+{
+  EphyBookmarksManager *manager = ephy_shell_get_bookmarks_manager (ephy_shell_get_default ());
+  g_autoptr (GtkFileChooserNative) file_chooser_dialog = NULL;
+  GtkWidget *import_info_dialog;
+  GtkFileFilter *filter;
+  int chooser_response;
+  gboolean imported = FALSE;
+
+  file_chooser_dialog = gtk_file_chooser_native_new (_("Choose File"),
+                                                     GTK_WINDOW (dialog),
+                                                     GTK_FILE_CHOOSER_ACTION_OPEN,
+                                                     _("I_mport"),
+                                                     _("_Cancel"));
+
+  filter = gtk_file_filter_new ();
+  gtk_file_filter_add_pattern (filter, "*.html");
+  gtk_file_chooser_set_filter (GTK_FILE_CHOOSER (file_chooser_dialog), filter);
+
+  chooser_response = gtk_native_dialog_run (GTK_NATIVE_DIALOG (file_chooser_dialog));
+  if (chooser_response == GTK_RESPONSE_ACCEPT) {
+    g_autoptr (GError) error = NULL;
+    g_autofree char *filename = NULL;
+
+    gtk_native_dialog_hide (GTK_NATIVE_DIALOG (file_chooser_dialog));
+
+    filename = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (file_chooser_dialog));
+    imported = ephy_bookmarks_import_from_html (manager, filename, &error);
+
+    import_info_dialog = gtk_message_dialog_new (GTK_WINDOW (dialog),
+                                                 GTK_DIALOG_MODAL,
+                                                 imported ? GTK_MESSAGE_INFO : GTK_MESSAGE_WARNING,
+                                                 GTK_BUTTONS_OK,
+                                                 "%s",
+                                                 imported ? _("Bookmarks successfully imported!")
+                                                          : error->message);
+    gtk_dialog_run (GTK_DIALOG (import_info_dialog));
+
+    gtk_widget_destroy (import_info_dialog);
+  }
+  return imported;
+}
+
+gboolean
+dialog_bookmarks_import_from_firefox (GtkDialog *dialog)
+{
+  EphyBookmarksManager *manager = ephy_shell_get_bookmarks_manager (ephy_shell_get_default ());
+  GtkWidget *import_info_dialog;
+  g_autoptr (GError) error = NULL;
+  GSList *profiles;
+  g_autofree gchar *profile = NULL;
+  int num_profiles;
+  gboolean imported = FALSE;
+
+  profiles = get_firefox_profiles ();
+
+  /* Import default profile */
+  num_profiles = g_slist_length (profiles);
+  if (num_profiles == 1) {
+    imported = ephy_bookmarks_import_from_firefox (manager, profiles->data, &error);
+  } else if (num_profiles > 1) {
+    profile = show_profile_selector (GTK_WIDGET (dialog), profiles);
+    if (profile) {
+      imported = ephy_bookmarks_import_from_firefox (manager, profile, &error);
+    }
+  } else {
+    g_assert_not_reached ();
+  }
+
+  g_slist_free_full (profiles, g_free);
+
+  /* If there are multiple profiles, but the user didn't select one in
+   * the profile (he pressed Cancel), don't display the import info dialog
+   * as no import took place
+   */
+  if (num_profiles == 1 || profile) {
+    import_info_dialog = gtk_message_dialog_new (GTK_WINDOW (dialog),
+                                                 GTK_DIALOG_MODAL,
+                                                 imported ? GTK_MESSAGE_INFO : GTK_MESSAGE_WARNING,
+                                                 GTK_BUTTONS_OK,
+                                                 "%s",
+                                                 imported ? _("Bookmarks successfully imported!")
+                                                          : error->message);
+    gtk_dialog_run (GTK_DIALOG (import_info_dialog));
+    gtk_widget_destroy (import_info_dialog);
+  }
+  return imported;
+}
+
 static void
 dialog_bookmarks_import_cb (GtkDialog   *dialog,
                             int          response,
                             GtkComboBox *combo_box)
 {
-  EphyBookmarksManager *manager = ephy_shell_get_bookmarks_manager (ephy_shell_get_default ());
-  GtkFileChooser *file_chooser_dialog;
-  GtkWidget *import_info_dialog;
   int active;
-  int chooser_response;
   gboolean imported = FALSE;
 
   if (response == GTK_RESPONSE_OK) {
     active = gtk_combo_box_get_active (combo_box);
-    if (active == 0) {
-      GtkFileFilter *filter;
-
-      file_chooser_dialog = GTK_FILE_CHOOSER (gtk_file_chooser_native_new (_("Choose File"),
-                                                                           GTK_WINDOW (dialog),
-                                                                           GTK_FILE_CHOOSER_ACTION_OPEN,
-                                                                           _("I_mport"),
-                                                                           _("_Cancel")));
-      gtk_file_chooser_set_show_hidden (file_chooser_dialog, TRUE);
-
-      filter = gtk_file_filter_new ();
-      gtk_file_filter_add_pattern (filter, "*.gvdb");
-      gtk_file_chooser_set_filter (file_chooser_dialog, filter);
-
-      chooser_response = gtk_native_dialog_run (GTK_NATIVE_DIALOG (file_chooser_dialog));
-      if (chooser_response == GTK_RESPONSE_ACCEPT) {
-        GError *error = NULL;
-        char *filename;
-
-        gtk_native_dialog_hide (GTK_NATIVE_DIALOG (file_chooser_dialog));
-
-        filename = gtk_file_chooser_get_filename (file_chooser_dialog);
-        imported = ephy_bookmarks_import (manager, filename, &error);
-        g_free (filename);
-
-        import_info_dialog = gtk_message_dialog_new (GTK_WINDOW (dialog),
-                                                     GTK_DIALOG_MODAL,
-                                                     imported ? GTK_MESSAGE_INFO : GTK_MESSAGE_WARNING,
-                                                     GTK_BUTTONS_OK,
-                                                     "%s",
-                                                     imported ? _("Bookmarks successfully imported!")
-                                                              : error->message);
-        gtk_dialog_run (GTK_DIALOG (import_info_dialog));
-
-        gtk_widget_destroy (import_info_dialog);
-      }
-      g_object_unref (file_chooser_dialog);
-    } else if (active == 1) {
-      GError *error = NULL;
-      GSList *profiles;
-      gchar *profile = NULL;
-      int num_profiles;
-
-      profiles = get_firefox_profiles ();
-
-      /* Import default profile */
-      num_profiles = g_slist_length (profiles);
-      if (num_profiles == 1) {
-        imported = ephy_bookmarks_import_from_firefox (manager, profiles->data, &error);
-      } else if (num_profiles > 1) {
-        profile = show_profile_selector (GTK_WIDGET (dialog), profiles);
-        if (profile) {
-          imported = ephy_bookmarks_import_from_firefox (manager, profile, &error);
-          g_free (profile);
-        }
-      } else {
+    switch (active) {
+      case 0:
+        imported = dialog_bookmarks_import (dialog);
+        break;
+      case 1:
+        imported = dialog_bookmarks_import_from_html (dialog);
+        break;
+      case 2:
+        imported = dialog_bookmarks_import_from_firefox (dialog);
+        break;
+      default:
         g_assert_not_reached ();
-      }
-
-      g_slist_free (profiles);
-
-      /* If there are multiple profiles, but the user didn't select one in
-       * the profile (he pressed Cancel), don't display the import info dialog
-       * as no import took place
-       */
-      if (num_profiles == 1 || profile) {
-        import_info_dialog = gtk_message_dialog_new (GTK_WINDOW (dialog),
-                                                     GTK_DIALOG_MODAL,
-                                                     imported ? GTK_MESSAGE_INFO : GTK_MESSAGE_WARNING,
-                                                     GTK_BUTTONS_OK,
-                                                     "%s",
-                                                     imported ? _("Bookmarks successfully imported!")
-                                                              : error->message);
-        gtk_dialog_run (GTK_DIALOG (import_info_dialog));
-        gtk_widget_destroy (import_info_dialog);
-      }
-      if (error)
-        g_error_free (error);
     }
 
     if (imported)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]