[gnome-documents] miners: avoid DB updates if mtime didn't change
- From: Cosimo Cecchi <cosimoc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-documents] miners: avoid DB updates if mtime didn't change
- Date: Tue, 2 Oct 2012 00:27:43 +0000 (UTC)
commit 452acf9d4cff55c026b5ecaa6a9ff0987471cd36
Author: Cosimo Cecchi <cosimoc gnome org>
Date: Mon Oct 1 19:59:09 2012 -0400
miners: avoid DB updates if mtime didn't change
When we mine remote entries, if their modification time didn't change,
avoid re-querying and setting to Tracker all the other properties, since
it's an expensive operation.
This makes the mining operation much faster.
src/Makefile-miner.am | 1 +
src/miner/gd-gdata-miner.c | 36 +++++++-----
src/miner/gd-miner-tracker.c | 134 ++++++++++++++++++++++++++++++++++++++++++
src/miner/gd-miner-tracker.h | 16 +++++
src/miner/gd-zpj-miner.c | 39 +++++++-----
5 files changed, 195 insertions(+), 31 deletions(-)
---
diff --git a/src/Makefile-miner.am b/src/Makefile-miner.am
index f0f4615..31a94c9 100644
--- a/src/Makefile-miner.am
+++ b/src/Makefile-miner.am
@@ -11,6 +11,7 @@ gdminer_source_c = \
libgdminer_1_0_la_CPPFLAGS = \
-DG_LOG_DOMAIN=\"Gdminer\" \
-DG_DISABLE_DEPRECATED \
+ -I$(top_srcdir)/src/lib \
$(DOCUMENTS_CFLAGS) \
$(NULL)
diff --git a/src/miner/gd-gdata-miner.c b/src/miner/gd-gdata-miner.c
index d747ecb..e145fdf 100644
--- a/src/miner/gd-gdata-miner.c
+++ b/src/miner/gd-gdata-miner.c
@@ -39,6 +39,8 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
gchar *resource = NULL;
gchar *date, *resource_url, *identifier;
const gchar *class = NULL;
+ gboolean mtime_changed, resource_exists;
+ gint64 new_mtime;
GList *authors, *l, *parents = NULL;
GDataAuthor *author;
@@ -86,20 +88,35 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
resource = gd_miner_tracker_sparql_connection_ensure_resource
(job->connection,
job->cancellable, error,
+ &resource_exists,
resource_url, identifier,
"nfo:RemoteDataObject", class, NULL);
if (*error != NULL)
goto out;
- gd_miner_tracker_sparql_connection_set_triple
- (job->connection, job->cancellable, error,
- identifier, resource,
- "nie:dataSource", job->datasource_urn);
+ gd_miner_tracker_update_datasource (job->connection, job->datasource_urn,
+ resource_exists, identifier, resource,
+ job->cancellable, error);
+
+ if (*error != NULL)
+ goto out;
+
+ new_mtime = gdata_entry_get_updated (entry);
+ mtime_changed = gd_miner_tracker_update_mtime (job->connection, new_mtime,
+ resource_exists, identifier, resource,
+ job->cancellable, error);
if (*error != NULL)
goto out;
+ /* avoid updating the DB if the entry already exists and has not
+ * been modified since our last run.
+ */
+ if (!mtime_changed)
+ goto out;
+
+ /* the resource changed - just set all the properties again */
alternate = gdata_entry_look_up_link (entry, GDATA_LINK_ALTERNATE);
alternate_uri = gdata_link_get_uri (alternate);
@@ -258,17 +275,6 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
if (*error != NULL)
goto out;
- date = gd_iso8601_from_timestamp (gdata_entry_get_updated (entry));
- gd_miner_tracker_sparql_connection_insert_or_replace_triple
- (job->connection,
- job->cancellable, error,
- identifier, resource,
- "nie:contentLastModified", date);
- g_free (date);
-
- if (*error != NULL)
- goto out;
-
out:
g_clear_object (&access_rules);
g_free (resource_url);
diff --git a/src/miner/gd-miner-tracker.c b/src/miner/gd-miner-tracker.c
index 3a94657..75960ca 100644
--- a/src/miner/gd-miner-tracker.c
+++ b/src/miner/gd-miner-tracker.c
@@ -22,6 +22,7 @@
#include <glib.h>
#include "gd-miner-tracker.h"
+#include "gd-utils.h"
static gchar *
_tracker_utils_format_into_graph (const gchar *graph)
@@ -29,10 +30,55 @@ _tracker_utils_format_into_graph (const gchar *graph)
return (graph != NULL) ? g_strdup_printf ("INTO <%s> ", graph) : g_strdup ("");
}
+static gboolean
+gd_miner_tracker_sparql_connection_get_string_attribute (TrackerSparqlConnection *connection,
+ GCancellable *cancellable,
+ GError **error,
+ const gchar *resource,
+ const gchar *attribute,
+ gchar **value)
+{
+ GString *select = g_string_new (NULL);
+ TrackerSparqlCursor *cursor;
+ const gchar *string_value = NULL;
+ gboolean res;
+
+ g_string_append_printf (select, "SELECT ?val { ?urn %s ?val . FILTER (?urn IN (<%s>)) }",
+ attribute, resource);
+ cursor = tracker_sparql_connection_query (connection,
+ select->str,
+ cancellable, error);
+ g_string_free (select, TRUE);
+
+ if (*error != NULL)
+ goto out;
+
+ res = tracker_sparql_cursor_next (cursor, cancellable, error);
+
+ if (*error != NULL)
+ goto out;
+
+ if (res)
+ {
+ string_value = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+ goto out;
+ }
+
+ out:
+ if (string_value != NULL && value != NULL)
+ *value = g_strdup (string_value);
+ else if (string_value == NULL)
+ res = FALSE;
+
+ g_clear_object (&cursor);
+ return res;
+}
+
gchar *
gd_miner_tracker_sparql_connection_ensure_resource (TrackerSparqlConnection *connection,
GCancellable *cancellable,
GError **error,
+ gboolean *resource_exists,
const gchar *graph,
const gchar *identifier,
const gchar *class,
@@ -48,6 +94,7 @@ gd_miner_tracker_sparql_connection_ensure_resource (TrackerSparqlConnection *con
GVariant *insert_res;
GVariantIter *iter;
gchar *key = NULL, *val = NULL;
+ gboolean exists = FALSE;
/* build the inner query with all the classes */
va_start (args, class);
@@ -83,6 +130,7 @@ gd_miner_tracker_sparql_connection_ensure_resource (TrackerSparqlConnection *con
{
/* return the found resource */
retval = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+ exists = TRUE;
g_debug ("Found resource in the store: %s", retval);
goto out;
}
@@ -127,6 +175,9 @@ gd_miner_tracker_sparql_connection_ensure_resource (TrackerSparqlConnection *con
g_debug ("Created a new resource: %s", retval);
out:
+ if (resource_exists)
+ *resource_exists = exists;
+
g_clear_object (&cursor);
return retval;
}
@@ -332,3 +383,86 @@ gd_miner_tracker_utils_ensure_contact_resource (TrackerSparqlConnection *connect
return retval;
}
+
+void
+gd_miner_tracker_update_datasource (TrackerSparqlConnection *connection,
+ const gchar *datasource_urn,
+ gboolean resource_exists,
+ const gchar *identifier,
+ const gchar *resource,
+ GCancellable *cancellable,
+ GError **error)
+{
+ gboolean set_datasource;
+
+ /* only set the datasource again if it has changed; this avoids touching the
+ * DB completely if the entry didn't change at all, since we later also check
+ * the mtime. */
+ set_datasource = TRUE;
+ if (resource_exists)
+ {
+ gboolean res;
+ gchar *old_value;
+
+ res = gd_miner_tracker_sparql_connection_get_string_attribute
+ (connection, cancellable, error,
+ resource, "nie:dataSource", &old_value);
+ g_clear_error (error);
+
+ if (res)
+ {
+ res = g_str_equal (old_value, datasource_urn);
+ g_free (old_value);
+ }
+
+ if (res)
+ set_datasource = FALSE;
+ }
+
+ if (set_datasource)
+ gd_miner_tracker_sparql_connection_set_triple
+ (connection, cancellable, error,
+ identifier, resource,
+ "nie:dataSource", datasource_urn);
+}
+
+gboolean
+gd_miner_tracker_update_mtime (TrackerSparqlConnection *connection,
+ gint64 new_mtime,
+ gboolean resource_exists,
+ const gchar *identifier,
+ const gchar *resource,
+ GCancellable *cancellable,
+ GError **error)
+{
+ GTimeVal old_mtime;
+ gboolean res;
+ gchar *old_value;
+ gchar *date;
+
+ if (resource_exists)
+ {
+ res = gd_miner_tracker_sparql_connection_get_string_attribute
+ (connection, cancellable, error,
+ resource, "nie:contentLastModified", &old_value);
+ g_clear_error (error);
+
+ if (res)
+ {
+ res = g_time_val_from_iso8601 (old_value, &old_mtime);
+ g_free (old_value);
+ }
+
+ if (res && (new_mtime == old_mtime.tv_sec))
+ return FALSE;
+ }
+
+ date = gd_iso8601_from_timestamp (new_mtime);
+ gd_miner_tracker_sparql_connection_insert_or_replace_triple
+ (connection, cancellable, error,
+ identifier, resource,
+ "nie:contentLastModified", date);
+ g_free (date);
+
+ return TRUE;
+}
diff --git a/src/miner/gd-miner-tracker.h b/src/miner/gd-miner-tracker.h
index 878ce85..59e2d70 100644
--- a/src/miner/gd-miner-tracker.h
+++ b/src/miner/gd-miner-tracker.h
@@ -30,6 +30,7 @@ G_BEGIN_DECLS
gchar *gd_miner_tracker_sparql_connection_ensure_resource (TrackerSparqlConnection *connection,
GCancellable *cancellable,
GError **error,
+ gboolean *resource_exists,
const gchar *graph,
const gchar *identifier,
const gchar *class,
@@ -63,6 +64,21 @@ gchar* gd_miner_tracker_utils_ensure_contact_resource (TrackerSparqlConnection *
const gchar *email,
const gchar *fullname);
+void gd_miner_tracker_update_datasource (TrackerSparqlConnection *connection,
+ const gchar *datasource_urn,
+ gboolean resource_exists,
+ const gchar *identifier,
+ const gchar *resource,
+ GCancellable *cancellable,
+ GError **error);
+gboolean gd_miner_tracker_update_mtime (TrackerSparqlConnection *connection,
+ gint64 new_mtime,
+ gboolean resource_exists,
+ const gchar *identifier,
+ const gchar *resource,
+ GCancellable *cancellable,
+ GError **error);
+
G_END_DECLS
#endif /* __GD_MINER_TRACKER_H__ */
diff --git a/src/miner/gd-zpj-miner.c b/src/miner/gd-zpj-miner.c
index 976e2c3..f9f598b 100644
--- a/src/miner/gd-zpj-miner.c
+++ b/src/miner/gd-zpj-miner.c
@@ -39,6 +39,8 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
gchar *resource = NULL;
gchar *date, *identifier;
const gchar *class = NULL, *id, *name;
+ gboolean resource_exists, mtime_changed;
+ gint64 new_mtime;
id = zpj_skydrive_entry_get_id (entry);
@@ -59,20 +61,36 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
resource = gd_miner_tracker_sparql_connection_ensure_resource
(job->connection,
job->cancellable, error,
+ &resource_exists,
job->datasource_urn, identifier,
"nfo:RemoteDataObject", class, NULL);
if (*error != NULL)
goto out;
- gd_miner_tracker_sparql_connection_set_triple
- (job->connection, job->cancellable, error,
- job->datasource_urn, resource,
- "nie:dataSource", job->datasource_urn);
+ gd_miner_tracker_update_datasource (job->connection, job->datasource_urn,
+ resource_exists, identifier, resource,
+ job->cancellable, error);
if (*error != NULL)
goto out;
+ updated_time = zpj_skydrive_entry_get_updated_time (entry);
+ new_mtime = g_date_time_to_unix (updated_time);
+ mtime_changed = gd_miner_tracker_update_mtime (job->connection, new_mtime,
+ resource_exists, identifier, resource,
+ job->cancellable, error);
+
+ if (*error != NULL)
+ goto out;
+
+ /* avoid updating the DB if the entry already exists and has not
+ * been modified since our last run.
+ */
+ if (!mtime_changed)
+ goto out;
+
+ /* the resource changed - just set all the properties again */
gd_miner_tracker_sparql_connection_insert_or_replace_triple
(job->connection,
job->cancellable, error,
@@ -91,6 +109,7 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
parent_identifier = g_strconcat ("gd:collection:windows-live:skydrive:", parent_id, NULL);
parent_resource_urn = gd_miner_tracker_sparql_connection_ensure_resource
(job->connection, job->cancellable, error,
+ NULL,
job->datasource_urn, parent_identifier,
"nfo:RemoteDataObject", "nfo:DataContainer", NULL);
g_free (parent_identifier);
@@ -170,18 +189,6 @@ account_miner_job_process_entry (GdAccountMinerJob *job,
if (*error != NULL)
goto out;
- updated_time = zpj_skydrive_entry_get_updated_time (entry);
- date = gd_iso8601_from_timestamp (g_date_time_to_unix (updated_time));
- gd_miner_tracker_sparql_connection_insert_or_replace_triple
- (job->connection,
- job->cancellable, error,
- job->datasource_urn, resource,
- "nie:contentLastModified", date);
- g_free (date);
-
- if (*error != NULL)
- goto out;
-
out:
g_free (resource);
g_free (identifier);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]