[tracker/wip/sam/resource: 13/13] libtracker-sparql: don't output duplicate resources
- From: Sam Thursfield <sthursfield src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/sam/resource: 13/13] libtracker-sparql: don't output duplicate resources
- Date: Fri, 8 Apr 2016 23:42:03 +0000 (UTC)
commit 58e45eefd4c3ccd9a4deebb20ac060197e0eaef8
Author: Sam Thursfield <ssssam gmail com>
Date: Fri Apr 8 23:51:10 2016 +0100
libtracker-sparql: don't output duplicate resources
src/libtracker-sparql/tracker-resource.c | 124 ++++++++++++++++++++++--------
1 files changed, 91 insertions(+), 33 deletions(-)
---
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index a2a3167..18eb374 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -508,6 +508,31 @@ tracker_resource_identifier_compare_func (TrackerResource *resource,
return strcmp (priv->identifier, identifier);
}
+/**
+ * tracker_resource_compare:
+ * @self: A #TrackerResource
+ *
+ * Compare the identifiers of two TrackerResource instances. The resources
+ * are considered identical if they have the same identifier.
+ *
+ * Note that there can be false negatives with this simplistic approach: two
+ * resources may have different identifiers that actually refer to the same
+ * thing.
+ *
+ * Returns: 0 if the identifiers are the same, -1 or +1 otherwise
+ *
+ * Since: 1.10
+ */
+gint
+tracker_resource_compare (TrackerResource *a,
+ TrackerResource *b)
+{
+ TrackerResourcePrivate *a_priv = GET_PRIVATE (a);
+ TrackerResourcePrivate *b_priv = GET_PRIVATE (b);
+
+ return strcmp (a_priv->identifier, b_priv->identifier);
+}
+
static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data);
@@ -645,10 +670,12 @@ typedef struct {
TrackerNamespaceManager *namespaces;
TrackerSparqlBuilder *builder;
const char *graph_id;
- GList *done_list;
+ GList **p_done_list;
GHashTable *overwrite_flags;
} GenerateSparqlData;
+void generate_sparql_update (TrackerResource *resource, TrackerSparqlBuilder *builder,
TrackerNamespaceManager *namespaces, const char *graph_id, GList **done_list, GError **error);
+
static void
generate_sparql_relations_foreach (gpointer key,
gpointer value_ptr,
@@ -662,13 +689,9 @@ generate_sparql_relations_foreach (gpointer key,
if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) {
TrackerResource *relation = g_value_get_object (value);
- if (g_list_find (data->done_list, relation) == NULL) {
- tracker_resource_generate_sparql_update (relation,
- data->builder,
- data->namespaces,
- data->graph_id,
- &error);
- data->done_list = g_list_prepend (data->done_list, relation);
+ if (g_list_find_custom (*data->p_done_list, relation, (GCompareFunc)
tracker_resource_compare) == NULL) {
+ generate_sparql_update (relation, data->builder, data->namespaces, data->graph_id,
data->p_done_list, &error);
+ *(data->p_done_list) = g_list_prepend (*(data->p_done_list), relation);
}
}
}
@@ -735,7 +758,7 @@ append_value_to_sparql_builder (const GValue *value,
} else if (type == G_TYPE_STRING) {
tracker_sparql_builder_object_string (builder, g_value_get_string (value));
} else if (type == TRACKER_TYPE_URI) {
- tracker_sparql_builder_object_iri (builder, g_value_get_string (value));
+ tracker_sparql_builder_object (builder, g_value_get_string (value));
} else if (type == TRACKER_TYPE_RESOURCE) {
TrackerResource *relation = TRACKER_RESOURCE (g_value_get_object (value));
tracker_sparql_builder_object_iri (builder, tracker_resource_get_identifier (relation));
@@ -757,12 +780,15 @@ generate_sparql_inserts_foreach (gpointer key,
full_property = tracker_namespace_manager_expand_uri (data->namespaces, property);
- if (strcmp (full_property, TRACKER_PREFIX_RDF "type") == 0) {
- tracker_sparql_builder_predicate (data->builder, "a");
- } else {
- tracker_sparql_builder_predicate (data->builder, property);
+ /* We have to have already set rdf:type */
+ /* FIXME: only the 1st one should be needed .. */
+ if (strcmp (full_property, TRACKER_PREFIX_RDF "type") == 0 || strcmp (property, "rdf:type") == 0) {
+ g_free (full_property);
+ return;
}
+ tracker_sparql_builder_predicate (data->builder, property);
+
g_free (full_property);
if (G_VALUE_TYPE (value) == G_TYPE_PTR_ARRAY) {
@@ -772,29 +798,17 @@ generate_sparql_inserts_foreach (gpointer key,
}
}
-/**
- * tracker_resource_generate_sparql_update:
- * @self: a #TrackerResource
- * @builder: a #TrackerSparqlBuilder where the result will be returned
- * @error: address where an error can be returned
- *
- * Generates a SPARQL command to update a database with the information
- * stored in @resource.
- *
- * Since: 1.10
- */
-/* FIXME: cycles between resources will cause this to infinite loop. It should
- * exit with an error instead, or better yet do the clever trickery in
- * order to actually insert them. */
void
-tracker_resource_generate_sparql_update (TrackerResource *resource,
- TrackerSparqlBuilder *builder,
- TrackerNamespaceManager *namespaces,
- const char *graph_id,
- GError **error)
+generate_sparql_update (TrackerResource *resource,
+ TrackerSparqlBuilder *builder,
+ TrackerNamespaceManager *namespaces,
+ const char *graph_id,
+ GList **p_done_list,
+ GError **error)
{
TrackerResourcePrivate *priv = GET_PRIVATE (resource);
GenerateSparqlData data;
+ GValue *type_value;
if (! priv->identifier) {
/* FIXME: use GError? */
@@ -817,7 +831,7 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
data.builder = builder;
data.overwrite_flags = priv->overwrite;
data.graph_id = graph_id;
- data.done_list = NULL;
+ data.p_done_list = p_done_list;
g_hash_table_foreach (priv->properties, generate_sparql_deletes_foreach, &data);
@@ -843,6 +857,22 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
tracker_sparql_builder_subject_iri (builder, priv->identifier);
+ /* rdf:type needs to be first, otherwise you'll see 'subject x is not in domain y'
+ * errors for the properties you try to set.
+ */
+ /* FIXME: hardcoding the rdf: prefix isn't smart; use tracker_namespace_manager to
+ * abbreviate the real URL, instead, & try both ....
+ */
+ type_value = g_hash_table_lookup (priv->properties, "rdf:type");
+ if (type_value != NULL) {
+ tracker_sparql_builder_predicate (builder, "a");
+ if (G_VALUE_TYPE (type_value) == G_TYPE_PTR_ARRAY) {
+ g_ptr_array_foreach (g_value_get_boxed (type_value),
(GFunc)append_value_to_sparql_builder, builder);
+ } else {
+ append_value_to_sparql_builder (type_value, builder);
+ }
+ }
+
g_hash_table_foreach (priv->properties, generate_sparql_inserts_foreach, &data);
if (graph_id){
@@ -850,3 +880,31 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
}
tracker_sparql_builder_insert_close (builder);
}
+
+/**
+ * tracker_resource_generate_sparql_update:
+ * @self: a #TrackerResource
+ * @builder: a #TrackerSparqlBuilder where the result will be returned
+ * @error: address where an error can be returned
+ *
+ * Generates a SPARQL command to update a database with the information
+ * stored in @resource.
+ *
+ * Since: 1.10
+ */
+void
+tracker_resource_generate_sparql_update (TrackerResource *resource,
+ TrackerSparqlBuilder *builder,
+ TrackerNamespaceManager *namespaces,
+ const char *graph_id,
+ GError **error)
+{
+ /* Resources can be recursive, and may have repeated or even cyclic
+ * relationships. This list keeps track of what we already processed.
+ */
+ GList *done_list = NULL;
+
+ generate_sparql_update (resource, builder, namespaces, graph_id, &done_list, error);
+
+ g_list_free (done_list);
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]