[tracker/wip/carlosg/sparql1.1: 167/201] libtracker-data: Work around terrible query planning from SQLite
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/sparql1.1: 167/201] libtracker-data: Work around terrible query planning from SQLite
- Date: Mon, 9 Sep 2019 22:32:51 +0000 (UTC)
commit 71170665dda8614254728ac99471f1e43e566108
Author: Carlos Garnacho <carlosg gnome org>
Date: Mon Jul 15 22:02:16 2019 +0200
libtracker-data: Work around terrible query planning from SQLite
Using a multivalued field as a property function on the union graph
incurs in very expensive query planning, as full table scans happen
for all selects in the union view.
This makes queries like "select rdf:type(?u) { ?u a rdfs:Resource }"
pretty expensive pretty quickly. And that's precisely what grilo asks
anyone using the Tracker plugin to do.
Work around this situation by doing the query in a way that indexes
are used, and we get non-Ent speeds back.
src/libtracker-data/tracker-sparql.c | 59 ++++++++++++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
---
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index cbda7147a..af87105a1 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -6488,6 +6488,65 @@ handle_property_function (TrackerSparql *sparql,
{
TrackerPropertyType type;
+ /* As of SQLite 3.26.0, performing an aggregate function (or anything
+ * that requires scanning all results, like distinct) on a unionGraph
+ * view results in full table scans on all unioned selects.
+ *
+ * This quickly gets far too expensive on property functions, as they
+ * are normally used in the SelectClause, so they get to run once per
+ * result. This makes queries like:
+ *
+ * SELECT rdf:type(?u) { ?u a rdfs:Resource }
+ *
+ * prohibitive. The solution is to perform the union inline, and
+ * evaluate the ArgList once on each of the unioned selects. This again
+ * hits table search, and hopefully through an index, so it gets fast
+ * again.
+ */
+ if (tracker_property_get_multiple_values (property) &&
+ tracker_token_is_empty (&sparql->current_state.graph)) {
+ TrackerStringBuilder *str, *old;
+ TrackerParserNode *arg;
+ GHashTable *ht;
+ GHashTableIter iter;
+ gpointer graph;
+
+ arg = _skip_rule (sparql, NAMED_RULE_ArgList);
+
+ _append_string (sparql, "(SELECT GROUP_CONCAT (");
+ str = _append_placeholder (sparql);
+ old = tracker_sparql_swap_builder (sparql, str);
+ _append_string_printf (sparql, "\"%s\"", tracker_property_get_name (property));
+ convert_expression_to_string (sparql, tracker_property_get_data_type (property));
+ tracker_sparql_swap_builder (sparql, old);
+
+ _append_string (sparql, ", ',') ");
+
+ _append_string_printf (sparql, "FROM (SELECT \"%s\" FROM \"main\".\"%s\" WHERE ID = ",
+ tracker_property_get_name (property),
+ tracker_property_get_table_name (property));
+ if (!_postprocess_rule (sparql, arg, NULL, error))
+ return FALSE;
+
+ ht = tracker_data_manager_get_graphs (sparql->data_manager);
+ g_hash_table_iter_init (&iter, ht);
+
+ while (g_hash_table_iter_next (&iter, (gpointer *) &graph, NULL)) {
+ _append_string_printf (sparql, "UNION ALL SELECT \"%s\" FROM \"%s\".\"%s\" WHERE ID =
",
+ tracker_property_get_name (property),
+ (gchar *) graph,
+ tracker_property_get_table_name (property));
+
+ if (!_postprocess_rule (sparql, arg, NULL, error))
+ return FALSE;
+ }
+
+ _append_string (sparql, ")) ");
+
+ sparql->current_state.expression_type = TRACKER_PROPERTY_TYPE_STRING;
+ return TRUE;
+ }
+
if (tracker_property_get_multiple_values (property)) {
TrackerStringBuilder *str, *old;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]