[tracker/wip/carlosg/delete-optimizations: 1/2] libtracker-data: Optimize easily catchable idempotent deletes
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/delete-optimizations: 1/2] libtracker-data: Optimize easily catchable idempotent deletes
- Date: Sun, 8 Nov 2020 21:22:40 +0000 (UTC)
commit 4677d81032da59818f81d0bed7538c034a110fe5
Author: Carlos Garnacho <carlosg gnome org>
Date: Sat Nov 7 22:59:13 2020 +0100
libtracker-data: Optimize easily catchable idempotent deletes
Operations like "DELETE WHERE { <urn> ... }" can be tiptoed if we know
that <urn> is not a known resource yet, avoiding it's full evaluation.
These operations are a fairly common result from using TrackerResource
(e.g. ensuring multivalued properties are in a clean slate), they are
common enough that it's prominent in first-index tracker-miner-fs-3
perf records, e.g.:
15.79% 0.01% pool-tracker-mi libtracker-sparql-3.0.so.0.100.0 [.] translate_DeleteWhere
With this change in place, the same situation becomes:
3.70% 0.07% pool-tracker-mi libtracker-sparql-3.0.so.0.100.0 [.] translate_DeleteWhere
src/libtracker-data/tracker-sparql.c | 64 ++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
---
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index 06c4bb45c..6a914f255 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -4500,6 +4500,66 @@ iterate_solution (TrackerSparql *sparql,
return retval;
}
+static gboolean
+check_idempotent_delete (TrackerSparql *sparql,
+ TrackerParserNode *pattern,
+ TrackerParserNode *end)
+{
+ TrackerDBInterface *iface;
+ TrackerParserNode *node, *subject;
+ gint n_triples = 0;
+ gboolean skip = FALSE;
+ gchar *subject_str;
+
+ /* Look for idempotent delete operations (those that don't change
+ * the RDF graph) or the easy ones at least. If the quad pattern
+ * consists of a single triple graph, the subject is an IRI, and
+ * we know it does not exist yet, we can avoid the busywork.
+ */
+ for (node = tracker_sparql_parser_tree_find_first (pattern, FALSE);
+ node;
+ node = tracker_sparql_parser_tree_find_next (node, FALSE)) {
+ const TrackerGrammarRule *rule;
+
+ if (node == end)
+ break;
+
+ rule = tracker_parser_node_get_rule (node);
+ if (tracker_grammar_rule_is_a (rule, RULE_TYPE_RULE,
+ NAMED_RULE_TriplesTemplate))
+ n_triples++;
+
+ if (n_triples > 1) {
+ skip = FALSE;
+ break;
+ }
+
+ if (!tracker_grammar_rule_is_a (rule, RULE_TYPE_RULE,
+ NAMED_RULE_TriplesSameSubject))
+ continue;
+
+ /* Find subject */
+ subject = tracker_sparql_parser_tree_find_first (node, TRUE);
+
+ /* If it's not an IRI, bail out */
+ rule = tracker_parser_node_get_rule (subject);
+ if (!tracker_grammar_rule_is_a (rule, RULE_TYPE_TERMINAL,
+ TERMINAL_TYPE_IRIREF))
+ continue;
+
+ subject_str = _extract_node_string (subject, sparql);
+ iface = tracker_data_manager_get_writable_db_interface (sparql->data_manager);
+ skip = tracker_data_query_resource_id (sparql->data_manager,
+ iface, subject_str) == 0;
+ g_free (subject_str);
+
+ if (!skip)
+ break;
+ }
+
+ return skip;
+}
+
static gboolean
translate_DeleteWhere (TrackerSparql *sparql,
GError **error)
@@ -4515,6 +4575,10 @@ translate_DeleteWhere (TrackerSparql *sparql,
quad_pattern = _skip_rule (sparql, NAMED_RULE_QuadPattern);
+ if (check_idempotent_delete (sparql, quad_pattern,
+ sparql->current_state->node))
+ return TRUE;
+
/* 'DELETE WHERE' uses the same pattern for both query and update */
solution = get_solution_for_pattern (sparql, quad_pattern, error);
if (!solution)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]