[tracker/wip/carlosg/delete-optimizations: 1/2] libtracker-data: Optimize easily catchable idempotent deletes




commit 4677d81032da59818f81d0bed7538c034a110fe5
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sat Nov 7 22:59:13 2020 +0100

    libtracker-data: Optimize easily catchable idempotent deletes
    
    Operations like "DELETE WHERE { <urn> ... }" can be tiptoed if we know
    that <urn> is not a known resource yet, avoiding it's full evaluation.
    
    These operations are a fairly common result from using TrackerResource
    (e.g. ensuring multivalued properties are in a clean slate), they are
    common enough that it's prominent in first-index tracker-miner-fs-3
    perf records, e.g.:
    
      15.79%     0.01%  pool-tracker-mi  libtracker-sparql-3.0.so.0.100.0  [.] translate_DeleteWhere
    
    With this change in place, the same situation becomes:
    
      3.70%     0.07%  pool-tracker-mi  libtracker-sparql-3.0.so.0.100.0  [.] translate_DeleteWhere

 src/libtracker-data/tracker-sparql.c | 64 ++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
---
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index 06c4bb45c..6a914f255 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -4500,6 +4500,66 @@ iterate_solution (TrackerSparql      *sparql,
        return retval;
 }
 
+static gboolean
+check_idempotent_delete (TrackerSparql     *sparql,
+                         TrackerParserNode *pattern,
+                         TrackerParserNode *end)
+{
+       TrackerDBInterface *iface;
+       TrackerParserNode *node, *subject;
+       gint n_triples = 0;
+       gboolean skip = FALSE;
+       gchar *subject_str;
+
+       /* Look for idempotent delete operations (those that don't change
+        * the RDF graph) or the easy ones at least. If the quad pattern
+        * consists of a single triple graph, the subject is an IRI, and
+        * we know it does not exist yet, we can avoid the busywork.
+        */
+       for (node = tracker_sparql_parser_tree_find_first (pattern, FALSE);
+            node;
+            node = tracker_sparql_parser_tree_find_next (node, FALSE)) {
+               const TrackerGrammarRule *rule;
+
+               if (node == end)
+                       break;
+
+               rule = tracker_parser_node_get_rule (node);
+               if (tracker_grammar_rule_is_a (rule, RULE_TYPE_RULE,
+                                              NAMED_RULE_TriplesTemplate))
+                       n_triples++;
+
+               if (n_triples > 1) {
+                       skip = FALSE;
+                       break;
+               }
+
+               if (!tracker_grammar_rule_is_a (rule, RULE_TYPE_RULE,
+                                               NAMED_RULE_TriplesSameSubject))
+                       continue;
+
+               /* Find subject */
+               subject = tracker_sparql_parser_tree_find_first (node, TRUE);
+
+               /* If it's not an IRI, bail out */
+               rule = tracker_parser_node_get_rule (subject);
+               if (!tracker_grammar_rule_is_a (rule, RULE_TYPE_TERMINAL,
+                                               TERMINAL_TYPE_IRIREF))
+                       continue;
+
+               subject_str = _extract_node_string (subject, sparql);
+               iface = tracker_data_manager_get_writable_db_interface (sparql->data_manager);
+               skip = tracker_data_query_resource_id (sparql->data_manager,
+                                                      iface, subject_str) == 0;
+               g_free (subject_str);
+
+               if (!skip)
+                       break;
+       }
+
+       return skip;
+}
+
 static gboolean
 translate_DeleteWhere (TrackerSparql  *sparql,
                        GError        **error)
@@ -4515,6 +4575,10 @@ translate_DeleteWhere (TrackerSparql  *sparql,
 
        quad_pattern = _skip_rule (sparql, NAMED_RULE_QuadPattern);
 
+       if (check_idempotent_delete (sparql, quad_pattern,
+                                    sparql->current_state->node))
+               return TRUE;
+
        /* 'DELETE WHERE' uses the same pattern for both query and update */
        solution = get_solution_for_pattern (sparql, quad_pattern, error);
        if (!solution)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]