[libdazzle] fuzzy-index-builder: add keyword priority



commit 8d685264e4916ff26388aa87de962a2ad101b551
Author: Christian Hergert <chergert redhat com>
Date:   Sat Jun 3 15:22:33 2017 -0700

    fuzzy-index-builder: add keyword priority
    
    We want to use this so that we can prioritize results by the cost of the
    keyword priority. That allows us to have "tiered" search results.

 src/fuzzy/dzl-fuzzy-index-builder.c |   33 ++++++++++++++++++++++++++-------
 src/fuzzy/dzl-fuzzy-index-builder.h |    3 ++-
 src/fuzzy/dzl-fuzzy-index-cursor.c  |   31 +++++++++++++++----------------
 src/fuzzy/dzl-fuzzy-index.c         |    3 +++
 tests/test-desktop-index.c          |   15 ++++++++-------
 tests/test-fuzzy-index.c            |   24 ++++++++++++------------
 6 files changed, 66 insertions(+), 43 deletions(-)
---
diff --git a/src/fuzzy/dzl-fuzzy-index-builder.c b/src/fuzzy/dzl-fuzzy-index-builder.c
index 55e5df1..e1eef15 100644
--- a/src/fuzzy/dzl-fuzzy-index-builder.c
+++ b/src/fuzzy/dzl-fuzzy-index-builder.c
@@ -16,7 +16,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define G_LOG_DOMAIN "dzl-fuzzy-index-builder"
+#define G_LOG_DOMAIN    "dzl-fuzzy-index-builder"
+#define MAX_KEY_ENTRIES (0x00FFFFFF)
 
 #include <stdlib.h>
 #include <string.h>
@@ -121,6 +122,12 @@ enum {
 
 static GParamSpec *properties [N_PROPS];
 
+static guint
+mask_priority (guint key_id)
+{
+  return key_id & 0x00FFFFFF;
+}
+
 static void
 dzl_fuzzy_index_builder_finalize (GObject *object)
 {
@@ -215,6 +222,7 @@ dzl_fuzzy_index_builder_new (void)
  * @self: A #DzlFuzzyIndexBuilder
  * @key: The UTF-8 encoded key for the document
  * @document: The document to store
+ * @priority: An optional priority for the keyword.
  *
  * Inserts @document into the index using @key as the lookup key.
  *
@@ -224,21 +232,32 @@ dzl_fuzzy_index_builder_new (void)
  * If @document is floating, it's floating reference will be sunk using
  * g_variant_ref_sink().
  *
+ * @priority may be used to group results by priority. Priority must be
+ * less than 256.
+ *
  * Returns: The document id registered for @document.
  */
 guint64
 dzl_fuzzy_index_builder_insert (DzlFuzzyIndexBuilder *self,
                                 const gchar          *key,
-                                GVariant             *document)
+                                GVariant             *document,
+                                guint                 priority)
 {
   GVariant *real_document = NULL;
   gpointer document_id = NULL;
   gpointer key_id = NULL;
   KVPair pair;
 
-  g_return_val_if_fail (DZL_IS_FUZZY_INDEX_BUILDER (self), 0);
-  g_return_val_if_fail (key != NULL, 0);
-  g_return_val_if_fail (document != NULL, 0);
+  g_return_val_if_fail (DZL_IS_FUZZY_INDEX_BUILDER (self), 0L);
+  g_return_val_if_fail (key != NULL, 0L);
+  g_return_val_if_fail (document != NULL, 0L);
+  g_return_val_if_fail (priority <= 0xFF, 0L);
+
+  if (self->keys->len > MAX_KEY_ENTRIES)
+    {
+      g_warning ("Index is full, cannot add more entries");
+      return 0L;
+    }
 
   key = g_string_chunk_insert_const (self->strings, key);
 
@@ -255,7 +274,7 @@ dzl_fuzzy_index_builder_insert (DzlFuzzyIndexBuilder *self,
 
   if (!g_hash_table_lookup_extended (self->key_ids, key, NULL, &key_id))
     {
-      key_id = GUINT_TO_POINTER (self->keys->len);
+      key_id = GUINT_TO_POINTER (self->keys->len | ((priority & 0xFF) << 24));
       g_ptr_array_add (self->keys, (gchar *)key);
     }
 
@@ -328,7 +347,7 @@ dzl_fuzzy_index_builder_build_index (DzlFuzzyIndexBuilder *self)
       const gchar *tmp;
       guint position = 0;
 
-      key = g_ptr_array_index (self->keys, kvpair->key_id);
+      key = g_ptr_array_index (self->keys, mask_priority (kvpair->key_id));
 
       if (!self->case_sensitive)
         key = lower = g_utf8_casefold (key, -1);
diff --git a/src/fuzzy/dzl-fuzzy-index-builder.h b/src/fuzzy/dzl-fuzzy-index-builder.h
index 39e783f..7be63cf 100644
--- a/src/fuzzy/dzl-fuzzy-index-builder.h
+++ b/src/fuzzy/dzl-fuzzy-index-builder.h
@@ -33,7 +33,8 @@ void                  dzl_fuzzy_index_builder_set_case_sensitive  (DzlFuzzyIndex
                                                                    gboolean               case_sensitive);
 guint64               dzl_fuzzy_index_builder_insert              (DzlFuzzyIndexBuilder  *self,
                                                                    const gchar           *key,
-                                                                   GVariant              *document);
+                                                                   GVariant              *document,
+                                                                   guint                  priority);
 gboolean              dzl_fuzzy_index_builder_write               (DzlFuzzyIndexBuilder  *self,
                                                                    GFile                 *file,
                                                                    gint                   io_priority,
diff --git a/src/fuzzy/dzl-fuzzy-index-cursor.c b/src/fuzzy/dzl-fuzzy-index-cursor.c
index a6388d9..85b57aa 100644
--- a/src/fuzzy/dzl-fuzzy-index-cursor.c
+++ b/src/fuzzy/dzl-fuzzy-index-cursor.c
@@ -26,14 +26,14 @@
 
 struct _DzlFuzzyIndexCursor
 {
-  GObject       object;
+  GObject          object;
 
   DzlFuzzyIndex   *index;
-  gchar        *query;
-  GVariantDict *tables;
-  GArray       *matches;
-  guint         max_matches;
-  guint         case_sensitive : 1;
+  gchar           *query;
+  GVariantDict    *tables;
+  GArray          *matches;
+  guint            max_matches;
+  guint            case_sensitive : 1;
 };
 
 typedef struct
@@ -53,12 +53,12 @@ typedef struct
 {
   DzlFuzzyIndex                   *index;
   const DzlFuzzyIndexItem * const *tables;
-  const gsize                  *tables_n_elements;
-  gint                         *tables_state;
-  guint                         n_tables;
-  guint                         max_matches;
-  const gchar                  *needle;
-  GHashTable                   *matches;
+  const gsize                     *tables_n_elements;
+  gint                            *tables_state;
+  guint                            n_tables;
+  guint                            max_matches;
+  const gchar                     *needle;
+  GHashTable                      *matches;
 } DzlFuzzyLookup;
 
 enum {
@@ -385,11 +385,9 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
 
       for (i = 0; i < lookup.tables_n_elements[0]; i++)
         {
-          const DzlFuzzyIndexItem *item;
+          const DzlFuzzyIndexItem *item = &lookup.tables[0][i];
           DzlFuzzyMatch match;
 
-          item = &lookup.tables[0][i];
-
           if (item->lookaside_id != last_id)
             {
               last_id = item->lookaside_id;
@@ -425,6 +423,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
         gpointer ptr;
         gfloat   fval;
       } other_score;
+      guint penalty = ((lookaside_id & 0xFF000000) >> 24) + 1;
 
       if G_UNLIKELY (!_dzl_fuzzy_index_resolve (self->index,
                                                 lookaside_id,
@@ -432,7 +431,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
                                                 &match.key))
         continue;
 
-      match.score = 1.0 / (strlen (match.key) + score);
+      match.score = 1.0 / ((strlen (match.key) + score) * penalty);
 
       if (g_hash_table_lookup_extended (by_document,
                                         GUINT_TO_POINTER (match.document_id),
diff --git a/src/fuzzy/dzl-fuzzy-index.c b/src/fuzzy/dzl-fuzzy-index.c
index c52738a..61ffabc 100644
--- a/src/fuzzy/dzl-fuzzy-index.c
+++ b/src/fuzzy/dzl-fuzzy-index.c
@@ -466,6 +466,9 @@ _dzl_fuzzy_index_resolve (DzlFuzzyIndex  *self,
   g_assert (DZL_IS_FUZZY_INDEX (self));
   g_assert (document_id != NULL);
 
+  /* Mask off the key priority */
+  lookaside_id &= 0x00FFFFFF;
+
   if G_UNLIKELY (lookaside_id >= self->lookaside_len)
     return FALSE;
 
diff --git a/tests/test-desktop-index.c b/tests/test-desktop-index.c
index e8d418a..8fd34f6 100644
--- a/tests/test-desktop-index.c
+++ b/tests/test-desktop-index.c
@@ -158,7 +158,8 @@ test_desktop_index_key (DzlFuzzyIndexBuilder *builder,
                         const gchar          *group,
                         const gchar          *key,
                         GVariant             *document,
-                        gboolean              can_tokenize)
+                        gboolean              can_tokenize,
+                        gint                  priority)
 {
   g_autofree gchar *str = NULL;
   g_auto(GStrv) words = NULL;
@@ -173,7 +174,7 @@ test_desktop_index_key (DzlFuzzyIndexBuilder *builder,
 
   if (!can_tokenize)
     {
-      dzl_fuzzy_index_builder_insert (builder, str, document);
+      dzl_fuzzy_index_builder_insert (builder, str, document, priority);
       return;
     }
 
@@ -182,7 +183,7 @@ test_desktop_index_key (DzlFuzzyIndexBuilder *builder,
   for (guint i = 0; words[i] != NULL; i++)
     {
       if (*words[i] && !g_ascii_isspace (*words[i]))
-        dzl_fuzzy_index_builder_insert (builder, words[i], document);
+        dzl_fuzzy_index_builder_insert (builder, words[i], document, priority);
     }
 }
 
@@ -225,10 +226,10 @@ test_desktop_index_file (DzlFuzzyIndexBuilder  *builder,
   g_variant_dict_insert (&dict, "title", "s", entry_name ?: "");
   document = g_variant_take_ref (g_variant_dict_end (&dict));
 
-  test_desktop_index_key (builder, key_file, "Desktop Entry", "Name", document, FALSE);
-  test_desktop_index_key (builder, key_file, "Desktop Entry", "Comment", document, TRUE);
-  test_desktop_index_key (builder, key_file, "Desktop Entry", "Categories", document, TRUE);
-  test_desktop_index_key (builder, key_file, "Desktop Entry", "Keywords", document, TRUE);
+  test_desktop_index_key (builder, key_file, "Desktop Entry", "Name", document, FALSE, 0);
+  test_desktop_index_key (builder, key_file, "Desktop Entry", "Keywords", document, TRUE, 20);
+  test_desktop_index_key (builder, key_file, "Desktop Entry", "Comment", document, TRUE, 40);
+  test_desktop_index_key (builder, key_file, "Desktop Entry", "Categories", document, TRUE, 60);
 
   return TRUE;
 }
diff --git a/tests/test-fuzzy-index.c b/tests/test-fuzzy-index.c
index c02359e..a159f81 100644
--- a/tests/test-fuzzy-index.c
+++ b/tests/test-fuzzy-index.c
@@ -55,11 +55,11 @@ test_index_builder_basic (void)
   builder = dzl_fuzzy_index_builder_new ();
   g_object_add_weak_pointer (G_OBJECT (builder), (gpointer *)&builder);
 
-  dzl_fuzzy_index_builder_insert (builder, "foo", g_variant_new_int32 (1));
-  dzl_fuzzy_index_builder_insert (builder, "FOO", g_variant_new_int32 (2));
-  dzl_fuzzy_index_builder_insert (builder, "Foo", g_variant_new_int32 (3));
-  dzl_fuzzy_index_builder_insert (builder, "bar", g_variant_new_int32 (4));
-  dzl_fuzzy_index_builder_insert (builder, "baz", g_variant_new_int32 (5));
+  dzl_fuzzy_index_builder_insert (builder, "foo", g_variant_new_int32 (1), 7);
+  dzl_fuzzy_index_builder_insert (builder, "FOO", g_variant_new_int32 (2), 7);
+  dzl_fuzzy_index_builder_insert (builder, "Foo", g_variant_new_int32 (3), 7);
+  dzl_fuzzy_index_builder_insert (builder, "bar", g_variant_new_int32 (4), 7);
+  dzl_fuzzy_index_builder_insert (builder, "baz", g_variant_new_int32 (5), 7);
 
   dzl_fuzzy_index_builder_write_async (builder,
                                        file,
@@ -233,13 +233,13 @@ test_index_basic (void)
    * We want to ensure we only get the highest scoring item for a
    * document (which are deduplicated in the index).
    */
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show", g_variant_new_int32 (1));
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show_all", g_variant_new_int32 (1));
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide", g_variant_new_int32 (2));
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide_all", g_variant_new_int32 (2));
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_parent", g_variant_new_int32 (3));
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_name", g_variant_new_int32 (4));
-  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_set_name", g_variant_new_int32 (5));
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show", g_variant_new_int32 (1), 7);
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show_all", g_variant_new_int32 (1), 7);
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide", g_variant_new_int32 (2), 7);
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide_all", g_variant_new_int32 (2), 7);
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_parent", g_variant_new_int32 (3), 7);
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_name", g_variant_new_int32 (4), 7);
+  dzl_fuzzy_index_builder_insert (builder, "gtk_widget_set_name", g_variant_new_int32 (5), 7);
 
   dzl_fuzzy_index_builder_write_async (builder,
                                        file,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]