[libdazzle] fuzzy-index-builder: add keyword priority
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libdazzle] fuzzy-index-builder: add keyword priority
- Date: Sun, 4 Jun 2017 02:07:17 +0000 (UTC)
commit 8d685264e4916ff26388aa87de962a2ad101b551
Author: Christian Hergert <chergert redhat com>
Date: Sat Jun 3 15:22:33 2017 -0700
fuzzy-index-builder: add keyword priority
We want to use this so that we can prioritize results by the cost of the
keyword priority. That allows us to have "tiered" search results.
src/fuzzy/dzl-fuzzy-index-builder.c | 33 ++++++++++++++++++++++++++-------
src/fuzzy/dzl-fuzzy-index-builder.h | 3 ++-
src/fuzzy/dzl-fuzzy-index-cursor.c | 31 +++++++++++++++----------------
src/fuzzy/dzl-fuzzy-index.c | 3 +++
tests/test-desktop-index.c | 15 ++++++++-------
tests/test-fuzzy-index.c | 24 ++++++++++++------------
6 files changed, 66 insertions(+), 43 deletions(-)
---
diff --git a/src/fuzzy/dzl-fuzzy-index-builder.c b/src/fuzzy/dzl-fuzzy-index-builder.c
index 55e5df1..e1eef15 100644
--- a/src/fuzzy/dzl-fuzzy-index-builder.c
+++ b/src/fuzzy/dzl-fuzzy-index-builder.c
@@ -16,7 +16,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#define G_LOG_DOMAIN "dzl-fuzzy-index-builder"
+#define G_LOG_DOMAIN "dzl-fuzzy-index-builder"
+#define MAX_KEY_ENTRIES (0x00FFFFFF)
#include <stdlib.h>
#include <string.h>
@@ -121,6 +122,12 @@ enum {
static GParamSpec *properties [N_PROPS];
+static guint
+mask_priority (guint key_id)
+{
+ return key_id & 0x00FFFFFF;
+}
+
static void
dzl_fuzzy_index_builder_finalize (GObject *object)
{
@@ -215,6 +222,7 @@ dzl_fuzzy_index_builder_new (void)
* @self: A #DzlFuzzyIndexBuilder
* @key: The UTF-8 encoded key for the document
* @document: The document to store
+ * @priority: An optional priority for the keyword.
*
* Inserts @document into the index using @key as the lookup key.
*
@@ -224,21 +232,32 @@ dzl_fuzzy_index_builder_new (void)
* If @document is floating, it's floating reference will be sunk using
* g_variant_ref_sink().
*
+ * @priority may be used to group results by priority. Priority must be
+ * less than 256.
+ *
* Returns: The document id registered for @document.
*/
guint64
dzl_fuzzy_index_builder_insert (DzlFuzzyIndexBuilder *self,
const gchar *key,
- GVariant *document)
+ GVariant *document,
+ guint priority)
{
GVariant *real_document = NULL;
gpointer document_id = NULL;
gpointer key_id = NULL;
KVPair pair;
- g_return_val_if_fail (DZL_IS_FUZZY_INDEX_BUILDER (self), 0);
- g_return_val_if_fail (key != NULL, 0);
- g_return_val_if_fail (document != NULL, 0);
+ g_return_val_if_fail (DZL_IS_FUZZY_INDEX_BUILDER (self), 0L);
+ g_return_val_if_fail (key != NULL, 0L);
+ g_return_val_if_fail (document != NULL, 0L);
+ g_return_val_if_fail (priority <= 0xFF, 0L);
+
+ if (self->keys->len > MAX_KEY_ENTRIES)
+ {
+ g_warning ("Index is full, cannot add more entries");
+ return 0L;
+ }
key = g_string_chunk_insert_const (self->strings, key);
@@ -255,7 +274,7 @@ dzl_fuzzy_index_builder_insert (DzlFuzzyIndexBuilder *self,
if (!g_hash_table_lookup_extended (self->key_ids, key, NULL, &key_id))
{
- key_id = GUINT_TO_POINTER (self->keys->len);
+ key_id = GUINT_TO_POINTER (self->keys->len | ((priority & 0xFF) << 24));
g_ptr_array_add (self->keys, (gchar *)key);
}
@@ -328,7 +347,7 @@ dzl_fuzzy_index_builder_build_index (DzlFuzzyIndexBuilder *self)
const gchar *tmp;
guint position = 0;
- key = g_ptr_array_index (self->keys, kvpair->key_id);
+ key = g_ptr_array_index (self->keys, mask_priority (kvpair->key_id));
if (!self->case_sensitive)
key = lower = g_utf8_casefold (key, -1);
diff --git a/src/fuzzy/dzl-fuzzy-index-builder.h b/src/fuzzy/dzl-fuzzy-index-builder.h
index 39e783f..7be63cf 100644
--- a/src/fuzzy/dzl-fuzzy-index-builder.h
+++ b/src/fuzzy/dzl-fuzzy-index-builder.h
@@ -33,7 +33,8 @@ void dzl_fuzzy_index_builder_set_case_sensitive (DzlFuzzyIndex
gboolean case_sensitive);
guint64 dzl_fuzzy_index_builder_insert (DzlFuzzyIndexBuilder *self,
const gchar *key,
- GVariant *document);
+ GVariant *document,
+ guint priority);
gboolean dzl_fuzzy_index_builder_write (DzlFuzzyIndexBuilder *self,
GFile *file,
gint io_priority,
diff --git a/src/fuzzy/dzl-fuzzy-index-cursor.c b/src/fuzzy/dzl-fuzzy-index-cursor.c
index a6388d9..85b57aa 100644
--- a/src/fuzzy/dzl-fuzzy-index-cursor.c
+++ b/src/fuzzy/dzl-fuzzy-index-cursor.c
@@ -26,14 +26,14 @@
struct _DzlFuzzyIndexCursor
{
- GObject object;
+ GObject object;
DzlFuzzyIndex *index;
- gchar *query;
- GVariantDict *tables;
- GArray *matches;
- guint max_matches;
- guint case_sensitive : 1;
+ gchar *query;
+ GVariantDict *tables;
+ GArray *matches;
+ guint max_matches;
+ guint case_sensitive : 1;
};
typedef struct
@@ -53,12 +53,12 @@ typedef struct
{
DzlFuzzyIndex *index;
const DzlFuzzyIndexItem * const *tables;
- const gsize *tables_n_elements;
- gint *tables_state;
- guint n_tables;
- guint max_matches;
- const gchar *needle;
- GHashTable *matches;
+ const gsize *tables_n_elements;
+ gint *tables_state;
+ guint n_tables;
+ guint max_matches;
+ const gchar *needle;
+ GHashTable *matches;
} DzlFuzzyLookup;
enum {
@@ -385,11 +385,9 @@ dzl_fuzzy_index_cursor_worker (GTask *task,
for (i = 0; i < lookup.tables_n_elements[0]; i++)
{
- const DzlFuzzyIndexItem *item;
+ const DzlFuzzyIndexItem *item = &lookup.tables[0][i];
DzlFuzzyMatch match;
- item = &lookup.tables[0][i];
-
if (item->lookaside_id != last_id)
{
last_id = item->lookaside_id;
@@ -425,6 +423,7 @@ dzl_fuzzy_index_cursor_worker (GTask *task,
gpointer ptr;
gfloat fval;
} other_score;
+ guint penalty = ((lookaside_id & 0xFF000000) >> 24) + 1;
if G_UNLIKELY (!_dzl_fuzzy_index_resolve (self->index,
lookaside_id,
@@ -432,7 +431,7 @@ dzl_fuzzy_index_cursor_worker (GTask *task,
&match.key))
continue;
- match.score = 1.0 / (strlen (match.key) + score);
+ match.score = 1.0 / ((strlen (match.key) + score) * penalty);
if (g_hash_table_lookup_extended (by_document,
GUINT_TO_POINTER (match.document_id),
diff --git a/src/fuzzy/dzl-fuzzy-index.c b/src/fuzzy/dzl-fuzzy-index.c
index c52738a..61ffabc 100644
--- a/src/fuzzy/dzl-fuzzy-index.c
+++ b/src/fuzzy/dzl-fuzzy-index.c
@@ -466,6 +466,9 @@ _dzl_fuzzy_index_resolve (DzlFuzzyIndex *self,
g_assert (DZL_IS_FUZZY_INDEX (self));
g_assert (document_id != NULL);
+ /* Mask off the key priority */
+ lookaside_id &= 0x00FFFFFF;
+
if G_UNLIKELY (lookaside_id >= self->lookaside_len)
return FALSE;
diff --git a/tests/test-desktop-index.c b/tests/test-desktop-index.c
index e8d418a..8fd34f6 100644
--- a/tests/test-desktop-index.c
+++ b/tests/test-desktop-index.c
@@ -158,7 +158,8 @@ test_desktop_index_key (DzlFuzzyIndexBuilder *builder,
const gchar *group,
const gchar *key,
GVariant *document,
- gboolean can_tokenize)
+ gboolean can_tokenize,
+ gint priority)
{
g_autofree gchar *str = NULL;
g_auto(GStrv) words = NULL;
@@ -173,7 +174,7 @@ test_desktop_index_key (DzlFuzzyIndexBuilder *builder,
if (!can_tokenize)
{
- dzl_fuzzy_index_builder_insert (builder, str, document);
+ dzl_fuzzy_index_builder_insert (builder, str, document, priority);
return;
}
@@ -182,7 +183,7 @@ test_desktop_index_key (DzlFuzzyIndexBuilder *builder,
for (guint i = 0; words[i] != NULL; i++)
{
if (*words[i] && !g_ascii_isspace (*words[i]))
- dzl_fuzzy_index_builder_insert (builder, words[i], document);
+ dzl_fuzzy_index_builder_insert (builder, words[i], document, priority);
}
}
@@ -225,10 +226,10 @@ test_desktop_index_file (DzlFuzzyIndexBuilder *builder,
g_variant_dict_insert (&dict, "title", "s", entry_name ?: "");
document = g_variant_take_ref (g_variant_dict_end (&dict));
- test_desktop_index_key (builder, key_file, "Desktop Entry", "Name", document, FALSE);
- test_desktop_index_key (builder, key_file, "Desktop Entry", "Comment", document, TRUE);
- test_desktop_index_key (builder, key_file, "Desktop Entry", "Categories", document, TRUE);
- test_desktop_index_key (builder, key_file, "Desktop Entry", "Keywords", document, TRUE);
+ test_desktop_index_key (builder, key_file, "Desktop Entry", "Name", document, FALSE, 0);
+ test_desktop_index_key (builder, key_file, "Desktop Entry", "Keywords", document, TRUE, 20);
+ test_desktop_index_key (builder, key_file, "Desktop Entry", "Comment", document, TRUE, 40);
+ test_desktop_index_key (builder, key_file, "Desktop Entry", "Categories", document, TRUE, 60);
return TRUE;
}
diff --git a/tests/test-fuzzy-index.c b/tests/test-fuzzy-index.c
index c02359e..a159f81 100644
--- a/tests/test-fuzzy-index.c
+++ b/tests/test-fuzzy-index.c
@@ -55,11 +55,11 @@ test_index_builder_basic (void)
builder = dzl_fuzzy_index_builder_new ();
g_object_add_weak_pointer (G_OBJECT (builder), (gpointer *)&builder);
- dzl_fuzzy_index_builder_insert (builder, "foo", g_variant_new_int32 (1));
- dzl_fuzzy_index_builder_insert (builder, "FOO", g_variant_new_int32 (2));
- dzl_fuzzy_index_builder_insert (builder, "Foo", g_variant_new_int32 (3));
- dzl_fuzzy_index_builder_insert (builder, "bar", g_variant_new_int32 (4));
- dzl_fuzzy_index_builder_insert (builder, "baz", g_variant_new_int32 (5));
+ dzl_fuzzy_index_builder_insert (builder, "foo", g_variant_new_int32 (1), 7);
+ dzl_fuzzy_index_builder_insert (builder, "FOO", g_variant_new_int32 (2), 7);
+ dzl_fuzzy_index_builder_insert (builder, "Foo", g_variant_new_int32 (3), 7);
+ dzl_fuzzy_index_builder_insert (builder, "bar", g_variant_new_int32 (4), 7);
+ dzl_fuzzy_index_builder_insert (builder, "baz", g_variant_new_int32 (5), 7);
dzl_fuzzy_index_builder_write_async (builder,
file,
@@ -233,13 +233,13 @@ test_index_basic (void)
* We want to ensure we only get the highest scoring item for a
* document (which are deduplicated in the index).
*/
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show", g_variant_new_int32 (1));
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show_all", g_variant_new_int32 (1));
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide", g_variant_new_int32 (2));
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide_all", g_variant_new_int32 (2));
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_parent", g_variant_new_int32 (3));
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_name", g_variant_new_int32 (4));
- dzl_fuzzy_index_builder_insert (builder, "gtk_widget_set_name", g_variant_new_int32 (5));
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show", g_variant_new_int32 (1), 7);
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_show_all", g_variant_new_int32 (1), 7);
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide", g_variant_new_int32 (2), 7);
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_hide_all", g_variant_new_int32 (2), 7);
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_parent", g_variant_new_int32 (3), 7);
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_get_name", g_variant_new_int32 (4), 7);
+ dzl_fuzzy_index_builder_insert (builder, "gtk_widget_set_name", g_variant_new_int32 (5), 7);
dzl_fuzzy_index_builder_write_async (builder,
file,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]