[gnome-builder/wip/gtk4-port: 470/495] search: improve (im)mutable fuzzy indexers
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-builder/wip/gtk4-port: 470/495] search: improve (im)mutable fuzzy indexers
- Date: Thu, 24 Mar 2022 23:51:40 +0000 (UTC)
commit b8a6efa014ce0409c779cc40eb101b24e239daf9
Author: Christian Hergert <chergert redhat com>
Date: Fri Sep 24 11:30:47 2021 -0700
search: improve (im)mutable fuzzy indexers
src/libide/search/ide-fuzzy-index-builder.c | 714 +++++++++++++++++++++++++++
src/libide/search/ide-fuzzy-index-builder.h | 79 +++
src/libide/search/ide-fuzzy-index-cursor.c | 631 ++++++++++++++++++++++++
src/libide/search/ide-fuzzy-index-cursor.h | 35 ++
src/libide/search/ide-fuzzy-index-match.c | 205 ++++++++
src/libide/search/ide-fuzzy-index-match.h | 39 ++
src/libide/search/ide-fuzzy-index-private.h | 36 ++
src/libide/search/ide-fuzzy-index.c | 511 ++++++++++++++++++++
src/libide/search/ide-fuzzy-index.h | 71 +++
src/libide/search/ide-fuzzy-mutable-index.c | 724 ++++++++++++++++++++++++++++
src/libide/search/ide-fuzzy-mutable-index.h | 76 +++
src/libide/search/ide-int-pair.h | 209 ++++++++
src/libide/search/libide-search.h | 5 +
src/libide/search/meson.build | 10 +
14 files changed, 3345 insertions(+)
---
diff --git a/src/libide/search/ide-fuzzy-index-builder.c b/src/libide/search/ide-fuzzy-index-builder.c
new file mode 100644
index 000000000..656ffd4c1
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-builder.c
@@ -0,0 +1,714 @@
+/* ide-fuzzy-index-builder.c
+ *
+ * Copyright (C) 2016-2021 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index-builder"
+#define MAX_KEY_ENTRIES (0x00FFFFFF)
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "ide-fuzzy-index-builder.h"
+
+struct _IdeFuzzyIndexBuilder
+{
+ GObject object;
+
+ guint case_sensitive : 1;
+
+ /*
+ * This hash table contains a mapping of GVariants so that we
+ * deduplicate insertions of the same document. This helps when
+ * we have indexes that contain multiple strings to the same
+ * piece of data.
+ */
+ GHashTable *documents_hash;
+
+ /*
+ * This array contains a pointer to the individual GVariants
+ * while building the index. When writing the index to disk,
+ * we create a fixed array from this array of varians.
+ */
+ GPtrArray *documents;
+
+ /*
+ * Since we will need to keep a copy of a lot of strings, we
+ * use a GString chunk to reduce the presure on the allocator.
+ * It can certainly leave some gaps that are unused in the
+ * sequence of pages, but it is generally better than using
+ * a GByteArray or some other pow^2 growing array.
+ */
+ GStringChunk *strings;
+
+ /*
+ * This maps a pointer to a string that is found in the strings
+ * string chunk to a key id (stored as a pointer). The input
+ * string must exist within strings as we use a direct hash from
+ * the input pointer to map to the string to save on the cost
+ * of key equality checks.
+ */
+ GHashTable *key_ids;
+
+ /*
+ * An array of keys where the index of the key is the "key_id" used
+ * in other structures. The pointer points to a key within the
+ * strings GStringChunk.
+ */
+ GPtrArray *keys;
+
+ /*
+ * This array maps our document id to a key id. When building the
+ * search index we use this to disambiguate between multiple
+ * documents pointing to the same document.
+ */
+ GArray *kv_pairs;
+
+ /*
+ * Metadata for the search index, which is stored as the "metadata"
+ * key in the final search index. You can use fuzzy_index_get_metadata()
+ * to retrieve values stored here.
+ *
+ * This might be useful to store things like the mtime of the data
+ * you are indexes so that you know if you need to reindex. You might
+ * also store the version of your indexer here so that when you update
+ * your indexer code, you can force a rebuild of the index.
+ */
+ GHashTable *metadata;
+};
+
+typedef struct
+{
+ /* The position within the keys array of the key. */
+ guint key_id;
+
+ /* The position within the documents array of the document */
+ guint document_id;
+} KVPair;
+
+typedef struct
+{
+ /*
+ * The character position within the string in terms of unicode
+ * characters, not byte-position.
+ */
+ guint position;
+
+ /* The index into the kvpairs */
+ guint lookaside_id;
+} IndexItem;
+
+G_DEFINE_TYPE (IdeFuzzyIndexBuilder, ide_fuzzy_index_builder, G_TYPE_OBJECT)
+
+enum {
+ PROP_0,
+ PROP_CASE_SENSITIVE,
+ N_PROPS
+};
+
+static GParamSpec *properties [N_PROPS];
+
+static guint
+_g_variant_hash (gconstpointer data)
+{
+ GVariant *variant = (GVariant *)data;
+ GBytes *bytes;
+ guint ret;
+
+ if (!g_variant_is_container (variant))
+ return g_variant_hash (variant);
+
+ /* Generally we wouldn't want to create a bytes to hash
+ * during a hash call, since that'd be fairly expensive.
+ * But since GHashTable caches hash values, its not that
+ * big of a deal.
+ */
+ bytes = g_variant_get_data_as_bytes (variant);
+ ret = g_bytes_hash (bytes);
+ g_bytes_unref (bytes);
+
+ return ret;
+}
+
+static guint
+mask_priority (guint key_id)
+{
+ return key_id & 0x00FFFFFF;
+}
+
+static void
+ide_fuzzy_index_builder_finalize (GObject *object)
+{
+ IdeFuzzyIndexBuilder *self = (IdeFuzzyIndexBuilder *)object;
+
+ g_clear_pointer (&self->documents_hash, g_hash_table_unref);
+ g_clear_pointer (&self->documents, g_ptr_array_unref);
+ g_clear_pointer (&self->strings, g_string_chunk_free);
+ g_clear_pointer (&self->kv_pairs, g_array_unref);
+ g_clear_pointer (&self->metadata, g_hash_table_unref);
+ g_clear_pointer (&self->key_ids, g_hash_table_unref);
+ g_clear_pointer (&self->keys, g_ptr_array_unref);
+
+ G_OBJECT_CLASS (ide_fuzzy_index_builder_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_builder_get_property (GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ IdeFuzzyIndexBuilder *self = IDE_FUZZY_INDEX_BUILDER (object);
+
+ switch (prop_id)
+ {
+ case PROP_CASE_SENSITIVE:
+ g_value_set_boolean (value, ide_fuzzy_index_builder_get_case_sensitive (self));
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ide_fuzzy_index_builder_set_property (GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ IdeFuzzyIndexBuilder *self = IDE_FUZZY_INDEX_BUILDER (object);
+
+ switch (prop_id)
+ {
+ case PROP_CASE_SENSITIVE:
+ ide_fuzzy_index_builder_set_case_sensitive (self, g_value_get_boolean (value));
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ide_fuzzy_index_builder_class_init (IdeFuzzyIndexBuilderClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->finalize = ide_fuzzy_index_builder_finalize;
+ object_class->get_property = ide_fuzzy_index_builder_get_property;
+ object_class->set_property = ide_fuzzy_index_builder_set_property;
+
+ properties [PROP_CASE_SENSITIVE] =
+ g_param_spec_boolean ("case-sensitive",
+ "Case Sensitive",
+ "Case Sensitive",
+ FALSE,
+ (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+
+ g_object_class_install_properties (object_class, N_PROPS, properties);
+}
+
+static void
+ide_fuzzy_index_builder_init (IdeFuzzyIndexBuilder *self)
+{
+ self->documents = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+ self->documents_hash = g_hash_table_new (_g_variant_hash, g_variant_equal);
+ self->kv_pairs = g_array_new (FALSE, FALSE, sizeof (KVPair));
+ self->strings = g_string_chunk_new (4096);
+ self->key_ids = g_hash_table_new (NULL, NULL);
+ self->keys = g_ptr_array_new ();
+}
+
+IdeFuzzyIndexBuilder *
+ide_fuzzy_index_builder_new (void)
+{
+ return g_object_new (IDE_TYPE_FUZZY_INDEX_BUILDER, NULL);
+}
+
+/**
+ * ide_fuzzy_index_builder_insert:
+ * @self: A #IdeFuzzyIndexBuilder
+ * @key: The UTF-8 encoded key for the document
+ * @document: The document to store
+ * @priority: An optional priority for the keyword.
+ *
+ * Inserts @document into the index using @key as the lookup key.
+ *
+ * If a matching document (checked by hashing @document) has already
+ * been inserted, only a single instance of the document will be stored.
+ *
+ * If @document is floating, it will be consumed.
+ *
+ * @priority may be used to group results by priority. Priority must be
+ * less than 256.
+ *
+ * Returns: The document id registered for @document.
+ */
+guint64
+ide_fuzzy_index_builder_insert (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ GVariant *document,
+ guint priority)
+{
+ g_autoptr(GVariant) sunk_variant = NULL;
+ GVariant *real_document = NULL;
+ gpointer document_id = NULL;
+ gpointer key_id = NULL;
+ KVPair pair;
+
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), 0L);
+ g_return_val_if_fail (key != NULL, 0L);
+ g_return_val_if_fail (document != NULL, 0L);
+ g_return_val_if_fail (priority <= 0xFF, 0L);
+
+ if (g_variant_is_floating (document))
+ sunk_variant = g_variant_ref_sink (document);
+
+ /* move the priority bits into the proper area */
+ priority = (priority & 0xFF) << 24;
+
+ if (self->keys->len > MAX_KEY_ENTRIES)
+ {
+ g_warning ("Index is full, cannot add more entries");
+ return 0L;
+ }
+
+ key = g_string_chunk_insert_const (self->strings, key);
+
+ /*
+ * We try to deduplicate document entries here by hashing the document and
+ * looking for another matching it. This way our generated index can stay
+ * relatively small when it comes to documents.
+ */
+ if (!g_hash_table_lookup_extended (self->documents_hash,
+ document,
+ (gpointer *)&real_document,
+ &document_id))
+ {
+ document_id = GUINT_TO_POINTER (self->documents->len);
+ real_document = g_variant_ref (document);
+ g_ptr_array_add (self->documents, real_document);
+ g_hash_table_insert (self->documents_hash, real_document, document_id);
+ }
+
+ /*
+ * If we already have the key then reuse its key index. If not, then add it.
+ */
+ if (!g_hash_table_lookup_extended (self->key_ids, key, NULL, &key_id))
+ {
+ key_id = GUINT_TO_POINTER (self->keys->len);
+ g_ptr_array_add (self->keys, (gchar *)key);
+ g_hash_table_insert (self->key_ids, (gpointer)key, key_id);
+ }
+
+ /*
+ * A bit of slight-of-hand here. We share keys between all key<->document
+ * pairs, but steal the high bits for the key priority in the kvpair entry.
+ * This allows for both deduplication and different priorities based on
+ * certain document pairs.
+ */
+ pair.key_id = GPOINTER_TO_UINT (key_id) | priority;
+ pair.document_id = GPOINTER_TO_UINT (document_id);
+
+ g_array_append_val (self->kv_pairs, pair);
+
+ return pair.document_id;
+}
+
+static gint
+pos_doc_pair_compare (gconstpointer a,
+ gconstpointer b)
+{
+ const IndexItem *paira = a;
+ const IndexItem *pairb = b;
+ gint ret;
+
+ ret = paira->lookaside_id - pairb->lookaside_id;
+
+ if (ret == 0)
+ ret = paira->position - pairb->position;
+
+ return ret;
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_keys (IdeFuzzyIndexBuilder *self)
+{
+ g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+ return g_variant_new_strv ((const gchar * const *)self->keys->pdata,
+ self->keys->len);
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_lookaside (IdeFuzzyIndexBuilder *self)
+{
+ g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+ return g_variant_new_fixed_array ((const GVariantType *)"(uu)",
+ self->kv_pairs->data,
+ self->kv_pairs->len,
+ sizeof (KVPair));
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_index (IdeFuzzyIndexBuilder *self)
+{
+ g_autoptr(GHashTable) rows = NULL;
+ GVariantDict dict;
+ GHashTableIter iter;
+ gpointer keyptr;
+ GArray *row;
+ guint i;
+
+ g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+ rows = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_array_unref);
+
+ for (i = 0; i < self->kv_pairs->len; i++)
+ {
+ g_autofree gchar *lower = NULL;
+ const gchar *key;
+ const gchar *tmp;
+ KVPair *kvpair;
+ IndexItem item;
+ guint position = 0;
+
+ kvpair = &g_array_index (self->kv_pairs, KVPair, i);
+ key = g_ptr_array_index (self->keys, mask_priority (kvpair->key_id));
+
+ /* the priority for the key is stashed in the high 8 bits of
+ * the kvpair.key_id. So we need to propagate that to the
+ * entry in the index for resolution later.
+ */
+ item.lookaside_id = i | (kvpair->key_id & 0xFF000000);
+
+ if (!self->case_sensitive)
+ key = lower = g_utf8_casefold (key, -1);
+
+ for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+ {
+ gunichar ch = g_utf8_get_char (tmp);
+
+ row = g_hash_table_lookup (rows, GUINT_TO_POINTER (ch));
+
+ if G_UNLIKELY (row == NULL)
+ {
+ row = g_array_new (FALSE, FALSE, sizeof (IndexItem));
+ g_hash_table_insert (rows, GUINT_TO_POINTER (ch), row);
+ }
+
+ item.position = position++;
+ g_array_append_val (row, item);
+ }
+ }
+
+ g_variant_dict_init (&dict, NULL);
+
+ g_hash_table_iter_init (&iter, rows);
+
+ while (g_hash_table_iter_next (&iter, &keyptr, (gpointer *)&row))
+ {
+ gchar key[12];
+ GVariant *variant;
+ gunichar ch = GPOINTER_TO_UINT (keyptr);
+
+ key [g_unichar_to_utf8 (ch, key)] = 0;
+
+ g_array_sort (row, pos_doc_pair_compare);
+
+ variant = g_variant_new_fixed_array ((const GVariantType *)"(uu)",
+ row->data,
+ row->len,
+ sizeof (IndexItem));
+ g_variant_dict_insert_value (&dict, key, variant);
+ }
+
+ return g_variant_dict_end (&dict);
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_metadata (IdeFuzzyIndexBuilder *self)
+{
+ GVariantDict dict;
+ GHashTableIter iter;
+
+ g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+ g_variant_dict_init (&dict, NULL);
+
+ if (self->metadata != NULL)
+ {
+ const gchar *key;
+ GVariant *value;
+
+ g_hash_table_iter_init (&iter, self->metadata);
+ while (g_hash_table_iter_next (&iter, (gpointer *)&key, (gpointer *)&value))
+ g_variant_dict_insert_value (&dict, key, value);
+ }
+
+ g_variant_dict_insert (&dict, "case-sensitive", "b", self->case_sensitive);
+
+ return g_variant_dict_end (&dict);
+}
+
+static void
+ide_fuzzy_index_builder_write_worker (GTask *task,
+ gpointer source_object,
+ gpointer task_data,
+ GCancellable *cancellable)
+{
+ IdeFuzzyIndexBuilder *self = source_object;
+ g_autoptr(GVariant) variant = NULL;
+ g_autoptr(GVariant) documents = NULL;
+ g_autoptr(GError) error = NULL;
+ GVariantDict dict;
+ GFile *file = task_data;
+
+ g_assert (G_IS_TASK (task));
+ g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+ g_assert (G_IS_FILE (file));
+ g_assert (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+ g_variant_dict_init (&dict, NULL);
+
+ /* Set our version number for the document */
+ g_variant_dict_insert (&dict, "version", "i", 1);
+
+ /* Build our dicitionary of metadata */
+ g_variant_dict_insert_value (&dict,
+ "metadata",
+ ide_fuzzy_index_builder_build_metadata (self));
+
+ /* Keys is an array of string keys where the index is the "key_id" */
+ g_variant_dict_insert_value (&dict,
+ "keys",
+ ide_fuzzy_index_builder_build_keys (self));
+
+ /* The lookaside is a mapping of kvpair to the repsective keys and
+ * documents. This allows the tables to use the kvpair id as the value
+ * in the index so we can have both document deduplication as well as
+ * the ability to disambiguate the keys which point to the same
+ * document. The contents are "a{uu}".
+ */
+ g_variant_dict_insert_value (&dict,
+ "lookaside",
+ ide_fuzzy_index_builder_build_lookaside (self));
+
+ /* Build our dicitionary of character → [(pos,lookaside_id),..] tuples.
+ * The position is the utf8 character position within the string.
+ * The lookaside_id is the index within the lookaside buffer to locate
+ * the document_id or key_id.
+ */
+ g_variant_dict_insert_value (&dict,
+ "tables",
+ ide_fuzzy_index_builder_build_index (self));
+
+ /*
+ * The documents are stored as an array where the document identifier is
+ * their index position. We then use a lookaside buffer to map the insertion
+ * id to the document id. Otherwise, we can't disambiguate between two
+ * keys that insert the same document (as we deduplicate documents inserted
+ * into the index).
+ */
+ documents = g_variant_new_array (NULL,
+ (GVariant * const *)self->documents->pdata,
+ self->documents->len);
+ g_variant_dict_insert_value (&dict, "documents", g_variant_ref_sink (documents));
+
+ /* Now write the variant to disk */
+ variant = g_variant_ref_sink (g_variant_dict_end (&dict));
+ if (!g_file_replace_contents (file,
+ g_variant_get_data (variant),
+ g_variant_get_size (variant),
+ NULL,
+ FALSE,
+ G_FILE_CREATE_NONE,
+ NULL,
+ cancellable,
+ &error))
+ g_task_return_error (task, g_steal_pointer (&error));
+ else
+ g_task_return_boolean (task, TRUE);
+}
+
+/**
+ * ide_fuzzy_index_builder_write_async:
+ * @self: A #IdeFuzzyIndexBuilder
+ * @file: A #GFile to write the index to
+ * @io_priority: The priority for IO operations
+ * @cancellable: (nullable): An optional #GCancellable or %NULL
+ * @callback: A callback for completion or %NULL
+ * @user_data: User data for @callback
+ *
+ * Builds and writes the index to @file. The file format is a
+ * GVariant on disk and can be loaded and searched using
+ * #FuzzyIndex.
+ */
+void
+ide_fuzzy_index_builder_write_async (IdeFuzzyIndexBuilder *self,
+ GFile *file,
+ gint io_priority,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data)
+{
+ g_autoptr(GTask) task = NULL;
+
+ g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+ g_return_if_fail (G_IS_FILE (file));
+ g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+ task = g_task_new (self, cancellable, callback, user_data);
+ g_task_set_source_tag (task, ide_fuzzy_index_builder_write_async);
+ g_task_set_priority (task, io_priority);
+ g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+ g_task_run_in_thread (task, ide_fuzzy_index_builder_write_worker);
+}
+
+gboolean
+ide_fuzzy_index_builder_write_finish (IdeFuzzyIndexBuilder *self,
+ GAsyncResult *result,
+ GError **error)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), FALSE);
+ g_return_val_if_fail (G_IS_TASK (result), FALSE);
+
+ return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+gboolean
+ide_fuzzy_index_builder_write (IdeFuzzyIndexBuilder *self,
+ GFile *file,
+ gint io_priority,
+ GCancellable *cancellable,
+ GError **error)
+{
+ g_autoptr(GTask) task = NULL;
+
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), FALSE);
+ g_return_val_if_fail (G_IS_FILE (file), FALSE);
+ g_return_val_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable), FALSE);
+
+ task = g_task_new (self, cancellable, NULL, NULL);
+ g_task_set_source_tag (task, ide_fuzzy_index_builder_write);
+ g_task_set_priority (task, io_priority);
+ g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+
+ ide_fuzzy_index_builder_write_worker (task, self, file, cancellable);
+
+ return g_task_propagate_boolean (task, error);
+}
+
+/**
+ * ide_fuzzy_index_builder_get_document:
+ *
+ * Returns the document that was inserted in a previous call to
+ * ide_fuzzy_index_builder_insert().
+ *
+ * Returns: (transfer none): A #GVariant
+ */
+const GVariant *
+ide_fuzzy_index_builder_get_document (IdeFuzzyIndexBuilder *self,
+ guint64 document_id)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), NULL);
+ g_return_val_if_fail ((guint)document_id < self->documents->len, NULL);
+
+ return g_ptr_array_index (self->documents, (guint)document_id);
+}
+
+void
+ide_fuzzy_index_builder_set_metadata (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ GVariant *value)
+{
+ g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+ g_return_if_fail (key != NULL);
+
+ if (self->metadata == NULL)
+ self->metadata = g_hash_table_new_full (g_str_hash,
+ g_str_equal,
+ g_free,
+ (GDestroyNotify)g_variant_unref);
+
+ if (value != NULL)
+ g_hash_table_insert (self->metadata,
+ g_strdup (key),
+ g_variant_ref_sink (value));
+ else
+ g_hash_table_remove (self->metadata, key);
+}
+
+void
+ide_fuzzy_index_builder_set_metadata_string (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ const gchar *value)
+{
+ g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+ g_return_if_fail (key != NULL);
+ g_return_if_fail (value != NULL);
+
+ ide_fuzzy_index_builder_set_metadata (self, key, g_variant_new_string (value));
+}
+
+void
+ide_fuzzy_index_builder_set_metadata_uint32 (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ guint32 value)
+{
+ g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+ g_return_if_fail (key != NULL);
+
+ ide_fuzzy_index_builder_set_metadata (self, key, g_variant_new_uint32 (value));
+}
+
+void
+ide_fuzzy_index_builder_set_metadata_uint64 (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ guint64 value)
+{
+ g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+ g_return_if_fail (key != NULL);
+
+ ide_fuzzy_index_builder_set_metadata (self, key, g_variant_new_uint64 (value));
+}
+
+gboolean
+ide_fuzzy_index_builder_get_case_sensitive (IdeFuzzyIndexBuilder *self)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), FALSE);
+
+ return self->case_sensitive;
+}
+
+void
+ide_fuzzy_index_builder_set_case_sensitive (IdeFuzzyIndexBuilder *self,
+ gboolean case_sensitive)
+{
+ g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+ case_sensitive = !!case_sensitive;
+
+ if (self->case_sensitive != case_sensitive)
+ {
+ self->case_sensitive = case_sensitive;
+ g_object_notify_by_pspec (G_OBJECT (self), properties [PROP_CASE_SENSITIVE]);
+ }
+}
diff --git a/src/libide/search/ide-fuzzy-index-builder.h b/src/libide/search/ide-fuzzy-index-builder.h
new file mode 100644
index 000000000..6c1f1c2de
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-builder.h
@@ -0,0 +1,79 @@
+/* ide-fuzzy-index-builder.h
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX_BUILDER (ide_fuzzy_index_builder_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndexBuilder, ide_fuzzy_index_builder, IDE, FUZZY_INDEX_BUILDER, GObject)
+
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyIndexBuilder *ide_fuzzy_index_builder_new (void);
+IDE_AVAILABLE_IN_ALL
+gboolean ide_fuzzy_index_builder_get_case_sensitive (IdeFuzzyIndexBuilder *self);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_builder_set_case_sensitive (IdeFuzzyIndexBuilder *self,
+ gboolean case_sensitive);
+IDE_AVAILABLE_IN_ALL
+guint64 ide_fuzzy_index_builder_insert (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ GVariant *document,
+ guint priority);
+IDE_AVAILABLE_IN_ALL
+gboolean ide_fuzzy_index_builder_write (IdeFuzzyIndexBuilder *self,
+ GFile *file,
+ gint io_priority,
+ GCancellable *cancellable,
+ GError **error);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_builder_write_async (IdeFuzzyIndexBuilder *self,
+ GFile *file,
+ gint io_priority,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data);
+IDE_AVAILABLE_IN_ALL
+gboolean ide_fuzzy_index_builder_write_finish (IdeFuzzyIndexBuilder *self,
+ GAsyncResult *result,
+ GError **error);
+IDE_AVAILABLE_IN_ALL
+const GVariant *ide_fuzzy_index_builder_get_document (IdeFuzzyIndexBuilder *self,
+ guint64 document_id);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_builder_set_metadata (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ GVariant *value);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_builder_set_metadata_string (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ const gchar *value);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_builder_set_metadata_uint32 (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ guint32 value);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_builder_set_metadata_uint64 (IdeFuzzyIndexBuilder *self,
+ const gchar *key,
+ guint64 value);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index-cursor.c b/src/libide/search/ide-fuzzy-index-cursor.c
new file mode 100644
index 000000000..2eb020650
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-cursor.c
@@ -0,0 +1,631 @@
+/* ide-fuzzy-index-cursor.c
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index-cursor"
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ide-fuzzy-index-cursor.h"
+#include "ide-fuzzy-index-match.h"
+#include "ide-fuzzy-index-private.h"
+#include "ide-int-pair.h"
+
+struct _IdeFuzzyIndexCursor
+{
+ GObject object;
+
+ IdeFuzzyIndex *index;
+ gchar *query;
+ GVariantDict *tables;
+ GArray *matches;
+ guint max_matches;
+ guint case_sensitive : 1;
+};
+
+typedef struct
+{
+ guint position;
+ guint lookaside_id;
+} IdeFuzzyIndexItem;
+
+typedef struct
+{
+ const gchar *key;
+ guint document_id;
+ gfloat score;
+ guint priority;
+} IdeFuzzyMatch;
+
+typedef struct
+{
+ IdeFuzzyIndex *index;
+ const IdeFuzzyIndexItem * const *tables;
+ const gsize *tables_n_elements;
+ gint *tables_state;
+ guint n_tables;
+ guint max_matches;
+ const gchar *needle;
+ GHashTable *matches;
+} IdeFuzzyLookup;
+
+enum {
+ PROP_0,
+ PROP_CASE_SENSITIVE,
+ PROP_INDEX,
+ PROP_TABLES,
+ PROP_MAX_MATCHES,
+ PROP_QUERY,
+ N_PROPS
+};
+
+static void async_initable_iface_init (GAsyncInitableIface *iface);
+static void list_model_iface_init (GListModelInterface *iface);
+
+static GParamSpec *properties [N_PROPS];
+
+G_DEFINE_TYPE_WITH_CODE (IdeFuzzyIndexCursor, ide_fuzzy_index_cursor, G_TYPE_OBJECT,
+ G_IMPLEMENT_INTERFACE (G_TYPE_ASYNC_INITABLE, async_initable_iface_init)
+ G_IMPLEMENT_INTERFACE (G_TYPE_LIST_MODEL, list_model_iface_init))
+
+static inline gfloat
+pointer_to_float (gpointer ptr)
+{
+ union {
+ gpointer ptr;
+#if GLIB_SIZEOF_VOID_P == 8
+ gdouble fval;
+#else
+ gfloat fval;
+#endif
+ } convert;
+ convert.ptr = ptr;
+ return (gfloat)convert.fval;
+}
+
+static inline gpointer
+float_to_pointer (gfloat fval)
+{
+ union {
+ gpointer ptr;
+#if GLIB_SIZEOF_VOID_P == 8
+ gdouble fval;
+#else
+ gfloat fval;
+#endif
+ } convert;
+ convert.fval = fval;
+ return convert.ptr;
+}
+
+/* Not guaranteed, so assert to be sure */
+G_STATIC_ASSERT (sizeof(gdouble) == 8);
+G_STATIC_ASSERT (sizeof(gfloat) == 4);
+
+static void
+ide_fuzzy_index_cursor_finalize (GObject *object)
+{
+ IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)object;
+
+ g_clear_object (&self->index);
+ g_clear_pointer (&self->query, g_free);
+ g_clear_pointer (&self->matches, g_array_unref);
+ g_clear_pointer (&self->tables, g_variant_dict_unref);
+
+ G_OBJECT_CLASS (ide_fuzzy_index_cursor_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_cursor_get_property (GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ IdeFuzzyIndexCursor *self = IDE_FUZZY_INDEX_CURSOR(object);
+
+ switch (prop_id)
+ {
+ case PROP_CASE_SENSITIVE:
+ g_value_set_boolean (value, self->case_sensitive);
+ break;
+
+ case PROP_INDEX:
+ g_value_set_object (value, self->index);
+ break;
+
+ case PROP_MAX_MATCHES:
+ g_value_set_uint (value, self->max_matches);
+ break;
+
+ case PROP_QUERY:
+ g_value_set_string (value, self->query);
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ide_fuzzy_index_cursor_set_property (GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ IdeFuzzyIndexCursor *self = IDE_FUZZY_INDEX_CURSOR(object);
+
+ switch (prop_id)
+ {
+ case PROP_CASE_SENSITIVE:
+ self->case_sensitive = g_value_get_boolean (value);
+ break;
+
+ case PROP_INDEX:
+ self->index = g_value_dup_object (value);
+ break;
+
+ case PROP_TABLES:
+ self->tables = g_value_dup_boxed (value);
+ break;
+
+ case PROP_MAX_MATCHES:
+ self->max_matches = g_value_get_uint (value);
+ break;
+
+ case PROP_QUERY:
+ self->query = g_value_dup_string (value);
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ide_fuzzy_index_cursor_class_init (IdeFuzzyIndexCursorClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->finalize = ide_fuzzy_index_cursor_finalize;
+ object_class->get_property = ide_fuzzy_index_cursor_get_property;
+ object_class->set_property = ide_fuzzy_index_cursor_set_property;
+
+ properties [PROP_CASE_SENSITIVE] =
+ g_param_spec_boolean ("case-sensitive",
+ "Case Sensitive",
+ "Case Sensitive",
+ FALSE,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_INDEX] =
+ g_param_spec_object ("index",
+ "Index",
+ "The index this cursor is iterating",
+ IDE_TYPE_FUZZY_INDEX,
+ (G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_TABLES] =
+ g_param_spec_boxed ("tables",
+ "Tables",
+ "The dictionary of character indexes",
+ G_TYPE_VARIANT_DICT,
+ (G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_QUERY] =
+ g_param_spec_string ("query",
+ "Query",
+ "The query for the index",
+ NULL,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_MAX_MATCHES] =
+ g_param_spec_uint ("max-matches",
+ "Max Matches",
+ "The max number of matches to display",
+ 0,
+ G_MAXUINT,
+ 0,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ g_object_class_install_properties (object_class, N_PROPS, properties);
+}
+
+static void
+ide_fuzzy_index_cursor_init (IdeFuzzyIndexCursor *self)
+{
+ self->matches = g_array_new (FALSE, FALSE, sizeof (IdeFuzzyMatch));
+}
+
+static gint
+fuzzy_match_compare (gconstpointer a,
+ gconstpointer b)
+{
+ const IdeFuzzyMatch *ma = a;
+ const IdeFuzzyMatch *mb = b;
+
+ if (ma->score < mb->score)
+ return 1;
+ else if (ma->score > mb->score)
+ return -1;
+
+ return strcmp (ma->key, mb->key);
+}
+
+static gboolean
+fuzzy_do_match (const IdeFuzzyLookup *lookup,
+ const IdeFuzzyIndexItem *item,
+ guint table_index,
+ gint score)
+{
+ const IdeFuzzyIndexItem *table;
+ const IdeFuzzyIndexItem *iter;
+ gssize n_elements;
+ gint *state;
+ gint iter_score;
+
+ g_assert (lookup != NULL);
+ g_assert (item != NULL);
+ g_assert (score >= 0);
+
+ table = lookup->tables [table_index];
+ n_elements = (gssize)lookup->tables_n_elements [table_index];
+ state = &lookup->tables_state [table_index];
+
+ for (; state [0] < n_elements; state [0]++)
+ {
+ IdeIntPair *lookup_pair;
+ gboolean contains_document;
+
+ iter = &table [state [0]];
+
+ if ((iter->lookaside_id < item->lookaside_id) ||
+ ((iter->lookaside_id == item->lookaside_id) &&
+ (iter->position <= item->position)))
+ continue;
+ else if (iter->lookaside_id > item->lookaside_id)
+ break;
+
+ iter_score = score + (iter->position - item->position);
+
+ if (table_index + 1 < lookup->n_tables)
+ {
+ if (fuzzy_do_match (lookup, iter, table_index + 1, iter_score))
+ return TRUE;
+ continue;
+ }
+
+ contains_document = g_hash_table_lookup_extended (lookup->matches,
+ GUINT_TO_POINTER (item->lookaside_id),
+ NULL,
+ (gpointer *)&lookup_pair);
+
+ if (!contains_document || iter_score < ide_int_pair_first (lookup_pair))
+ g_hash_table_insert (lookup->matches,
+ GUINT_TO_POINTER (item->lookaside_id),
+ ide_int_pair_new (iter_score, iter->position));
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+ide_fuzzy_index_cursor_worker (GTask *task,
+ gpointer source_object,
+ gpointer task_data,
+ GCancellable *cancellable)
+{
+ IdeFuzzyIndexCursor *self = source_object;
+ g_autoptr(GHashTable) matches = NULL;
+ g_autoptr(GHashTable) by_document = NULL;
+ g_autoptr(GPtrArray) tables = NULL;
+ g_autoptr(GArray) tables_n_elements = NULL;
+ g_autofree gint *tables_state = NULL;
+ g_autofree gchar *freeme = NULL;
+ const gchar *query;
+ IdeFuzzyLookup lookup = { 0 };
+ GHashTableIter iter;
+ const gchar *str;
+ gpointer key, value;
+ guint i;
+
+ g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+ g_assert (G_IS_TASK (task));
+
+ if (g_task_return_error_if_cancelled (task))
+ return;
+
+ /* No matches with empty query */
+ if (self->query == NULL || *self->query == '\0')
+ goto cleanup;
+
+ /* If we are not case-sensitive, we need to downcase the query string */
+ query = self->query;
+ if (!self->case_sensitive)
+ query = freeme = g_utf8_casefold (query, -1);
+
+ tables = g_ptr_array_new ();
+ tables_n_elements = g_array_new (FALSE, FALSE, sizeof (gsize));
+ matches = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)ide_int_pair_free);
+
+ for (str = query; *str; str = g_utf8_next_char (str))
+ {
+ gunichar ch = g_utf8_get_char (str);
+ g_autoptr(GVariant) table = NULL;
+ gconstpointer fixed;
+ gsize n_elements;
+ gchar char_key[8];
+
+ if (g_unichar_isspace (ch))
+ continue;
+
+ char_key [g_unichar_to_utf8 (ch, char_key)] = '\0';
+ table = g_variant_dict_lookup_value (self->tables,
+ char_key,
+ (const GVariantType *)"a(uu)");
+
+ /* No possible matches, missing table for character */
+ if (table == NULL)
+ goto cleanup;
+
+ fixed = g_variant_get_fixed_array (table, &n_elements, sizeof (IdeFuzzyIndexItem));
+ g_array_append_val (tables_n_elements, n_elements);
+ g_ptr_array_add (tables, (gpointer)fixed);
+ }
+
+ if (tables->len == 0)
+ goto cleanup;
+
+ g_assert (tables->len > 0);
+ g_assert (tables->len == tables_n_elements->len);
+
+ tables_state = g_new0 (gint, tables->len);
+
+ lookup.index = self->index;
+ lookup.matches = matches;
+ lookup.tables = (const IdeFuzzyIndexItem * const *)tables->pdata;
+ lookup.tables_n_elements = (const gsize *)(gpointer)tables_n_elements->data;
+ lookup.tables_state = tables_state;
+ lookup.n_tables = tables->len;
+ lookup.needle = query;
+ lookup.max_matches = self->max_matches;
+
+ if G_LIKELY (lookup.n_tables > 1)
+ {
+ for (i = 0; i < lookup.tables_n_elements[0]; i++)
+ {
+ const IdeFuzzyIndexItem *item = &lookup.tables[0][i];
+
+ fuzzy_do_match (&lookup, item, 1, MIN (16, item->position * 2));
+ }
+ }
+ else
+ {
+ guint last_id = G_MAXUINT;
+
+ for (i = 0; i < lookup.tables_n_elements[0]; i++)
+ {
+ const IdeFuzzyIndexItem *item = &lookup.tables[0][i];
+ IdeFuzzyMatch match;
+
+ if (item->lookaside_id != last_id)
+ {
+ last_id = item->lookaside_id;
+
+ if G_UNLIKELY (!_ide_fuzzy_index_resolve (self->index,
+ item->lookaside_id,
+ &match.document_id,
+ &match.key,
+ &match.priority,
+ item->position,
+ item->position,
+ &match.score))
+ continue;
+
+ g_array_append_val (self->matches, match);
+ }
+ }
+
+ goto cleanup;
+ }
+
+ if (g_task_return_error_if_cancelled (task))
+ return;
+
+ by_document = g_hash_table_new (NULL, NULL);
+
+ g_hash_table_iter_init (&iter, matches);
+
+ while (g_hash_table_iter_next (&iter, &key, &value))
+ {
+ IdeIntPair *pair = value;
+ guint score = ide_int_pair_first (pair);
+ guint last_offset = ide_int_pair_second (pair);
+ gpointer other_score;
+ IdeFuzzyMatch match = {0};
+ guint lookaside_id = GPOINTER_TO_UINT (key);
+
+ if G_UNLIKELY (!_ide_fuzzy_index_resolve (self->index,
+ lookaside_id,
+ &match.document_id,
+ &match.key,
+ &match.priority,
+ score,
+ last_offset,
+ &match.score))
+ continue;
+
+ if (g_hash_table_lookup_extended (by_document,
+ GUINT_TO_POINTER (match.document_id),
+ NULL,
+ &other_score) &&
+ match.score <= pointer_to_float (other_score))
+ continue;
+
+ g_hash_table_insert (by_document,
+ GUINT_TO_POINTER (match.document_id),
+ float_to_pointer (match.score));
+
+ g_array_append_val (self->matches, match);
+ }
+
+ /*
+ * Because we have to do the deduplication of documents after
+ * searching all the potential matches, we could have duplicates
+ * in the array. So we need to filter them out. Not that big
+ * of a deal, since we can do remove_fast on the index and
+ * we have to sort them afterwards anyway. Potentially, we could
+ * do both at once if we felt like doing our own sort.
+ */
+ for (i = 0; i < self->matches->len; i++)
+ {
+ IdeFuzzyMatch *match;
+ gpointer other_score;
+
+ next:
+ match = &g_array_index (self->matches, IdeFuzzyMatch, i);
+ other_score = g_hash_table_lookup (by_document, GUINT_TO_POINTER (match->document_id));
+
+ /*
+ * This item should have been discarded, but we didn't have enough
+ * information at the time we built the array.
+ */
+ if (pointer_to_float (other_score) < match->score)
+ {
+ g_array_remove_index_fast (self->matches, i);
+ if (i < self->matches->len - 1)
+ goto next;
+ }
+ }
+
+ if (g_task_return_error_if_cancelled (task))
+ return;
+
+cleanup:
+ if (self->matches != NULL)
+ {
+ g_array_sort (self->matches, fuzzy_match_compare);
+ if (lookup.max_matches > 0 && lookup.max_matches < self->matches->len)
+ g_array_set_size (self->matches, lookup.max_matches);
+ }
+
+ g_task_return_boolean (task, TRUE);
+}
+
+static void
+ide_fuzzy_index_cursor_init_async (GAsyncInitable *initable,
+ gint io_priority,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data)
+{
+ IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)initable;
+ g_autoptr(GTask) task = NULL;
+
+ g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+ g_assert (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+ task = g_task_new (self, cancellable, callback, user_data);
+ g_task_set_source_tag (task, ide_fuzzy_index_cursor_init_async);
+ g_task_set_priority (task, io_priority);
+ g_task_set_check_cancellable (task, FALSE);
+ g_task_run_in_thread (task, ide_fuzzy_index_cursor_worker);
+}
+
+static gboolean
+ide_fuzzy_index_cursor_init_finish (GAsyncInitable *initiable,
+ GAsyncResult *result,
+ GError **error)
+{
+ g_assert (IDE_IS_FUZZY_INDEX_CURSOR (initiable));
+ g_assert (G_IS_TASK (result));
+
+ return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+static void
+async_initable_iface_init (GAsyncInitableIface *iface)
+{
+ iface->init_async = ide_fuzzy_index_cursor_init_async;
+ iface->init_finish = ide_fuzzy_index_cursor_init_finish;
+}
+
+static GType
+ide_fuzzy_index_cursor_get_item_type (GListModel *model)
+{
+ return IDE_TYPE_FUZZY_INDEX_MATCH;
+}
+
+static guint
+ide_fuzzy_index_cursor_get_n_items (GListModel *model)
+{
+ IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)model;
+
+ g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+
+ return self->matches->len;
+}
+
+static gpointer
+ide_fuzzy_index_cursor_get_item (GListModel *model,
+ guint position)
+{
+ IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)model;
+ g_autoptr(GVariant) document = NULL;
+ IdeFuzzyMatch *match;
+
+ g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+ g_assert (position < self->matches->len);
+
+ match = &g_array_index (self->matches, IdeFuzzyMatch, position);
+
+ document = _ide_fuzzy_index_lookup_document (self->index, match->document_id);
+
+ return g_object_new (IDE_TYPE_FUZZY_INDEX_MATCH,
+ "document", document,
+ "key", match->key,
+ "score", match->score,
+ "priority", match->priority,
+ NULL);
+}
+
+static void
+list_model_iface_init (GListModelInterface *iface)
+{
+ iface->get_item_type = ide_fuzzy_index_cursor_get_item_type;
+ iface->get_n_items = ide_fuzzy_index_cursor_get_n_items;
+ iface->get_item = ide_fuzzy_index_cursor_get_item;
+}
+
+/**
+ * ide_fuzzy_index_cursor_get_index:
+ * @self: A #IdeFuzzyIndexCursor
+ *
+ * Gets the index the cursor is iterating.
+ *
+ * Returns: (transfer none): A #IdeFuzzyIndex.
+ */
+IdeFuzzyIndex *
+ide_fuzzy_index_cursor_get_index (IdeFuzzyIndexCursor *self)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_CURSOR (self), NULL);
+
+ return self->index;
+}
diff --git a/src/libide/search/ide-fuzzy-index-cursor.h b/src/libide/search/ide-fuzzy-index-cursor.h
new file mode 100644
index 000000000..cb85a88e7
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-cursor.h
@@ -0,0 +1,35 @@
+/* ide-fuzzy-index-cursor.h
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+#include "ide-fuzzy-index.h"
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX_CURSOR (ide_fuzzy_index_cursor_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndexCursor, ide_fuzzy_index_cursor, IDE, FUZZY_INDEX_CURSOR, GObject)
+
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyIndex *ide_fuzzy_index_cursor_get_index (IdeFuzzyIndexCursor *self);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index-match.c b/src/libide/search/ide-fuzzy-index-match.c
new file mode 100644
index 000000000..20e14ef1c
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-match.c
@@ -0,0 +1,205 @@
+/* ide-fuzzy-index-match.c
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index-match"
+
+#include "config.h"
+
+#include "ide-fuzzy-index-match.h"
+
+struct _IdeFuzzyIndexMatch
+{
+ GObject object;
+ GVariant *document;
+ gchar *key;
+ gfloat score;
+ guint priority;
+};
+
+enum {
+ PROP_0,
+ PROP_DOCUMENT,
+ PROP_KEY,
+ PROP_SCORE,
+ PROP_PRIORITY,
+ N_PROPS
+};
+
+G_DEFINE_TYPE (IdeFuzzyIndexMatch, ide_fuzzy_index_match, G_TYPE_OBJECT)
+
+static GParamSpec *properties [N_PROPS];
+
+static void
+ide_fuzzy_index_match_finalize (GObject *object)
+{
+ IdeFuzzyIndexMatch *self = (IdeFuzzyIndexMatch *)object;
+
+ g_clear_pointer (&self->document, g_variant_unref);
+ g_clear_pointer (&self->key, g_free);
+
+ G_OBJECT_CLASS (ide_fuzzy_index_match_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_match_get_property (GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ IdeFuzzyIndexMatch *self = IDE_FUZZY_INDEX_MATCH (object);
+
+ switch (prop_id)
+ {
+ case PROP_DOCUMENT:
+ g_value_set_variant (value, self->document);
+ break;
+
+ case PROP_SCORE:
+ g_value_set_float (value, self->score);
+ break;
+
+ case PROP_KEY:
+ g_value_set_string (value, self->key);
+ break;
+
+ case PROP_PRIORITY:
+ g_value_set_uint (value, self->priority);
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ide_fuzzy_index_match_set_property (GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ IdeFuzzyIndexMatch *self = IDE_FUZZY_INDEX_MATCH (object);
+
+ switch (prop_id)
+ {
+ case PROP_DOCUMENT:
+ self->document = g_value_dup_variant (value);
+ break;
+
+ case PROP_SCORE:
+ self->score = g_value_get_float (value);
+ break;
+
+ case PROP_KEY:
+ self->key = g_value_dup_string (value);
+ break;
+
+ case PROP_PRIORITY:
+ self->priority = g_value_get_uint (value);
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ide_fuzzy_index_match_class_init (IdeFuzzyIndexMatchClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->finalize = ide_fuzzy_index_match_finalize;
+ object_class->get_property = ide_fuzzy_index_match_get_property;
+ object_class->set_property = ide_fuzzy_index_match_set_property;
+
+ properties [PROP_DOCUMENT] =
+ g_param_spec_variant ("document",
+ "Document",
+ "Document",
+ G_VARIANT_TYPE_ANY,
+ NULL,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_KEY] =
+ g_param_spec_string ("key",
+ "Key",
+ "The string key that was inserted for the document",
+ NULL,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_PRIORITY] =
+ g_param_spec_uint ("priority",
+ "Priority",
+ "The priority used when creating the index",
+ 0,
+ 255,
+ 0,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ properties [PROP_SCORE] =
+ g_param_spec_float ("score",
+ "Score",
+ "Score",
+ -G_MINFLOAT,
+ G_MAXFLOAT,
+ 0.0f,
+ (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+ g_object_class_install_properties (object_class, N_PROPS, properties);
+}
+
+static void
+ide_fuzzy_index_match_init (IdeFuzzyIndexMatch *match)
+{
+}
+
+/**
+ * ide_fuzzy_index_match_get_document:
+ *
+ * Returns: (transfer none): A #GVariant.
+ */
+GVariant *
+ide_fuzzy_index_match_get_document (IdeFuzzyIndexMatch *self)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), NULL);
+
+ return self->document;
+}
+
+gfloat
+ide_fuzzy_index_match_get_score (IdeFuzzyIndexMatch *self)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), 0.0f);
+
+ return self->score;
+}
+
+const gchar *
+ide_fuzzy_index_match_get_key (IdeFuzzyIndexMatch *self)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), NULL);
+
+ return self->key;
+}
+
+guint
+ide_fuzzy_index_match_get_priority (IdeFuzzyIndexMatch *self)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), 0);
+
+ return self->priority;
+}
diff --git a/src/libide/search/ide-fuzzy-index-match.h b/src/libide/search/ide-fuzzy-index-match.h
new file mode 100644
index 000000000..e35131df2
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-match.h
@@ -0,0 +1,39 @@
+/* ide-fuzzy-index-match.h
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX_MATCH (ide_fuzzy_index_match_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndexMatch, ide_fuzzy_index_match, IDE, FUZZY_INDEX_MATCH, GObject)
+
+IDE_AVAILABLE_IN_ALL
+const gchar *ide_fuzzy_index_match_get_key (IdeFuzzyIndexMatch *self);
+IDE_AVAILABLE_IN_ALL
+GVariant *ide_fuzzy_index_match_get_document (IdeFuzzyIndexMatch *self);
+IDE_AVAILABLE_IN_ALL
+gfloat ide_fuzzy_index_match_get_score (IdeFuzzyIndexMatch *self);
+IDE_AVAILABLE_IN_ALL
+guint ide_fuzzy_index_match_get_priority (IdeFuzzyIndexMatch *self);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index-private.h b/src/libide/search/ide-fuzzy-index-private.h
new file mode 100644
index 000000000..f2798e3d9
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-private.h
@@ -0,0 +1,36 @@
+/* ide-fuzzy-index-private.h
+ *
+ * Copyright (C) 2016 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "ide-fuzzy-index.h"
+
+G_BEGIN_DECLS
+
+GVariant *_ide_fuzzy_index_lookup_document (IdeFuzzyIndex *self,
+ guint document_id);
+gboolean _ide_fuzzy_index_resolve (IdeFuzzyIndex *self,
+ guint lookaside_id,
+ guint *document_id,
+ const char **key,
+ guint *priority,
+ guint in_score,
+ guint last_offset,
+ float *out_score);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index.c b/src/libide/search/ide-fuzzy-index.c
new file mode 100644
index 000000000..112bb446c
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index.c
@@ -0,0 +1,511 @@
+/* ide-fuzzy-index.c
+ *
+ * Copyright (C) 2016 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index"
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ide-fuzzy-index.h"
+#include "ide-fuzzy-index-cursor.h"
+#include "ide-fuzzy-index-private.h"
+
+typedef struct
+{
+ guint key_id;
+ guint document_id;
+} LookasideEntry;
+
+struct _IdeFuzzyIndex
+{
+ GObject object;
+
+ guint loaded : 1;
+ guint case_sensitive : 1;
+
+ GMappedFile *mapped_file;
+
+ /*
+ * Toplevel variant for the whole document. This is loaded from the entire
+ * contents of @mapped_file. It contains a dictionary of "a{sv}"
+ * containing all of our index data tables.
+ */
+ GVariant *variant;
+
+ /*
+ * This is a variant containing the array of documents. The index of the
+ * document is the corresponding document_id used in other data structres.
+ * This maps to the "documents" field in @variant.
+ */
+ GVariant *documents;
+
+ /*
+ * The keys found within the index. The index of the key is the "key_id"
+ * used in other datastructures, such as the @lookaside array.
+ */
+ GVariant *keys;
+
+ /*
+ * The lookaside array is used to disambiguate between multiple keys
+ * pointing to the same document. Each element in the array is of type
+ * "(uu)" with the first field being the "key_id" and the second field
+ * being the "document_id". Each of these are indexes into the
+ * corresponding @documents and @keys arrays.
+ *
+ * This is a fixed array type and therefore can have the raw data
+ * accessed with g_variant_get_fixed_array() to save on lookup
+ * costs.
+ */
+ GVariant *lookaside;
+
+ /*
+ * Raw pointers for fast access to the lookaside buffer.
+ */
+ const LookasideEntry *lookaside_raw;
+ gsize lookaside_len;
+
+ /*
+ * This vardict is used to get the fixed array containing the
+ * (offset, lookaside_id) for each unicode character in the index.
+ * These are accessed by the cursors to layout the fulltext search
+ * index by each character in the input string. Doing so, is what
+ * gives us the O(mn) worst-case running time.
+ */
+ GVariantDict *tables;
+
+ /*
+ * The metadata located within the search index. This contains
+ * metadata set with ide_fuzzy_index_builder_set_metadata() or one
+ * of its typed variants.
+ */
+ GVariantDict *metadata;
+};
+
+G_DEFINE_TYPE (IdeFuzzyIndex, ide_fuzzy_index, G_TYPE_OBJECT)
+
+static void
+ide_fuzzy_index_finalize (GObject *object)
+{
+ IdeFuzzyIndex *self = (IdeFuzzyIndex *)object;
+
+ g_clear_pointer (&self->mapped_file, g_mapped_file_unref);
+ g_clear_pointer (&self->variant, g_variant_unref);
+ g_clear_pointer (&self->documents, g_variant_unref);
+ g_clear_pointer (&self->keys, g_variant_unref);
+ g_clear_pointer (&self->tables, g_variant_dict_unref);
+ g_clear_pointer (&self->lookaside, g_variant_unref);
+ g_clear_pointer (&self->metadata, g_variant_dict_unref);
+
+ G_OBJECT_CLASS (ide_fuzzy_index_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_class_init (IdeFuzzyIndexClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->finalize = ide_fuzzy_index_finalize;
+}
+
+static void
+ide_fuzzy_index_init (IdeFuzzyIndex *self)
+{
+}
+
+IdeFuzzyIndex *
+ide_fuzzy_index_new (void)
+{
+ return g_object_new (IDE_TYPE_FUZZY_INDEX, NULL);
+}
+
+static void
+ide_fuzzy_index_load_file_worker (GTask *task,
+ gpointer source_object,
+ gpointer task_data,
+ GCancellable *cancellable)
+{
+ g_autofree gchar *path = NULL;
+ g_autoptr(GMappedFile) mapped_file = NULL;
+ g_autoptr(GVariant) variant = NULL;
+ g_autoptr(GVariant) documents = NULL;
+ g_autoptr(GVariant) lookaside = NULL;
+ g_autoptr(GVariant) keys = NULL;
+ g_autoptr(GVariant) tables = NULL;
+ g_autoptr(GVariant) metadata = NULL;
+ g_autoptr(GError) error = NULL;
+ IdeFuzzyIndex *self = source_object;
+ GFile *file = task_data;
+ GVariantDict dict;
+ gint version = 0;
+ gboolean case_sensitive = FALSE;
+
+ g_assert (IDE_IS_FUZZY_INDEX (self));
+ g_assert (G_IS_FILE (file));
+ g_assert (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+ if (self->loaded)
+ {
+ g_task_return_new_error (task,
+ G_IO_ERROR,
+ G_IO_ERROR_INVAL,
+ "Cannot load index multiple times");
+ return;
+ }
+
+ self->loaded = TRUE;
+
+ if (!g_file_is_native (file) || NULL == (path = g_file_get_path (file)))
+ {
+ g_task_return_new_error (task,
+ G_IO_ERROR,
+ G_IO_ERROR_INVALID_FILENAME,
+ "Index must be a local file");
+ return;
+ }
+
+ if (NULL == (mapped_file = g_mapped_file_new (path, FALSE, &error)))
+ {
+ g_task_return_error (task, g_steal_pointer (&error));
+ return;
+ }
+
+ variant = g_variant_new_from_data (G_VARIANT_TYPE_VARDICT,
+ g_mapped_file_get_contents (mapped_file),
+ g_mapped_file_get_length (mapped_file),
+ FALSE, NULL, NULL);
+
+ if (variant == NULL)
+ {
+ g_task_return_new_error (task,
+ G_IO_ERROR,
+ G_IO_ERROR_INVAL,
+ "Failed to parse GVariant");
+ return;
+ }
+
+ g_variant_ref_sink (variant);
+
+ g_variant_dict_init (&dict, variant);
+
+ if (!g_variant_dict_lookup (&dict, "version", "i", &version) || version != 1)
+ {
+ g_variant_dict_clear (&dict);
+ g_task_return_new_error (task,
+ G_IO_ERROR,
+ G_IO_ERROR_INVAL,
+ "Version mismatch in gvariant. Got %d, expected 1",
+ version);
+ return;
+ }
+
+ documents = g_variant_dict_lookup_value (&dict, "documents", G_VARIANT_TYPE_ARRAY);
+ keys = g_variant_dict_lookup_value (&dict, "keys", G_VARIANT_TYPE_STRING_ARRAY);
+ lookaside = g_variant_dict_lookup_value (&dict, "lookaside", G_VARIANT_TYPE_ARRAY);
+ tables = g_variant_dict_lookup_value (&dict, "tables", G_VARIANT_TYPE_VARDICT);
+ metadata = g_variant_dict_lookup_value (&dict, "metadata", G_VARIANT_TYPE_VARDICT);
+ g_variant_dict_clear (&dict);
+
+ if (keys == NULL || documents == NULL || tables == NULL || metadata == NULL)
+ {
+ g_task_return_new_error (task,
+ G_IO_ERROR,
+ G_IO_ERROR_INVAL,
+ "Invalid gvariant index");
+ return;
+ }
+
+ self->mapped_file = g_steal_pointer (&mapped_file);
+ self->variant = g_steal_pointer (&variant);
+ self->documents = g_steal_pointer (&documents);
+ self->lookaside = g_steal_pointer (&lookaside);
+ self->keys = g_steal_pointer (&keys);
+ self->tables = g_variant_dict_new (tables);
+ self->metadata = g_variant_dict_new (metadata);
+
+ self->lookaside_raw = g_variant_get_fixed_array (self->lookaside,
+ &self->lookaside_len,
+ sizeof (LookasideEntry));
+
+ if (g_variant_dict_lookup (self->metadata, "case-sensitive", "b", &case_sensitive))
+ self->case_sensitive = !!case_sensitive;
+
+ g_task_return_boolean (task, TRUE);
+}
+
+void
+ide_fuzzy_index_load_file_async (IdeFuzzyIndex *self,
+ GFile *file,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data)
+{
+ g_autoptr(GTask) task = NULL;
+
+ g_return_if_fail (IDE_IS_FUZZY_INDEX (self));
+ g_return_if_fail (G_IS_FILE (file));
+ g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+ task = g_task_new (self, cancellable, callback, user_data);
+ g_task_set_source_tag (task, ide_fuzzy_index_load_file);
+ g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+ g_task_set_check_cancellable (task, FALSE);
+ g_task_run_in_thread (task, ide_fuzzy_index_load_file_worker);
+}
+
+gboolean
+ide_fuzzy_index_load_file_finish (IdeFuzzyIndex *self,
+ GAsyncResult *result,
+ GError **error)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), FALSE);
+ g_return_val_if_fail (G_IS_TASK (result), FALSE);
+
+ return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+gboolean
+ide_fuzzy_index_load_file (IdeFuzzyIndex *self,
+ GFile *file,
+ GCancellable *cancellable,
+ GError **error)
+{
+ g_autoptr(GTask) task = NULL;
+
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), FALSE);
+ g_return_val_if_fail (G_IS_FILE (file), FALSE);
+ g_return_val_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable), FALSE);
+
+ task = g_task_new (self, cancellable, NULL, NULL);
+ g_task_set_source_tag (task, ide_fuzzy_index_load_file);
+ g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+ g_task_set_check_cancellable (task, FALSE);
+
+ ide_fuzzy_index_load_file_worker (task, self, file, cancellable);
+
+ return g_task_propagate_boolean (task, error);
+}
+
+static void
+ide_fuzzy_index_query_cb (GObject *object,
+ GAsyncResult *result,
+ gpointer user_data)
+{
+ IdeFuzzyIndexCursor *cursor = (IdeFuzzyIndexCursor *)object;
+ g_autoptr(GTask) task = user_data;
+ g_autoptr(GError) error = NULL;
+
+ g_assert (IDE_IS_FUZZY_INDEX_CURSOR (cursor));
+ g_assert (G_IS_ASYNC_RESULT (result));
+ g_assert (G_IS_TASK (task));
+
+ if (!g_async_initable_init_finish (G_ASYNC_INITABLE (cursor), result, &error))
+ g_task_return_error (task, g_steal_pointer (&error));
+ else
+ g_task_return_pointer (task, g_object_ref (cursor), g_object_unref);
+}
+
+void
+ide_fuzzy_index_query_async (IdeFuzzyIndex *self,
+ const gchar *query,
+ guint max_matches,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data)
+{
+ g_autoptr(GTask) task = NULL;
+ g_autoptr(IdeFuzzyIndexCursor) cursor = NULL;
+
+ g_return_if_fail (IDE_IS_FUZZY_INDEX (self));
+ g_return_if_fail (query != NULL);
+ g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+ task = g_task_new (self, cancellable, callback, user_data);
+ g_task_set_priority (task, G_PRIORITY_LOW);
+ g_task_set_source_tag (task, ide_fuzzy_index_query_async);
+
+ cursor = g_object_new (IDE_TYPE_FUZZY_INDEX_CURSOR,
+ "case-sensitive", self->case_sensitive,
+ "index", self,
+ "query", query,
+ "max-matches", max_matches,
+ "tables", self->tables,
+ NULL);
+
+ g_async_initable_init_async (G_ASYNC_INITABLE (cursor),
+ G_PRIORITY_LOW,
+ cancellable,
+ ide_fuzzy_index_query_cb,
+ g_steal_pointer (&task));
+}
+
+/**
+ * ide_fuzzy_index_query_finish:
+ *
+ * Completes an asynchronous request to ide_fuzzy_index_query_async().
+ *
+ * Returns: (transfer full): A #GListModel of results.
+ */
+GListModel *
+ide_fuzzy_index_query_finish (IdeFuzzyIndex *self,
+ GAsyncResult *result,
+ GError **error)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), NULL);
+ g_return_val_if_fail (G_IS_TASK (result), NULL);
+
+ return g_task_propagate_pointer (G_TASK (result), error);
+}
+
+/**
+ * ide_fuzzy_index_get_metadata:
+ *
+ * Looks up the metadata for @key.
+ *
+ * Returns: (transfer full) (nullable): A #GVariant or %NULL.
+ */
+GVariant *
+ide_fuzzy_index_get_metadata (IdeFuzzyIndex *self,
+ const gchar *key)
+{
+ g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), NULL);
+ g_return_val_if_fail (key != NULL, NULL);
+
+ if (self->metadata != NULL)
+ return g_variant_dict_lookup_value (self->metadata, key, NULL);
+
+ return NULL;
+}
+
+guint64
+ide_fuzzy_index_get_metadata_uint64 (IdeFuzzyIndex *self,
+ const gchar *key)
+{
+ g_autoptr(GVariant) ret = NULL;
+
+ ret = ide_fuzzy_index_get_metadata (self, key);
+
+ if (ret != NULL)
+ return g_variant_get_uint64 (ret);
+
+ return G_GUINT64_CONSTANT (0);
+}
+
+guint32
+ide_fuzzy_index_get_metadata_uint32 (IdeFuzzyIndex *self,
+ const gchar *key)
+{
+ g_autoptr(GVariant) ret = NULL;
+
+ ret = ide_fuzzy_index_get_metadata (self, key);
+
+ if (ret != NULL)
+ return g_variant_get_uint32 (ret);
+
+ return G_GUINT64_CONSTANT (0);
+}
+
+const gchar *
+ide_fuzzy_index_get_metadata_string (IdeFuzzyIndex *self,
+ const gchar *key)
+{
+ g_autoptr(GVariant) ret = NULL;
+
+ ret = ide_fuzzy_index_get_metadata (self, key);
+
+ /*
+ * This looks unsafe, but is safe because strings are \0 terminated
+ * inside the variants and the result is a pointer to internal data.
+ * Since that data exists on our mmap() region, we are all good.
+ */
+ if (ret != NULL)
+ return g_variant_get_string (ret, NULL);
+
+ return NULL;
+}
+
+/**
+ * _ide_fuzzy_index_lookup_document:
+ * @self: A #IdeFuzzyIndex
+ * @document_id: The identifier of the document
+ *
+ * This looks up the document found matching @document_id.
+ *
+ * This should be the document_id resolved through the lookaside
+ * using _ide_fuzzy_index_resolve().
+ *
+ * Returns: (transfer full) (nullable): A #GVariant if successful;
+ * otherwise %NULL.
+ */
+GVariant *
+_ide_fuzzy_index_lookup_document (IdeFuzzyIndex *self,
+ guint document_id)
+{
+ g_assert (IDE_IS_FUZZY_INDEX (self));
+
+ return g_variant_get_child_value (self->documents, document_id);
+}
+
+gboolean
+_ide_fuzzy_index_resolve (IdeFuzzyIndex *self,
+ guint lookaside_id,
+ guint *document_id,
+ const gchar **key,
+ guint *priority,
+ guint in_score,
+ guint last_offset,
+ gfloat *out_score)
+{
+ const LookasideEntry *entry;
+ const gchar *local_key = NULL;
+ guint key_id;
+
+ g_assert (IDE_IS_FUZZY_INDEX (self));
+ g_assert (document_id != NULL);
+ g_assert (out_score != NULL);
+ g_assert (priority != NULL);
+
+ if (self->keys == NULL || self->lookaside_raw == NULL)
+ return FALSE;
+
+ /* Mask off the key priority */
+ lookaside_id &= 0x00FFFFFF;
+
+ if G_UNLIKELY (lookaside_id >= self->lookaside_len)
+ return FALSE;
+
+ entry = &self->lookaside_raw [lookaside_id];
+
+ /* The key_id has a mask with the priority as well */
+ key_id = entry->key_id & 0x00FFFFFF;
+ if G_UNLIKELY (key_id >= g_variant_n_children (self->keys))
+ return FALSE;
+
+ g_variant_get_child (self->keys, key_id, "&s", &local_key);
+
+ if (key != NULL)
+ *key = local_key;
+
+ if (document_id != NULL)
+ *document_id = entry->document_id;
+
+ *priority = (entry->key_id & 0xFF000000) >> 24;
+ *out_score = ((1.0 / 256.0) / (1 + last_offset + in_score)) + ((255.0 - *priority) / 256.0);
+
+ return TRUE;
+}
diff --git a/src/libide/search/ide-fuzzy-index.h b/src/libide/search/ide-fuzzy-index.h
new file mode 100644
index 000000000..4997851d6
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index.h
@@ -0,0 +1,71 @@
+/* ide-fuzzy-index.h
+ *
+ * Copyright (C) 2016 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX (ide_fuzzy_index_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndex, ide_fuzzy_index, IDE, FUZZY_INDEX, GObject)
+
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyIndex *ide_fuzzy_index_new (void);
+IDE_AVAILABLE_IN_ALL
+gboolean ide_fuzzy_index_load_file (IdeFuzzyIndex *self,
+ GFile *file,
+ GCancellable *cancellable,
+ GError **error);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_load_file_async (IdeFuzzyIndex *self,
+ GFile *file,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data);
+IDE_AVAILABLE_IN_ALL
+gboolean ide_fuzzy_index_load_file_finish (IdeFuzzyIndex *self,
+ GAsyncResult *result,
+ GError **error);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_index_query_async (IdeFuzzyIndex *self,
+ const gchar *query,
+ guint max_matches,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ gpointer user_data);
+IDE_AVAILABLE_IN_ALL
+GListModel *ide_fuzzy_index_query_finish (IdeFuzzyIndex *self,
+ GAsyncResult *result,
+ GError **error);
+IDE_AVAILABLE_IN_ALL
+GVariant *ide_fuzzy_index_get_metadata (IdeFuzzyIndex *self,
+ const gchar *key);
+IDE_AVAILABLE_IN_ALL
+guint32 ide_fuzzy_index_get_metadata_uint32 (IdeFuzzyIndex *self,
+ const gchar *key);
+IDE_AVAILABLE_IN_ALL
+guint64 ide_fuzzy_index_get_metadata_uint64 (IdeFuzzyIndex *self,
+ const gchar *key);
+IDE_AVAILABLE_IN_ALL
+const gchar *ide_fuzzy_index_get_metadata_string (IdeFuzzyIndex *self,
+ const gchar *key);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-mutable-index.c b/src/libide/search/ide-fuzzy-mutable-index.c
new file mode 100644
index 000000000..394687a48
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-mutable-index.c
@@ -0,0 +1,724 @@
+/* ide-fuzzy-mutable-index.c
+ *
+ * Copyright (C) 2014-2017 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-mutable-index"
+
+#include "config.h"
+
+#include <ctype.h>
+#include <string.h>
+
+#include "ide-fuzzy-mutable-index.h"
+
+/**
+ * SECTION:ide-fuzzy-mutable-index
+ * @title: IdeFuzzyMutableIndex Matching
+ * @short_description: IdeFuzzyMutableIndex matching for GLib based programs.
+ *
+ * #IdeFuzzyMutableIndex provides a fulltext index that focuses around fuzzy
+ * matching words. This version of the datastructure is focused around
+ * in-memory storage. This makes mutability performance of adding or removing
+ * items from the corpus simpler.
+ *
+ * If you need mostly read-only indexes, you might consider using
+ * #IdeFuzzyIndex and #IdeFuzzyIndexBuilder which can create a disk-based file
+ * and mmap() a read-only version of the data set.
+ *
+ * It is a programming error to modify #Fuzzy while holding onto an array
+ * of #FuzzyMatch elements. The position of strings within the IdeFuzzyMutableIndexMatch
+ * may no longer be valid.
+ */
+
+G_DEFINE_BOXED_TYPE (IdeFuzzyMutableIndex, ide_fuzzy_mutable_index,
+ (GBoxedCopyFunc)ide_fuzzy_mutable_index_ref,
+ (GBoxedFreeFunc)ide_fuzzy_mutable_index_unref)
+
+struct _IdeFuzzyMutableIndex
+{
+ volatile gint ref_count;
+ GByteArray *heap;
+ GArray *id_to_text_offset;
+ GPtrArray *id_to_value;
+ GHashTable *char_tables;
+ GHashTable *removed;
+ guint in_bulk_insert : 1;
+ guint case_sensitive : 1;
+};
+
+#pragma pack(push, 1)
+typedef struct
+{
+#ifdef G_OS_WIN32
+ guint32 id;
+ guint16 pos;
+#else
+ guint64 id : 32;
+ guint64 pos : 16;
+#endif
+} IdeFuzzyMutableIndexItem;
+#pragma pack(pop)
+
+G_STATIC_ASSERT (sizeof(IdeFuzzyMutableIndexItem) == 6);
+
+typedef struct
+{
+ IdeFuzzyMutableIndex *fuzzy;
+ GArray **tables;
+ gint *state;
+ guint n_tables;
+ gsize max_matches;
+ const gchar *needle;
+ GHashTable *matches;
+} IdeFuzzyMutableIndexLookup;
+
+static gint
+ide_fuzzy_mutable_index_item_compare (gconstpointer a,
+ gconstpointer b)
+{
+ gint ret;
+
+ const IdeFuzzyMutableIndexItem *fa = a;
+ const IdeFuzzyMutableIndexItem *fb = b;
+
+ if ((ret = fa->id - fb->id) == 0)
+ ret = fa->pos - fb->pos;
+
+ return ret;
+}
+
+static gint
+ide_fuzzy_mutable_index_match_compare (gconstpointer a,
+ gconstpointer b)
+{
+ const IdeFuzzyMutableIndexMatch *ma = a;
+ const IdeFuzzyMutableIndexMatch *mb = b;
+
+ if (ma->score < mb->score) {
+ return 1;
+ } else if (ma->score > mb->score) {
+ return -1;
+ }
+
+ return strcmp (ma->key, mb->key);
+}
+
+IdeFuzzyMutableIndex *
+ide_fuzzy_mutable_index_ref (IdeFuzzyMutableIndex *fuzzy)
+{
+ g_return_val_if_fail (fuzzy, NULL);
+ g_return_val_if_fail (fuzzy->ref_count > 0, NULL);
+
+ g_atomic_int_inc (&fuzzy->ref_count);
+
+ return fuzzy;
+}
+
+/**
+ * ide_fuzzy_mutable_index_new:
+ * @case_sensitive: %TRUE if case should be preserved.
+ *
+ * Create a new #Fuzzy for fuzzy matching strings.
+ *
+ * Returns: A newly allocated #Fuzzy that should be freed with ide_fuzzy_mutable_index_unref().
+ */
+IdeFuzzyMutableIndex *
+ide_fuzzy_mutable_index_new (gboolean case_sensitive)
+{
+ IdeFuzzyMutableIndex *fuzzy;
+
+ fuzzy = g_slice_new0 (IdeFuzzyMutableIndex);
+ fuzzy->ref_count = 1;
+ fuzzy->heap = g_byte_array_new ();
+ fuzzy->id_to_value = g_ptr_array_new ();
+ fuzzy->id_to_text_offset = g_array_new (FALSE, FALSE, sizeof (gsize));
+ fuzzy->char_tables = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_array_unref);
+ fuzzy->case_sensitive = case_sensitive;
+ fuzzy->removed = g_hash_table_new (g_direct_hash, g_direct_equal);
+
+ return fuzzy;
+}
+
+IdeFuzzyMutableIndex *
+ide_fuzzy_mutable_index_new_with_free_func (gboolean case_sensitive,
+ GDestroyNotify free_func)
+{
+ IdeFuzzyMutableIndex *fuzzy;
+
+ fuzzy = ide_fuzzy_mutable_index_new (case_sensitive);
+ ide_fuzzy_mutable_index_set_free_func (fuzzy, free_func);
+
+ return fuzzy;
+}
+
+void
+ide_fuzzy_mutable_index_set_free_func (IdeFuzzyMutableIndex *fuzzy,
+ GDestroyNotify free_func)
+{
+ g_return_if_fail (fuzzy);
+
+ g_ptr_array_set_free_func (fuzzy->id_to_value, free_func);
+}
+
+static gsize
+ide_fuzzy_mutable_index_heap_insert (IdeFuzzyMutableIndex *fuzzy,
+ const gchar *text)
+{
+ gsize ret;
+
+ g_assert (fuzzy != NULL);
+ g_assert (text != NULL);
+
+ ret = fuzzy->heap->len;
+
+ g_byte_array_append (fuzzy->heap, (guint8 *)text, strlen (text) + 1);
+
+ return ret;
+}
+
+/**
+ * ide_fuzzy_mutable_index_begin_bulk_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ *
+ * Start a bulk insertion. @fuzzy is not ready for searching until
+ * ide_fuzzy_mutable_index_end_bulk_insert() has been called.
+ *
+ * This allows for inserting large numbers of strings and deferring
+ * the final sort until ide_fuzzy_mutable_index_end_bulk_insert().
+ */
+void
+ide_fuzzy_mutable_index_begin_bulk_insert (IdeFuzzyMutableIndex *fuzzy)
+{
+ g_return_if_fail (fuzzy);
+ g_return_if_fail (!fuzzy->in_bulk_insert);
+
+ fuzzy->in_bulk_insert = TRUE;
+}
+
+/**
+ * ide_fuzzy_mutable_index_end_bulk_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ *
+ * Complete a bulk insert and resort the index.
+ */
+void
+ide_fuzzy_mutable_index_end_bulk_insert (IdeFuzzyMutableIndex *fuzzy)
+{
+ GHashTableIter iter;
+ gpointer key;
+ gpointer value;
+
+ g_return_if_fail(fuzzy);
+ g_return_if_fail(fuzzy->in_bulk_insert);
+
+ fuzzy->in_bulk_insert = FALSE;
+
+ g_hash_table_iter_init (&iter, fuzzy->char_tables);
+
+ while (g_hash_table_iter_next (&iter, &key, &value)) {
+ GArray *table = value;
+
+ g_array_sort (table, ide_fuzzy_mutable_index_item_compare);
+ }
+}
+
+/**
+ * ide_fuzzy_mutable_index_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ * @key: (in): A UTF-8 encoded string.
+ * @value: (in): A value to associate with key.
+ *
+ * Inserts a string into the fuzzy matcher.
+ */
+void
+ide_fuzzy_mutable_index_insert (IdeFuzzyMutableIndex *fuzzy,
+ const gchar *key,
+ gpointer value)
+{
+ const gchar *tmp;
+ gchar *downcase = NULL;
+ gsize offset;
+ guint id;
+
+ if (G_UNLIKELY (!key || !*key || (fuzzy->id_to_text_offset->len == G_MAXUINT)))
+ return;
+
+ if (!fuzzy->case_sensitive)
+ downcase = g_utf8_casefold (key, -1);
+
+ offset = ide_fuzzy_mutable_index_heap_insert (fuzzy, key);
+ id = fuzzy->id_to_text_offset->len;
+ g_array_append_val (fuzzy->id_to_text_offset, offset);
+ g_ptr_array_add (fuzzy->id_to_value, value);
+
+ if (!fuzzy->case_sensitive)
+ key = downcase;
+
+ for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+ {
+ gunichar ch = g_utf8_get_char (tmp);
+ GArray *table;
+ IdeFuzzyMutableIndexItem item;
+
+ table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+
+ if (G_UNLIKELY (table == NULL))
+ {
+ table = g_array_new (FALSE, FALSE, sizeof (IdeFuzzyMutableIndexItem));
+ g_hash_table_insert (fuzzy->char_tables, GINT_TO_POINTER (ch), table);
+ }
+
+ item.id = id;
+ item.pos = (guint)(gsize)(tmp - key);
+
+ g_array_append_val (table, item);
+ }
+
+ if (G_UNLIKELY (!fuzzy->in_bulk_insert))
+ {
+ for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+ {
+ GArray *table;
+ gunichar ch;
+
+ ch = g_utf8_get_char (tmp);
+ table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+ g_array_sort (table, ide_fuzzy_mutable_index_item_compare);
+ }
+ }
+
+ g_free (downcase);
+}
+
+/**
+ * ide_fuzzy_mutable_index_unref:
+ * @fuzzy: A #Fuzzy.
+ *
+ * Decrements the reference count of fuzzy by one. When the reference count
+ * reaches zero, the structure will be freed.
+ */
+void
+ide_fuzzy_mutable_index_unref (IdeFuzzyMutableIndex *fuzzy)
+{
+ g_return_if_fail (fuzzy);
+ g_return_if_fail (fuzzy->ref_count > 0);
+
+ if (G_UNLIKELY (g_atomic_int_dec_and_test (&fuzzy->ref_count)))
+ {
+ g_byte_array_unref (fuzzy->heap);
+ fuzzy->heap = NULL;
+
+ g_array_unref (fuzzy->id_to_text_offset);
+ fuzzy->id_to_text_offset = NULL;
+
+ g_ptr_array_unref (fuzzy->id_to_value);
+ fuzzy->id_to_value = NULL;
+
+ g_hash_table_unref (fuzzy->char_tables);
+ fuzzy->char_tables = NULL;
+
+ g_hash_table_unref (fuzzy->removed);
+ fuzzy->removed = NULL;
+
+ g_slice_free (IdeFuzzyMutableIndex, fuzzy);
+ }
+}
+
+static void
+rollback_state_to_pos (GArray *table,
+ gint *state,
+ guint id,
+ guint pos)
+{
+ g_assert (table != NULL);
+ g_assert (state != NULL);
+ g_assert (pos > 0);
+
+ while (*state > 0 && *state <= table->len)
+ {
+ IdeFuzzyMutableIndexItem *iter;
+
+ (*state)--;
+
+ iter = &g_array_index (table, IdeFuzzyMutableIndexItem, *state);
+
+ if (iter->id > id || (iter->id == id && *state >= pos))
+ continue;
+
+ break;
+ }
+}
+
+static gboolean
+ide_fuzzy_mutable_index_do_match (IdeFuzzyMutableIndexLookup *lookup,
+ IdeFuzzyMutableIndexItem *item,
+ guint table_index,
+ gint score)
+{
+ gboolean ret = FALSE;
+ GArray *table;
+ gint *state;
+
+ table = lookup->tables [table_index];
+ state = &lookup->state [table_index];
+
+ for (; state [0] < (gint)table->len; state [0]++)
+ {
+ IdeFuzzyMutableIndexItem *iter;
+ gpointer key;
+ gint iter_score;
+
+ iter = &g_array_index (table, IdeFuzzyMutableIndexItem, state[0]);
+
+ if ((iter->id < item->id) || ((iter->id == item->id) && (iter->pos <= item->pos)))
+ continue;
+ else if (iter->id > item->id)
+ break;
+
+ iter_score = score + (iter->pos - item->pos - 1);
+
+ if ((table_index + 1) < lookup->n_tables)
+ {
+ if (ide_fuzzy_mutable_index_do_match (lookup, iter, table_index + 1, iter_score))
+ {
+ ret = TRUE;
+
+ /* We already found a match, but we could have a better match
+ * further in the word. We need to rollback all of our additional
+ * table state to the current position so that we can possibly
+ * advance again.
+ */
+ if ((state[0] + 1) < table->len &&
+ g_array_index (table, IdeFuzzyMutableIndexItem, state[0] + 1).id == item->id)
+ {
+ for (guint i = table_index + 1; i < lookup->n_tables; i++)
+ rollback_state_to_pos (lookup->tables[i], &lookup->state[i], iter->id, iter->pos + 1);
+ }
+ }
+ continue;
+ }
+
+ key = GINT_TO_POINTER (iter->id);
+
+ if (!g_hash_table_contains (lookup->matches, key) ||
+ (iter_score < GPOINTER_TO_INT (g_hash_table_lookup (lookup->matches, key))))
+ g_hash_table_insert (lookup->matches, key, GINT_TO_POINTER (iter_score));
+
+ ret = TRUE;
+ }
+
+ return ret;
+}
+
+static inline const gchar *
+ide_fuzzy_mutable_index_get_string (IdeFuzzyMutableIndex *fuzzy,
+ gint id)
+{
+ guint offset = g_array_index (fuzzy->id_to_text_offset, gsize, id);
+ return (const gchar *)&fuzzy->heap->data [offset];
+}
+
+/**
+ * ide_fuzzy_mutable_index_match:
+ * @fuzzy: (in): A #Fuzzy.
+ * @needle: (in): The needle to fuzzy search for.
+ * @max_matches: (in): The max number of matches to return.
+ *
+ * IdeFuzzyMutableIndex searches within @fuzzy for strings that fuzzy match @needle.
+ * Only up to @max_matches will be returned.
+ *
+ * TODO: max_matches is not yet respected.
+ *
+ * Returns: (transfer full) (element-type IdeFuzzyMutableIndexMatch): A newly allocated
+ * #GArray containing #FuzzyMatch elements. This should be freed when
+ * the caller is done with it using g_array_unref().
+ * It is a programming error to keep the structure around longer than
+ * the @fuzzy instance.
+ */
+GArray *
+ide_fuzzy_mutable_index_match (IdeFuzzyMutableIndex *fuzzy,
+ const gchar *needle,
+ gsize max_matches)
+{
+ IdeFuzzyMutableIndexLookup lookup = { 0 };
+ IdeFuzzyMutableIndexMatch match;
+ IdeFuzzyMutableIndexItem *item;
+ GHashTableIter iter;
+ gpointer key;
+ gpointer value;
+ const gchar *tmp;
+ GArray *matches = NULL;
+ GArray *root;
+ gchar *downcase = NULL;
+ guint i;
+
+ g_return_val_if_fail (fuzzy, NULL);
+ g_return_val_if_fail (!fuzzy->in_bulk_insert, NULL);
+ g_return_val_if_fail (needle, NULL);
+
+ matches = g_array_new (FALSE, FALSE, sizeof (IdeFuzzyMutableIndexMatch));
+
+ if (!*needle)
+ goto cleanup;
+
+ if (!fuzzy->case_sensitive)
+ {
+ downcase = g_utf8_casefold (needle, -1);
+ needle = downcase;
+ }
+
+ lookup.fuzzy = fuzzy;
+ lookup.n_tables = g_utf8_strlen (needle, -1);
+ lookup.state = g_new0 (gint, lookup.n_tables);
+ lookup.tables = g_new0 (GArray*, lookup.n_tables);
+ lookup.needle = needle;
+ lookup.max_matches = max_matches;
+ lookup.matches = g_hash_table_new (NULL, NULL);
+
+ for (i = 0, tmp = needle; *tmp; tmp = g_utf8_next_char (tmp))
+ {
+ gunichar ch;
+ GArray *table;
+
+ ch = g_utf8_get_char (tmp);
+ table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+
+ if (table == NULL)
+ goto cleanup;
+
+ lookup.tables [i++] = table;
+ }
+
+ g_assert (lookup.n_tables == i);
+ g_assert (lookup.tables [0] != NULL);
+
+ root = lookup.tables [0];
+
+ if (G_LIKELY (lookup.n_tables > 1))
+ {
+ for (i = 0; i < root->len; i++)
+ {
+ item = &g_array_index (root, IdeFuzzyMutableIndexItem, i);
+
+ if (ide_fuzzy_mutable_index_do_match (&lookup, item, 1, 0) &&
+ i + 1 < root->len &&
+ (item + 1)->id == item->id)
+ {
+ /* We found a match, but we might find another one with a higher
+ * score later on for the same item of the corpus. We need to
+ * roll state back to the position we're starting at so that we
+ * can match all the same characters again.
+ */
+ for (guint j = 1; j < lookup.n_tables; j++)
+ rollback_state_to_pos (lookup.tables[j], &lookup.state[j], item->id, item->pos + 1);
+ }
+ }
+ }
+ else
+ {
+ guint last_id = G_MAXUINT;
+
+ for (i = 0; i < root->len; i++)
+ {
+ item = &g_array_index (root, IdeFuzzyMutableIndexItem, i);
+ match.id = GPOINTER_TO_INT (item->id);
+ if (match.id != last_id)
+ {
+ match.key = ide_fuzzy_mutable_index_get_string (fuzzy, item->id);
+ match.value = g_ptr_array_index (fuzzy->id_to_value, item->id);
+ match.score = 1.0 / (strlen (match.key) + item->pos);
+ g_array_append_val (matches, match);
+ last_id = match.id;
+ }
+ }
+
+ goto cleanup;
+ }
+
+ g_hash_table_iter_init (&iter, lookup.matches);
+
+ while (g_hash_table_iter_next (&iter, &key, &value))
+ {
+ /* Ignore keys that have a tombstone record. */
+ if (g_hash_table_contains (fuzzy->removed, key))
+ continue;
+
+ match.id = GPOINTER_TO_INT (key);
+ match.key = ide_fuzzy_mutable_index_get_string (fuzzy, match.id);
+ match.value = g_ptr_array_index (fuzzy->id_to_value, match.id);
+
+ /* If we got a perfect substring match, then this is 1.0, and avoid
+ * perturbing further or we risk non-contiguous (but shorter strings)
+ * matching at higher value.
+ */
+ if (value == NULL)
+ match.score = 1.0;
+ else
+ match.score = 1.0 / (strlen (match.key) + GPOINTER_TO_INT (value));
+
+ g_array_append_val (matches, match);
+ }
+
+ /*
+ * TODO: We could be more clever here when inserting into the array
+ * only if it is a lower score than the end or < max items.
+ */
+
+ if (max_matches != 0)
+ {
+ g_array_sort (matches, ide_fuzzy_mutable_index_match_compare);
+
+ if (max_matches && (matches->len > max_matches))
+ g_array_set_size (matches, max_matches);
+ }
+
+cleanup:
+ g_free (downcase);
+ g_free (lookup.state);
+ g_free (lookup.tables);
+ g_clear_pointer (&lookup.matches, g_hash_table_unref);
+
+ return matches;
+}
+
+gboolean
+ide_fuzzy_mutable_index_contains (IdeFuzzyMutableIndex *fuzzy,
+ const gchar *key)
+{
+ GArray *ar;
+ gboolean ret;
+
+ g_return_val_if_fail (fuzzy != NULL, FALSE);
+
+ ar = ide_fuzzy_mutable_index_match (fuzzy, key, 1);
+ ret = (ar != NULL) && (ar->len > 0);
+ g_clear_pointer (&ar, g_array_unref);
+
+ return ret;
+}
+
+void
+ide_fuzzy_mutable_index_remove (IdeFuzzyMutableIndex *fuzzy,
+ const gchar *key)
+{
+ GArray *ar;
+
+ g_return_if_fail (fuzzy != NULL);
+
+ if (!key || !*key)
+ return;
+
+ ar = ide_fuzzy_mutable_index_match (fuzzy, key, 1);
+
+ if (ar != NULL && ar->len > 0)
+ {
+ for (guint i = 0; i < ar->len; i++)
+ {
+ const IdeFuzzyMutableIndexMatch *match = &g_array_index (ar, IdeFuzzyMutableIndexMatch, i);
+
+ if (g_strcmp0 (match->key, key) == 0)
+ g_hash_table_insert (fuzzy->removed, GINT_TO_POINTER (match->id), NULL);
+ }
+ }
+
+ g_clear_pointer (&ar, g_array_unref);
+}
+
+gchar *
+ide_fuzzy_highlight (const gchar *str,
+ const gchar *match,
+ gboolean case_sensitive)
+{
+ static const gchar *begin = "<b>";
+ static const gchar *end = "</b>";
+ GString *ret;
+ gunichar str_ch;
+ gunichar match_ch;
+ gboolean element_open = FALSE;
+
+ if (str == NULL || match == NULL)
+ return g_strdup (str);
+
+ ret = g_string_new (NULL);
+
+ for (; *str; str = g_utf8_next_char (str))
+ {
+ str_ch = g_utf8_get_char (str);
+ match_ch = g_utf8_get_char (match);
+
+ if (str_ch == '&')
+ {
+ const gchar *entity_end = strchr (str, ';');
+
+ if (entity_end != NULL)
+ {
+ gsize len = entity_end - str;
+
+ if (element_open)
+ {
+ g_string_append (ret, end);
+ element_open = FALSE;
+ }
+
+ g_string_append_len (ret, str, len + 1);
+ str += len;
+
+ continue;
+ }
+ }
+
+ if ((str_ch == match_ch) ||
+ (!case_sensitive && g_unichar_tolower (str_ch) == g_unichar_tolower (match_ch)))
+ {
+ if (!element_open)
+ {
+ g_string_append (ret, begin);
+ element_open = TRUE;
+ }
+
+ if (str_ch == '<')
+ g_string_append (ret, "<");
+ else if (str_ch == '>')
+ g_string_append (ret, ">");
+ else
+ g_string_append_unichar (ret, str_ch);
+
+ /* TODO: We could seek to the next char and append in a batch. */
+ match = g_utf8_next_char (match);
+ }
+ else
+ {
+ if (element_open)
+ {
+ g_string_append (ret, end);
+ element_open = FALSE;
+ }
+
+ if (str_ch == '<')
+ g_string_append (ret, "<");
+ else if (str_ch == '>')
+ g_string_append (ret, ">");
+ else
+ g_string_append_unichar (ret, str_ch);
+ }
+ }
+
+ if (element_open)
+ g_string_append (ret, end);
+
+ return g_string_free (ret, FALSE);
+}
diff --git a/src/libide/search/ide-fuzzy-mutable-index.h b/src/libide/search/ide-fuzzy-mutable-index.h
new file mode 100644
index 000000000..954145d85
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-mutable-index.h
@@ -0,0 +1,76 @@
+/* ide-fuzzy-mutable-index.h
+ *
+ * Copyright (C) 2015 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_MUTABLE_INDEX (ide_fuzzy_mutable_index_get_type())
+
+typedef struct _IdeFuzzyMutableIndex IdeFuzzyMutableIndex;
+
+typedef struct
+{
+ const char *key;
+ gpointer value;
+ float score;
+ guint id;
+} IdeFuzzyMutableIndexMatch;
+
+IDE_AVAILABLE_IN_ALL
+GType ide_fuzzy_mutable_index_get_type (void);
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyMutableIndex *ide_fuzzy_mutable_index_new (gboolean case_sensitive);
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyMutableIndex *ide_fuzzy_mutable_index_new_with_free_func (gboolean case_sensitive,
+ GDestroyNotify free_func);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_mutable_index_set_free_func (IdeFuzzyMutableIndex *fuzzy,
+ GDestroyNotify free_func);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_mutable_index_begin_bulk_insert (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_mutable_index_end_bulk_insert (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+gboolean ide_fuzzy_mutable_index_contains (IdeFuzzyMutableIndex *fuzzy,
+ const char *key);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_mutable_index_insert (IdeFuzzyMutableIndex *fuzzy,
+ const char *key,
+ gpointer value);
+IDE_AVAILABLE_IN_ALL
+GArray *ide_fuzzy_mutable_index_match (IdeFuzzyMutableIndex *fuzzy,
+ const char *needle,
+ gsize max_matches);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_mutable_index_remove (IdeFuzzyMutableIndex *fuzzy,
+ const char *key);
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyMutableIndex *ide_fuzzy_mutable_index_ref (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+void ide_fuzzy_mutable_index_unref (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+char *ide_fuzzy_highlight (const char *str,
+ const char *query,
+ gboolean case_sensitive);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC (IdeFuzzyMutableIndex, ide_fuzzy_mutable_index_unref)
+
+G_END_DECLS
diff --git a/src/libide/search/ide-int-pair.h b/src/libide/search/ide-int-pair.h
new file mode 100644
index 000000000..c52999474
--- /dev/null
+++ b/src/libide/search/ide-int-pair.h
@@ -0,0 +1,209 @@
+/* ide-int-pair.h
+ *
+ * Copyright (C) 2017 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IDE_INT_PAIR_H
+#define IDE_INT_PAIR_H
+
+#ifndef __GI_SCANNER__
+
+#include <glib.h>
+#include <stdlib.h>
+
+G_BEGIN_DECLS
+
+#if GLIB_SIZEOF_VOID_P == 8
+# define IDE_INT_PAIR_64
+#endif
+
+#ifdef IDE_INT_PAIR_64
+
+typedef union
+{
+ /*< private >*/
+ struct {
+ gint first;
+ gint second;
+ };
+ gpointer ptr;
+} IdeIntPair;
+
+typedef union
+{
+ /*< private >*/
+ struct {
+ guint first;
+ guint second;
+ };
+ gpointer ptr;
+} IdeUIntPair;
+
+#else
+
+typedef struct
+{
+ /*< private >*/
+ gint first;
+ gint second;
+} IdeIntPair;
+
+typedef struct
+{
+ /*< private >*/
+ guint first;
+ guint second;
+} IdeUIntPair;
+
+#endif
+
+/**
+ * ide_int_pair_new: (skip)
+ */
+static inline IdeIntPair *
+ide_int_pair_new (gint first, gint second)
+{
+ IdeIntPair pair;
+
+ /* Avoid tripping g-ir-scanner by putting this
+ * inside the inline function.
+ */
+ G_STATIC_ASSERT (sizeof (IdeIntPair) == 8);
+
+ pair.first = first;
+ pair.second = second;
+
+#ifdef IDE_INT_PAIR_64
+ return (IdeIntPair *)pair.ptr;
+#else
+ return g_slice_copy (sizeof (IdeIntPair), &pair);
+#endif
+}
+
+/**
+ * ide_uint_pair_new: (skip)
+ */
+static inline IdeUIntPair *
+ide_uint_pair_new (guint first, guint second)
+{
+ IdeUIntPair pair;
+
+ /* Avoid tripping g-ir-scanner by putting this
+ * inside the inline function.
+ */
+ G_STATIC_ASSERT (sizeof (IdeUIntPair) == 8);
+
+ pair.first = first;
+ pair.second = second;
+
+#ifdef IDE_INT_PAIR_64
+ return (IdeUIntPair *)pair.ptr;
+#else
+ return g_slice_copy (sizeof (IdeUIntPair), &pair);
+#endif
+}
+
+/**
+ * ide_int_pair_first: (skip)
+ */
+static inline gint
+ide_int_pair_first (IdeIntPair *pair)
+{
+ IdeIntPair p;
+#ifdef IDE_INT_PAIR_64
+ p.ptr = pair;
+#else
+ p = *pair;
+#endif
+ return p.first;
+}
+
+/**
+ * ide_int_pair_second: (skip)
+ */
+static inline gint
+ide_int_pair_second (IdeIntPair *pair)
+{
+ IdeIntPair p;
+#ifdef IDE_INT_PAIR_64
+ p.ptr = pair;
+#else
+ p = *pair;
+#endif
+ return p.second;
+}
+
+/**
+ * ide_uint_pair_first: (skip)
+ */
+static inline guint
+ide_uint_pair_first (IdeUIntPair *pair)
+{
+ IdeUIntPair p;
+#ifdef IDE_INT_PAIR_64
+ p.ptr = pair;
+#else
+ p = *pair;
+#endif
+ return p.first;
+}
+
+/**
+ * ide_uint_pair_second: (skip)
+ */
+static inline guint
+ide_uint_pair_second (IdeUIntPair *pair)
+{
+ IdeUIntPair p;
+#ifdef IDE_INT_PAIR_64
+ p.ptr = pair;
+#else
+ p = *pair;
+#endif
+ return p.second;
+}
+
+/**
+ * ide_int_pair_free: (skip)
+ */
+static inline void
+ide_int_pair_free (IdeIntPair *pair)
+{
+#ifdef IDE_INT_PAIR_64
+ /* Do Nothing */
+#else
+ g_slice_free (IdeIntPair, pair);
+#endif
+}
+
+/**
+ * ide_uint_pair_free: (skip)
+ */
+static inline void
+ide_uint_pair_free (IdeUIntPair *pair)
+{
+#ifdef IDE_INT_PAIR_64
+ /* Do Nothing */
+#else
+ g_slice_free (IdeUIntPair, pair);
+#endif
+}
+
+G_END_DECLS
+
+#endif /* __GI_SCANNER__ */
+
+#endif /* IDE_INT_PAIR_H */
diff --git a/src/libide/search/libide-search.h b/src/libide/search/libide-search.h
index 591358d5a..da0730aec 100644
--- a/src/libide/search/libide-search.h
+++ b/src/libide/search/libide-search.h
@@ -25,6 +25,11 @@
#define IDE_SEARCH_INSIDE
+#include "ide-fuzzy-index-builder.h"
+#include "ide-fuzzy-index-cursor.h"
+#include "ide-fuzzy-index.h"
+#include "ide-fuzzy-index-match.h"
+#include "ide-fuzzy-mutable-index.h"
#include "ide-pattern-spec.h"
#include "ide-search-engine.h"
#include "ide-search-provider.h"
diff --git a/src/libide/search/meson.build b/src/libide/search/meson.build
index 6cd6750ed..075b5725c 100644
--- a/src/libide/search/meson.build
+++ b/src/libide/search/meson.build
@@ -7,6 +7,11 @@ libide_include_directories += include_directories('.')
#
libide_search_public_headers = [
+ 'ide-fuzzy-index-builder.h',
+ 'ide-fuzzy-index-cursor.h',
+ 'ide-fuzzy-index.h',
+ 'ide-fuzzy-index-match.h',
+ 'ide-fuzzy-mutable-index.h',
'ide-pattern-spec.h',
'ide-search-engine.h',
'ide-search-provider.h',
@@ -22,6 +27,11 @@ install_headers(libide_search_public_headers, subdir: libide_search_header_subdi
#
libide_search_public_sources = [
+ 'ide-fuzzy-index-builder.c',
+ 'ide-fuzzy-index.c',
+ 'ide-fuzzy-index-cursor.c',
+ 'ide-fuzzy-index-match.c',
+ 'ide-fuzzy-mutable-index.c',
'ide-pattern-spec.c',
'ide-search-engine.c',
'ide-search-provider.c',
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]