tracker r2059 - in branches/indexer-split: . src/tracker-indexer
- From: ifrade svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r2059 - in branches/indexer-split: . src/tracker-indexer
- Date: Tue, 12 Aug 2008 15:21:23 +0000 (UTC)
Author: ifrade
Date: Tue Aug 12 15:21:23 2008
New Revision: 2059
URL: http://svn.gnome.org/viewvc/tracker?rev=2059&view=rev
Log:
Added differential indexing for full text contents
Modified:
branches/indexer-split/ChangeLog
branches/indexer-split/src/tracker-indexer/tracker-indexer.c
Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer.c (original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer.c Tue Aug 12 15:21:23 2008
@@ -93,6 +93,7 @@
typedef struct PathInfo PathInfo;
typedef struct MetadataForeachData MetadataForeachData;
typedef struct MetadataRequest MetadataRequest;
+typedef struct UpdateWordsForeachData UpdateWordsForeachData;
struct TrackerIndexerPrivate {
GQueue *dir_queue;
@@ -148,6 +149,13 @@
guint32 id;
};
+struct UpdateWordsForeachData {
+ TrackerDBIndex *index;
+
+ guint32 service_id;
+ guint32 service_type_id;
+};
+
enum {
PROP_0,
PROP_RUNNING,
@@ -925,6 +933,70 @@
}
static void
+update_word_foreach (gpointer key, gpointer value, gpointer user_data)
+{
+ gchar *word;
+ UpdateWordsForeachData *data;
+ gint score;
+
+ word = (gchar *)key;
+ score = GPOINTER_TO_INT (value);
+
+ data = (UpdateWordsForeachData *)user_data;
+
+ tracker_db_index_add_word (data->index,
+ word,
+ data->service_id,
+ data->service_type_id,
+ score);
+}
+
+static void
+update_words_no_parsing (TrackerIndexer *indexer, gint service_id, gint service_type_id, GHashTable *words)
+{
+
+ UpdateWordsForeachData user_data;
+
+ user_data.index = indexer->private->index;
+ user_data.service_id = service_id;
+ user_data.service_type_id = service_type_id;
+
+ g_hash_table_foreach (words, update_word_foreach, &user_data);
+
+
+}
+
+
+static void
+merge_word_table (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ char *word;
+ int score;
+ GHashTable *new_table;
+
+ gpointer k=0, v=0;
+
+ word = (char *) key;
+ score = GPOINTER_TO_INT (value);
+ new_table = user_data;
+
+ if (!g_hash_table_lookup_extended (new_table, word, &k, &v)) {
+ g_hash_table_insert (new_table, g_strdup (word), GINT_TO_POINTER (0 - score));
+ } else {
+ if ((GPOINTER_TO_INT (v) - score) != 0) {
+ g_hash_table_insert (new_table,
+ (gchar *) word,
+ GINT_TO_POINTER (GPOINTER_TO_INT (v) - score));
+ } else {
+ /* The word is the same in old and new text */
+ g_hash_table_remove (new_table, word);
+ }
+ }
+}
+
+static void
create_update_item (TrackerIndexer *indexer,
PathInfo *info,
TrackerMetadata *metadata)
@@ -977,6 +1049,56 @@
tracker_db_set_text (service_def, id, text);
g_free (text);
}
+
+ } else {
+
+ gchar *old_text = NULL, *new_text = NULL;
+ GHashTable *old_words = NULL, *new_words = NULL;
+
+ /* Update case */
+ g_debug ("Updating file '%s'", info->file->path);
+
+ /* TODO: Take the old metadata -> the new one, calculate difference and add the words
+ *
+ */
+
+ /* Take the old text -> the new one, calculate difference and add the words */
+ old_text = tracker_db_get_text (service_def, id);
+ new_text = tracker_indexer_module_file_get_text (info->module, info->file);
+
+ if (old_text || new_text) {
+ /* Service has/had full text */
+
+ old_words = tracker_parser_text (NULL,
+ old_text,
+ 1,
+ indexer->private->language,
+ tracker_config_get_max_words_to_index (indexer->private->config),
+ tracker_config_get_max_word_length (indexer->private->config),
+ tracker_config_get_min_word_length (indexer->private->config),
+ tracker_config_get_enable_stemmer (indexer->private->config),
+ FALSE);
+
+ new_words = tracker_parser_text (NULL,
+ new_text,
+ 1,
+ indexer->private->language,
+ tracker_config_get_max_words_to_index (indexer->private->config),
+ tracker_config_get_max_word_length (indexer->private->config),
+ tracker_config_get_min_word_length (indexer->private->config),
+ tracker_config_get_enable_stemmer (indexer->private->config),
+ FALSE);
+
+ /* Merge the score of the words from one and other file
+ * new_table contains the words with the updated scores
+ */
+ g_hash_table_foreach (old_words, merge_word_table, new_words);
+
+ update_words_no_parsing (indexer, id, tracker_service_get_id (service_def), new_words);
+
+ tracker_parser_text_free (old_words);
+ tracker_parser_text_free (new_words);
+ }
}
g_free (dirname);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]