[tracker] tracker-extract: store as many words as the FTS config says.
- From: Carlos Garnacho <carlosg src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: store as many words as the FTS config says.
- Date: Thu, 22 Oct 2009 15:32:36 +0000 (UTC)
commit ee7c66892b3968400f622e2c31769b02e38d0b5f
Author: Carlos Garnacho <carlos lanedo com>
Date: Thu Oct 22 17:31:32 2009 +0200
tracker-extract: store as many words as the FTS config says.
src/tracker-extract/Makefile.am | 2 +
src/tracker-extract/tracker-extract-msoffice.c | 6 +-
src/tracker-extract/tracker-extract-oasis.c | 6 +-
src/tracker-extract/tracker-extract-pdf.c | 7 +-
src/tracker-extract/tracker-fts-config.c | 430 ++++++++++++++++++++++++
src/tracker-extract/tracker-fts-config.h | 65 ++++
src/tracker-extract/tracker-main.c | 11 +
src/tracker-extract/tracker-main.h | 3 +
8 files changed, 526 insertions(+), 4 deletions(-)
---
diff --git a/src/tracker-extract/Makefile.am b/src/tracker-extract/Makefile.am
index 0c41af7..c8fed93 100644
--- a/src/tracker-extract/Makefile.am
+++ b/src/tracker-extract/Makefile.am
@@ -253,6 +253,8 @@ tracker_extract_SOURCES = \
tracker-dbus.h \
tracker-extract.c \
tracker-extract.h \
+ tracker-fts-config.c \
+ tracker-fts.config.h \
tracker-main.c \
tracker-main.h
diff --git a/src/tracker-extract/tracker-extract-msoffice.c b/src/tracker-extract/tracker-extract-msoffice.c
index 1b1b2c8..ebcb799 100644
--- a/src/tracker-extract/tracker-extract-msoffice.c
+++ b/src/tracker-extract/tracker-extract-msoffice.c
@@ -250,6 +250,8 @@ extract_msoffice (const gchar *uri,
GsfInput *stream;
gchar *filename, *content;
gboolean rdf_type_added = FALSE;
+ TrackerFTSConfig *fts_config;
+ guint n_words;
gsf_init ();
@@ -327,7 +329,9 @@ extract_msoffice (const gchar *uri,
g_object_unref (stream);
}
- content = extract_content (uri, 1000);
+ fts_config = tracker_main_get_fts_config ();
+ n_words = tracker_fts_config_get_max_words_to_index (fts_config);
+ content = extract_content (uri, n_words);
if (content) {
tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
diff --git a/src/tracker-extract/tracker-extract-oasis.c b/src/tracker-extract/tracker-extract-oasis.c
index 51111a2..49fadcb 100644
--- a/src/tracker-extract/tracker-extract-oasis.c
+++ b/src/tracker-extract/tracker-extract-oasis.c
@@ -111,6 +111,8 @@ extract_oasis (const gchar *uri,
gchar *xml;
gchar *filename = g_filename_from_uri (uri, NULL, NULL);
gchar *content;
+ TrackerFTSConfig *fts_config;
+ guint n_words;
ODTParseInfo info = {
metadata,
-1,
@@ -144,7 +146,9 @@ extract_oasis (const gchar *uri,
g_free (xml);
}
- content = extract_content (filename, 1000);
+ fts_config = tracker_main_get_fts_config ();
+ n_words = tracker_fts_config_get_max_words_to_index (fts_config);
+ content = extract_content (filename, n_words);
if (content) {
tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c
index 8517b78..b6edfd6 100644
--- a/src/tracker-extract/tracker-extract-pdf.c
+++ b/src/tracker-extract/tracker-extract-pdf.c
@@ -134,6 +134,8 @@ extract_pdf (const gchar *uri,
gchar *metadata_xml = NULL;
GTime creation_date;
GError *error = NULL;
+ TrackerFTSConfig *fts_config;
+ guint n_words;
g_type_init ();
@@ -347,8 +349,9 @@ extract_pdf (const gchar *uri,
}
}
- /* FIXME: Fixed word limit at the moment */
- content = extract_content (document, 1000);
+ fts_config = tracker_main_get_fts_config ();
+ n_words = tracker_fts_config_get_max_words_to_index (fts_config);
+ content = extract_content (document, n_words);
if (content) {
tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
diff --git a/src/tracker-extract/tracker-fts-config.c b/src/tracker-extract/tracker-fts-config.c
new file mode 100644
index 0000000..b452fbf
--- /dev/null
+++ b/src/tracker-extract/tracker-fts-config.c
@@ -0,0 +1,430 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2009, Nokia (urho konttori nokia com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include <libtracker-common/tracker-keyfile-object.h>
+
+#include "tracker-fts-config.h"
+
+#define TRACKER_FTS_CONFIG_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfigPrivate))
+
+/* GKeyFile defines */
+#define GROUP_INDEXING "Indexing"
+
+/* Default values */
+#define DEFAULT_MIN_WORD_LENGTH 3 /* 0->30 */
+#define DEFAULT_MAX_WORD_LENGTH 30 /* 0->200 */
+#define DEFAULT_MAX_WORDS_TO_INDEX 10000
+
+typedef struct {
+ /* Indexing */
+ gint min_word_length;
+ gint max_word_length;
+ gint max_words_to_index;
+} TrackerFTSConfigPrivate;
+
+typedef struct {
+ GType type;
+ const gchar *property;
+ const gchar *group;
+ const gchar *key;
+} ObjectToKeyFile;
+
+static void config_set_property (GObject *object,
+ guint param_id,
+ const GValue *value,
+ GParamSpec *pspec);
+static void config_get_property (GObject *object,
+ guint param_id,
+ GValue *value,
+ GParamSpec *pspec);
+static void config_finalize (GObject *object);
+static void config_constructed (GObject *object);
+static void config_create_with_defaults (TrackerFTSConfig *config,
+ GKeyFile *key_file,
+ gboolean overwrite);
+static void config_load (TrackerFTSConfig *config);
+
+enum {
+ PROP_0,
+
+ /* Indexing */
+ PROP_MIN_WORD_LENGTH,
+ PROP_MAX_WORD_LENGTH,
+
+ /* Performance */
+ PROP_MAX_WORDS_TO_INDEX,
+};
+
+static ObjectToKeyFile conversions[] = {
+ { G_TYPE_INT, "min-word-length", GROUP_INDEXING, "MinWordLength" },
+ { G_TYPE_INT, "max-word-length", GROUP_INDEXING, "MaxWordLength" },
+ { G_TYPE_INT, "max-words-to-index", GROUP_INDEXING, "MaxWordsToIndex" },
+};
+
+G_DEFINE_TYPE (TrackerFTSConfig, tracker_fts_config, TRACKER_TYPE_CONFIG_FILE);
+
+static void
+tracker_fts_config_class_init (TrackerFTSConfigClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->set_property = config_set_property;
+ object_class->get_property = config_get_property;
+ object_class->finalize = config_finalize;
+ object_class->constructed = config_constructed;
+
+ /* Indexing */
+ g_object_class_install_property (object_class,
+ PROP_MIN_WORD_LENGTH,
+ g_param_spec_int ("min-word-length",
+ "Minimum word length",
+ " Set the minimum length of words to index (0->30, default=3)",
+ 0,
+ 30,
+ DEFAULT_MIN_WORD_LENGTH,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
+ PROP_MAX_WORD_LENGTH,
+ g_param_spec_int ("max-word-length",
+ "Maximum word length",
+ " Set the maximum length of words to index (0->200, default=30)",
+ 0,
+ 200, /* Is this a reasonable limit? */
+ DEFAULT_MAX_WORD_LENGTH,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
+ PROP_MAX_WORDS_TO_INDEX,
+ g_param_spec_int ("max-words-to-index",
+ "Maximum words to index",
+ " Maximum unique words to index from a file's content (default=10000)",
+ 0,
+ G_MAXINT,
+ DEFAULT_MAX_WORDS_TO_INDEX,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+
+ g_type_class_add_private (object_class, sizeof (TrackerFTSConfigPrivate));
+}
+
+static void
+tracker_fts_config_init (TrackerFTSConfig *object)
+{
+}
+
+static void
+config_set_property (GObject *object,
+ guint param_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ switch (param_id) {
+ /* Indexing */
+ case PROP_MIN_WORD_LENGTH:
+ tracker_fts_config_set_min_word_length (TRACKER_FTS_CONFIG (object),
+ g_value_get_int (value));
+ break;
+ case PROP_MAX_WORD_LENGTH:
+ tracker_fts_config_set_max_word_length (TRACKER_FTS_CONFIG (object),
+ g_value_get_int (value));
+ break;
+ case PROP_MAX_WORDS_TO_INDEX:
+ tracker_fts_config_set_max_words_to_index (TRACKER_FTS_CONFIG (object),
+ g_value_get_int (value));
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
+ break;
+ };
+}
+
+static void
+config_get_property (GObject *object,
+ guint param_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (object);
+
+ switch (param_id) {
+ /* Indexing */
+ case PROP_MIN_WORD_LENGTH:
+ g_value_set_int (value, priv->min_word_length);
+ break;
+ case PROP_MAX_WORD_LENGTH:
+ g_value_set_int (value, priv->max_word_length);
+ break;
+ case PROP_MAX_WORDS_TO_INDEX:
+ g_value_set_int (value, priv->max_words_to_index);
+ break;
+
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
+ break;
+ };
+}
+
+static void
+config_finalize (GObject *object)
+{
+ /* For now we do nothing here, we left this override in for
+ * future expansion.
+ */
+
+ (G_OBJECT_CLASS (tracker_fts_config_parent_class)->finalize) (object);
+}
+
+static void
+config_constructed (GObject *object)
+{
+ (G_OBJECT_CLASS (tracker_fts_config_parent_class)->constructed) (object);
+
+ config_load (TRACKER_FTS_CONFIG (object));
+}
+
+static void
+config_create_with_defaults (TrackerFTSConfig *config,
+ GKeyFile *key_file,
+ gboolean overwrite)
+{
+ gint i;
+
+ g_message ("Loading defaults into GKeyFile...");
+
+ for (i = 0; i < G_N_ELEMENTS (conversions); i++) {
+ gboolean has_key;
+
+ has_key = g_key_file_has_key (key_file,
+ conversions[i].group,
+ conversions[i].key,
+ NULL);
+ if (!overwrite && has_key) {
+ continue;
+ }
+
+ switch (conversions[i].type) {
+ case G_TYPE_INT:
+ g_key_file_set_integer (key_file,
+ conversions[i].group,
+ conversions[i].key,
+ tracker_keyfile_object_default_int (config,
+ conversions[i].property));
+ break;
+
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+
+ g_key_file_set_comment (key_file,
+ conversions[i].group,
+ conversions[i].key,
+ tracker_keyfile_object_blurb (config,
+ conversions[i].property),
+ NULL);
+ }
+}
+
+static void
+config_load (TrackerFTSConfig *config)
+{
+ TrackerConfigFile *file;
+ gint i;
+
+ file = TRACKER_CONFIG_FILE (config);
+ config_create_with_defaults (config, file->key_file, FALSE);
+
+ if (!file->file_exists) {
+ tracker_config_file_save (file);
+ }
+
+ for (i = 0; i < G_N_ELEMENTS (conversions); i++) {
+ gboolean has_key;
+
+ has_key = g_key_file_has_key (file->key_file,
+ conversions[i].group,
+ conversions[i].key,
+ NULL);
+
+ switch (conversions[i].type) {
+ case G_TYPE_INT:
+ tracker_keyfile_object_load_int (G_OBJECT (file),
+ conversions[i].property,
+ file->key_file,
+ conversions[i].group,
+ conversions[i].key);
+ break;
+
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+ }
+}
+
+static gboolean
+config_save (TrackerFTSConfig *config)
+{
+ TrackerConfigFile *file;
+ gint i;
+
+ file = TRACKER_CONFIG_FILE (config);
+
+ if (!file->key_file) {
+ g_critical ("Could not save config, GKeyFile was NULL, has the config been loaded?");
+
+ return FALSE;
+ }
+
+ g_message ("Setting details to GKeyFile object...");
+
+ for (i = 0; i < G_N_ELEMENTS (conversions); i++) {
+ switch (conversions[i].type) {
+ case G_TYPE_INT:
+ tracker_keyfile_object_save_int (file,
+ conversions[i].property,
+ file->key_file,
+ conversions[i].group,
+ conversions[i].key);
+ break;
+
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+ }
+
+ return tracker_config_file_save (TRACKER_CONFIG_FILE (config));
+}
+
+TrackerFTSConfig *
+tracker_fts_config_new (void)
+{
+ return g_object_new (TRACKER_TYPE_FTS_CONFIG,
+ "domain", "tracker-fts",
+ NULL);
+}
+
+gboolean
+tracker_fts_config_save (TrackerFTSConfig *config)
+{
+ g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), FALSE);
+
+ return config_save (config);
+}
+
+gint
+tracker_fts_config_get_min_word_length (TrackerFTSConfig *config)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_MIN_WORD_LENGTH);
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ return priv->min_word_length;
+}
+
+gint
+tracker_fts_config_get_max_word_length (TrackerFTSConfig *config)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_MAX_WORD_LENGTH);
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ return priv->max_word_length;
+}
+
+gint
+tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_MAX_WORDS_TO_INDEX);
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ return priv->max_words_to_index;
+}
+
+void
+tracker_fts_config_set_min_word_length (TrackerFTSConfig *config,
+ gint value)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+ if (!tracker_keyfile_object_validate_int (config, "min-word-length", value)) {
+ return;
+ }
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ priv->min_word_length = value;
+ g_object_notify (G_OBJECT (config), "min-word-length");
+}
+
+void
+tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
+ gint value)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+ if (!tracker_keyfile_object_validate_int (config, "max-word-length", value)) {
+ return;
+ }
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ priv->max_word_length = value;
+ g_object_notify (G_OBJECT (config), "max-word-length");
+}
+
+void
+tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
+ gint value)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+ if (!tracker_keyfile_object_validate_int (config, "max-words-to-index", value)) {
+ return;
+ }
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ priv->max_words_to_index = value;
+ g_object_notify (G_OBJECT (config), "max-words-to-index");
+}
diff --git a/src/tracker-extract/tracker-fts-config.h b/src/tracker-extract/tracker-fts-config.h
new file mode 100644
index 0000000..1919472
--- /dev/null
+++ b/src/tracker-extract/tracker-fts-config.h
@@ -0,0 +1,65 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2009, Nokia (urho konttori nokia com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_FTS_CONFIG_H__
+#define __TRACKER_FTS_CONFIG_H__
+
+#include <glib-object.h>
+
+#include <libtracker-common/tracker-config-file.h>
+
+G_BEGIN_DECLS
+
+#define TRACKER_TYPE_FTS_CONFIG (tracker_fts_config_get_type ())
+#define TRACKER_FTS_CONFIG(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfig))
+#define TRACKER_FTS_CONFIG_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfigClass))
+#define TRACKER_IS_FTS_CONFIG(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), TRACKER_TYPE_FTS_CONFIG))
+#define TRACKER_IS_FTS_CONFIG_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), TRACKER_TYPE_FTS_CONFIG))
+#define TRACKER_FTS_CONFIG_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfigClass))
+
+typedef struct TrackerFTSConfig TrackerFTSConfig;
+typedef struct TrackerFTSConfigClass TrackerFTSConfigClass;
+
+struct TrackerFTSConfig {
+ TrackerConfigFile parent;
+};
+
+struct TrackerFTSConfigClass {
+ TrackerConfigFileClass parent_class;
+};
+
+GType tracker_fts_config_get_type (void) G_GNUC_CONST;
+
+TrackerFTSConfig *tracker_fts_config_new (void);
+gboolean tracker_fts_config_save (TrackerFTSConfig *config);
+gint tracker_fts_config_get_min_word_length (TrackerFTSConfig *config);
+gint tracker_fts_config_get_max_word_length (TrackerFTSConfig *config);
+gint tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config);
+void tracker_fts_config_set_min_word_length (TrackerFTSConfig *config,
+ gint value);
+void tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
+ gint value);
+void tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
+ gint value);
+
+G_END_DECLS
+
+#endif /* __TRACKER_FTS_CONFIG_H__ */
+
diff --git a/src/tracker-extract/tracker-main.c b/src/tracker-extract/tracker-main.c
index fa9dbda..90946c9 100644
--- a/src/tracker-extract/tracker-main.c
+++ b/src/tracker-extract/tracker-main.c
@@ -74,6 +74,7 @@ static gboolean disable_shutdown;
static gint verbosity = -1;
static gchar *filename;
static gchar *mime_type;
+static TrackerFTSConfig *fts_config;
static GOptionEntry entries[] = {
{ "version", 'V', 0,
@@ -254,6 +255,16 @@ log_handler (const gchar *domain,
}
}
+TrackerFTSConfig *
+tracker_main_get_fts_config (void)
+{
+ if (G_UNLIKELY (!fts_config)) {
+ fts_config = tracker_fts_config_new ();
+ }
+
+ return fts_config;
+}
+
int
main (int argc, char *argv[])
{
diff --git a/src/tracker-extract/tracker-main.h b/src/tracker-extract/tracker-main.h
index 45c5405..a562fef 100644
--- a/src/tracker-extract/tracker-main.h
+++ b/src/tracker-extract/tracker-main.h
@@ -26,6 +26,7 @@
#include <libtracker-common/tracker-storage.h>
#include <libtracker-common/tracker-statement-list.h>
+#include "tracker-fts-config.h"
G_BEGIN_DECLS
@@ -48,6 +49,8 @@ TrackerExtractData *tracker_get_extract_data (void);
*/
void tracker_main_quit_timeout_reset (void);
+TrackerFTSConfig *tracker_main_get_fts_config (void);
+
G_END_DECLS
#endif /* __TRACKER_MAIN_H__ */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]