tracker r2150 - in branches/indexer-split: . src/tracker-indexer src/tracker-indexer/modules
- From: carlosg svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r2150 - in branches/indexer-split: . src/tracker-indexer src/tracker-indexer/modules
- Date: Tue, 26 Aug 2008 10:17:02 +0000 (UTC)
Author: carlosg
Date: Tue Aug 26 10:17:02 2008
New Revision: 2150
URL: http://svn.gnome.org/viewvc/tracker?rev=2150&view=rev
Log:
2008-08-26 Carlos Garnacho <carlos imendio com>
* src/tracker-indexer/modules/files.c
(tracker_metadata_call_text_filter) (get_file_content): Moved to
tracker-metadata-utils.c
(tracker_module_file_get_text): Use these new functions.
* src/tracker-indexer/tracker-metadata-utils.[ch]: Refactor
MetadataContext so it can also spawn external text extractors.
(tracker_metadata_utils_get_text): New helper function.
Modified:
branches/indexer-split/ChangeLog
branches/indexer-split/src/tracker-indexer/modules/files.c
branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c
branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h
Modified: branches/indexer-split/src/tracker-indexer/modules/files.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/modules/files.c (original)
+++ branches/indexer-split/src/tracker-indexer/modules/files.c Tue Aug 26 10:17:02 2008
@@ -142,126 +142,16 @@
return tracker_metadata_utils_get_data (path);
}
-static gchar *
-tracker_metadata_call_text_filter (const gchar *path,
- const gchar *mime)
-{
- gchar *str, *text_filter_file;
- gchar *text = NULL;
-
-#ifdef OS_WIN32
- str = g_strconcat (mime, "_filter.bat", NULL);
-#else
- str = g_strconcat (mime, "_filter", NULL);
-#endif
-
- text_filter_file = g_build_filename (LIBDIR,
- "tracker",
- "filters",
- str,
- NULL);
-
- if (g_file_test (text_filter_file, G_FILE_TEST_EXISTS)) {
- gchar **argv;
-
- argv = g_new0 (gchar *, 3);
- argv[0] = g_strdup (text_filter_file);
- argv[1] = g_strdup (path);
-
- g_message ("Extracting text for:'%s' using filter:'%s'",
- argv[1], argv[0]);
-
- tracker_spawn (argv, 30, &text, NULL);
-
- g_strfreev (argv);
- }
-
- g_free (text_filter_file);
- g_free (str);
-
- return text;
-}
-
-static gchar *
-get_file_content (const gchar *path)
-{
- GFile *file;
- GFileInputStream *stream;
- GError *error = NULL;
- gssize bytes_read;
- gssize bytes_remaining;
- gchar buf[1048576];
-
- file = g_file_new_for_path (path);
- stream = g_file_read (file, NULL, &error);
-
- if (error) {
- g_message ("Couldn't get file file:'%s', %s",
- path,
- error->message);
- g_error_free (error);
- g_object_unref (file);
-
- return NULL;
- }
-
- /* bytes_max = tracker_config_get_max_text_to_index (config); */
- bytes_remaining = sizeof (buf);
- memset (buf, 0, bytes_remaining);
-
- /* NULL termination */
- bytes_remaining--;
-
- for (bytes_read = -1; bytes_read != 0 && !error; ) {
- bytes_read = g_input_stream_read (G_INPUT_STREAM (stream),
- buf,
- bytes_remaining,
- NULL,
- &error);
- bytes_remaining -= bytes_read;
- }
-
- if (error) {
- g_message ("Couldn't get read input stream for:'%s', %s",
- path,
- error->message);
- g_error_free (error);
- g_object_unref (file);
- g_object_unref (stream);
-
- return NULL;
- }
-
- g_object_unref (file);
- g_object_unref (stream);
-
- g_debug ("Read %d bytes from file:'%s'\n",
- sizeof (buf) - bytes_remaining,
- path);
-
- return g_strdup (buf);
-}
-
gchar *
tracker_module_file_get_text (TrackerFile *file)
{
- gchar *mimetype, *service_type;
- gchar *text = NULL;
-
- mimetype = tracker_file_get_mime_type (file->path);
- service_type = tracker_ontology_get_service_type_for_mime (mimetype);
+ const gchar *path;
- /* No need to filter text based files - index them directly */
- if (service_type &&
- (strcmp (service_type, "Text") == 0 ||
- strcmp (service_type, "Development") == 0)) {
- text = get_file_content (file->path);
- } else {
- text = tracker_metadata_call_text_filter (file->path, mimetype);
- }
+ path = file->path;
- g_free (mimetype);
- g_free (service_type);
+ if (check_exclude_file (path)) {
+ return NULL;
+ }
- return text;
+ return tracker_metadata_utils_get_text (path);
}
Modified: branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c (original)
+++ branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c Tue Aug 26 10:17:02 2008
@@ -23,7 +23,9 @@
#include <libtracker-common/tracker-type-utils.h>
#include <libtracker-common/tracker-os-dependant.h>
#include <libtracker-common/tracker-ontology.h>
+#include <gio/gio.h>
#include <string.h>
+
#include "tracker-metadata-utils.h"
#define METADATA_FILE_NAME_DELIMITED "File:NameDelimited"
@@ -41,36 +43,54 @@
GIOChannel *stdin_channel;
GIOChannel *stdout_channel;
GMainLoop *data_incoming_loop;
-} MetadataContext;
+ gpointer data;
+} ProcessContext;
-static MetadataContext *context = NULL;
+static ProcessContext *metadata_context = NULL;
static void
-tracker_extract_watch_cb (GPid pid,
- gint status,
- gpointer data)
+destroy_process_context (ProcessContext *context)
{
- g_debug ("Metadata extractor exited with code: %d\n", status);
-
- if (!context) {
- return;
- }
-
g_io_channel_shutdown (context->stdin_channel, FALSE, NULL);
g_io_channel_unref (context->stdin_channel);
g_io_channel_shutdown (context->stdout_channel, FALSE, NULL);
g_io_channel_unref (context->stdout_channel);
- if (g_main_loop_is_running (context->data_incoming_loop))
+ if (g_main_loop_is_running (context->data_incoming_loop)) {
g_main_loop_quit (context->data_incoming_loop);
+ }
g_main_loop_unref (context->data_incoming_loop);
g_spawn_close_pid (context->pid);
g_free (context);
- context = NULL;
+}
+
+static ProcessContext *
+create_process_context (const gchar **argv)
+{
+ ProcessContext *context;
+ GIOChannel *stdin_channel, *stdout_channel;
+ GIOFlags flags;
+ GPid pid;
+
+ if (!tracker_spawn_async_with_channels (argv, 10, &pid, &stdin_channel, &stdout_channel, NULL))
+ return NULL;
+
+ context = g_new0 (ProcessContext, 1);
+ context->pid = pid;
+ context->stdin_channel = stdin_channel;
+ context->stdout_channel = stdout_channel;
+ context->data_incoming_loop = g_main_loop_new (NULL, FALSE);
+
+ flags = g_io_channel_get_flags (context->stdout_channel);
+ flags |= G_IO_FLAG_NONBLOCK;
+
+ g_io_channel_set_flags (context->stdout_channel, flags, NULL);
+
+ return context;
}
static gboolean
@@ -78,29 +98,29 @@
GIOCondition condition,
gpointer user_data)
{
+ ProcessContext *context;
GPtrArray *array;
GIOStatus status = G_IO_STATUS_NORMAL;
gchar *line;
- array = (GPtrArray *) user_data;
-
- if (!context) {
- return FALSE;
- }
+ context = user_data;
+ array = context->data;
if (condition & G_IO_IN || condition & G_IO_PRI) {
do {
status = g_io_channel_read_line (context->stdout_channel, &line, NULL, NULL, NULL);
- if (line && *line) {
+ if (status == G_IO_STATUS_NORMAL && line && *line) {
g_strstrip (line);
g_strdelimit (line, ";", '\0');
g_ptr_array_add (array, line);
}
} while (status == G_IO_STATUS_NORMAL && line && *line);
- if (status == G_IO_STATUS_NORMAL && !*line) {
- /* Empty line, all extractor output has been processed */
+ if (status == G_IO_STATUS_EOF ||
+ status == G_IO_STATUS_ERROR ||
+ (status == G_IO_STATUS_NORMAL && !*line)) {
+ /* all extractor output has been processed */
g_main_loop_quit (context->data_incoming_loop);
return FALSE;
}
@@ -113,37 +133,24 @@
return TRUE;
}
-static gboolean
-create_metadata_context (void)
+static void
+tracker_metadata_watch_cb (GPid pid,
+ gint status,
+ gpointer user_data)
{
- GIOChannel *stdin_channel, *stdout_channel;
- const gchar *argv[2] = { EXTRACTOR_PATH, NULL };
- GIOFlags flags;
- GPid pid;
-
- if (!tracker_spawn_async_with_channels (argv, 10, &pid, &stdin_channel, &stdout_channel, NULL))
- return FALSE;
-
- g_child_watch_add (pid, tracker_extract_watch_cb, NULL);
-
- context = g_new0 (MetadataContext, 1);
- context->pid = pid;
- context->stdin_channel = stdin_channel;
- context->stdout_channel = stdout_channel;
- context->data_incoming_loop = g_main_loop_new (NULL, FALSE);
-
- flags = g_io_channel_get_flags (context->stdout_channel);
- flags |= G_IO_FLAG_NONBLOCK;
-
- g_io_channel_set_flags (context->stdout_channel, flags, NULL);
+ g_debug ("Metadata extractor exited with code: %d\n", status);
- return TRUE;
+ if (metadata_context) {
+ destroy_process_context (metadata_context);
+ metadata_context = NULL;
+ }
}
static gchar **
tracker_metadata_query_file (const gchar *path,
const gchar *mimetype)
{
+ const gchar *argv[2] = { EXTRACTOR_PATH, NULL };
gchar *utf_path, *str;
GPtrArray *array;
GIOStatus status;
@@ -152,8 +159,14 @@
return NULL;
}
- if (!context && !create_metadata_context ()) {
- return NULL;
+ if (!metadata_context) {
+ metadata_context = create_process_context (argv);
+
+ if (!metadata_context) {
+ return NULL;
+ }
+
+ g_child_watch_add (metadata_context->pid, tracker_metadata_watch_cb, NULL);
}
utf_path = g_filename_from_utf8 (path, -1, NULL, NULL, NULL);
@@ -164,23 +177,25 @@
}
array = g_ptr_array_sized_new (10);
+ metadata_context->data = array;
- g_io_add_watch (context->stdout_channel,
+ g_io_add_watch (metadata_context->stdout_channel,
G_IO_IN | G_IO_PRI | G_IO_ERR | G_IO_HUP,
tracker_metadata_read,
- array);
+ metadata_context);
/* write path and mimetype */
str = g_strdup_printf ("%s\n%s\n", utf_path, mimetype);
- status = g_io_channel_write_chars (context->stdin_channel, str, -1, NULL, NULL);
- g_io_channel_flush (context->stdin_channel, NULL);
+ status = g_io_channel_write_chars (metadata_context->stdin_channel, str, -1, NULL, NULL);
+ g_io_channel_flush (metadata_context->stdin_channel, NULL);
/* It will block here until all incoming
* metadata has been processed
*/
- g_main_loop_run (context->data_incoming_loop);
+ g_main_loop_run (metadata_context->data_incoming_loop);
g_ptr_array_add (array, NULL);
+ metadata_context->data = NULL;
g_free (utf_path);
g_free (str);
@@ -301,6 +316,190 @@
return metadata;
}
+static gboolean
+tracker_text_read (GIOChannel *channel,
+ GIOCondition condition,
+ gpointer user_data)
+{
+ ProcessContext *context;
+ GString *text;
+ GIOStatus status;
+ gchar *line;
+
+ context = user_data;
+ text = context->data;;
+ status = G_IO_STATUS_NORMAL;
+
+ if (condition & G_IO_IN || condition & G_IO_PRI) {
+ do {
+ status = g_io_channel_read_line (channel, &line, NULL, NULL, NULL);
+
+ if (status == G_IO_STATUS_NORMAL) {
+ g_string_append (text, line);
+ g_free (line);
+ }
+ } while (status == G_IO_STATUS_NORMAL);
+
+ if (status == G_IO_STATUS_EOF ||
+ status == G_IO_STATUS_ERROR) {
+ g_main_loop_quit (context->data_incoming_loop);
+ return FALSE;
+ }
+ }
+
+ if (condition & G_IO_ERR || condition & G_IO_HUP) {
+ g_main_loop_quit (context->data_incoming_loop);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gchar *
+call_text_filter (const gchar *path,
+ const gchar *mime)
+{
+ ProcessContext *context;
+ gchar *str, *text_filter_file;
+ gchar **argv;
+ GString *text;
+
+#ifdef OS_WIN32
+ str = g_strconcat (mime, "_filter.bat", NULL);
+#else
+ str = g_strconcat (mime, "_filter", NULL);
+#endif
+
+ text_filter_file = g_build_filename (LIBDIR,
+ "tracker",
+ "filters",
+ str,
+ NULL);
+
+ g_free (str);
+
+ if (!g_file_test (text_filter_file, G_FILE_TEST_EXISTS)) {
+ g_free (text_filter_file);
+ return NULL;
+ }
+
+ argv = g_new0 (gchar *, 3);
+ argv[0] = text_filter_file;
+ argv[1] = path;
+
+ g_message ("Extracting text for:'%s' using filter:'%s'", argv[1], argv[0]);
+
+ context = create_process_context ((const gchar **) argv);
+
+ g_free (text_filter_file);
+ g_free (argv);
+
+ if (!context) {
+ return NULL;
+ }
+
+ text = g_string_new (NULL);
+ context->data = text;
+
+ g_io_add_watch (context->stdout_channel,
+ G_IO_IN | G_IO_PRI | G_IO_ERR | G_IO_HUP,
+ tracker_text_read,
+ context);
+
+ /* It will block here until all incoming
+ * text has been processed
+ */
+ g_main_loop_run (context->data_incoming_loop);
+
+ destroy_process_context (context);
+
+ return g_string_free (text, FALSE);
+}
+
+static gchar *
+get_file_content (const gchar *path)
+{
+ GFile *file;
+ GFileInputStream *stream;
+ GError *error = NULL;
+ gssize bytes_read;
+ gssize bytes_remaining;
+ gchar buf[1048576];
+
+ file = g_file_new_for_path (path);
+ stream = g_file_read (file, NULL, &error);
+
+ if (error) {
+ g_message ("Couldn't get file file:'%s', %s",
+ path,
+ error->message);
+ g_error_free (error);
+ g_object_unref (file);
+
+ return NULL;
+ }
+
+ /* bytes_max = tracker_config_get_max_text_to_index (config); */
+ bytes_remaining = sizeof (buf);
+ memset (buf, 0, bytes_remaining);
+
+ /* NULL termination */
+ bytes_remaining--;
+
+ for (bytes_read = -1; bytes_read != 0 && !error; ) {
+ bytes_read = g_input_stream_read (G_INPUT_STREAM (stream),
+ buf,
+ bytes_remaining,
+ NULL,
+ &error);
+ bytes_remaining -= bytes_read;
+ }
+
+ if (error) {
+ g_message ("Couldn't get read input stream for:'%s', %s",
+ path,
+ error->message);
+ g_error_free (error);
+ g_object_unref (file);
+ g_object_unref (stream);
+
+ return NULL;
+ }
+
+ g_object_unref (file);
+ g_object_unref (stream);
+
+ g_debug ("Read %d bytes from file:'%s'\n",
+ sizeof (buf) - bytes_remaining,
+ path);
+
+ return g_strdup (buf);
+}
+
+gchar *
+tracker_metadata_utils_get_text (const gchar *path)
+{
+ gchar *mimetype, *service_type;
+ gchar *text = NULL;
+
+ mimetype = tracker_file_get_mime_type (path);
+ service_type = tracker_ontology_get_service_type_for_mime (mimetype);
+
+ /* No need to filter text based files - index them directly */
+ if (service_type &&
+ (strcmp (service_type, "Text") == 0 ||
+ strcmp (service_type, "Development") == 0)) {
+ text = get_file_content (path);
+ } else {
+ text = call_text_filter (path, mimetype);
+ }
+
+ g_free (mimetype);
+ g_free (service_type);
+
+ return text;
+}
+
typedef struct {
TrackerMetadata *old_metadata;
TrackerMetadata *new_metadata;
Modified: branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h (original)
+++ branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h Tue Aug 26 10:17:02 2008
@@ -46,7 +46,8 @@
void tracker_metadata_utils_action_item_free (MetadataActionItem *item,
gpointer user_data);
-TrackerMetadata * tracker_metadata_utils_get_data (const gchar *file);
+TrackerMetadata * tracker_metadata_utils_get_data (const gchar *path);
+gchar * tracker_metadata_utils_get_text (const gchar *path);
GSList * tracker_metadata_utils_calculate_merge (TrackerMetadata *old_metadata,
TrackerMetadata *new_metadata);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]