[tracker/extractor-rules-list: 2/5] extractor: Use module manager from libtracker-extract
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/extractor-rules-list: 2/5] extractor: Use module manager from libtracker-extract
- Date: Thu, 10 Mar 2011 16:15:49 +0000 (UTC)
commit 2ab79d24febe2ec1fe9215b199a0e3591ce9a66e
Author: Carlos Garnacho <carlosg gnome org>
Date: Thu Mar 10 13:12:55 2011 +0100
extractor: Use module manager from libtracker-extract
src/libtracker-extract/tracker-data.h | 73 ++------
src/tracker-extract/tracker-extract.c | 346 +++++----------------------------
2 files changed, 66 insertions(+), 353 deletions(-)
---
diff --git a/src/libtracker-extract/tracker-data.h b/src/libtracker-extract/tracker-data.h
index 7b0cabf..5809b2e 100644
--- a/src/libtracker-extract/tracker-data.h
+++ b/src/libtracker-extract/tracker-data.h
@@ -95,16 +95,21 @@ G_BEGIN_DECLS
*
*/
-
/**
- * TrackerExtractMimeFunc:
+ * tracker_extract_get_metadata:
* @uri: a string representing a URI.
+ * @mimetype: mimetype for the element contained in URI
* @preupdate: used to populate with data updates that
* are a prerequisite for the actual file
* metadata insertion.
* @metadata: used to populate with file metadata predicate/object(s).
*
- * Extracts metadata from a file, and inserts it into @metadata.
+ * This function must be provided by ALL extractors. This is merely
+ * the declaration of the function which must be written by each
+ * extractor.
+ *
+ * This is checked by tracker-extract by looking up the symbols for
+ * each started plugin and making sure this function exists.
*
* The @metadata parameter is a #TrackerSparqlBuilder constructed
* through tracker_sparql_builder_new_embedded_insert(), the subject
@@ -119,65 +124,15 @@ G_BEGIN_DECLS
* added to @preupdate, which is a #TrackerSparqlBuilder constructed.
* through tracker_sparql_builder_new_update().
*
- * Since: 0.8
- **/
-typedef void (*TrackerExtractMimeFunc) (const gchar *uri,
- TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata);
-
-/**
- * TrackerExtractData:
- * @mime: a string pointer representing a mime type.
- * @func: a function to extract extract the data in.
- *
- * The @mime is usually in the format of "image/png" for example.
-
- * The @func is called by tracker-extract if an extractor plugin
- * matches the @mime.
- *
- * Since: 0.8
- **/
-typedef struct {
- const gchar *mime;
- TrackerExtractMimeFunc func;
-} TrackerExtractData;
-
-/**
- * TrackerExtractDataFunc:
- *
- * This function is used by by tracker-extract to call into each
- * extractor to get a list of mime type and TrackerExtractMimeFunc
- * combinations.
- *
- * Returns: an array of #TrackerExtractData which must be NULL
- * terminated and must NOT be freed.
- *
- * Since: 0.6
- **/
-typedef TrackerExtractData * (*TrackerExtractDataFunc) (void);
-
-/**
- * tracker_extract_get_data:
- *
- *
- * This function must be provided by ALL extractors. This is merely
- * the declaration of the function which must be written by each
- * extractor.
- *
- * This is checked by tracker-extract by looking up the symbols for
- * each plugin and making sure this function exists. This is only
- * called by tracker-extract if a mime type in any of the
- * #TrackerExtractData structures returned matches the mime type of
- * the file being handled.
- *
- * Returns: a #TrackerExtractData pointer which should not be freed.
- * This pointer can be an array of #TrackerExtractData structures
- * where multiple mime types are supported.
+ * Returns: %TRUE if the extraction succeeded, %FALSE otherwise.
*
* Since: 0.8
*/
-TrackerExtractData *tracker_extract_get_data (void);
-
+gboolean tracker_extract_get_metadata (const gchar *uri,
+ const gchar *mimetype,
+ TrackerSparqlBuilder *preupdate,
+ TrackerSparqlBuilder *metadata);
+
G_END_DECLS
#endif /* __LIBTRACKER_EXTRACT_DATA_H__ */
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index dd0cee3..328d471 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -32,6 +32,7 @@
#include <libtracker-common/tracker-common.h>
#include <libtracker-extract/tracker-extract.h>
+#include <libtracker-extract/tracker-extract-module-manager.h>
#include "tracker-extract.h"
#include "tracker-main.h"
@@ -41,8 +42,6 @@
#include "tracker-topanalyzer.h"
#endif /* HAVE_STREAMANALYZER */
-#define EXTRACT_FUNCTION "tracker_extract_get_data"
-
#define MAX_EXTRACT_TIME 10
#define UNKNOWN_METHOD_MESSAGE "Method \"%s\" with signature \"%s\" on " \
@@ -73,23 +72,22 @@ static const gchar introspection_xml[] =
extern gboolean debug;
typedef struct {
- GArray *specific_extractors;
- GArray *generic_extractors;
+ gint extracted_count;
+ gint failed_count;
+} StatisticsData;
+
+typedef struct {
+ GHashTable *statistics_data;
+
gboolean disable_shutdown;
gboolean force_internal_extractors;
gboolean disable_summary_on_finalize;
GDBusConnection *d_connection;
GDBusNodeInfo *introspection_data;
guint registration_id;
-} TrackerExtractPrivate;
-typedef struct {
- const GModule *module;
- const TrackerExtractData *edata;
- GPatternSpec *pattern; /* For a fast g_pattern_match() */
- gint extracted_count;
- gint failed_count;
-} ModuleData;
+ gint unhandled_count;
+} TrackerExtractPrivate;
static void tracker_extract_finalize (GObject *object);
static void report_statistics (GObject *object);
@@ -111,16 +109,20 @@ tracker_extract_class_init (TrackerExtractClass *klass)
static void
tracker_extract_init (TrackerExtract *object)
{
+ TrackerExtractPrivate *priv;
+
#ifdef HAVE_LIBSTREAMANALYZER
tracker_topanalyzer_init ();
#endif /* HAVE_STREAMANALYZER */
+
+ priv = TRACKER_EXTRACT_GET_PRIVATE (object);
+ priv->statistics_data = g_hash_table_new (NULL, NULL);
}
static void
tracker_extract_finalize (GObject *object)
{
TrackerExtractPrivate *priv;
- gint i;
priv = TRACKER_EXTRACT_GET_PRIVATE (object);
@@ -132,21 +134,7 @@ tracker_extract_finalize (GObject *object)
tracker_topanalyzer_shutdown ();
#endif /* HAVE_STREAMANALYZER */
- for (i = 0; i < priv->specific_extractors->len; i++) {
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->specific_extractors, ModuleData, i);
- g_pattern_spec_free (mdata->pattern);
- }
- g_array_free (priv->specific_extractors, TRUE);
-
- for (i = 0; i < priv->generic_extractors->len; i++) {
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->generic_extractors, ModuleData, i);
- g_pattern_spec_free (mdata->pattern);
- }
- g_array_free (priv->generic_extractors, TRUE);
+ g_hash_table_destroy (priv->statistics_data);
G_OBJECT_CLASS (tracker_extract_parent_class)->finalize (object);
}
@@ -155,205 +143,41 @@ static void
report_statistics (GObject *object)
{
TrackerExtractPrivate *priv;
- GHashTable *reported = NULL;
- gint i;
+ GHashTableIter iter;
+ gpointer key, value;
priv = TRACKER_EXTRACT_GET_PRIVATE (object);
g_message ("--------------------------------------------------");
g_message ("Statistics:");
- g_message (" Specific Extractors:");
-
- reported = g_hash_table_new (g_direct_hash, g_direct_equal);
- for (i = 0; i < priv->specific_extractors->len; i++) {
- ModuleData *mdata;
- const gchar *name;
+ g_hash_table_iter_init (&iter, priv->statistics_data);
- mdata = &g_array_index (priv->specific_extractors, ModuleData, i);
- name = g_module_name ((GModule*) mdata->module);
+ while (g_hash_table_iter_next (&iter, &key, &value)) {
+ GModule *module = key;
+ StatisticsData *data = value;
- if ((mdata->extracted_count > 0 || mdata->failed_count > 0) &&
- !g_hash_table_lookup (reported, name)) {
- const gchar *name_without_path;
+ if (data->extracted_count > 0 || data->failed_count > 0) {
+ const gchar *name, *name_without_path;
+ name = g_module_name (module);
name_without_path = strrchr (name, G_DIR_SEPARATOR) + 1;
g_message (" Module:'%s', extracted:%d, failures:%d",
name_without_path,
- mdata->extracted_count,
- mdata->failed_count);
- g_hash_table_insert (reported, (gpointer) name, GINT_TO_POINTER(1));
+ data->extracted_count,
+ data->failed_count);
}
}
- if (g_hash_table_size (reported) < 1) {
- g_message (" No files handled");
- }
-
- g_hash_table_remove_all (reported);
-
- g_message (" Generic Extractors:");
-
- for (i = 0; i < priv->generic_extractors->len; i++) {
- ModuleData *mdata;
- const gchar *name;
-
- mdata = &g_array_index (priv->generic_extractors, ModuleData, i);
- name = g_module_name ((GModule*) mdata->module);
-
- if ((mdata->extracted_count > 0 || mdata->failed_count > 0) &&
- !g_hash_table_lookup (reported, name)) {
- const gchar *name_without_path;
+ g_message ("Unhandled files: %d", priv->unhandled_count);
- name_without_path = strrchr (name, G_DIR_SEPARATOR) + 1;
-
- g_message (" Module:'%s', extracted:%d, failed:%d",
- name_without_path,
- mdata->extracted_count,
- mdata->failed_count);
- g_hash_table_insert (reported, (gpointer) name, GINT_TO_POINTER(1));
- }
- }
-
- if (g_hash_table_size (reported) < 1) {
+ if (priv->unhandled_count == 0 &&
+ g_hash_table_size (priv->statistics_data) < 1) {
g_message (" No files handled");
}
g_message ("--------------------------------------------------");
-
- g_hash_table_unref (reported);
-}
-
-static gboolean
-load_modules (const gchar *force_module,
- GArray **specific_extractors,
- GArray **generic_extractors)
-{
- GDir *dir;
- GError *error = NULL;
- const gchar *name;
- gchar *force_module_checked;
- gboolean success;
- const gchar *extractors_dir;
-
- extractors_dir = g_getenv ("TRACKER_EXTRACTORS_DIR");
- if (G_LIKELY (extractors_dir == NULL)) {
- extractors_dir = TRACKER_EXTRACTORS_DIR;
- } else {
- g_message ("Extractor modules directory is '%s' (set in env)", extractors_dir);
- }
-
- dir = g_dir_open (extractors_dir, 0, &error);
-
- if (!dir) {
- g_error ("Error opening modules directory: %s", error->message);
- g_error_free (error);
- return FALSE;
- }
-
- if (G_UNLIKELY (force_module)) {
- if (!g_str_has_suffix (force_module, "." G_MODULE_SUFFIX)) {
- force_module_checked = g_strdup_printf ("%s.%s",
- force_module,
- G_MODULE_SUFFIX);
- } else {
- force_module_checked = g_strdup (force_module);
- }
- } else {
- force_module_checked = NULL;
- }
-
- *specific_extractors = g_array_new (FALSE,
- TRUE,
- sizeof (ModuleData));
-
- *generic_extractors = g_array_new (FALSE,
- TRUE,
- sizeof (ModuleData));
-
-#ifdef HAVE_LIBSTREAMANALYZER
- if (!force_internal_extractors) {
- g_message ("Adding extractor for libstreamanalyzer");
- g_message (" Generic match for ALL (tried first before our module)");
- g_message (" Specific match for NONE (fallback to our modules)");
- } else {
- g_message ("Not using libstreamanalyzer");
- g_message (" It is available but disabled by command line");
- }
-#endif /* HAVE_STREAMANALYZER */
-
- while ((name = g_dir_read_name (dir)) != NULL) {
- TrackerExtractDataFunc func;
- GModule *module;
- gchar *module_path;
-
- if (!g_str_has_suffix (name, "." G_MODULE_SUFFIX)) {
- continue;
- }
-
- if (force_module_checked && strcmp (name, force_module_checked) != 0) {
- continue;
- }
-
- module_path = g_build_filename (extractors_dir, name, NULL);
-
- module = g_module_open (module_path, G_MODULE_BIND_LOCAL);
-
- if (!module) {
- g_warning ("Could not load module '%s': %s",
- name,
- g_module_error ());
- g_free (module_path);
- continue;
- }
-
- g_module_make_resident (module);
-
- if (g_module_symbol (module, EXTRACT_FUNCTION, (gpointer *) &func)) {
- ModuleData mdata = { 0 };
-
- mdata.module = module;
- mdata.edata = (func) ();
-
- g_message ("Adding extractor:'%s' with:",
- g_module_name ((GModule*) mdata.module));
-
- for (; mdata.edata->mime; mdata.edata++) {
- /* Compile pattern from mime */
- mdata.pattern = g_pattern_spec_new (mdata.edata->mime);
-
- if (G_UNLIKELY (strchr (mdata.edata->mime, '*') != NULL)) {
- g_message (" Generic match for mime:'%s'",
- mdata.edata->mime);
- g_array_append_val (*generic_extractors, mdata);
- } else {
- g_message (" Specific match for mime:'%s'",
- mdata.edata->mime);
- g_array_append_val (*specific_extractors, mdata);
- }
- }
- } else {
- g_warning ("Could not load module '%s': Function %s() was not found, is it exported?",
- name, EXTRACT_FUNCTION);
- }
-
- g_free (module_path);
- }
-
- if (G_UNLIKELY (force_module) &&
- (!*specific_extractors || (*specific_extractors)->len < 1) &&
- (!*generic_extractors || (*generic_extractors)->len < 1)) {
- g_warning ("Could not force module '%s', it was not found", force_module_checked);
- success = FALSE;
- } else {
- success = TRUE;
- }
-
- g_free (force_module_checked);
- g_dir_close (dir);
-
- return success;
}
TrackerExtract *
@@ -363,15 +187,8 @@ tracker_extract_new (gboolean disable_shutdown,
{
TrackerExtract *object;
TrackerExtractPrivate *priv;
- GArray *specific_extractors;
- GArray *generic_extractors;
- if (!g_module_supported ()) {
- g_error ("Modules are not supported for this platform");
- return NULL;
- }
-
- if (!load_modules (force_module, &specific_extractors, &generic_extractors)) {
+ if (!tracker_extract_module_manager_init ()) {
return NULL;
}
@@ -383,9 +200,6 @@ tracker_extract_new (gboolean disable_shutdown,
priv->disable_shutdown = disable_shutdown;
priv->force_internal_extractors = force_internal_extractors;
- priv->specific_extractors = specific_extractors;
- priv->generic_extractors = generic_extractors;
-
return object;
}
@@ -499,106 +313,50 @@ get_file_metadata (TrackerExtract *extract,
* data we need from the extractors.
*/
if (mime_used) {
- guint i;
- glong length;
- gchar *reversed;
-
- /* Using a reversed string while pattern matching is faster
- * if we have lots of patterns with wildcards.
- * We are assuming here that mime_used is ASCII always, so
- * we avoid g_utf8_strreverse() */
- reversed = g_strdup (mime_used);
- g_strreverse (reversed);
- length = strlen (mime_used);
-
- for (i = 0; i < priv->specific_extractors->len; i++) {
- const TrackerExtractData *edata;
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->specific_extractors, ModuleData, i);
- edata = mdata->edata;
-
- if (g_pattern_match (mdata->pattern, length, mime_used, reversed)) {
- gint items;
-
- tracker_dbus_request_comment (request,
- " Extracting with module:'%s'",
- g_module_name ((GModule*) mdata->module));
-
- (*edata->func) (uri, preupdate, statements);
+ TrackerExtractMetadataFunc func;
+ GModule *module;
- items = tracker_sparql_builder_get_length (statements);
+ module = tracker_extract_module_manager_get_for_mimetype (mime_used, &func);
- tracker_dbus_request_comment (request,
- " Found %d metadata items",
- items);
+ if (module) {
+ StatisticsData *data;
+ gint items;
- mdata->extracted_count++;
+ (func) (uri, mime_used, preupdate, statements);
- if (items == 0) {
- mdata->failed_count++;
- continue;
- }
+ items = tracker_sparql_builder_get_length (statements);
- tracker_sparql_builder_insert_close (statements);
+ tracker_dbus_request_comment (request,
+ " Found %d metadata items",
+ items);
- g_free (mime_used);
- g_free (reversed);
+ data = g_hash_table_lookup (priv->statistics_data, module);
- *preupdate_out = preupdate;
- *statements_out = statements;
- return TRUE;
+ if (!data) {
+ data = g_slice_new0 (StatisticsData);
+ g_hash_table_insert (priv->statistics_data, module, data);
}
- }
-
- for (i = 0; i < priv->generic_extractors->len; i++) {
- const TrackerExtractData *edata;
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->generic_extractors, ModuleData, i);
- edata = mdata->edata;
-
- if (g_pattern_match (mdata->pattern, length, mime_used, reversed)) {
- gint items;
-
- tracker_dbus_request_comment (request,
- " Extracting with module:'%s'",
- g_module_name ((GModule*) mdata->module));
- (*edata->func) (uri, preupdate, statements);
-
- items = tracker_sparql_builder_get_length (statements);
-
- tracker_dbus_request_comment (request,
- " Found %d metadata items",
- items);
-
- mdata->extracted_count++;
-
- if (items == 0) {
- mdata->failed_count++;
- continue;
- }
+ data->extracted_count++;
+ if (items > 0) {
tracker_sparql_builder_insert_close (statements);
- g_free (mime_used);
- g_free (reversed);
-
*preupdate_out = preupdate;
*statements_out = statements;
return TRUE;
+ } else {
+ data->failed_count++;
}
+ } else {
+ priv->unhandled_count++;
}
tracker_dbus_request_comment (request,
" Could not find any extractors to handle metadata type "
"(mime: %s)",
mime_used);
-
- g_free (mime_used);
- g_free (reversed);
} else {
tracker_dbus_request_comment (request,
" No mime available, not extracting data");
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]