[tracker/extract-sparql] Allow libtracker-extract users to get a complete SPARQL INSERT for files
- From: Philip Van Hoof <pvanhoof src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/extract-sparql] Allow libtracker-extract users to get a complete SPARQL INSERT for files
- Date: Mon, 17 Dec 2012 09:30:37 +0000 (UTC)
commit e4af9b34e22b60d37251426a29dfcd9b566f6e0e
Author: Philip Van Hoof <philip codeminded be>
Date: Mon Dec 17 10:22:53 2012 +0100
Allow libtracker-extract users to get a complete SPARQL INSERT for files
This features makes it possible for for example a MTP daemon to query
for a file's SPARQL (with metadata extraction) through a API call in
libtracker-extract. Also added is a series of commandline switches to
use the feature from the tracker-sparql commandline.
The API allows to pass the destination URL, destination GRAPH, modified
time and last accessed time.
Sample usage:
static void
on_finished (GObject *none, GAsyncResult *result, gpointer user_data)
{
GMainLoop *loop = user_data;
GError *error = NULL;
gchar *sparql = tracker_extract_get_sparql_finish (result, &error);
if (error == NULL) {
g_print ("%s", sparql);
g_free (sparql);
} else {
g_error("%s", error->message);
}
g_clear_error (&error);
g_main_loop_quit (loop);
}
int main (int argc, char **argv)
{
const gchar *file = "/tmp/file.png";
const gchar *dest = "file:///tmp/destination.png"
GMainLoop *loop;
g_type_init();
loop = g_main_loop_new (NULL, FALSE);
tracker_extract_get_sparql (file, dest, NULL, time(0),
time(0), on_finished, loop);
g_main_loop_run (loop);
g_object_unref (loop);
return 0;
}
src/libtracker-common/tracker-marshal.list | 1 +
.../Makefile-shared-sources.decl | 10 +
src/libtracker-extract/Makefile.am | 38 ++-
src/libtracker-extract/tracker-extract-sparql.c | 415 ++++++++++++++++++++
src/libtracker-extract/tracker-extract-sparql.h | 47 +++
src/libtracker-extract/tracker-extract.h | 2 +
src/libtracker-extract/tracker-marshal.list | 2 +
.../tracker-storage.c | 0
.../tracker-storage.h | 10 +-
src/libtracker-miner/Makefile.am | 6 +-
src/libtracker-miner/tracker-miner.h | 1 -
src/miners/fs/tracker-miner-files.c | 1 +
src/tracker-extract/tracker-controller.c | 1 -
src/tracker-extract/tracker-extract-gstreamer.c | 312 +++++++++++++++
src/tracker-extract/tracker-media-art.c | 2 +-
src/tracker-utils/Makefile.am | 1 +
src/tracker-utils/tracker-sparql.c | 47 +++-
src/tracker-writeback/tracker-writeback.c | 2 +-
18 files changed, 882 insertions(+), 16 deletions(-)
---
diff --git a/src/libtracker-common/tracker-marshal.list b/src/libtracker-common/tracker-marshal.list
index 72f9937..f7cef75 100644
--- a/src/libtracker-common/tracker-marshal.list
+++ b/src/libtracker-common/tracker-marshal.list
@@ -1 +1,2 @@
VOID:STRING,STRING
+VOID:STRING,STRING,STRING,BOOLEAN,BOOLEAN
diff --git a/src/libtracker-extract/Makefile-shared-sources.decl b/src/libtracker-extract/Makefile-shared-sources.decl
new file mode 100644
index 0000000..a714777
--- /dev/null
+++ b/src/libtracker-extract/Makefile-shared-sources.decl
@@ -0,0 +1,10 @@
+
+# Includes sources that will be shared with the
+# testers in test/libtracker-extract
+
+libtracker_extract_marshal_sources = \
+ $(top_builddir)/src/libtracker-extract/tracker-marshal.c
+
+libtracker_extract_marshal_headers = \
+ $(top_builddir)/src/libtracker-extract/tracker-marshal.h
+
diff --git a/src/libtracker-extract/Makefile.am b/src/libtracker-extract/Makefile.am
index f3be025..4627923 100644
--- a/src/libtracker-extract/Makefile.am
+++ b/src/libtracker-extract/Makefile.am
@@ -1,5 +1,16 @@
include $(top_srcdir)/Makefile.decl
+# Include list of shared sources:
+# Defines:
+# $(libtracker_extract_marshal_sources)
+# $(libtracker_extract_marshal_headers)
+#
+# Headers and sources are split for the tests to build
+# with make distcheck.
+#
+
+include Makefile-shared-sources.decl
+
AM_CPPFLAGS = \
$(BUILD_CFLAGS) \
-I$(top_srcdir)/src \
@@ -13,6 +24,8 @@ lib_LTLIBRARIES = libtracker-extract- TRACKER_API_VERSION@.la
libtracker_extractincludedir=$(includedir)/tracker-$(TRACKER_API_VERSION)/libtracker-extract/
libtracker_extract_ TRACKER_API_VERSION@_la_SOURCES = \
+ $(libtracker_extract_marshal_sources) \
+ $(libtracker_extract_marshal_headers) \
tracker-data.h \
tracker-encoding.c \
tracker-exif.c \
@@ -29,7 +42,11 @@ libtracker_extract_ TRACKER_API_VERSION@_la_SOURCES = \
tracker-module-manager.h \
tracker-utils.c \
tracker-xmp.c \
- tracker-xmp.h
+ tracker-xmp.h \
+ tracker-storage.c \
+ tracker-storage.h \
+ tracker-extract-sparql.c \
+ tracker-extract-sparql.h
noinst_HEADERS =
@@ -44,7 +61,22 @@ libtracker_extractinclude_HEADERS = \
tracker-iptc.h \
tracker-module-manager.h \
tracker-utils.h \
- tracker-xmp.h
+ tracker-xmp.h \
+ tracker-storage.h \
+ tracker-extract-sparql.h
+
+
+$(top_builddir)/src/libtracker-extract/tracker-marshal.h: tracker-marshal.list
+ $(AM_V_GEN)$(GLIB_GENMARSHAL) $< --prefix=tracker_marshal --header > $@
+
+$(top_builddir)/src/libtracker-extract/tracker-marshal.c: tracker-marshal.list
+ $(AM_V_GEN)(echo "#include \"tracker-marshal.h\""; \
+ $(GLIB_GENMARSHAL) $< --prefix=tracker_marshal --body) > $@
+
+
+BUILT_SOURCES = \
+ $(libtracker_extract_marshal_sources) \
+ $(libtracker_extract_marshal_headers)
if HAVE_ENCA
libtracker_extract_ TRACKER_API_VERSION@_la_SOURCES += \
@@ -98,5 +130,7 @@ gir_DATA = $(INTROSPECTION_GIRS)
typelibdir = $(libdir)/girepository-1.0
typelib_DATA = $(INTROSPECTION_GIRS:.gir=.typelib)
+EXTRA_DIST = tracker-marshal.list
+
CLEANFILES = $(gir_DATA) $(typelib_DATA)
endif
diff --git a/src/libtracker-extract/tracker-extract-sparql.c b/src/libtracker-extract/tracker-extract-sparql.c
new file mode 100644
index 0000000..50e02ac
--- /dev/null
+++ b/src/libtracker-extract/tracker-extract-sparql.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C) 2012 Codeminded <philip codeminded be>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "tracker-extract-sparql.h"
+#include "tracker-extract.h"
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-common/tracker-ontologies.h>
+
+typedef struct {
+ TrackerSparqlBuilder *sparql;
+ GFile *file;
+ gchar *urn;
+ gchar *url;
+ gchar *graph_urn;
+ GSimpleAsyncResult *simple;
+ TrackerStorage *storage;
+ time_t last_mod;
+ time_t last_access;
+ gboolean last_mod_set;
+ gboolean last_access_set;
+} ExtractionData;
+
+static GSimpleAsyncResult*
+extraction_data_free (ExtractionData *data)
+{
+ GSimpleAsyncResult *simple = data->simple;
+
+ g_free (data->graph_urn);
+ g_free (data->urn);
+ g_free (data->url);
+
+ if (data->file) {
+ g_object_unref (data->file);
+ }
+
+ if (data->sparql) {
+ g_object_unref (data->sparql);
+ }
+
+ if (data->storage) {
+ g_object_unref (data->storage);
+ }
+
+ return simple;
+}
+
+static void
+sparql_builder_finish (ExtractionData *data,
+ const gchar *preupdate,
+ const gchar *postupdate,
+ const gchar *sparql,
+ const gchar *where)
+{
+ if (sparql && *sparql) {
+ if (data->urn != NULL) {
+ gchar *str;
+ str = g_strdup_printf ("<%s>", data->urn);
+ tracker_sparql_builder_append (data->sparql, str);
+ g_free (str);
+ } else {
+ tracker_sparql_builder_append (data->sparql, "_:file");
+ }
+ tracker_sparql_builder_append (data->sparql, sparql);
+ }
+
+ if (data->graph_urn) {
+ tracker_sparql_builder_graph_close (data->sparql);
+ }
+
+ tracker_sparql_builder_insert_close (data->sparql);
+
+ if (where && *where) {
+ tracker_sparql_builder_where_open (data->sparql);
+ tracker_sparql_builder_append (data->sparql, where);
+ tracker_sparql_builder_where_close (data->sparql);
+ }
+
+ /* Prepend preupdate queries */
+ if (preupdate && *preupdate) {
+ tracker_sparql_builder_prepend (data->sparql, preupdate);
+ }
+
+ /* Append postupdate */
+ if (postupdate && *postupdate) {
+ tracker_sparql_builder_append (data->sparql, postupdate);
+ }
+}
+
+static void
+extractor_get_embedded_metadata_cb (GObject *object, GAsyncResult *result, gpointer user_data)
+{
+ ExtractionData *data = user_data;
+ GError *error = NULL;
+ TrackerExtractInfo *info = tracker_extract_client_get_metadata_finish (G_FILE(object), result, &error);
+
+ if (error == NULL) {
+ TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
+ const gchar *where;
+
+ preupdate = tracker_extract_info_get_preupdate_builder (info);
+ postupdate = tracker_extract_info_get_postupdate_builder (info);
+ sparql = tracker_extract_info_get_metadata_builder (info);
+ where = tracker_extract_info_get_where_clause (info);
+
+ sparql_builder_finish (data, tracker_sparql_builder_get_result (preupdate),
+ tracker_sparql_builder_get_result (postupdate),
+ tracker_sparql_builder_get_result (sparql), where);
+
+ /* And .. we're done */
+ gchar *sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
+ g_simple_async_result_set_op_res_gpointer (data->simple, sparql_s, g_free);
+ g_simple_async_result_complete (extraction_data_free (data));
+ } else {
+ g_simple_async_result_set_from_error (data->simple, error);
+ g_simple_async_result_complete (extraction_data_free (data));
+ }
+
+ g_clear_error (&error);
+
+}
+
+static void
+on_fileinfo_received (GObject *file, GAsyncResult *result, gpointer user_data)
+{
+ GError *error = NULL;
+ ExtractionData *data = user_data;
+ GFileInfo *file_info = g_file_query_info_finish (G_FILE(file), result, &error);
+
+ if (error == NULL) {
+ TrackerSparqlBuilder *sparql = data->sparql;
+ time_t time_;
+ const gchar *mime_type;
+ const gchar *removable_device_uuid;
+ gchar *removable_device_urn;
+ GFile *dest_file = g_file_new_for_uri (data->url);
+
+ if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) {
+ tracker_sparql_builder_predicate (sparql, "a");
+ tracker_sparql_builder_object (sparql, "nfo:Folder");
+ }
+
+ tracker_sparql_builder_predicate (sparql, "nfo:fileName");
+ tracker_sparql_builder_object_string (sparql, g_file_get_basename (dest_file));
+
+ tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
+ tracker_sparql_builder_object_int64 (sparql, g_file_info_get_size (file_info));
+
+ if (data->last_mod_set == FALSE) {
+ time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
+ } else {
+ time_ = data->last_mod;
+ }
+
+ tracker_sparql_builder_predicate (sparql, "nfo:fileLastModified");
+ tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
+
+ if (data->last_access_set == FALSE) {
+ time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_ACCESS);
+ } else {
+ time_ = data->last_access;
+ }
+
+ tracker_sparql_builder_predicate (sparql, "nfo:fileLastAccessed");
+ tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
+
+ /* Laying the link between the IE and the DO. We use IE = DO */
+ tracker_sparql_builder_predicate (sparql, "nie:isStoredAs");
+ if (data->urn) {
+ tracker_sparql_builder_object_iri (sparql, data->urn);
+ } else {
+ tracker_sparql_builder_object (sparql, "_:file");
+ }
+
+ /* The URL of the DataObject (because IE = DO, this is correct) */
+ tracker_sparql_builder_predicate (sparql, "nie:url");
+ tracker_sparql_builder_object_string (sparql, data->url);
+
+ mime_type = g_file_info_get_content_type (file_info);
+
+ tracker_sparql_builder_predicate (sparql, "nie:mimeType");
+ tracker_sparql_builder_object_string (sparql, mime_type);
+
+ removable_device_uuid = tracker_storage_get_uuid_for_file (data->storage, dest_file);
+
+ if (removable_device_uuid) {
+ removable_device_urn = g_strdup_printf (TRACKER_DATASOURCE_URN_PREFIX "%s",
+ removable_device_uuid);
+ } else {
+ removable_device_urn = g_strdup (TRACKER_NON_REMOVABLE_MEDIA_DATASOURCE_URN);
+ }
+
+
+ tracker_sparql_builder_predicate (sparql, "a");
+ tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
+
+ tracker_sparql_builder_predicate (sparql, "nie:dataSource");
+ tracker_sparql_builder_object_iri (sparql, removable_device_urn);
+
+ tracker_sparql_builder_predicate (sparql, "tracker:available");
+ tracker_sparql_builder_object_boolean (sparql, TRUE);
+
+ g_free (removable_device_urn);
+ g_object_unref (dest_file);
+
+ if (tracker_extract_module_manager_mimetype_is_handled (mime_type)) {
+ /* Next step, if handled by the extractor, get embedded metadata */
+ tracker_extract_client_get_metadata (data->file, mime_type, data->graph_urn, NULL,
+ extractor_get_embedded_metadata_cb,
+ data);
+ } else {
+ gchar *sparql_s;
+
+ /* Otherwise, don't request embedded metadata extraction. We're done here */
+ sparql_builder_finish (data, NULL, NULL, NULL, NULL);
+
+ sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
+ g_simple_async_result_set_op_res_gpointer (data->simple, sparql_s, g_free);
+ g_simple_async_result_complete (extraction_data_free (data));
+ }
+ } else {
+ g_simple_async_result_set_from_error (data->simple, error);
+ g_simple_async_result_complete (extraction_data_free (data));
+ }
+
+ g_clear_error (&error);
+}
+
+static void
+on_parent_received (GObject *con, GAsyncResult *result, gpointer user_data)
+{
+ GError *error = NULL;
+ ExtractionData *data = user_data;
+ TrackerSparqlBuilder *sparql = data->sparql;
+ GFile *file = data->file;
+ TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION(con), result, &error);
+
+ if (error == NULL) {
+ gchar *parent_urn = NULL;
+ const gchar *attrs;
+
+ while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+ parent_urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+ break;
+ }
+
+ if (parent_urn) {
+ tracker_sparql_builder_predicate (sparql, "nfo:belongsToContainer");
+ tracker_sparql_builder_object_iri (sparql, parent_urn);
+ }
+
+ attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
+ G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE ","
+ G_FILE_ATTRIBUTE_STANDARD_SIZE ","
+ G_FILE_ATTRIBUTE_TIME_MODIFIED ","
+ G_FILE_ATTRIBUTE_TIME_ACCESS;
+
+ g_file_query_info_async (file, attrs, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+ G_PRIORITY_DEFAULT, NULL,
+ on_fileinfo_received, data);
+
+ g_free (parent_urn);
+ g_object_unref (cursor);
+ } else {
+ g_simple_async_result_set_from_error (data->simple, error);
+ g_simple_async_result_complete (extraction_data_free (data));
+ }
+
+ g_clear_error (&error);
+}
+
+static void
+on_file_exists_checked (GObject *con, GAsyncResult *result, gpointer user_data)
+{
+ ExtractionData *data = user_data;
+ GError *error = NULL;
+ TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION(con), result, &error);
+
+ if (error == NULL) {
+ TrackerSparqlBuilder *sparql = tracker_sparql_builder_new_update ();
+ GFile *parent;
+ gchar *url, *qry;
+
+ while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+ data->urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+ break;
+ }
+
+ g_object_unref (cursor);
+
+ tracker_sparql_builder_insert_silent_open (sparql, NULL);
+ if (data->graph_urn) {
+ tracker_sparql_builder_graph_open (sparql, data->graph_urn);
+ }
+
+ if (data->urn != NULL) {
+ tracker_sparql_builder_subject_iri (sparql, data->urn);
+ } else {
+ tracker_sparql_builder_subject (sparql, "_:file");
+ }
+
+ tracker_sparql_builder_predicate (sparql, "a");
+ tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
+ tracker_sparql_builder_object (sparql, "nie:InformationElement");
+
+
+ data->sparql = sparql;
+
+ parent = g_file_get_parent (data->file);
+
+ url = g_file_get_uri (parent);
+ qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", url);
+
+ tracker_sparql_connection_query_async (TRACKER_SPARQL_CONNECTION(con), qry, NULL, on_parent_received, data);
+
+ g_free (url);
+ g_object_unref (parent);
+ } else {
+ g_simple_async_result_set_from_error (data->simple, error);
+ g_simple_async_result_complete (extraction_data_free (data));
+ }
+
+ g_clear_error(&error);
+}
+
+static void
+on_get_connection (GObject *none, GAsyncResult *result, gpointer user_data)
+{
+ ExtractionData *data = user_data;
+ GError *error = NULL;
+ TrackerSparqlConnection*con = tracker_sparql_connection_get_finish (result, &error);
+
+ if (error == NULL) {
+ gchar *qry;
+
+ qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", data->url);
+ tracker_sparql_connection_query_async (con, qry, NULL, on_file_exists_checked, data);
+
+ } else {
+ g_simple_async_result_set_from_error (data->simple, error);
+ g_simple_async_result_complete (extraction_data_free (data));
+ }
+
+ g_clear_error (&error);
+}
+
+void
+tracker_extract_get_sparql (const gchar *temp_file,
+ const gchar *dest_url,
+ const gchar *graph,
+ time_t last_mod,
+ time_t last_access,
+ GAsyncReadyCallback callback,
+ gpointer user_data)
+{
+ ExtractionData *data = g_new0(ExtractionData, 1);
+
+ if (graph) {
+ data->graph_urn = g_strdup (graph);
+ }
+
+ if (last_mod != 0) {
+ data->last_mod = last_mod;
+ data->last_mod_set = TRUE;
+ } else {
+ data->last_mod_set = FALSE;
+ }
+
+ if (last_access != 0) {
+ data->last_access = last_access;
+ data->last_access_set = TRUE;
+ } else {
+ data->last_access_set = FALSE;
+ }
+
+ data->storage = tracker_storage_new ();
+ data->file = g_file_new_for_path(temp_file);
+ data->url = g_strdup (dest_url);
+ data->simple = g_simple_async_result_new (NULL, callback, user_data, tracker_extract_get_sparql);
+
+ tracker_sparql_connection_get_async (NULL, on_get_connection, data);
+}
+
+gchar*
+tracker_extract_get_sparql_finish (GAsyncResult *result, GError **error)
+{
+ gchar *res;
+ GSimpleAsyncResult *simple;
+ simple = (GSimpleAsyncResult *) result;
+
+ if (g_simple_async_result_propagate_error (simple, error)) {
+ return NULL;
+ }
+
+ res = g_simple_async_result_get_op_res_gpointer (simple);
+
+ return res;
+}
+
diff --git a/src/libtracker-extract/tracker-extract-sparql.h b/src/libtracker-extract/tracker-extract-sparql.h
new file mode 100644
index 0000000..1bad385
--- /dev/null
+++ b/src/libtracker-extract/tracker-extract-sparql.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Codeminded <philip codeminded be>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __LIBTRACKER_EXTRACT_SPARQL_H__
+#define __LIBTRACKER_EXTRACT_SPARQL_H__
+
+#if !defined (__LIBTRACKER_EXTRACT_INSIDE__) && !defined (TRACKER_COMPILATION)
+#error "only <libtracker-extract/tracker-extract.h> must be included directly."
+#endif
+
+#include <glib.h>
+#include <gio/gio.h>
+#include <time.h>
+
+G_BEGIN_DECLS
+
+
+void tracker_extract_get_sparql (const gchar *temp_file,
+ const gchar *dest_url,
+ const gchar *graph,
+ time_t last_mod,
+ time_t last_access,
+ GAsyncReadyCallback callback,
+ gpointer user_data);
+
+gchar* tracker_extract_get_sparql_finish (GAsyncResult *result,
+ GError **error);
+
+G_END_DECLS
+
+#endif /* __LIBTRACKER_EXTRACT_ENCODING_H__ */
diff --git a/src/libtracker-extract/tracker-extract.h b/src/libtracker-extract/tracker-extract.h
index 5b53a4c..03b24db 100644
--- a/src/libtracker-extract/tracker-extract.h
+++ b/src/libtracker-extract/tracker-extract.h
@@ -34,6 +34,8 @@
#include "tracker-iptc.h"
#include "tracker-utils.h"
#include "tracker-xmp.h"
+#include "tracker-storage.h"
+#include "tracker-extract-sparql.h"
#undef __LIBTRACKER_EXTRACT_INSIDE__
diff --git a/src/libtracker-extract/tracker-marshal.list b/src/libtracker-extract/tracker-marshal.list
new file mode 100644
index 0000000..68f4496
--- /dev/null
+++ b/src/libtracker-extract/tracker-marshal.list
@@ -0,0 +1,2 @@
+VOID:STRING,STRING,STRING,BOOLEAN,BOOLEAN
+VOID:STRING,STRING
diff --git a/src/libtracker-miner/tracker-storage.c b/src/libtracker-extract/tracker-storage.c
similarity index 100%
rename from src/libtracker-miner/tracker-storage.c
rename to src/libtracker-extract/tracker-storage.c
diff --git a/src/libtracker-miner/tracker-storage.h b/src/libtracker-extract/tracker-storage.h
similarity index 93%
rename from src/libtracker-miner/tracker-storage.h
rename to src/libtracker-extract/tracker-storage.h
index f3101c0..f63fea4 100644
--- a/src/libtracker-miner/tracker-storage.h
+++ b/src/libtracker-extract/tracker-storage.h
@@ -17,11 +17,11 @@
* Boston, MA 02110-1301, USA.
*/
-#ifndef __LIBTRACKER_MINER_STORAGE_H__
-#define __LIBTRACKER_MINER_STORAGE_H__
+#ifndef __LIBTRACKER_EXTRACT_STORAGE_H__
+#define __LIBTRACKER_EXTRACT_STORAGE_H__
-#if !defined (__LIBTRACKER_MINER_H_INSIDE__) && !defined (TRACKER_COMPILATION)
-#error "Only <libtracker-miner/tracker-miner.h> can be included directly."
+#if !defined (__LIBTRACKER_EXTRACT_INSIDE__) && !defined (TRACKER_COMPILATION)
+#error "Only <libtracker-extract/tracker-extract.h> can be included directly."
#endif
#include <glib-object.h>
@@ -115,4 +115,4 @@ const gchar * tracker_storage_get_uuid_for_file (TrackerStorage
G_END_DECLS
-#endif /* __LIBTRACKER_MINER_STORAGE_H__ */
+#endif /* __LIBTRACKER_EXTRACT_STORAGE_H__ */
diff --git a/src/libtracker-miner/Makefile.am b/src/libtracker-miner/Makefile.am
index f272a5e..2889187 100644
--- a/src/libtracker-miner/Makefile.am
+++ b/src/libtracker-miner/Makefile.am
@@ -52,9 +52,7 @@ private_sources = \
tracker-task-pool.h \
tracker-task-pool.c \
tracker-sparql-buffer.h \
- tracker-sparql-buffer.c \
- tracker-storage.c \
- tracker-storage.h
+ tracker-sparql-buffer.c
miner_sources = \
$(libtracker_miner_monitor_sources) \
@@ -103,7 +101,6 @@ libtracker_minerinclude_HEADERS = \
tracker-miner-web.h \
tracker-network-provider.h \
tracker-password-provider.h \
- tracker-storage.h \
tracker-thumbnailer.h
libtracker_miner_ TRACKER_API_VERSION@_la_LDFLAGS = \
@@ -119,6 +116,7 @@ libtracker_miner_ TRACKER_API_VERSION@_la_LIBADD = \
libtracker-miner-private.la \
$(top_builddir)/src/libtracker-common/libtracker-common.la \
$(top_builddir)/src/libtracker-sparql-backend/libtracker-sparql- TRACKER_API_VERSION@.la \
+ $(top_builddir)/src/libtracker-extract/libtracker-extract- TRACKER_API_VERSION@.la \
-lm \
$(BUILD_LIBS) \
$(LIBTRACKER_MINER_LIBS)
diff --git a/src/libtracker-miner/tracker-miner.h b/src/libtracker-miner/tracker-miner.h
index 0eab844..5acc763 100644
--- a/src/libtracker-miner/tracker-miner.h
+++ b/src/libtracker-miner/tracker-miner.h
@@ -23,7 +23,6 @@
#define __LIBTRACKER_MINER_H_INSIDE__
#include <libtracker-miner/tracker-crawler.h>
-#include <libtracker-miner/tracker-storage.h>
#include <libtracker-miner/tracker-thumbnailer.h>
#include <libtracker-miner/tracker-media-art.h>
#include <libtracker-miner/tracker-network-provider.h>
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 33938c9..082d979 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -44,6 +44,7 @@
#include <libtracker-extract/tracker-module-manager.h>
#include <libtracker-extract/tracker-extract-client.h>
+#include <libtracker-extract/tracker-storage.h>
#include "tracker-power.h"
#include "tracker-miner-files.h"
diff --git a/src/tracker-extract/tracker-controller.c b/src/tracker-extract/tracker-controller.c
index 2d01246..7689298 100644
--- a/src/tracker-extract/tracker-controller.c
+++ b/src/tracker-extract/tracker-controller.c
@@ -26,7 +26,6 @@
#include <libtracker-common/tracker-common.h>
#include <libtracker-extract/tracker-extract.h>
-#include <libtracker-miner/tracker-miner.h>
#include <gio/gio.h>
#ifdef STAYALIVE_ENABLE_TRACE
diff --git a/src/tracker-extract/tracker-extract-gstreamer.c b/src/tracker-extract/tracker-extract-gstreamer.c
index 2a2fb7b..15da85a 100644
--- a/src/tracker-extract/tracker-extract-gstreamer.c
+++ b/src/tracker-extract/tracker-extract-gstreamer.c
@@ -89,6 +89,29 @@
#define GST_TAG_FRAMERATE "framerate"
#endif
+/* These are in newer GStreamer releases, but we define them here
+ so we don't need to depend on a new release */
+#ifndef GST_TAG_SHOW_NAME
+#define GST_TAG_SHOW_NAME "show-name"
+#endif
+
+#ifndef GST_TAG_SHOW_EPISODE_NUMBER
+#define GST_TAG_SHOW_EPISODE_NUMBER "show-episode-number"
+#endif
+
+#ifndef GST_TAG_SHOW_SEASON_NUMBER
+#define GST_TAG_SHOW_SEASON_NUMBER "show-season-number"
+#endif
+
+#define TV_REGEX "(?<showname>.*)\\.(?<season>(?:\\d{1,2})|(?:[sS]\\K\\d{1,2}))(?<episode>(?:\\d{2})|(?:[eE]\\K\\d{1,2}))\\.?(?<name>.*)?"
+#define MOVIE_REGEX "(?<name>.*)\\.?[\\(\\[](?<year>[12][90]\\d{2})[\\)\\]]"
+
+typedef enum {
+ VIDEO_TYPE_UNKNOWN,
+ VIDEO_TYPE_MOVIE,
+ VIDEO_TYPE_SERIES
+} VideoType;
+
typedef enum {
EXTRACT_MIME_AUDIO,
EXTRACT_MIME_VIDEO,
@@ -1013,6 +1036,295 @@ delete_existing_tracks (TrackerSparqlBuilder *postupdate,
g_free (sparql);
}
+const gchar *blacklisted_prefix[] = {
+ "tpz-", NULL
+};
+
+/* Blacklisted are words that we ignore everything after */
+const char *blacklist[] = {
+ "720p", "1080p",
+ "ws", "WS", "proper", "PROPER",
+ "repack", "real.repack",
+ "hdtv", "HDTV", "pdtv", "PDTV", "notv", "NOTV",
+ "dsr", "DSR", "DVDRip", "divx", "DIVX", "xvid", "Xvid",
+ NULL
+};
+
+static gchar *
+sanitise_string (const gchar *str)
+{
+ int i;
+ gchar *line;
+
+ line = (gchar *) str;
+ for (i = 0; blacklisted_prefix[i]; i++) {
+ if (g_str_has_prefix (str, blacklisted_prefix[i])) {
+ int len = strlen (blacklisted_prefix[i]);
+
+ line = (gchar *) str + len;
+ }
+ }
+
+ for (i = 0; blacklist[i]; i++) {
+ gchar *end;
+
+ end = strstr (line, blacklist[i]);
+ if (end) {
+ return g_strndup (line, end - line);
+ }
+ }
+
+ return g_strdup (line);
+}
+
+/* tidies strings before we run them through the regexes */
+static gchar *
+uri_to_metadata (const gchar *uri)
+{
+ gchar *ext, *basename, *name, *whitelisted;
+
+ basename = g_path_get_basename (uri);
+ ext = strrchr (basename, '.');
+ if (ext) {
+ name = g_strndup (basename, ext - basename);
+ g_free (basename);
+ } else {
+ name = basename;
+ }
+
+ /* Replace _ <space> with . */
+ g_strdelimit (name, "_ ", '.');
+ whitelisted = sanitise_string (name);
+ g_free (name);
+
+ return whitelisted;
+}
+
+static VideoType
+parse_uri (const gchar *uri,
+ gchar **title,
+ gchar **showname,
+ GDate **date,
+ gint *season,
+ gint *episode)
+{
+ gchar *metadata;
+ GRegex *regex;
+ GMatchInfo *info;
+
+ metadata = uri_to_metadata (uri);
+
+ regex = g_regex_new (MOVIE_REGEX, 0, 0, NULL);
+ g_regex_match (regex, metadata, 0, &info);
+
+ if (g_match_info_matches (info)) {
+ if (title) {
+ *title= g_match_info_fetch_named (info, "name");
+ /* Replace "." with <space> */
+ g_strdelimit (*title, ".", ' ');
+ }
+
+ if (date) {
+ gchar *year = g_match_info_fetch_named (info, "year");
+
+ *date = g_date_new ();
+ g_date_set_year (*date, atoi (year));
+ g_free (year);
+ }
+
+ if (showname) {
+ *showname = NULL;
+ }
+
+ if (season) {
+ *season = 0;
+ }
+
+ if (episode) {
+ *episode = 0;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+ g_free (metadata);
+
+ return VIDEO_TYPE_MOVIE;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+
+ regex = g_regex_new (TV_REGEX, 0, 0, NULL);
+ g_regex_match (regex, metadata, 0, &info);
+
+ if (g_match_info_matches (info)) {
+ if (title) {
+ *title = g_match_info_fetch_named (info, "name");
+ g_strdelimit (*title, ".", ' ');
+ }
+
+ if (showname) {
+ *showname = g_match_info_fetch_named (info, "showname");
+ g_strdelimit (*showname, ".", ' ');
+ }
+
+ if (season) {
+ gchar *s = g_match_info_fetch_named (info, "season");
+ if (s) {
+ if (*s == 's' || *s == 'S') {
+ *season = atoi (s + 1);
+ } else {
+ *season = atoi (s);
+ }
+ } else {
+ *season = 0;
+ }
+
+ g_free (s);
+ }
+
+ if (episode) {
+ gchar *e = g_match_info_fetch_named (info, "episode");
+ if (e) {
+ if (*e == 'e' || *e == 'E') {
+ *episode = atoi (e + 1);
+ } else {
+ *episode = atoi (e);
+ }
+ } else {
+ *episode = 0;
+ }
+
+ g_free (e);
+ }
+
+ if (date) {
+ *date = NULL;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+ g_free (metadata);
+
+ return VIDEO_TYPE_SERIES;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+
+ /* The filename doesn't look like a movie or a TV show, just use the
+ filename without extension as the title */
+ if (title) {
+ *title = g_strdelimit (metadata, ".", ' ');
+ }
+
+ if (showname) {
+ *showname = NULL;
+ }
+
+ if (date) {
+ *date = NULL;
+ }
+
+ if (season) {
+ *season = 0;
+ }
+
+ if (episode) {
+ *episode = 0;
+ }
+
+ return VIDEO_TYPE_UNKNOWN;
+}
+
+static void
+sanity_check_video_metadata (MetadataExtractor *extractor,
+ const gchar *uri,
+ TrackerSparqlBuilder *metadata)
+{
+ VideoType type;
+ gchar *title, *showname;
+ gboolean ret;
+ GDate *date;
+ gint season, episode;
+
+ ret = gst_tag_list_get_string (extractor->tagcache,
+ GST_TAG_TITLE, &title);
+ if (ret == FALSE) {
+ title = NULL;
+ }
+
+ ret = gst_tag_list_get_string (extractor->tagcache,
+ GST_TAG_SHOW_NAME, &showname);
+ if (ret == FALSE) {
+ showname = NULL;
+ }
+
+ ret = gst_tag_list_get_int (extractor->tagcache,
+ GST_TAG_SHOW_EPISODE_NUMBER, &episode);
+ if (ret == FALSE) {
+ episode = 0;
+ }
+
+ ret = gst_tag_list_get_int (extractor->tagcache,
+ GST_TAG_SHOW_SEASON_NUMBER, &season);
+ if (ret == FALSE) {
+ season = 0;
+ }
+
+ ret = gst_tag_list_get_date (extractor->tagcache, GST_TAG_DATE, &date);
+ if (ret == FALSE) {
+ date = NULL;
+ }
+
+ type = parse_uri (uri, title ? NULL : &title,
+ showname ? NULL : &showname,
+ date ? NULL : &date,
+ season != 0 ? NULL : &season,
+ episode != 0 ? NULL : &episode);
+
+ if (title && title[0] != '\0') {
+ tracker_sparql_builder_predicate (metadata, "nie:title");
+ tracker_sparql_builder_object_unvalidated (metadata, title);
+ }
+
+ /* FIXME: Tracker doesn't have (as far as I can see) anyway to store
+ showname, so if title hasn't been set, then we set it to showname */
+ if (showname && showname != '\0') {
+ if (title == NULL || title[0] == '\0') {
+ tracker_sparql_builder_predicate (metadata, "nie:title");
+ tracker_sparql_builder_object_unvalidated (metadata, showname);
+ }
+ }
+ g_free (showname);
+ g_free (title);
+
+ if (date) {
+ gchar buf[10];
+
+ if (g_date_strftime (buf, 10, "%Y", date)) {
+ tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+ tracker_sparql_builder_object_unvalidated (metadata, buf);
+ }
+
+ g_date_free (date);
+ }
+
+ if (episode > 0) {
+ tracker_sparql_builder_predicate (metadata, "nmm:episodeNumber");
+ tracker_sparql_builder_object_int64 (metadata, episode);
+ }
+
+ if (season > 0) {
+ tracker_sparql_builder_predicate (metadata, "nmm:season");
+ tracker_sparql_builder_object_int64 (metadata, season);
+ }
+
+ if (type == VIDEO_TYPE_SERIES) {
+ /* FIXME: Set isSeries */
+ }
+}
+
static void
extract_metadata (MetadataExtractor *extractor,
const gchar *file_url,
diff --git a/src/tracker-extract/tracker-media-art.c b/src/tracker-extract/tracker-media-art.c
index 7aa5f44..c41ebab 100644
--- a/src/tracker-extract/tracker-media-art.c
+++ b/src/tracker-extract/tracker-media-art.c
@@ -35,7 +35,7 @@
#include <glib/gstdio.h>
#include <gio/gio.h>
-#include <libtracker-miner/tracker-miner.h>
+#include <libtracker-extract/tracker-extract.h>
#include <libtracker-common/tracker-file-utils.h>
#include <libtracker-common/tracker-date-time.h>
#include <libtracker-common/tracker-media-art.h>
diff --git a/src/tracker-utils/Makefile.am b/src/tracker-utils/Makefile.am
index 7795fb8..881ba9f 100644
--- a/src/tracker-utils/Makefile.am
+++ b/src/tracker-utils/Makefile.am
@@ -10,6 +10,7 @@ AM_CPPFLAGS = \
libs = \
$(top_builddir)/src/libtracker-sparql-backend/libtracker-sparql- TRACKER_API_VERSION@.la \
$(top_builddir)/src/libtracker-miner/libtracker-miner- TRACKER_API_VERSION@.la \
+ $(top_builddir)/src/libtracker-extract/libtracker-extract- TRACKER_API_VERSION@.la \
$(top_builddir)/src/libtracker-common/libtracker-common.la \
$(BUILD_LIBS) \
$(TRACKER_UTILS_LIBS)
diff --git a/src/tracker-utils/tracker-sparql.c b/src/tracker-utils/tracker-sparql.c
index 317468d..a46819f 100644
--- a/src/tracker-utils/tracker-sparql.c
+++ b/src/tracker-utils/tracker-sparql.c
@@ -28,6 +28,7 @@
#include <glib/gi18n.h>
#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-extract/tracker-extract.h>
#define ABOUT \
"Tracker " PACKAGE_VERSION "\n"
@@ -49,6 +50,9 @@ static gboolean parse_list_indexes (const gchar *option_name,
GError **error);
static gchar *file;
+static gchar *metadata_file_path;
+static gchar *metadata_graph_urn;
+static gchar *metadata_dest_url;
static gchar *query;
static gboolean update;
static gboolean list_classes;
@@ -64,6 +68,18 @@ static GOptionEntry entries[] = {
N_("Path to use to run a query or update from file"),
N_("FILE"),
},
+ { "metadata-file-path", 'm', 0, G_OPTION_ARG_FILENAME, &metadata_file_path,
+ N_("Path to use to get metadata as a sparql insert query for (uses tracker-extract)"),
+ N_("FILE"),
+ },
+ { "metadata-graph-urn", 'g', 0, G_OPTION_ARG_FILENAME, &metadata_graph_urn,
+ N_("Graph to use to get metadata as a sparql insert query for (uses tracker-extract)"),
+ N_("URN"),
+ },
+ { "metadata-dest-url", 'd', 0, G_OPTION_ARG_FILENAME, &metadata_dest_url,
+ N_("Destination URL to use to get metadata as a sparql insert query for (uses tracker-extract)"),
+ N_("URL"),
+ },
{ "query", 'q', 0, G_OPTION_ARG_STRING, &query,
N_("SPARQL query"),
N_("SPARQL"),
@@ -227,6 +243,25 @@ print_cursor (TrackerSparqlCursor *cursor,
}
}
+static void
+on_metadata_get_sparql_finished (GObject *none, GAsyncResult *result, gpointer user_data)
+{
+ GMainLoop *loop = user_data;
+ GError *error = NULL;
+ gchar *sparql = tracker_extract_get_sparql_finish (result, &error);
+
+ if (error == NULL) {
+ g_print ("%s", sparql);
+ g_free (sparql);
+ } else {
+ g_error("%s", error->message);
+ }
+
+ g_clear_error (&error);
+
+ g_main_loop_quit (loop);
+}
+
int
main (int argc, char **argv)
{
@@ -255,7 +290,8 @@ main (int argc, char **argv)
}
if (!list_classes && !list_class_prefixes && !list_properties &&
- !list_notifies && !list_indexes && !search && !file && !query) {
+ !list_notifies && !list_indexes && !search && !file && !query &&
+ !metadata_file_path) {
error_message = _("An argument must be supplied");
} else if (file && query) {
error_message = _("File and query can not be used together");
@@ -551,6 +587,15 @@ main (int argc, char **argv)
g_free (path_in_utf8);
}
+ if (metadata_file_path) {
+ GMainLoop *loop = g_main_loop_new (NULL, FALSE);
+ tracker_extract_get_sparql (metadata_file_path,
+ metadata_dest_url, metadata_graph_urn,
+ time(0), time(0),
+ on_metadata_get_sparql_finished, loop);
+ g_main_loop_run (loop);
+ }
+
if (query) {
if (G_UNLIKELY (update)) {
tracker_sparql_connection_update (connection, query, 0, NULL, &error);
diff --git a/src/tracker-writeback/tracker-writeback.c b/src/tracker-writeback/tracker-writeback.c
index c527197..942d0cd 100644
--- a/src/tracker-writeback/tracker-writeback.c
+++ b/src/tracker-writeback/tracker-writeback.c
@@ -21,7 +21,7 @@
#include "tracker-writeback-module.h"
#include <libtracker-common/tracker-common.h>
-#include <libtracker-miner/tracker-miner.h>
+#include <libtracker-extract/tracker-extract.h>
#include <libtracker-sparql/tracker-sparql.h>
#include <gio/gio.h>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]