Re: [Tracker] [PATCH] Re: Extracting Embedded Licenses

On Sat, 2007-06-30 at 20:19 -0700, Jason Kivlighn wrote:
these look ok - will apply as soon as the other stuff is tidied up (need
tracker_read_xmp before I can apply these)
Here's a patch that just adds tracker_read_xmp.  It reads licenses and
dublin core metadata from XMP, and could easily be extended to other
schemas (like exif).

its not correct because DC is abstract in tracker and must not be used
to store metadata - you must use a subclass of the DC type instead


for Docs

Doc:Title is a subclass of DC:Title

For Images

Image:Title is a subclass of DC:Title

what you will need to do is use the service type to determine which
subclass to use

For now you can just set it for Images if you like
Here's a patch that just uses Image:* for now and fixes the style issues
(I hope I got them all).  Hopefully this can make it into 0.6, along
with the extractor updates.

Regarding using the service type, that's going to take more extensive
updates to Tracker.  All extraction of XMP is done from tracker-extract,
which doesn't know the service type.  Maybe extend
tracker_metadata_get_embedded(...) to somehow post-process the extracted
fields.  Or maybe also pass along the service type to tracker-extract.
I'm not sure the most elegant solution here.

also theres still a number of lines which omit spacing EG:
if (strcmp(qual_path,"xml:lang") == 0) {

I can tidy this up for you but it will probably take a few days as im
busy getting 0.6 out of the door atm

If you can fix today then I can include it in the next release


Index: src/trackerd/tracker-metadata.c
--- src/trackerd/tracker-metadata.c     (revision 607)
+++ src/trackerd/tracker-metadata.c     (working copy)
@@ -83,7 +83,8 @@
-                         "application/x-wpg"
+                         "application/x-wpg",
+                         "application/rdf+xml"
Index: src/tracker-extract/tracker-extract.c
--- src/tracker-extract/tracker-extract.c       (revision 607)
+++ src/tracker-extract/tracker-extract.c       (working copy)
@@ -30,6 +30,11 @@
 #define MAX_MEM 128
+#include <exempi/xmp.h>
+#include <exempi/xmpconsts.h>
 typedef enum {
@@ -59,6 +64,9 @@
 void tracker_extract_msoffice  (gchar *, GHashTable *);
+void tracker_extract_xmp       (gchar *, GHashTable *);
 void tracker_extract_mp3       (gchar *, GHashTable *);
 void tracker_extract_vorbis    (gchar *, GHashTable *);
@@ -92,7 +100,10 @@
        { "application/msword",                         tracker_extract_msoffice        },
        { "application/*",                       tracker_extract_msoffice        },
+       { "application/rdf+xml",                        tracker_extract_xmp             },
        /* Video extractors */
        { "video/*",                                    tracker_extract_gstreamer       },
@@ -183,7 +194,193 @@
+tracker_append_string_to_hash_table( GHashTable *metadata, const gchar *key, const gchar *value, gboolean 
append )
+       char *new_value;
+       if (append) {
+               char *orig;
+               if (g_hash_table_lookup_extended (metadata, key, NULL, (gpointer)&orig )) {
+                       new_value = g_strconcat (orig, " ", value, NULL);
+               } else {
+                       new_value = g_strdup (value);
+               }
+       } else {
+               new_value = g_strdup (value);
+       }
+       g_hash_table_insert (metadata, g_strdup(key), new_value);
+void tracker_xmp_iter(XmpPtr xmp, XmpIteratorPtr iter, GHashTable *metadata, gboolean append);
+void tracker_xmp_iter_simple(GHashTable *metadata, const char *schema, const char *path, const char *value, 
gboolean append);
+/* We have an array, now recursively iterate over it's children.  Set 'append' to true so that all values of 
the array are added
+   under one entry. */
+tracker_xmp_iter_array(XmpPtr xmp, GHashTable *metadata, const char *schema, const char *path)
+               XmpIteratorPtr iter = xmp_iterator_new (xmp, schema, path, XMP_ITER_JUSTCHILDREN);
+               tracker_xmp_iter (xmp, iter, metadata, TRUE);
+               xmp_iterator_free (iter);
+/* We have an array, now recursively iterate over it's children.  Set 'append' to false so that only one 
item is used. */
+tracker_xmp_iter_alt_text(XmpPtr xmp, GHashTable *metadata, const char *schema, const char *path)
+               XmpIteratorPtr iter = xmp_iterator_new (xmp, schema, path, XMP_ITER_JUSTCHILDREN);
+               tracker_xmp_iter (xmp, iter, metadata, FALSE);
+               xmp_iterator_free (iter);
+/* We have a simple element, but need to iterate over the qualifiers */
+tracker_xmp_iter_simple_qual(XmpPtr xmp, GHashTable *metadata,
+   const char *schema, const char *path, const char *value, gboolean append)
+       XmpIteratorPtr iter = xmp_iterator_new(xmp, schema, path, XMP_ITER_JUSTCHILDREN | 
+       XmpStringPtr the_path = xmp_string_new ();
+       XmpStringPtr the_prop = xmp_string_new ();
+       char *locale = setlocale (LC_ALL, NULL);
+       char *sep = strchr (locale,'.');
+       if (sep) {
+               locale[sep-locale] = '\0';
+       }
+       sep = strchr(locale,'_');
+       if (sep) {
+               locale[sep-locale] = '-';
+       }
+       gboolean ignore_element = FALSE;
+       while(xmp_iterator_next (iter, NULL, the_path, the_prop, NULL))
+       {
+               const char *qual_path = xmp_string_cstr (the_path);
+               const char *qual_value = xmp_string_cstr (the_prop);
+               if (strcmp(qual_path,"xml:lang") == 0) {
+                       /* is this a language we should ignore? */
+                       if (strcmp (qual_value, "x-default") != 0 && strcmp (qual_value, "x-repair") != 0 && 
strcmp (qual_value, locale) != 0) {
+                               ignore_element = TRUE;
+                               break;
+                       }
+               }
+       }
+       if (!ignore_element) {
+               tracker_xmp_iter_simple (metadata, schema, path, value, append);
+       }
+       xmp_string_free (the_prop);
+       xmp_string_free (the_path);
+       xmp_iterator_free (iter);
+/* We have a simple element.  Add any metadata we know about to the hash table  */
+tracker_xmp_iter_simple(GHashTable *metadata,
+   const char *schema, const char *path, const char *value, gboolean append)
+       char *name = g_strdup (strchr (path, ':')+1);
+       const char *index = strrchr (name, '[');
+       if (index) {
+               name[index-name] = '\0';
+       }
+       /* Dublin Core */
+       if (strcmp(schema, NS_DC) == 0) {
+               if (strcmp (name, "title") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "Image:Title", value, append);
+               }
+               else if (strcmp (name, "rights") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "File:Copyright", value, append);
+               }
+               else if (strcmp (name, "creator") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "Image:Creator", value, append);
+               }
+               else if (strcmp (name, "description") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "Image:Description", value, append);
+               }
+               else if (strcmp (name, "date") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "Image:Date", value, append);
+               }
+               else if (strcmp (name, "keywords") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "Image:Keywords", value, append);
+               }
+       }
+       /* Creative Commons */
+       else if (strcmp (schema, NS_CC) == 0) {
+               if (strcmp (name, "license") == 0) {
+                       tracker_append_string_to_hash_table (metadata, "File:License", value, append);
+               }
+       }
+       free(name);
+/* Iterate over the XMP, dispatching to the appropriate element type (simple, simple w/qualifiers, or an 
array) handler */
+tracker_xmp_iter(XmpPtr xmp, XmpIteratorPtr iter, GHashTable *metadata, gboolean append)
+       XmpStringPtr the_schema = xmp_string_new ();
+       XmpStringPtr the_path = xmp_string_new ();
+       XmpStringPtr the_prop = xmp_string_new ();
+       uint32_t opt;
+       while(xmp_iterator_next (iter, the_schema, the_path, the_prop, &opt))
+       {
+               const char *schema = xmp_string_cstr (the_schema);
+               const char *path = xmp_string_cstr (the_path);
+               const char *value = xmp_string_cstr (the_prop);
+               if (XMP_IS_PROP_SIMPLE (opt)) {
+                       if (strcmp (path,"") != 0) {
+                               if (XMP_HAS_PROP_QUALIFIERS (opt)) {
+                                       tracker_xmp_iter_simple_qual (xmp, metadata, schema, path, value, 
+                               } else {
+                                       tracker_xmp_iter_simple (metadata, schema, path, value, append);
+                               }
+                       }       
+               }
+               else if (XMP_IS_PROP_ARRAY (opt)) {
+                       if (XMP_IS_ARRAY_ALTTEXT (opt)) {
+                               tracker_xmp_iter_alt_text (xmp, metadata, schema, path);
+                               xmp_iterator_skip (iter, XMP_ITER_SKIPSUBTREE);
+                       } else {
+                               tracker_xmp_iter_array (xmp, metadata, schema, path);
+                               xmp_iterator_skip (iter, XMP_ITER_SKIPSUBTREE);
+                       }
+               }
+       }
+       xmp_string_free (the_prop);
+       xmp_string_free (the_path);
+       xmp_string_free (the_schema);
+tracker_read_xmp (const gchar *buffer, size_t len, GHashTable *metadata)
+       #ifdef HAVE_EXEMPI
+       xmp_init ();
+       XmpPtr xmp = xmp_new_empty ();
+       xmp_parse (xmp, buffer, len);
+       if (xmp != NULL) {
+               XmpIteratorPtr iter = xmp_iterator_new (xmp, NULL, NULL, XMP_ITER_PROPERTIES);
+               tracker_xmp_iter (xmp, iter, metadata, FALSE);
+               xmp_iterator_free (iter);
+               xmp_free (xmp);
+       }
+       xmp_terminate ();
+       #endif
 static GHashTable *
 tracker_get_file_metadata (const char *uri, char *mime)
Index: src/tracker-extract/tracker-extract.h
--- src/tracker-extract/tracker-extract.h       (revision 607)
+++ src/tracker-extract/tracker-extract.h       (working copy)
@@ -17,6 +17,13 @@
  * Boston, MA  02110-1301, USA.
 #include <glib.h>
 gboolean       tracker_spawn (char **argv, int timeout, char **tmp_stdout, int *exit_status);
+void           tracker_read_xmp (gchar *buffer, size_t len, GHashTable *metadata);
Index: src/tracker-extract/tracker-extract-xmp.c
--- src/tracker-extract/tracker-extract-xmp.c   (revision 0)
+++ src/tracker-extract/tracker-extract-xmp.c   (revision 0)
@@ -0,0 +1,40 @@
+/* Tracker Extract - extracts embedded metadata from files
+ * Copyright (C) 2007, Jason Kivlighn (jkivlighn gmail com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+#include "config.h"
+#include <glib.h>
+#include "tracker-extract.h"
+void tracker_extract_xmp (gchar* filename, GHashTable *metadata)
+       gchar *contents;
+       gsize length;
+       GError *error;
+       if ( g_file_get_contents( filename, &contents, &length, &error ) )
+               tracker_read_xmp(contents, length, metadata);
+#warning "Not building XMP metadata extractor."
+#endif  /* HAVE_EXEMPI */
Index: src/tracker-extract/
--- src/tracker-extract/     (revision 607)
+++ src/tracker-extract/     (working copy)
@@ -5,6 +5,7 @@
        $(LIBGSF_CFLAGS)                        \
        $(LIBGSF_CFLAGS)                        \
        $(GSTREAMER_CFLAGS)                     \
+       $(EXEMPI_CFLAGS)                        \
 bin_PROGRAMS = tracker-extract
@@ -33,6 +34,7 @@
        tracker-extract-imagemagick.c           \
        tracker-extract-mplayer.c               \
        tracker-extract-totem.c                 \
+       tracker-extract-xmp.c                   \
 tracker_extract_LDADD = $(GLIB2_LIBS)          \
@@ -41,4 +43,5 @@
        $(LIBEXIF_LIBS)                         \
        $(LIBGSF_LIBS)                          \
        $(GSTREAMER_LIBS)                       \
+       $(EXEMPI_LIBS)                          \
---        (revision 607)
+++        (working copy)
@@ -610,6 +610,25 @@
+# check for exempi
+AC_ARG_ENABLE(xmp, AC_HELP_STRING([--disable-xmp], [Disable XMP extraction]),,[enable_xmp=yes])
+if test "x$enable_xmp" = "xyes"; then
+               exempi-2.0 >= $EXEMPI_REQUIRED],
+               [have_exempi=yes] , [have_exempi=no])
+       have_exempi="no (disabled)"
+AM_CONDITIONAL(HAVE_EXEMPI, test "$have_exempi" = "yes")
+test "$have_exempi" = "yes" && AC_DEFINE(HAVE_EXEMPI, [], [Define if we have exempi])
@@ -677,6 +696,7 @@
        exif (jpeg):                            $have_libexif
        gsf:                                    $have_libgsf
        video files:                            $videos_are_handled ($videos_handler)
+       embedded xmp:                           $have_exempi
 if test "x$enable_external_sqlite" = "xyes"; then

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]