[tracker/tracker-0.6] Bug NB#100123 - Image:Date is empty for some of the images
- From: Carlos Garnacho <carlosg src gnome org>
- To: svn-commits-list gnome org
- Subject: [tracker/tracker-0.6] Bug NB#100123 - Image:Date is empty for some of the images
- Date: Mon, 22 Jun 2009 10:27:25 -0400 (EDT)
commit 9b7bea1d3c5478eb7d5d6695a80cecef85dbe351
Author: Carlos Garnacho <carlosg gnome org>
Date: Mon Jun 22 16:23:28 2009 +0200
Bug NB#100123 - Image:Date is empty for some of the images
Reworked date parser, based on a patch by Mikael Ottela, now the parser uses
sscanf() to parse the date part by part, and compose the ISO8601 date from it.
src/libtracker-common/tracker-type-utils.c | 332 +++++++++++---------
src/tracker-extract/tracker-extract-jpeg.c | 2 +
tests/libtracker-common/tracker-type-utils-test.c | 92 ++++++-
3 files changed, 269 insertions(+), 157 deletions(-)
---
diff --git a/src/libtracker-common/tracker-type-utils.c b/src/libtracker-common/tracker-type-utils.c
index b12c8b8..bb095f1 100644
--- a/src/libtracker-common/tracker-type-utils.c
+++ b/src/libtracker-common/tracker-type-utils.c
@@ -27,6 +27,8 @@
#include <strings.h>
#include <string.h>
#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
#include <glib.h>
@@ -46,42 +48,158 @@ static const char imonths[] = {
'6', '7', '8', '9', '0', '1', '2'
};
-static gboolean
-is_int (const gchar *str)
+static gint
+parse_month (const gchar *month)
{
- gint i, len;
+ gint i;
- if (!str || str[0] == '\0') {
- return FALSE;
+ for (i = 0; i < 12; i++) {
+ if (!strncmp (month, months[i], 3)) {
+ return i;
+ }
}
- len = strlen (str);
+ return -1;
+}
- for (i = 0; i < len; i++) {
- if (!g_ascii_isdigit(str[i])) {
- return FALSE;
- }
+static gdouble
+get_remainder_multiplier (gint remainder)
+{
+ gdouble mult;
+
+ mult = (gdouble) remainder;
+
+ while (mult > 1) {
+ mult /= 10;
}
- return TRUE ;
+ return mult;
}
-static gint
-parse_month (const gchar *month)
+/* FIXME This function is crap and it doing unnecessary
+ * allocations (splits) all over the place.
+ * FIXME We still don't handle large years or year-week
+ * or year-day-formats (not that they would be common in
+ * file formats
+ */
+static gboolean
+tracker_simplify_8601 (const gchar *date_string,
+ gchar *buf)
{
- gint i;
+ gchar *copy, *date, *time, *zone, *sep;
+ gint year, mon, day, hour, min, sec, remainder;
+ gint len;
- for (i = 0; i < 12; i++) {
- if (!strncmp (month, months[i], 3)) {
- return i;
+ if (!date_string) {
+ return FALSE;
+ }
+
+ date = copy = g_strdup (date_string);
+ year = mon = day = 1;
+ hour = min = sec = 0;
+ zone = NULL;
+
+ /* First try to split date and time, either by ' ' or 'T' */
+ sep = strchr (copy, 'T');
+
+ if (!sep) {
+ sep = strchr (copy, ' ');
+ }
+
+ if (sep) {
+ /* Separate date and time */
+ *sep = '\0';
+ time = sep + 1;
+ } else {
+ time = NULL;
+ }
+
+ if (time) {
+ zone = strchr (time, '+');
+
+ if (!zone) {
+ zone = strchr (time, '-');
}
}
- return -1;
+ if (!zone) {
+ zone = "+00:00";
+ }
+
+ if (date) {
+ len = strlen (date);
+
+ if (len == 10 && sscanf (date, "%4d-%2d-%2d", &year, &mon, &day) == 3) {
+ /* YYYY-MM-DD */
+ } else if (len == 8 && sscanf (date, "%2d-%2d-%2d", &year, &mon, &day) == 3) {
+ /* YY-MM-DD */
+ } else if (len == 8 && sscanf (date, "%4d%2d%2d", &year, &mon, &day) == 3) {
+ /* YYYYMMDD */
+ } else if (len == 6 && sscanf (date, "%2d%2d%2d", &year, &mon, &day) == 3) {
+ /* YYMMDD */
+ } else if (len == 7 && sscanf (date, "%4d-%2d", &year, &mon) == 2) {
+ /* YYYY-MM */
+ day = 1;
+ } else if (len == 4 && sscanf (date, "%4d", &year) == 1) {
+ /* Full year */
+ mon = day = 1;
+ } else if (len == 2 && sscanf (date, "%2d", &year) == 1) {
+ /* Only the century (this is a weird one) */
+ year *= 100;
+ mon = day = 1;
+ } else {
+ g_critical ("Could not parse date in '%s'", date);
+ g_free (copy);
+ return FALSE;
+ }
+ }
+
+ if (time) {
+ len = strlen (time);
+
+ if (len >= 8 && sscanf (time, "%2d:%2d:%2d", &hour, &min, &sec) == 3) {
+ /* hh:mm:ss */
+ } else if (len >= 7 && sscanf (time, "%2d:%2d.%d", &hour, &min, &remainder) == 3) {
+ gdouble mult;
+
+ mult = get_remainder_multiplier (remainder);
+ sec = 60 * mult;
+ } else if (len == 6 && sscanf (time, "%2d%2d%2d", &hour, &min, &sec) == 3) {
+ /* hhmmss */
+ } else if (len == 5 && sscanf (time, "%2d:%2d", &hour, &min) == 2) {
+ /* hh:mm */
+ sec = 0;
+ } else if (len >= 4 && sscanf (time, "%2d.%d", &hour, &remainder) == 2) {
+ gdouble mult;
+ gint secs_in_remainder;
+
+ /* hh.r */
+ mult = get_remainder_multiplier (remainder);
+ secs_in_remainder = 60 * 60 * mult;
+ min = secs_in_remainder / 60;
+ sec = secs_in_remainder % 60;
+ } else if (len == 2 && sscanf (time, "%2d", &hour) == 1) {
+ /* hh */
+ min = sec = 0;
+ } else {
+ g_critical ("Could not parse time in '%s'", time);
+ g_free (copy);
+ return FALSE;
+ }
+ }
+
+ sprintf (buf,
+ "%04d-%02d-%02dT%02d:%02d:%02d%s",
+ year, mon, day,
+ hour, min, sec,
+ zone);
+
+ g_free (copy);
+
+ return TRUE;
}
-/* Determine date format and convert to ISO 8601 format */
-/* FIXME We should handle all the fractions here (see ISO 8601), as well as YYYY:DDD etc */
+/* Determine date format and convert to simple ISO 8601 format */
gchar *
tracker_date_format (const gchar *date_string)
{
@@ -94,68 +212,15 @@ tracker_date_format (const gchar *date_string)
len = strlen (date_string);
- /* We cannot format a date without at least a four digit
+ /* We cannot format a date without at least a 2 digit
* year.
*/
- if (len < 4) {
+ if (len < 2) {
return NULL;
}
- /* Check for year only dates (EG ID3 music tags might have
- * Audio.ReleaseDate as 4 digit year)
- */
- if (len == 4) {
- if (is_int (date_string)) {
- buf[0] = date_string[0];
- buf[1] = date_string[1];
- buf[2] = date_string[2];
- buf[3] = date_string[3];
- buf[4] = '-';
- buf[5] = '0';
- buf[6] = '1';
- buf[7] = '-';
- buf[8] = '0';
- buf[9] = '1';
- buf[10] = 'T';
- buf[11] = '0';
- buf[12] = '0';
- buf[13] = ':';
- buf[14] = '0';
- buf[15] = '0';
- buf[16] = ':';
- buf[17] = '0';
- buf[18] = '0';
- buf[19] = '\0';
-
- return g_strdup (buf);
- } else {
- return NULL;
- }
- } else if (len == 10) {
- /* Check for date part only YYYY-MM-DD*/
- buf[0] = date_string[0];
- buf[1] = date_string[1];
- buf[2] = date_string[2];
- buf[3] = date_string[3];
- buf[4] = '-';
- buf[5] = date_string[5];
- buf[6] = date_string[6];
- buf[7] = '-';
- buf[8] = date_string[8];
- buf[9] = date_string[9];
- buf[10] = 'T';
- buf[11] = '0';
- buf[12] = '0';
- buf[13] = ':';
- buf[14] = '0';
- buf[15] = '0';
- buf[16] = ':';
- buf[17] = '0';
- buf[18] = '0';
- buf[19] = '\0';
-
- return g_strdup (buf);
- } else if (len == 14) {
+ /* First check for non-8601 formats (why do we even do this? Extractors should already)*/
+ if (len == 14) {
/* Check for pdf format EG 20050315113224-08'00' or
* 20050216111533Z
*/
@@ -178,62 +243,16 @@ tracker_date_format (const gchar *date_string)
buf[16] = ':';
buf[17] = date_string[12];
buf[18] = date_string[13];
- buf[19] = '\0';
-
- return g_strdup (buf);
- } else if (len == 15 && date_string[14] == 'Z') {
- buf[0] = date_string[0];
- buf[1] = date_string[1];
- buf[2] = date_string[2];
- buf[3] = date_string[3];
- buf[4] = '-';
- buf[5] = date_string[4];
- buf[6] = date_string[5];
- buf[7] = '-';
- buf[8] = date_string[6];
- buf[9] = date_string[7];
- buf[10] = 'T';
- buf[11] = date_string[8];
- buf[12] = date_string[9];
- buf[13] = ':';
- buf[14] = date_string[10];
- buf[15] = date_string[11];
- buf[16] = ':';
- buf[17] = date_string[12];
- buf[18] = date_string[13];
- buf[19] = 'Z';
- buf[20] = '\0';
-
- return g_strdup (buf);
- } else if (len == 21 && (date_string[14] == '-' || date_string[14] == '+' )) {
- buf[0] = date_string[0];
- buf[1] = date_string[1];
- buf[2] = date_string[2];
- buf[3] = date_string[3];
- buf[4] = '-';
- buf[5] = date_string[4];
- buf[6] = date_string[5];
- buf[7] = '-';
- buf[8] = date_string[6];
- buf[9] = date_string[7];
- buf[10] = 'T';
- buf[11] = date_string[8];
- buf[12] = date_string[9];
- buf[13] = ':';
- buf[14] = date_string[10];
- buf[15] = date_string[11];
- buf[16] = ':';
- buf[17] = date_string[12];
- buf[18] = date_string[13];
- buf[19] = date_string[14];
- buf[20] = date_string[15];
- buf[21] = date_string[16];
- buf[22] = ':';
- buf[23] = date_string[18];
- buf[24] = date_string[19];
+ buf[19] = '+';
+ buf[20] = '0';
+ buf[21] = '0';
+ buf[22] = ':';
+ buf[23] = '0';
+ buf[24] = '0';
buf[25] = '\0';
return g_strdup (buf);
+
} else if ((len == 24) && (date_string[3] == ' ')) {
/* Check for msoffice date format "Mon Feb 9 10:10:00 2004" */
gint num_month;
@@ -276,9 +295,16 @@ tracker_date_format (const gchar *date_string)
buf[16] = ':';
buf[17] = date_string[17];
buf[18] = date_string[18];
- buf[19] = '\0';
+ buf[19] = '+';
+ buf[20] = '0';
+ buf[21] = '0';
+ buf[22] = ':';
+ buf[23] = '0';
+ buf[24] = '0';
+ buf[25] = '\0';
return g_strdup (buf);
+
} else if ((len == 19) && (date_string[4] == ':') && (date_string[7] == ':')) {
/* Check for Exif date format "2005:04:29 14:56:54" */
buf[0] = date_string[0];
@@ -300,40 +326,48 @@ tracker_date_format (const gchar *date_string)
buf[16] = ':';
buf[17] = date_string[17];
buf[18] = date_string[18];
- buf[19] = '\0';
+ buf[19] = '+';
+ buf[20] = '0';
+ buf[21] = '0';
+ buf[22] = ':';
+ buf[23] = '0';
+ buf[24] = '0';
+ buf[25] = '\0';
return g_strdup (buf);
- } else if ((len == 28) && (date_string[4] == '-') && (date_string[10] == 'T')
- && (date_string[19] == '.') ) {
- /* The fraction of seconds ISO 8601 "YYYY-MM-DDThh:mm:ss.ff+zz:zz" */
+
+ } else if (len == 15 && date_string[14] == 'Z') {
buf[0] = date_string[0];
buf[1] = date_string[1];
buf[2] = date_string[2];
buf[3] = date_string[3];
buf[4] = '-';
- buf[5] = date_string[5];
- buf[6] = date_string[6];
+ buf[5] = date_string[4];
+ buf[6] = date_string[5];
buf[7] = '-';
- buf[8] = date_string[8];
- buf[9] = date_string[9];
+ buf[8] = date_string[6];
+ buf[9] = date_string[7];
buf[10] = 'T';
- buf[11] = date_string[11];
- buf[12] = date_string[12];
+ buf[11] = date_string[8];
+ buf[12] = date_string[9];
buf[13] = ':';
- buf[14] = date_string[14];
- buf[15] = date_string[15];
+ buf[14] = date_string[10];
+ buf[15] = date_string[11];
buf[16] = ':';
- buf[17] = date_string[17];
- buf[18] = date_string[18];
- buf[19] = date_string[22];
- buf[20] = date_string[23];
- buf[21] = date_string[24];
+ buf[17] = date_string[12];
+ buf[18] = date_string[13];
+ buf[19] = '+';
+ buf[20] = '0';
+ buf[21] = '0';
buf[22] = ':';
- buf[23] = date_string[26];
- buf[24] = date_string[27];
+ buf[23] = '0';
+ buf[24] = '0';
buf[25] = '\0';
return g_strdup (buf);
+
+ } else if (tracker_simplify_8601 (date_string, buf)) {
+ return g_strdup (buf);
}
return g_strdup (date_string);
diff --git a/src/tracker-extract/tracker-extract-jpeg.c b/src/tracker-extract/tracker-extract-jpeg.c
index 76ab808..ba88064 100644
--- a/src/tracker-extract/tracker-extract-jpeg.c
+++ b/src/tracker-extract/tracker-extract-jpeg.c
@@ -474,6 +474,8 @@ fail:
tracker_escape_metadata (date));
g_free (date);
}
+
+ g_debug ("Date: %s", (char *)g_hash_table_lookup (metadata, "Image:Date"));
}
TrackerExtractData *
diff --git a/tests/libtracker-common/tracker-type-utils-test.c b/tests/libtracker-common/tracker-type-utils-test.c
index 24a20ae..55d6f17 100644
--- a/tests/libtracker-common/tracker-type-utils-test.c
+++ b/tests/libtracker-common/tracker-type-utils-test.c
@@ -36,31 +36,107 @@ test_date_format (void)
result = tracker_date_format ("");
g_assert (result == NULL);
- /* Fails
result = tracker_date_format ("1978"); //Audio.ReleaseDate
- g_assert (tracker_test_helpers_cmpstr_equal (result, "1978-01-01T00:00:00"));
- */
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1978-01-01T00:00:00+00:00"));
result = tracker_date_format ("2008-06-14");
- g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00"));
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00+00:00"));
g_free (result);
result = tracker_date_format ("20080614000000");
- g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00"));
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00+00:00"));
g_free (result);
result = tracker_date_format ("20080614000000Z");
- g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00Z"));
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00+00:00"));
g_free (result);
result = tracker_date_format ("Mon Jun 14 04:20:20 2008"); /* MS Office */
- g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20"));
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20+00:00"));
g_free (result);
result = tracker_date_format ("2008:06:14 04:20:20"); /* Exif style */
- g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20"));
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20+00:00"));
+ g_free (result);
+
+ /* Do everything allowed in ISO 8601 here */
+
+ /* Dates */
+ /* YYYY or YYYY-MM or YYYY-MM-DD or YYYYMMDD */
+ result = tracker_date_format ("2008");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-01-01T00:00:00+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1978-06");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1978-06-01T00:00:00+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1999-02-23");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1999-02-23T00:00:00+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("19931223");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1993-12-23T00:00:00+00:00"));
+ g_free (result);
+
+ /* Times */
+ /* hh:mm:ss or hhmmss, hh:mm or hhmm or hh , with T and without*/
+
+ result = tracker_date_format ("1999-02-23T23:12:01");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1999-02-23T23:12:01+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("19990223T231201");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1999-02-23T23:12:01+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1979-03-04 16:03:03");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1979-03-04T16:03");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:00+00:00"));
g_free (result);
+ result = tracker_date_format ("1979-03-04T16");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:00:00+00:00"));
+ g_free (result);
+
+ /* Decimals */
+ /* We are only interested in accuracy of a second */
+
+ result = tracker_date_format ("1979-03-04 16:03:03.5");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1979-03-04T16:03.2");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:12+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1979-03-04T16.25");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:15:00+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1979-03-04 16.125");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:07:30+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("2002-07-20T11:24:35.1Z");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "2002-07-20T11:24:35+00:00"));
+ g_free (result);
+
+ /* Timezone and UTC */
+
+ result = tracker_date_format ("1979-03-04 16:03:03.5Z");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+00:00"));
+ g_free (result);
+
+ result = tracker_date_format ("1979-03-04T16:03:03.50+02:00");
+ g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+02:00"));
+ g_free (result);
+
+ /* FIXME Missing week and day notations, truncated notation, extended years */
+
if (g_test_trap_fork (0, G_TEST_TRAP_SILENCE_STDERR)) {
result = tracker_date_format (NULL);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]