[tracker/tracker-1.0] tracker-extract: MP3 id3v23, id3v24 functions documented and v23 fixed
- From: Martyn James Russell <mr src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/tracker-1.0] tracker-extract: MP3 id3v23, id3v24 functions documented and v23 fixed
- Date: Fri, 22 Aug 2014 11:54:03 +0000 (UTC)
commit 153afe6e89244bc7a6e9f30fe036c1ce68556b58
Author: Martyn Russell <martyn lanedo com>
Date: Mon Jun 9 12:38:36 2014 +0100
tracker-extract: MP3 id3v23, id3v24 functions documented and v23 fixed
- The padding logic in v23 was incorrect.
- Also the frame reading would continue while < total file size, which lead to
infinite loops and issues for some files, now we go only while < tsize (tag
size).
- Instead of +10 here and there, now we use a defined integer to know why
we're increasing the iteration through the file for maintainability
- Added debugging so we know when there is a failure and what frames we're
processing/ignoring
- Smaller improvements (no docs) for id3v20 function, same logic applies.
src/tracker-extract/tracker-extract-mp3.c | 261 +++++++++++++++++++++++------
1 files changed, 211 insertions(+), 50 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 554ffdd..4a2fe6e 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -1731,6 +1731,8 @@ parse_id3v24 (const gchar *data,
MP3Data *filedata,
size_t *offset_delta)
{
+ const gint header_size = 10;
+ const gint frame_size = 10;
gint unsync;
gint ext_header;
gint experimental;
@@ -1738,52 +1740,110 @@ parse_id3v24 (const gchar *data,
guint pos;
guint ext_header_size;
+ /* Check header, expecting (in hex), 10 bytes long:
+ *
+ * $ 49 44 33 yy yy xx zz zz zz zz
+ *
+ * Where yy is less than $FF, xx is the 'flags' byte and zz is
+ * less than $80.
+ *
+ * Here yy is the version, so v24 == 04 00.
+ *
+ * MP3's look like this:
+ *
+ * [Header][?External Header?][Tags][Content]
+ */
if ((size < 16) ||
(data[0] != 0x49) ||
(data[1] != 0x44) ||
(data[2] != 0x33) ||
(data[3] != 0x04) ||
- (data[4] != 0x00) ) {
+ (data[4] != 0x00)) {
+ /* It's not an error, we might try another function
+ * if we have the wrong version header here.
+ */
return;
}
+ /* Get the flags (xx) in the header */
unsync = (data[5] & 0x80) > 0;
ext_header = (data[5] & 0x40) > 0;
experimental = (data[5] & 0x20) > 0;
+
+ /* Get the complete tag size (zz) in the header:
+ * Tag size is size of the complete tag after
+ * unsychronisation, including padding, excluding the header
+ * but not excluding the extended header (total tag size - 10)
+ */
tsize = (((data[6] & 0x7F) << 21) |
((data[7] & 0x7F) << 14) |
((data[8] & 0x7F) << 7) |
((data[9] & 0x7F) << 0));
- if ((tsize + 10 > size) || (experimental)) {
+ /* We don't handle experimental cases */
+ if (experimental) {
+ g_message ("[v24] Experimental MP3s are not extracted, doing nothing");
return;
}
- pos = 10;
+ /* Check if we can read even the first frame, The complete
+ * tag size (tsize) does not include the header which is 10
+ * bytes, so we check that there is some content AFTER the
+ * headers. */
+ if (tsize + header_size > size) {
+ g_message ("[v24] Expected MP3 tag size and header size to be within file size boundaries");
+ return;
+ }
+ /* Start after the header (10 bytes long) */
+ pos = header_size;
+
+ /* Completely optional */
if (ext_header) {
+ /* Extended header is expected to be:
+ * Extended header size $xx xx xx xx (4 chars)
+ * Extended Flags $xx xx
+ * Size of padding $xx xx xx xx
+ */
ext_header_size = (((data[10] & 0x7F) << 21) |
((data[11] & 0x7F) << 14) |
((data[12] & 0x7F) << 7) |
((data[13] & 0x7F) << 0));
- pos += ext_header_size;
- if (pos + tsize > size) {
- /* invalid size: extended header longer than tag */
+ /* Where the 'Extended header size', currently 6 or 10
+ * bytes, excludes itself. The 'Size of padding' is
+ * simply the total tag size excluding the frames and
+ * the headers, in other words the padding.
+ */
+ if (tsize + ext_header_size > size) {
+ g_message ("[v24] Expected MP3 tag size and extended header size to be within file
size boundaries");
return;
}
+
+ pos += ext_header_size;
}
while (pos < size) {
+ const char *frame_name;
id3v24frame frame;
size_t csize;
unsigned short flags;
- if (pos + 10 > size) {
- return;
+ /* Frames are 10 bytes each and made up of:
+ * Frame ID $xx xx xx xx (4 chars)
+ * Size $xx xx xx xx
+ * Flags $xx xx
+ */
+ if (pos + frame_size > tsize) {
+ g_message ("[v24] Expected MP3 frame size (%d) to be within tag size (%d) boundaries,
position = %d",
+ frame_size,
+ tsize,
+ pos);
+ break;
}
- frame = id3v24_get_frame (&data[pos]);
+ frame_name = &data[pos];
+ frame = id3v24_get_frame (frame_name);
csize = (((data[pos+4] & 0x7F) << 21) |
((data[pos+5] & 0x7F) << 14) |
@@ -1793,22 +1853,41 @@ parse_id3v24 (const gchar *data,
flags = (((unsigned char) (data[pos + 8]) << 8) +
((unsigned char) (data[pos + 9])));
- pos += 10;
+ pos += frame_size;
if (frame == ID3V24_UNKNOWN) {
- /* ignore unknown frames */
+ /* Ignore unknown frames */
+ g_debug ("[v24] Ignoring unknown frame '%s' (pos:%d, size:%" G_GSIZE_FORMAT ")",
frame_name, pos, csize);
pos += csize;
continue;
+ } else {
+ g_debug ("[v24] Processing frame '%s'", frame_name);
}
- if (pos + csize > size) {
+ /* If content size is more than size of file, stop. If
+ * If content size is 0 then continue to next frame. */
+ if (pos + csize > tsize) {
+ g_debug ("[v24] Position (%d) + content size (%" G_GSIZE_FORMAT ") > tag size (%d),
not processing any more frames", pos, csize, tsize);
break;
} else if (csize == 0) {
+ g_debug ("[v24] Content size was 0, moving to next frame");
continue;
}
+ /* Frame flags expected are in format of:
+ *
+ * %abc00000 %ijk00000
+ *
+ * a - Tag alter preservation
+ * b - File alter preservation
+ * c - Read only
+ * i - Compression
+ * j - Encryption
+ * k - Grouping identity
+ */
if (((flags & 0x80) > 0) ||
((flags & 0x40) > 0)) {
+ g_debug ("[v23] Ignoring frame '%s', frame flags 0x80 or 0x40 found (compression /
encryption)", frame_name);
pos += csize;
continue;
}
@@ -1833,7 +1912,7 @@ parse_id3v24 (const gchar *data,
pos += csize;
}
- *offset_delta = tsize + 10;
+ *offset_delta = tsize + header_size;
}
static void
@@ -1845,73 +1924,119 @@ parse_id3v23 (const gchar *data,
MP3Data *filedata,
size_t *offset_delta)
{
+ const gint header_size = 10;
+ const gint frame_size = 10;
gint unsync;
gint ext_header;
gint experimental;
guint tsize;
guint pos;
guint ext_header_size;
- guint padding;
+ /* Check header, expecting (in hex), 10 bytes long:
+ *
+ * $ 49 44 33 yy yy xx zz zz zz zz
+ *
+ * Where yy is less than $FF, xx is the 'flags' byte and zz is
+ * less than $80.
+ *
+ * Here yy is the version, so v23 == 03 00.
+ *
+ * MP3's look like this:
+ *
+ * [Header][?External Header?][Tags][Content]
+ */
if ((size < 16) ||
(data[0] != 0x49) ||
(data[1] != 0x44) ||
(data[2] != 0x33) ||
(data[3] != 0x03) ||
(data[4] != 0x00)) {
+ /* It's not an error, we might try another function
+ * if we have the wrong version header here.
+ */
return;
}
+ /* Get the flags (xx) in the header */
unsync = (data[5] & 0x80) > 0;
ext_header = (data[5] & 0x40) > 0;
experimental = (data[5] & 0x20) > 0;
+
+ /* Get the complete tag size (zz) in the header:
+ * Tag size is size of the complete tag after
+ * unsychronisation, including padding, excluding the header
+ * but not excluding the extended header (total tag size - 10)
+ */
tsize = (((data[6] & 0x7F) << 21) |
((data[7] & 0x7F) << 14) |
((data[8] & 0x7F) << 7) |
((data[9] & 0x7F) << 0));
- if ((tsize + 10 > size) || (experimental)) {
+ /* We don't handle experimental cases */
+ if (experimental) {
+ g_message ("[v23] Experimental MP3s are not extracted, doing nothing");
+ return;
+ }
+
+ /* Check if we can read even the first frame, The complete
+ * tag size (tsize) does not include the header which is 10
+ * bytes, so we check that there is some content AFTER the
+ * headers. */
+ if (tsize + header_size > size) {
+ g_message ("[v23] Expected MP3 tag size and header size to be within file size boundaries");
return;
}
- pos = 10;
- padding = 0;
+ /* Start after the header (10 bytes long) */
+ pos = header_size;
+ /* Completely optional */
if (ext_header) {
+ /* Extended header is expected to be:
+ * Extended header size $xx xx xx xx (4 chars)
+ * Extended Flags $xx xx
+ * Size of padding $xx xx xx xx
+ */
ext_header_size = (((unsigned char)(data[10]) << 24) |
((unsigned char)(data[11]) << 16) |
((unsigned char)(data[12]) << 8) |
- ((unsigned char)(data[12]) << 0));
-
- padding = (((unsigned char)(data[15]) << 24) |
- ((unsigned char)(data[16]) << 16) |
- ((unsigned char)(data[17]) << 8) |
- ((unsigned char)(data[18]) << 0));
-
- pos += 4 + ext_header_size;
+ ((unsigned char)(data[13]) << 0));
- if (padding < tsize)
- tsize -= padding;
- else {
+ /* Where the 'Extended header size', currently 6 or 10
+ * bytes, excludes itself. The 'Size of padding' is
+ * simply the total tag size excluding the frames and
+ * the headers, in other words the padding.
+ */
+ if (tsize + ext_header_size > size) {
+ g_message ("[v23] Expected MP3 tag size and extended header size to be within file
size boundaries");
return;
}
- if (pos + tsize > size) {
- /* invalid size: extended header longer than tag */
- return;
- }
+ pos += ext_header_size;
}
while (pos < size) {
+ const char *frame_name;
id3v24frame frame;
size_t csize;
unsigned short flags;
- if (pos + 10 > size) {
- return;
+ /* Frames are 10 bytes each and made up of:
+ * Frame ID $xx xx xx xx (4 chars)
+ * Size $xx xx xx xx
+ * Flags $xx xx
+ */
+ if (pos + frame_size > tsize) {
+ g_message ("[v23] Expected MP3 frame size (%d) to be within tag size (%d) boundaries,
position = %d",
+ frame_size,
+ tsize,
+ pos);
+ break;
}
- frame = id3v24_get_frame (&data[pos]);
+ frame_name = &data[pos];
+ frame = id3v24_get_frame (frame_name);
csize = (((unsigned char)(data[pos + 4]) << 24) |
((unsigned char)(data[pos + 5]) << 16) |
@@ -1921,21 +2046,40 @@ parse_id3v23 (const gchar *data,
flags = (((unsigned char)(data[pos + 8]) << 8) +
((unsigned char)(data[pos + 9])));
- pos += 10;
+ pos += frame_size;
if (frame == ID3V24_UNKNOWN) {
- /* ignore unknown frames */
+ /* Ignore unknown frames */
+ g_debug ("[v23] Ignoring unknown frame '%s' (pos:%d, size:%" G_GSIZE_FORMAT ")",
frame_name, pos, csize);
pos += csize;
continue;
+ } else {
+ g_debug ("[v23] Processing frame '%s'", frame_name);
}
- if (pos + csize > size) {
+ /* If content size is more than size of file, stop. If
+ * If content size is 0 then continue to next frame. */
+ if (pos + csize > tsize) {
+ g_debug ("[v23] Position (%d) + content size (%" G_GSIZE_FORMAT ") > tag size (%d),
not processing any more frames", pos, csize, tsize);
break;
} else if (csize == 0) {
- continue;
- }
-
- if (((flags & 0x80) > 0) || ((flags & 0x40) > 0)) {
+ g_debug ("[v23] Content size was 0, moving to next frame");
+ }
+
+ /* Frame flags expected are in format of:
+ *
+ * %abc00000 %ijk00000
+ *
+ * a - Tag alter preservation
+ * b - File alter preservation
+ * c - Read only
+ * i - Compression
+ * j - Encryption
+ * k - Grouping identity
+ */
+ if (((flags & 0x80) > 0) ||
+ ((flags & 0x40) > 0)) {
+ g_debug ("[v23] Ignoring frame '%s', frame flags 0x80 or 0x40 found (compression /
encryption)", frame_name);
pos += csize;
continue;
}
@@ -1960,7 +2104,7 @@ parse_id3v23 (const gchar *data,
pos += csize;
}
- *offset_delta = tsize + 10;
+ *offset_delta = tsize + header_size;
}
static void
@@ -1972,6 +2116,8 @@ parse_id3v20 (const gchar *data,
MP3Data *filedata,
size_t *offset_delta)
{
+ const gint header_size = 10;
+ const gint frame_size = 6;
gint unsync;
guint tsize;
guint pos;
@@ -1982,6 +2128,9 @@ parse_id3v20 (const gchar *data,
(data[2] != 0x33) ||
(data[3] != 0x02) ||
(data[4] != 0x00)) {
+ /* It's not an error, we might try another function
+ * if we have the wrong version header here.
+ */
return;
}
@@ -1991,37 +2140,49 @@ parse_id3v20 (const gchar *data,
((data[8] & 0x7F) << 07) |
((data[9] & 0x7F) << 00));
- if (tsize + 10 > size) {
+ if (tsize + header_size > size) {
+ g_message ("[v20] Expected MP3 tag size and header size to be within file size boundaries");
return;
}
- pos = 10;
+
+ pos = header_size;
while (pos < size) {
+ const char *frame_name;
id3v2frame frame;
size_t csize;
- if (pos + 6 > size) {
- return;
+ if (pos + frame_size > tsize) {
+ g_message ("[v20] Expected MP3 frame size (%d) to be within tag size (%d) boundaries,
position = %d",
+ frame_size,
+ tsize,
+ pos);
+ break;
}
- frame = id3v2_get_frame (&data[pos]);
+ frame_name = &data[pos];
+ frame = id3v2_get_frame (frame_name);
csize = (((unsigned char)(data[pos + 3]) << 16) +
((unsigned char)(data[pos + 4]) << 8) +
((unsigned char)(data[pos + 5]) ) );
- pos += 6;
+ pos += frame_size;
if (frame == ID3V2_UNKNOWN) {
/* ignore unknown frames */
+ g_debug ("[v20] Ignoring unknown frame '%s' (pos:%d, size:%" G_GSIZE_FORMAT ")",
frame_name, pos, csize);
pos += csize;
continue;
}
- if (pos + csize > size) {
+ /* If content size is more than size of file, stop. If
+ * If content size is 0 then continue to next frame. */
+ if (pos + csize > tsize) {
+ g_debug ("[v20] Position (%d) + content size (%" G_GSIZE_FORMAT ") > tag size (%d),
not processing any more frames", pos, csize, tsize);
break;
} else if (csize == 0) {
- continue;
+ g_debug ("[v20] Content size was 0, moving to next frame");
}
/* Early versions do not have unsynch per frame */
@@ -2039,7 +2200,7 @@ parse_id3v20 (const gchar *data,
pos += csize;
}
- *offset_delta = tsize + 10;
+ *offset_delta = tsize + header_size;
}
static goffset
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]