unfortunately, there exist a bountiful number of shitful mailers out there who's authors clearly couldn't be bothered to read or understand the MIME specifications and so just pulled an encoding scheme out of their proverbials. the attached patch tries to deal with the scenarios that I'm aware of, namely illegal characters in the encoded text portion of the encoded-word token (which, sadly, even includes SPACE and TAB) for the convenience of anyone reading this message who hasn't yet read rfc2047, here's a good quote from the end of section 2: IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's by an RFC 822 parser. As a consequence, unencoded white space characters (such as SPACE and HTAB) are FORBIDDEN within an 'encoded-word'. For example, the character sequence =?iso-8859-1?q?this is some text?= would be parsed as four 'atom's, rather than as a single 'atom' (by an RFC 822 parser) or 'encoded-word' (by a parser which understands 'encoded-words'). The correct way to encode the string "this is some text" is to encode the SPACE characters as well, e.g. =?iso-8859-1?q?this=20is=20some=20text?= so yes, the behaviour of the broken mailers is explicitly FORBIDDEN but that hasn't stopped them. oh well. I've also attached a test mbox for everyone's convenience in testing this patch (feel free to add to it) With the patch applied, both Mozilla-Mail and Evolution render the subjects (and other headers) exactly the same afaict. -- Jeffrey Stedfast Evolution Hacker - Novell, Inc. fejj ximian com - www.novell.com
? broken-rfc2047.patch ? camel-mime-tables.c ? providers/imap4/imap4-XGWMOVE.patch ? providers/imap4/imap4.patch Index: ChangeLog =================================================================== RCS file: /cvs/gnome/evolution-data-server/camel/ChangeLog,v retrieving revision 1.2431 diff -u -p -r1.2431 ChangeLog --- ChangeLog 15 Feb 2005 11:12:51 -0000 1.2431 +++ ChangeLog 25 Feb 2005 18:36:04 -0000 @@ -1,3 +1,13 @@ +2005-02-25 Jeffrey Stedfast <fejj novell com> + + * camel-mime-utils.c (quoted_decode): Allow spaces in the text we + are decoding. + (append_quoted_pair): Changed to take charset params and convert + un-quoted-pair'd strings to UTF-8. + (header_decode_text): Rewritten to work around broken rfc2047 + encoded-words sent by mailers who's authors couldn't be bothered + to read the specs. + 2005-02-11 Radek Doulik <rodo novell com> * camel-filter-search.c (junk_test): use camel debug Index: camel-mime-utils.c =================================================================== RCS file: /cvs/gnome/evolution-data-server/camel/camel-mime-utils.c,v retrieving revision 1.223 diff -u -p -r1.223 camel-mime-utils.c --- camel-mime-utils.c 31 Jan 2005 06:56:28 -0000 1.223 +++ camel-mime-utils.c 25 Feb 2005 18:36:05 -0000 @@ -814,8 +814,12 @@ quoted_decode(const unsigned char *in, s *outptr++ = 0x20; } else if (c==' ' || c==0x09) { /* FIXME: this is an error! ignore for now ... */ +#if ADHERE_TO_SPEC ret = -1; break; +#else + *outptr++ = c; +#endif } else { *outptr++ = c; } @@ -915,7 +919,7 @@ rfc2047_decode_word(const char *in, size /* quick check to see if this could possibly be a real encoded word */ if (len < 8 || !(in[0] == '=' && in[1] == '?' && in[len-1] == '=' && in[len-2] == '?')) { - d(printf("invalid\n")); + d(printf("rfc2047_decode_word: invalid token\n")); return NULL; } @@ -1058,6 +1062,7 @@ append_8bit (GString *out, const char *i } +#ifdef ADHERE_TO_SPEC static GString * append_quoted_pair (GString *str, const char *in, gssize inlen) { @@ -1072,7 +1077,7 @@ append_quoted_pair (GString *str, const else g_string_append_c (str, c); } - + return str; } @@ -1140,6 +1145,115 @@ header_decode_text (const char *in, size return dword; } + +#else /* ! ADHERE_TO_SPEC */ + +static void +append_text (GString *str, const char *in, ssize_t inlen, const char *default_charset, const char *locale_charset) +{ + if ((default_charset == NULL || !append_8bit (str, in, inlen, default_charset)) + && (locale_charset == NULL || !append_8bit (str, in, inlen, locale_charset))) + append_latin1 (str, in, inlen); +} + +static void +append_quoted_pair (GString *str, const char *in, ssize_t inlen, const char *default_charset, const char *locale_charset) +{ + register const char *inptr = in; + const char *inend = in + inlen; + GString *unquoted; + char c; + + unquoted = g_string_new (""); + + while (inptr < inend) { + c = *inptr++; + if (c == '\\' && inptr < inend) + g_string_append_c (unquoted, *inptr++); + else + g_string_append_c (unquoted, c); + } + + append_text (str, unquoted->str, unquoted->len, default_charset, locale_charset); + g_string_free (unquoted, TRUE); +} + +static char * +header_decode_text (const char *in, size_t inlen, int ctext, const char *default_charset) +{ + void (* append) (GString *, const char *, ssize_t, const char *, const char *); + const char *inptr, *inend, *start, *encword, *locale_charset; + char *dword = NULL; + GString *out; + + locale_charset = e_iconv_locale_charset (); + + if (ctext) + append = append_quoted_pair; + else + append = append_text; + + out = g_string_new (""); + inptr = in; + inend = inptr + inlen; + + while (inptr < inend) { + start = inptr; + + while (inptr < (inend - 8) && strncmp (inptr, "=?", 2) != 0) { + if (!camel_mime_is_lwsp (*inptr)) + dword = NULL; + inptr++; + } + + if (inptr == (inend - 8)) { + append (out, start, inend - start, default_charset, locale_charset); + break; + } + + /* could be an encoded word (or a broken encoded word which is why this code is so damn hairy) */ + encword = inptr; + + inptr += 2; + while (inptr < (inend - 5) && *inptr != '?') + inptr++; + + if (inptr[0] == '?' && (inptr[1] == 'B' || inptr[1] == 'b' || inptr[1] == 'Q' || inptr[1] == 'q') && inptr[2] == '?') { + /* looking more and more like an encoded word... */ + inptr += 3; + while (inptr < (inend - 2) && *inptr != '?') + inptr++; + + if (strncmp (inptr, "?=", 2) != 0) + goto not_encword; + + if (!dword) + append (out, start, encword - start, default_charset, locale_charset); + + inptr += 2; + + if ((dword = rfc2047_decode_word (encword, inptr - encword))) { + g_string_append (out, dword); + g_free (dword); + } else { + append (out, encword, inptr - encword, default_charset, locale_charset); + } + } else { + /* not an encoded word */ + not_encword: + dword = NULL; + inptr = encword + 2; + + append (out, start, inptr - start, default_charset, locale_charset); + } + } + + dword = out->str; + g_string_free (out, FALSE); + + return dword; +} +#endif /* ADHERE_TO_SPEC */ char * camel_header_decode_string (const char *in, const char *default_charset)
From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: foo=?UTF-8?Q?bar?=baz Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000005-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: foo =?UTF-8?Q?bar?=baz Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000007-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: foo=?UTF-8?Q?bar?= baz Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000009-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?UTF-8?Q?foo bar?=baz Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000000b-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: foo=?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000000d-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?UTF-8?Q?foo?==?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000000e-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?UTF-8?Q?foo?= =?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000000f-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?UTF-8?Q?foo?= bar =?UTF-8?Q?baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000010-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?foo=?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000011-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?foo?=?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000012-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?foo?Q=?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000013-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: =?foo?Q?=?UTF-8?Q?bar baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 00000014-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Subject: foo =?UTF-8?Q?bar ? baz?= Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000001a-0010 message body From fejj novell com Wed Feb 16 15:31:00 2005 From: =?US-ASCII?Q?Keith_Moore?= <moore cs utk edu> To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld dkuug dk> Cc: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD vm1 ulg ac be> Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= X-Mailer: Test of ctext (=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=) Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000001b-0010 message body (example headers taken exactly from rfc2047) From fejj novell com Wed Feb 16 15:31:00 2005 From: Jeffrey Stedfast <fejj novell com> To: Jeffrey Stedfast <fejj novell com> Subject: Test of ctext (bug #62771) User-Agent: Wanderlust/2.11.24 (Wonderwall) Emacs/21.3 Mule/5.0 (=?ISO-2022-JP?B?GyRCOC1MWhsoQg==?=) with some \"quoted pairs\" Content-Type: text/plain Message-Id: <1007613014 1382 12 camel taz> Mime-Version: 1.0 Date: 16 Feb 2005 15:30:13 -0500 X-Evolution: 0000001c-0010 message body
Attachment:
smime.p7s
Description: S/MIME cryptographic signature