On Mon, 2005-02-28 at 12:22 +0800, Not Zed wrote: > > So, how many valid mails does this break? > > e.g. > > =?iso-8859-1?b?foo this is a hidden message not for evolution users > bar?= probably would decode into garbage... or whatever "foo this is a hidden message not for evolution users bar" is base64 decoded. > > > And why did you copy the append_ functions? There should be only one > of each in the code. This is a messy enough hack as it is. fair enough > > Instead of a test folder, any test data must be added to the > regression test we already have for rfc2047 decoding. I'll add this stuff to the camel test-suite and send a new patch > > BTW, using if (p = memchr(inptr, '=', end-start-2) && p[1] == '?') is > highly likely to be much more efficient than (while (inptr < inend-2 > && !strcmp("=?")) inptr++)) fair enough. Jeff > > On Fri, 2005-02-25 at 13:47 -0500, Jeffrey Stedfast wrote: > > unfortunately, there exist a bountiful number of shitful mailers out > > there who's authors clearly couldn't be bothered to read or understand > > the MIME specifications and so just pulled an encoding scheme out of > > their proverbials. > > > > the attached patch tries to deal with the scenarios that I'm aware of, > > namely illegal characters in the encoded text portion of the > > encoded-word token (which, sadly, even includes SPACE and TAB) > > > > for the convenience of anyone reading this message who hasn't yet read > > rfc2047, here's a good quote from the end of section 2: > > > > IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's > > by an RFC 822 parser. As a consequence, unencoded white space > > characters (such as SPACE and HTAB) are FORBIDDEN within an > > 'encoded-word'. For example, the character sequence > > > > =?iso-8859-1?q?this is some text?= > > > > would be parsed as four 'atom's, rather than as a single 'atom' (by > > an RFC 822 parser) or 'encoded-word' (by a parser which understands > > 'encoded-words'). The correct way to encode the string "this is some > > text" is to encode the SPACE characters as well, e.g. > > > > =?iso-8859-1?q?this=20is=20some=20text?= > > > > so yes, the behaviour of the broken mailers is explicitly FORBIDDEN but > > that hasn't stopped them. oh well. > > > > > > I've also attached a test mbox for everyone's convenience in testing > > this patch (feel free to add to it) > > > > With the patch applied, both Mozilla-Mail and Evolution render the > > subjects (and other headers) exactly the same afaict. > > > > > text/plain attachment (broken-rfc2047.patch) > > ? broken-rfc2047.patch > > ? camel-mime-tables.c > > ? providers/imap4/imap4-XGWMOVE.patch > > ? providers/imap4/imap4.patch > > Index: ChangeLog > > =================================================================== > > RCS file: /cvs/gnome/evolution-data-server/camel/ChangeLog,v > > retrieving revision 1.2431 > > diff -u -p -r1.2431 ChangeLog > > --- ChangeLog 15 Feb 2005 11:12:51 -0000 1.2431 > > +++ ChangeLog 25 Feb 2005 18:36:04 -0000 > > @@ -1,3 +1,13 @@ > > +2005-02-25 Jeffrey Stedfast <fejj novell com> > > + > > + * camel-mime-utils.c (quoted_decode): Allow spaces in the text we > > + are decoding. > > + (append_quoted_pair): Changed to take charset params and convert > > + un-quoted-pair'd strings to UTF-8. > > + (header_decode_text): Rewritten to work around broken rfc2047 > > + encoded-words sent by mailers who's authors couldn't be bothered > > + to read the specs. > > + > > 2005-02-11 Radek Doulik <rodo novell com> > > > > * camel-filter-search.c (junk_test): use camel debug > > Index: camel-mime-utils.c > > =================================================================== > > RCS file: /cvs/gnome/evolution-data-server/camel/camel-mime-utils.c,v > > retrieving revision 1.223 > > diff -u -p -r1.223 camel-mime-utils.c > > --- camel-mime-utils.c 31 Jan 2005 06:56:28 -0000 1.223 > > +++ camel-mime-utils.c 25 Feb 2005 18:36:05 -0000 > > @@ -814,8 +814,12 @@ quoted_decode(const unsigned char *in, s > > *outptr++ = 0x20; > > } else if (c==' ' || c==0x09) { > > /* FIXME: this is an error! ignore for now ... */ > > +#if ADHERE_TO_SPEC > > ret = -1; > > break; > > +#else > > + *outptr++ = c; > > +#endif > > } else { > > *outptr++ = c; > > } > > @@ -915,7 +919,7 @@ rfc2047_decode_word(const char *in, size > > > > /* quick check to see if this could possibly be a real encoded word */ > > if (len < 8 || !(in[0] == '=' && in[1] == '?' && in[len-1] == '=' && in[len-2] == '?')) { > > - d(printf("invalid\n")); > > + d(printf("rfc2047_decode_word: invalid token\n")); > > return NULL; > > } > > > > @@ -1058,6 +1062,7 @@ append_8bit (GString *out, const char *i > > > > } > > > > +#ifdef ADHERE_TO_SPEC > > static GString * > > append_quoted_pair (GString *str, const char *in, gssize inlen) > > { > > @@ -1072,7 +1077,7 @@ append_quoted_pair (GString *str, const > > else > > g_string_append_c (str, c); > > } > > - > > + > > return str; > > } > > > > @@ -1140,6 +1145,115 @@ header_decode_text (const char *in, size > > > > return dword; > > } > > + > > +#else /* ! ADHERE_TO_SPEC */ > > + > > +static void > > +append_text (GString *str, const char *in, ssize_t inlen, const char *default_charset, const char *locale_charset) > > +{ > > + if ((default_charset == NULL || !append_8bit (str, in, inlen, default_charset)) > > + && (locale_charset == NULL || !append_8bit (str, in, inlen, locale_charset))) > > + append_latin1 (str, in, inlen); > > +} > > + > > +static void > > +append_quoted_pair (GString *str, const char *in, ssize_t inlen, const char *default_charset, const char *locale_charset) > > +{ > > + register const char *inptr = in; > > + const char *inend = in + inlen; > > + GString *unquoted; > > + char c; > > + > > + unquoted = g_string_new (""); > > + > > + while (inptr < inend) { > > + c = *inptr++; > > + if (c == '\\' && inptr < inend) > > + g_string_append_c (unquoted, *inptr++); > > + else > > + g_string_append_c (unquoted, c); > > + } > > + > > + append_text (str, unquoted->str, unquoted->len, default_charset, locale_charset); > > + g_string_free (unquoted, TRUE); > > +} > > + > > +static char * > > +header_decode_text (const char *in, size_t inlen, int ctext, const char *default_charset) > > +{ > > + void (* append) (GString *, const char *, ssize_t, const char *, const char *); > > + const char *inptr, *inend, *start, *encword, *locale_charset; > > + char *dword = NULL; > > + GString *out; > > + > > + locale_charset = e_iconv_locale_charset (); > > + > > + if (ctext) > > + append = append_quoted_pair; > > + else > > + append = append_text; > > + > > + out = g_string_new (""); > > + inptr = in; > > + inend = inptr + inlen; > > + > > + while (inptr < inend) { > > + start = inptr; > > + > > + while (inptr < (inend - 8) && strncmp (inptr, "=?", 2) != 0) { > > + if (!camel_mime_is_lwsp (*inptr)) > > + dword = NULL; > > + inptr++; > > + } > > + > > + if (inptr == (inend - 8)) { > > + append (out, start, inend - start, default_charset, locale_charset); > > + break; > > + } > > + > > + /* could be an encoded word (or a broken encoded word which is why this code is so damn hairy) */ > > + encword = inptr; > > + > > + inptr += 2; > > + while (inptr < (inend - 5) && *inptr != '?') > > + inptr++; > > + > > + if (inptr[0] == '?' && (inptr[1] == 'B' || inptr[1] == 'b' || inptr[1] == 'Q' || inptr[1] == 'q') && inptr[2] == '?') { > > + /* looking more and more like an encoded word... */ > > + inptr += 3; > > + while (inptr < (inend - 2) && *inptr != '?') > > + inptr++; > > + > > + if (strncmp (inptr, "?=", 2) != 0) > > + goto not_encword; > > + > > + if (!dword) > > + append (out, start, encword - start, default_charset, locale_charset); > > + > > + inptr += 2; > > + > > + if ((dword = rfc2047_decode_word (encword, inptr - encword))) { > > + g_string_append (out, dword); > > + g_free (dword); > > + } else { > > + append (out, encword, inptr - encword, default_charset, locale_charset); > > + } > > + } else { > > + /* not an encoded word */ > > + not_encword: > > + dword = NULL; > > + inptr = encword + 2; > > + > > + append (out, start, inptr - start, default_charset, locale_charset); > > + } > > + } > > + > > + dword = out->str; > > + g_string_free (out, FALSE); > > + > > + return dword; > > +} > > +#endif /* ADHERE_TO_SPEC */ > > > > char * > > camel_header_decode_string (const char *in, const char *default_charset) -- Jeffrey Stedfast Evolution Hacker - Novell, Inc. fejj ximian com - www.novell.com
Attachment:
smime.p7s
Description: S/MIME cryptographic signature