http://bugzilla.ximian.com/show_bug.cgi?id=24026 I don't expect this to be accepted absed on NotZed's comments in the bug, but jp asked me to send in a patch so here it is. btw, I think at least the charset_mask_best() changes should go in even if the other changes don't. Jeff -- Jeffrey Stedfast Evolution Hacker - Novell, Inc. fejj ximian com - www.novell.com
Index: ChangeLog
===================================================================
RCS file: /cvs/gnome/evolution/camel/ChangeLog,v
retrieving revision 1.2223
diff -u -r1.2223 ChangeLog
--- ChangeLog 21 Jul 2004 15:55:00 -0000 1.2223
+++ ChangeLog 22 Jul 2004 17:29:34 -0000
@@ -1,3 +1,14 @@
+2004-07-22 Jeffrey Stedfast <fejj novell com>
+
+ * camel-charset-map.c (main): Add some multibyte charsets to the
+ table.
+ (camel_charset_best_mask): Changed the logic slightly to only
+ match certain charsets if the locale matches (Macedonians don't
+ want to use koi8-r for example). This logic also prevents the use
+ of a multibyte charset for mixed languages (such as DanW's feared
+ Greek and Japanese example) as long as the locale isn't Japanese
+ (in which case they probably want Japanese anyway).
+
2004-07-19 Jeffrey Stedfast <fejj novell com>
* providers/imap/camel-imap-store.c (get_subscribed_folders): Free
Index: camel-charset-map.c
===================================================================
RCS file: /cvs/gnome/evolution/camel/camel-charset-map.c,v
retrieving revision 1.38
diff -u -r1.38 camel-charset-map.c
--- camel-charset-map.c 9 Jul 2003 19:05:12 -0000 1.38
+++ camel-charset-map.c 22 Jul 2004 17:29:34 -0000
@@ -49,11 +49,13 @@
#ifdef BUILD_MAP
#include <iconv.h>
+#include <errno.h>
#include <glib.h>
static struct {
- char *name;
- unsigned int bit; /* assigned bit */
+ char *name; /* charset name */
+ int multibyte; /* charset type */
+ unsigned int bit; /* assigned bit */
} tables[] = {
/* These are the 8bit character sets (other than iso-8859-1,
* which is special-cased) which are supported by both other
@@ -61,20 +63,34 @@
* they're listed in is the order they'll be tried in, so put
* the more-popular ones first.
*/
- { "iso-8859-2", 0 }, /* Central/Eastern European */
- { "iso-8859-4", 0 }, /* Baltic */
- { "koi8-r", 0 }, /* Russian */
- { "koi8-u", 0 }, /* Ukranian */
- { "iso-8859-5", 0 }, /* Least-popular Russian encoding */
- { "iso-8859-7", 0 }, /* Greek */
- { "iso-8859-8", 0 }, /* Hebrew; Visual */
- { "iso-8859-9", 0 }, /* Turkish */
- { "iso-8859-13", 0 }, /* Baltic again */
- { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most
- * programs that support this support UTF8
- */
- { "windows-1251", 0 }, /* Russian */
- { 0, 0 }
+ { "iso-8859-2", 0, 0 }, /* Central/Eastern European */
+ { "iso-8859-4", 0, 0 }, /* Baltic */
+ { "koi8-r", 0, 0 }, /* Russian */
+ { "koi8-u", 0, 0 }, /* Ukranian */
+ { "iso-8859-5", 0, 0 }, /* Least-popular Russian encoding */
+ { "iso-8859-7", 0, 0 }, /* Greek */
+ { "iso-8859-8", 0, 0 }, /* Hebrew; Visual */
+ { "iso-8859-9", 0, 0 }, /* Turkish */
+ { "iso-8859-13", 0, 0 }, /* Baltic again */
+ { "iso-8859-15", 0, 0 }, /* New-and-improved iso-8859-1, but most
+ * programs that support this support UTF8
+ */
+ { "windows-1251", 0, 0 }, /* Russian */
+
+ /* These are the multibyte character sets which are commonly
+ * supported by other mail clients. Note: order for multibyte
+ * charsets does not affect priority unlike the 8bit charsets
+ * listed above.
+ */
+ { "iso-2022-jp", 1, 0 }, /* Japanese designed for use over the Net */
+ { "Shift-JIS", 1, 0 }, /* Japanese as used by Windows and MacOS systems */
+ { "euc-jp", 1, 0 }, /* Japanese traditionally used on Unix systems */
+ { "euc-kr", 1, 0 }, /* Korean */
+ { "iso-2022-kr", 1, 0 }, /* Korean (less popular than euc-kr) */
+ { "gb2312", 1, 0 }, /* Simplified Chinese */
+ { "Big5", 1, 0 }, /* Traditional Chinese */
+ { "euc-tw", 1, 0 },
+ { NULL, 0, 0 }
};
unsigned int encoding_map[256 * 256];
@@ -85,115 +101,181 @@
#define UCS "UCS-4LE"
#endif
-int main (void)
+int main (int argc, char **argv)
{
- int i, j;
- int max, min;
- int bit = 0x01;
- int k;
+ GHashTable *table_hash;
+ size_t inleft, outleft;
+ char *inbuf, *outbuf;
+ guint32 out[128], c;
+ unsigned int bit = 0x01;
+ char in[128];
+ int i, j, k;
int bytes;
iconv_t cd;
- char in[128];
- guint32 out[128];
- char *inptr, *outptr;
- size_t inlen, outlen;
-
+
/* dont count the terminator */
- bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;
-
+ bytes = (G_N_ELEMENTS (tables) + 7 - 1) / 8;
+
for (i = 0; i < 128; i++)
in[i] = i + 128;
-
- for (j = 0; tables[j].name; j++) {
+
+ for (j = 0; tables[j].name && !tables[j].multibyte; j++) {
cd = iconv_open (UCS, tables[j].name);
- inptr = in;
- outptr = (char *)(out);
- inlen = sizeof (in);
- outlen = sizeof (out);
- while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) {
+ inbuf = in;
+ outbuf = (char *)(out);
+ inleft = sizeof (in);
+ outleft = sizeof (out);
+ while (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) == -1) {
if (errno == EILSEQ) {
- inptr++;
- inlen--;
+ inbuf++;
+ inleft--;
} else {
- printf ("%s\n", strerror (errno));
+ fprintf (stderr, "iconv (%s->UCS4, ..., %d, ..., %d): %s",
+ tables[j].name, inleft, outleft,
+ strerror (errno));
exit (1);
}
}
iconv_close (cd);
-
- for (i = 0; i < 128 - outlen / 4; i++) {
+
+ for (i = 0; i < 128 - outleft / 4; i++) {
encoding_map[i] |= bit;
encoding_map[out[i]] |= bit;
}
-
+
tables[j].bit = bit;
bit <<= 1;
}
-
- printf("/* This file is automatically generated: DO NOT EDIT */\n\n");
-
- for (i=0;i<256;i++) {
+
+ /* Mutibyte tables */
+ for ( ; tables[j].name && tables[j].multibyte; j++) {
+ cd = iconv_open (tables[j].name, UCS);
+ if (cd == (iconv_t) -1)
+ continue;
+
+ for (c = 128, i = 0; c < 65535 && i < 65535; c++) {
+ inbuf = (char *) &c;
+ inleft = sizeof (c);
+ outbuf = in;
+ outleft = sizeof (in);
+
+ if (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1) {
+ /* this is a legal character in charset table[j].name */
+ iconv (cd, NULL, NULL, &outbuf, &outleft);
+ encoding_map[i++] |= bit;
+ encoding_map[c] |= bit;
+ } else {
+ /* reset the iconv descriptor */
+ iconv (cd, NULL, NULL, NULL, NULL);
+ }
+ }
+
+ iconv_close (cd);
+
+ tables[j].bit = bit;
+ bit <<= 1;
+ }
+
+ printf ("/* This file is automatically generated: DO NOT EDIT */\n\n");
+
+ /* FIXME: we can condense better than what my quick hack does,
+ but it'd be more work and I'm not sure if it's worth it or
+ not. Currently I'm just making it so that tables that
+ contain all of the same values will only ever be
+ one-of-a-kind by making duplicates into macro aliases for
+ the original */
+
+ table_hash = g_hash_table_new (g_int_hash, g_int_equal);
+
+ for (i = 0; i < 256; i++) {
/* first, do we need this block? */
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
+ for (k = 0; k < bytes; k++) {
+ int first = encoding_map[i * 256] & (0xff << (k * 8));
+ int same = TRUE;
+ int dump = FALSE;
+
+ for (j = 0; j < 256; j++) {
+ same = same && (encoding_map[i * 256 + j] & (0xff << (k * 8))) == first;
+ if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0)
+ dump = TRUE;
}
- if (j < 256) {
+
+ if (dump) {
+ if (same) {
+ /* this table is aliasable */
+ char *table_name;
+
+ if ((table_name = g_hash_table_lookup (table_hash, &first))) {
+ /* we've already written out a table with the exact same
+ values so we can just alias it with a macro. */
+ printf ("#define m%02x%x %s\n\n", i, k, table_name);
+ continue;
+ } else {
+ table_name = g_strdup_printf ("m%02x%x", i, k);
+ g_hash_table_insert (table_hash, &first, table_name);
+ }
+ }
+
/* yes, dump it */
- printf("static unsigned char m%02x%x[256] = {\n\t", i, k);
- for (j=0;j<256;j++) {
- printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff );
- if (((j+1)&7) == 0 && j<255)
- printf("\n\t");
+ printf ("static unsigned char m%02x%x[256] = {\n\t", i, k);
+ for (j = 0; j < 256; j++) {
+ printf ("0x%02x, ", (encoding_map[i * 256 + j] >> (k * 8)) & 0xff);
+ if (((j + 1) & 7) == 0 && j < 255)
+ printf ("\n\t");
}
- printf("\n};\n\n");
+ printf ("\n};\n\n");
}
}
}
-
- printf("struct {\n");
- for (k=0;k<bytes;k++) {
- printf("\tunsigned char *bits%d;\n", k);
+
+ printf ("struct {\n");
+ for (k = 0; k < bytes; k++) {
+ printf ("\tunsigned char *bits%d;\n", k);
}
- printf("} camel_charmap[256] = {\n\t");
- for (i=0;i<256;i++) {
+
+ printf ("} camel_charmap[256] = {\n\t");
+ for (i = 0; i < 256; i++) {
/* first, do we need this block? */
- printf("{ ");
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
+ printf ("{ ");
+ for (k = 0; k < bytes; k++) {
+ for (j = 0; j < 256; j++) {
+ if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0)
break;
}
+
if (j < 256) {
- printf("m%02x%x, ", i, k);
+ printf ("m%02x%x, ", i, k);
} else {
- printf("0, ");
+ printf ("0, ");
}
}
- printf("}, ");
- if (((i+1)&7) == 0 && i<255)
- printf("\n\t");
+
+ printf ("}, ");
+ if (((i + 1) & 7) == 0 && i < 255)
+ printf ("\n\t");
}
- printf("\n};\n\n");
-
- printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
- for (j=0;tables[j].name;j++) {
- printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
+ printf ("\n};\n\n");
+
+ printf ("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
+ for (j = 0; tables[j].name; j++) {
+ printf ("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
}
- printf("};\n\n");
-
+ printf ("};\n\n");
+
printf("#define charset_mask(x) \\\n");
- for (k=0;k<bytes;k++) {
- if (k!=0)
- printf("\t| ");
+ for (k = 0; k < bytes; k++) {
+ if (k != 0)
+ printf ("\t| ");
else
- printf("\t");
- printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8);
- if (k<bytes-1)
- printf("\t\\\n");
+ printf ("\t");
+
+ printf ("(camel_charmap[(x) >> 8].bits%d ? camel_charmap[(x) >> 8].bits%d[(x) & 0xff] << %d : 0)",
+ k, k, k * 8);
+
+ if (k < bytes - 1)
+ printf ("\t\\\n");
}
- printf("\n\n");
+ printf ("\n\n");
return 0;
}
@@ -211,6 +293,8 @@
#include <langinfo.h>
#endif
+#include <gal/util/e-iconv.h>
+
void
camel_charset_init (CamelCharset *c)
{
@@ -261,12 +345,19 @@
static const char *
camel_charset_best_mask(unsigned int mask)
{
+ const char *locale_lang, *lang;
int i;
-
- for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) {
- if (camel_charinfo[i].bit & mask)
- return camel_charinfo[i].name;
+
+ locale_lang = e_iconv_locale_language ();
+ for (i = 0; i < G_N_ELEMENTS (camel_charinfo); i++) {
+ if (camel_charinfo[i].bit & mask) {
+ lang = e_iconv_charset_language (camel_charinfo[i].name);
+
+ if (!lang || (locale_lang && !strncmp (locale_lang, lang, 2)))
+ return camel_charinfo[i].name;
+ }
}
+
return "UTF-8";
}
Attachment:
smime.p7s
Description: S/MIME cryptographic signature