Patch: Better multipart handling + HTML-to-text conversion on reply
- From: Toralf Lund <toralf kscanners com>
- To: Balsa Mailing List <balsa-list gnome org>
- Subject: Patch: Better multipart handling + HTML-to-text conversion on reply
- Date: Tue, 13 Nov 2001 10:27:43 +0100
A rerun of one of some of my favourite updates, here. The attached patch
will:
1. Provide a better part selection routine.
2. Implement part selection when quoting a message.
3. Introduce a HTML-to-text conversion step for replies to messages that
are purely HTML.
--
Toralf Lund <toralf@kscanners.com> +47 66 85 51 22
Kongsberg Scanners AS +47 66 85 51 00 (switchboard)
http://www.kscanners.no/~toralf +47 66 85 51 01 (fax)
Index: libbalsa/mime.c
===================================================================
RCS file: /cvs/gnome/balsa/libbalsa/mime.c,v
retrieving revision 1.41
diff -u -b -r1.41 mime.c
--- libbalsa/mime.c 2001/09/23 18:04:48 1.41
+++ libbalsa/mime.c 2001/11/13 09:18:24
@@ -20,21 +20,216 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
+#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
#include "config.h"
#include "libbalsa.h"
#include "mailbackend.h"
-/* FIXME: The content of this file could go to message.c */
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <libgnome/libgnome.h>
+
+
+
+
+#define HTML_TO_TEXT "lynx -force_html -dump file:%s > %s" /* *** FIXME: Set via ./configure */
-static GString *process_mime_multipart(LibBalsaMessage * message,
+
+static GString *
+process_mime_multipart(LibBalsaMessage * message,
LibBalsaMessageBody * body,
- gchar * reply_prefix_str,
- gint llen, gboolean ignore_html,
- gboolean flow);
+ gchar * reply_prefix_str, gint llen,
+ gboolean ignore_html, gboolean flow);
+
+/* FIXME: The content of this file could go to message.c */
+
+static gchar char_ref_char(const gchar *char_ref)
+{
+ if(char_ref[1]=='#')
+ return (gchar )atoi(char_ref+1);
+ /* These are essential: */
+ else if(g_strcasecmp(char_ref, "lt")==0)
+ return '<';
+ else if(g_strcasecmp(char_ref, "gt")==0)
+ return '>';
+ else if(g_strcasecmp(char_ref, "amp")==0)
+ return '&';
+ else if(g_strcasecmp(char_ref, "nbsp")==0)
+ return ' ';
+ /* Some special characters I'm using a lot... */
+ else if(strcmp(char_ref, "aelig")==0)
+ return 'æ';
+ else if(strcmp(char_ref, "AElig")==0)
+ return 'Æ';
+ else if(strcmp(char_ref, "oslash")==0)
+ return 'ø';
+ else if(strcmp(char_ref, "Oslash")==0)
+ return 'Ø';
+ else if(strcmp(char_ref, "aring")==0)
+ return 'å';
+ else if(strcmp(char_ref, "Aring")==0)
+ return 'Å';
+ /* Return first character for the rest. Often makes sense because reference
+ is "<character base><info on accent etc.>" */
+ return char_ref[0];
+}
+
+static gchar *extract_tag(gchar **html, gchar end_sep)
+/* Pre : *html is at start of tag, i.e. points to '<' or similar
+ Post: *html points to position after end of tag. */
+{
+ gchar *end=strchr(*html, end_sep), *tag;
+
+ if(!end) /* No '>', skip rest of string. */
+ end=*html+strlen(*html);
+
+ tag=g_strndup(*html+1, end-*html-1);
+ *html=end+1;
+
+ return tag;
+}
+
+static gchar *convert_html_internal(const gchar *html)
+/* Remarks: Fall-back used when no real HTML converter was configured, or
+ call failed. */
+{
+ gchar *txt=calloc(strlen(html)+1, sizeof(gchar));
+
+ if(txt) {
+ gchar *c_html=(gchar *)html, *c_txt=txt;
+ gboolean skip=FALSE;
+
+ while(*c_html) {
+ if(*c_html=='<') {
+ gchar *tag=extract_tag(&c_html, '>');
+
+ if(g_strncasecmp(tag, "head", 4)==0) {
+ skip=TRUE;
+ } else if(g_strncasecmp(tag, "body", 4)==0) {
+ skip=FALSE;
+ }
+
+ if(!skip) {
+ if(g_strcasecmp(tag, "br")==0 ||
+ g_strcasecmp(tag, "tr")==0) {
+ *c_txt++='\n';
+ } else if(g_strcasecmp(tag, "p")==0 ||
+ (tolower(tag[0])=='h' && isdigit(tag[1]))) {
+ *c_txt++='\n';
+ *c_txt++='\n';
+ } else if(g_strcasecmp(tag, "li")==0) {
+ *c_txt++='\n';
+ *c_txt++='*';
+ *c_txt++='\t';
+ } else if(g_strcasecmp(tag, "td")==0 ||
+ g_strcasecmp(tag, "th")==0) {
+ /* Note: Can't do a lot more than starting a new
+ paragraph (more or less), since we are not
+ able to interleave text from different cells. */
+ *c_txt++='\n';
+ *c_txt++='\n';
+ *c_txt++='\t';
+ } else {
+ gchar *ref=strstr(tag, "href=");
+
+ if(!ref) { /* *** Want 'strcasestr' */
+ ref=strstr(tag, "HREF=");
+ }
+
+ if(ref) {
+ *c_txt++='[';
+ ref+=5;
+
+ while(*ref && *ref!=' ')
+ *c_txt++=*ref++;
+ *c_txt++=']';
+ }
+ }
+ }
+ g_free(tag);
+ } else if(skip) {
+ c_html++;
+ } else if(*c_html=='&') {
+ gchar *char_ref=extract_tag(&c_html, ';');
+
+ *c_txt++=char_ref_char(char_ref);
+ g_free(char_ref);
+ } else if(*c_html=='\n' || *c_html==' ' || *c_html=='\t') {
+ gchar prev=*(c_txt-1);
+
+ c_html++;
+
+ if(c_txt!=txt && prev!=' ' && prev!='\n' && prev!='\t')
+ *c_txt++=' ';
+ } else {
+ *c_txt++=*c_html++;
+ }
+ }
+ }
+ return txt;
+}
+
+
+gchar *convert_html(const gchar *html)
+{
+#ifdef HTML_TO_TEXT
+ gchar html_file[PATH_MAX + 1], txt_file[PATH_MAX + 1];
+ gchar *txt=NULL;
+ FILE *fp;
+
+ libbalsa_lock_mutt();
+ mutt_mktemp(html_file);
+ mutt_mktemp(txt_file);
+ libbalsa_unlock_mutt();
+
+ fp=safe_fopen(html_file, "w");
+
+ if(fp) {
+ gchar cmd[PATH_MAX + 1];
+ pid_t cmd_pid;
+
+ fprintf(fp, "%s", html);
+ fclose(fp);
+
+ snprintf(cmd, sizeof(cmd), HTML_TO_TEXT, html_file, txt_file);
+
+ /* Note: Should probably find gnome_ alternative to fork() + exec(),
+ but this is at least better than system() (gnome_execute_shell
+ is not what we want as it will start process in the background
+ */
+
+ if((cmd_pid=fork())==0) {
+ gchar *shell=gnome_util_user_shell();
+
+ execl(shell, shell, "-c", cmd, NULL);
+ g_free(shell); /* In case exec fails */
+ _exit(1);
+ } else if(cmd_pid>0) {
+ waitpid(cmd_pid, NULL, 0);
+ fp=fopen(txt_file, "r");
+ if(fp) {
+ libbalsa_readfile(fp, &txt);
+ fclose(fp);
+ }
+ }
+ }
+ unlink(txt_file);
+ unlink(html_file);
+
+ if(txt)
+ return txt;
+
+#endif
+ return convert_html_internal(html);
+}
+
+
+
/* process_mime_part:
returns string representation of given message part.
NOTE: may return NULL(!).
@@ -48,6 +243,9 @@
size_t alloced;
gchar *res = NULL;
GString *reply = NULL;
+ gchar *content_type = libbalsa_message_body_get_content_type(body);
+ gboolean ishtml=(g_strcasecmp(content_type, "text/html") == 0);
+
switch (libbalsa_message_body_type(body)) {
case LIBBALSA_MESSAGE_BODY_TYPE_OTHER:
@@ -63,11 +261,6 @@
llen, ignore_html, flow);
break;
case LIBBALSA_MESSAGE_BODY_TYPE_TEXT:
- /* don't return text/html stuff... */
- if (ignore_html && body->mutt_body->subtype &&
- !strcmp("html", body->mutt_body->subtype))
- break;
-
libbalsa_message_body_save_temporary(body, NULL);
part = fopen(body->temp_filename, "r");
@@ -78,6 +271,16 @@
if (!res)
break;
+ if(ishtml) {
+ gchar *res_ascii=convert_html(res);
+
+ if(res_ascii) {
+ g_free(res);
+ res=res_ascii;
+ }
+ }
+
+
if (llen > 0) {
if (flow && libbalsa_flowed_rfc2646(body)) {
/* we're making a `format=flowed' message, and the
@@ -126,6 +329,8 @@
g_free(res);
break;
}
+ g_free(content_type);
+
return reply;
}
@@ -137,10 +342,22 @@
{
LibBalsaMessageBody *part;
GString *res = NULL, *s;
+ gchar *content_type = libbalsa_message_body_get_content_type(body);
+ fprintf(stderr, "process_mime_multipart\n");
+
+ if(g_strcasecmp(content_type, "multipart/alternative")==0) {
+ part = preferred_part(body->parts, ignore_html);
+ if(part)
+ res = process_mime_part(message, part, reply_prefix_str, llen,
+ ignore_html, flow);
+ } else {
for (part = body->parts; part; part = part->next) {
+ if(part->mutt_body->disposition==DISPINLINE)
s = process_mime_part(message, part, reply_prefix_str, llen,
ignore_html, flow);
+ else
+ s = NULL;
if (!s)
continue;
if (res) {
@@ -149,6 +366,9 @@
} else
res = s;
}
+ }
+
+ g_free(content_type);
return res;
}
@@ -173,4 +393,38 @@
}
return reply;
+}
+
+LibBalsaMessageBody*
+preferred_part(LibBalsaMessageBody *parts, gboolean ignore_html)
+/* Remarks: Try to find LAST suitable part, which should be considered the
+ best according to RFC 1341 */
+{
+ /* TODO: - Look for additional specific types, and have more flags
+ to control their selection?
+ - Let user preferences or previous selections override order
+ (e.g. choose text/html part even when text/plain is included
+ after it)? */
+ LibBalsaMessageBody *body;
+ gchar *content_type;
+ LibBalsaMessageBody *best_part=NULL, *last_part=NULL;
+
+ for(body=parts; body; body=body->next) {
+ content_type = libbalsa_message_body_get_content_type(body);
+
+ if(g_strcasecmp(content_type, "text/plain")==0 ||
+ !ignore_html && (g_strcasecmp(content_type, "text/html")==0 ||
+ g_strcasecmp(content_type, "multipart/related")==0) ||
+ !best_part && g_strncasecmp(content_type, "text/", 5)==0)
+ best_part=body;
+
+ last_part=body;
+
+ g_free(content_type);
+ }
+
+ if(!best_part)
+ best_part=last_part;
+
+ return best_part;
}
Index: libbalsa/mime.h
===================================================================
RCS file: /cvs/gnome/balsa/libbalsa/mime.h,v
retrieving revision 1.16
diff -u -b -r1.16 mime.h
--- libbalsa/mime.h 2001/09/23 18:04:48 1.16
+++ libbalsa/mime.h 2001/11/13 09:18:24
@@ -31,8 +31,14 @@
LibBalsaMessageBody * body,
gchar * reply_prefix_str, gint llen,
gboolean ignore_html, gboolean flow);
+
GString *content2reply(LibBalsaMessage * message,
gchar * reply_prefix_str, gint llen,
gboolean ignore_html, gboolean flow); /* arp */
+
+gchar *convert_html(const gchar *html);
+
+LibBalsaMessageBody*
+preferred_part(LibBalsaMessageBody *parts, gboolean ignore_html);
#endif /* __LIBBALSA_MIME_H__ */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]