[gmime] Fixed header fold logic to reuse the rfc2047 tokenizer
- From: Jeffrey Stedfast <fejj src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gmime] Fixed header fold logic to reuse the rfc2047 tokenizer
- Date: Sat, 6 Apr 2013 03:37:31 +0000 (UTC)
commit d39fd2a07f71fc9b5b5dd8d9edabf5f5c7234532
Author: Jeffrey Stedfast <fejj gnome org>
Date: Fri Apr 5 23:36:52 2013 -0400
Fixed header fold logic to reuse the rfc2047 tokenizer
2013-04-05 Jeffrey Stedfast <fejj gnome org>
* gmime/gmime-utils.c (header_fold_tokens): New internal function
that replaces the older header_fold() function. This new one uses
the rfc2047 tokenizer so that we share the same rfc2047 workaround
logic in the tokenizer.
Fixes bug #697407
ChangeLog | 9 +
gmime/gmime-message.c | 16 +-
gmime/gmime-utils.c | 519 +++++++++++++++++++++++++++++++------------------
tests/test-mime.c | 31 +++
4 files changed, 377 insertions(+), 198 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 0eeeed5..41c2040 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2013-04-05 Jeffrey Stedfast <fejj gnome org>
+
+ * gmime/gmime-utils.c (header_fold_tokens): New internal function
+ that replaces the older header_fold() function. This new one uses
+ the rfc2047 tokenizer so that we share the same rfc2047 workaround
+ logic in the tokenizer.
+
+ Fixes bug #697407
+
2013-02-23 Jeffrey Stedfast <fejj gnome org>
* gmime/gmime-filter-html.c: Disable g_warnings unless warnings
diff --git a/gmime/gmime-message.c b/gmime/gmime-message.c
index eae671e..ddf12d2 100644
--- a/gmime/gmime-message.c
+++ b/gmime/gmime-message.c
@@ -50,6 +50,8 @@
**/
extern GMimeEvent *_g_mime_header_list_get_changed_event (GMimeHeaderList *headers);
+extern char *_g_mime_utils_unstructured_header_fold (const char *field, const char *value);
+extern char *_g_mime_utils_structured_header_fold (const char *field, const char *value);
static void g_mime_message_class_init (GMimeMessageClass *klass);
static void g_mime_message_init (GMimeMessage *message, GMimeMessageClass *klass);
@@ -603,13 +605,10 @@ write_received (GMimeStream *stream, const char *name, const char *value)
static ssize_t
write_subject (GMimeStream *stream, const char *name, const char *value)
{
- char *unfolded, *folded;
+ char *folded;
ssize_t n;
- unfolded = g_strdup_printf ("%s: %s\n", name, value);
- folded = g_mime_utils_unstructured_header_fold (unfolded);
- g_free (unfolded);
-
+ folded = _g_mime_utils_unstructured_header_fold (name, value);
n = g_mime_stream_write_string (stream, folded);
g_free (folded);
@@ -627,13 +626,10 @@ write_msgid (GMimeStream *stream, const char *name, const char *value)
static ssize_t
write_structured (GMimeStream *stream, const char *name, const char *value)
{
- char *unfolded, *folded;
+ char *folded;
ssize_t n;
- unfolded = g_strdup_printf ("%s: %s\n", name, value);
- folded = g_mime_utils_structured_header_fold (unfolded);
- g_free (unfolded);
-
+ folded = _g_mime_utils_structured_header_fold (name, value);
n = g_mime_stream_write_string (stream, folded);
g_free (folded);
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index afd2b22..c7453e2 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1127,193 +1127,6 @@ g_mime_references_get_message_id (const GMimeReferences *ref)
static gboolean
-is_rfc2047_token (const char *inptr, size_t len)
-{
- if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0)
- return FALSE;
-
- inptr += 2;
- len -= 2;
-
- /* skip past the charset */
- while (*inptr != '?' && len > 0) {
- inptr++;
- len--;
- }
-
- if (*inptr != '?' || len < 4)
- return FALSE;
-
- if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B')
- return FALSE;
-
- inptr += 2;
- len -= 2;
-
- if (*inptr != '?')
- return FALSE;
-
- return TRUE;
-}
-
-static char *
-header_fold (const char *in, gboolean structured)
-{
- gboolean last_was_lwsp = FALSE;
- register const char *inptr;
- size_t len, outlen, i;
- size_t fieldlen;
- GString *out;
- char *ret;
-
- inptr = in;
- len = strlen (in);
- if (len <= GMIME_FOLD_LEN + 1)
- return g_strdup (in);
-
- out = g_string_new ("");
- fieldlen = strcspn (inptr, ": \t\n");
- g_string_append_len (out, inptr, fieldlen);
- outlen = fieldlen;
- inptr += fieldlen;
-
- while (*inptr && *inptr != '\n') {
- len = strcspn (inptr, " \t\n");
-
- if (len > 1 && outlen + len > GMIME_FOLD_LEN) {
- if (outlen > 1 && out->len >= fieldlen + 2) {
- if (last_was_lwsp) {
- if (structured)
- out->str[out->len - 1] = '\t';
-
- g_string_insert_c (out, out->len - 1, '\n');
- } else
- g_string_append (out, "\n\t");
-
- outlen = 1;
- }
-
- if (!structured && !is_rfc2047_token (inptr, len)) {
- /* check for very long words, just cut them up */
- while (outlen + len > GMIME_FOLD_LEN) {
- for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
- g_string_append_c (out, inptr[i]);
- inptr += GMIME_FOLD_LEN - outlen;
- len -= GMIME_FOLD_LEN - outlen;
- g_string_append (out, "\n\t");
- outlen = 1;
- }
- } else {
- g_string_append_len (out, inptr, len);
- outlen += len;
- inptr += len;
- }
- last_was_lwsp = FALSE;
- } else if (len > 0) {
- g_string_append_len (out, inptr, len);
- outlen += len;
- inptr += len;
- last_was_lwsp = FALSE;
- } else {
- last_was_lwsp = TRUE;
- if (*inptr == '\t') {
- /* tabs are a good place to fold, odds
- are that this is where the previous
- mailer folded it */
- g_string_append (out, "\n\t");
- outlen = 1;
- while (is_blank (*inptr))
- inptr++;
- } else {
- g_string_append_c (out, *inptr++);
- outlen++;
- }
- }
- }
-
- if (*inptr == '\n' && out->str[out->len - 1] != '\n')
- g_string_append_c (out, '\n');
-
- ret = out->str;
- g_string_free (out, FALSE);
-
- return ret;
-}
-
-
-/**
- * g_mime_utils_structured_header_fold:
- * @str: input string
- *
- * Folds a structured header according to the rules in rfc822.
- *
- * Returns: an allocated string containing the folded header.
- **/
-char *
-g_mime_utils_structured_header_fold (const char *str)
-{
- return header_fold (str, TRUE);
-}
-
-
-/**
- * g_mime_utils_unstructured_header_fold:
- * @str: input string
- *
- * Folds an unstructured header according to the rules in rfc822.
- *
- * Returns: an allocated string containing the folded header.
- **/
-char *
-g_mime_utils_unstructured_header_fold (const char *str)
-{
- return header_fold (str, FALSE);
-}
-
-
-/**
- * g_mime_utils_header_fold:
- * @str: input string
- *
- * Folds a structured header according to the rules in rfc822.
- *
- * Returns: an allocated string containing the folded header.
- **/
-char *
-g_mime_utils_header_fold (const char *str)
-{
- return header_fold (str, TRUE);
-}
-
-
-/**
- * g_mime_utils_header_printf:
- * @format: string format
- * @Varargs: arguments
- *
- * Allocates a buffer containing a formatted header specified by the
- * @Varargs.
- *
- * Returns: an allocated string containing the folded header specified
- * by @format and the following arguments.
- **/
-char *
-g_mime_utils_header_printf (const char *format, ...)
-{
- char *buf, *ret;
- va_list ap;
-
- va_start (ap, format);
- buf = g_strdup_vprintf (format, ap);
- va_end (ap);
-
- ret = header_fold (buf, TRUE);
- g_free (buf);
-
- return ret;
-}
-
-static gboolean
need_quotes (const char *string)
{
gboolean quoted = FALSE;
@@ -1810,7 +1623,39 @@ quoted_decode (const unsigned char *in, size_t len, unsigned char *out, int *sta
return (size_t) (outptr - out);
}
-#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len -
2, "?=", 2))
+
+#if 0
+static gboolean
+is_rfc2047_token (const char *inptr, size_t len)
+{
+ if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0)
+ return FALSE;
+
+ inptr += 2;
+ len -= 2;
+
+ /* skip past the charset */
+ while (*inptr != '?' && len > 0) {
+ inptr++;
+ len--;
+ }
+
+ if (*inptr != '?' || len < 4)
+ return FALSE;
+
+ if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B')
+ return FALSE;
+
+ inptr += 2;
+ len -= 2;
+
+ if (*inptr != '?')
+ return FALSE;
+
+ return TRUE;
+}
+#endif
+
typedef struct _rfc2047_token {
struct _rfc2047_token *next;
@@ -2843,3 +2688,301 @@ g_mime_utils_header_encode_text (const char *text)
return rfc2047_encode (text, IS_ESAFE);
}
+
+
+static char *
+header_fold_tokens (const char *field, const char *value, size_t vlen, rfc2047_token *tokens, gboolean
structured)
+{
+ rfc2047_token *token, *next;
+ size_t lwsp, tab, len, n;
+ GString *output;
+
+ len = strlen (field) + 2;
+ output = g_string_sized_new (len + vlen + 1);
+ g_string_append (output, field);
+ g_string_append (output, ": ");
+ lwsp = 0;
+ tab = 0;
+
+ token = tokens;
+ while (token != NULL) {
+ if (is_lwsp (token->text[0])) {
+ for (n = 0; n < token->length; n++) {
+ if (token->text[n] == '\r')
+ continue;
+
+ lwsp = output->len;
+ if (token->text[n] == '\t')
+ tab = output->len;
+
+ g_string_append_c (output, token->text[n]);
+ if (token->text[n] == '\n') {
+ lwsp = tab = 0;
+ len = 0;
+ } else {
+ len++;
+ }
+ }
+
+ if (len == 0 && token->next) {
+ g_string_append_c (output, structured ? '\t' : ' ');
+ len = 1;
+ }
+ } else if (token->encoding != 0) {
+ n = strlen (token->charset) + 7;
+
+ if (len + token->length + n > GMIME_FOLD_LEN) {
+ if (tab != 0) {
+ /* tabs are the perfect breaking opportunity... */
+ g_string_insert_c (output, tab, '\n');
+ len = (lwsp - tab) + 1;
+ } else if (lwsp != 0) {
+ /* break just before the last lwsp character i*/
+ g_string_insert_c (output, lwsp, '\n');
+ len = 1;
+ } else if (len > 1) {
+ /* force a line break... */
+ g_string_append (output, structured ? "\n\t" : "\n ");
+ len = 1;
+ }
+ }
+
+ /* Note: if the encoded-word token is longer than the fold length, oh well...
+ * it probably just means that we are folding a header written by a user-agent
+ * with a different max line length than ours. */
+
+ g_string_append_printf (output, "=?%s?%c?", token->charset, token->encoding);
+ g_string_append_len (output, token->text, token->length);
+ g_string_append (output, "?=");
+ len += token->length + n;
+ lwsp = 0;
+ tab = 0;
+ } else if (len + token->length > GMIME_FOLD_LEN) {
+ if (tab != 0) {
+ /* tabs are the perfect breaking opportunity... */
+ g_string_insert_c (output, tab, '\n');
+ len = (lwsp - tab) + 1;
+ } else if (lwsp != 0) {
+ /* break just before the last lwsp character i*/
+ g_string_insert_c (output, lwsp, '\n');
+ len = 1;
+ } else if (len > 1) {
+ /* force a line break... */
+ g_string_append (output, structured ? "\n\t" : "\n ");
+ len = 1;
+ }
+
+ if (token->length >= GMIME_FOLD_LEN) {
+ /* the token is longer than the allowable line length,
+ * so we'll have to break it apart... */
+ n = GMIME_FOLD_LEN - len;
+ g_string_append_len (output, token->text, n);
+ g_string_append (output, "\n\t");
+ g_string_append_len (output, token->text + n, token->length - n);
+ len = (token->length - n) + 1;
+ } else {
+ g_string_append_len (output, token->text, token->length);
+ len += token->length;
+ }
+
+ lwsp = 0;
+ tab = 0;
+ } else {
+ g_string_append_len (output, token->text, token->length);
+ len += token->length;
+ lwsp = 0;
+ tab = 0;
+ }
+
+ next = token->next;
+ rfc2047_token_free (token);
+ token = next;
+ }
+
+ if (output->str[output->len - 1] != '\n')
+ g_string_append_c (output, '\n');
+
+ return g_string_free (output, FALSE);
+}
+
+
+/**
+ * g_mime_utils_structured_header_fold:
+ * @header: header field and value string
+ *
+ * Folds a structured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+g_mime_utils_structured_header_fold (const char *header)
+{
+ rfc2047_token *tokens;
+ const char *value;
+ char *folded;
+ char *field;
+ size_t len;
+
+ if (header == NULL)
+ return NULL;
+
+ value = header;
+ while (*value && *value != ':')
+ value++;
+
+ if (*value == '\0')
+ return NULL;
+
+ field = g_strndup (header, value - header);
+
+ value++;
+ while (*value && is_lwsp (*value))
+ value++;
+
+ tokens = tokenize_rfc2047_phrase (value, &len);
+ folded = header_fold_tokens (field, value, len, tokens, TRUE);
+ g_free (field);
+
+ return folded;
+}
+
+
+/**
+ * _g_mime_utils_structured_header_fold:
+ * @field: header field
+ * @value: header value
+ *
+ * Folds an structured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+_g_mime_utils_structured_header_fold (const char *field, const char *value)
+{
+ rfc2047_token *tokens;
+ size_t len;
+
+ if (field == NULL)
+ return NULL;
+
+ if (value == NULL)
+ return g_strdup_printf ("%s: \n", field);
+
+ tokens = tokenize_rfc2047_phrase (value, &len);
+
+ return header_fold_tokens (field, value, len, tokens, TRUE);
+}
+
+
+/**
+ * g_mime_utils_unstructured_header_fold:
+ * @header: header field and value string
+ *
+ * Folds an unstructured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+g_mime_utils_unstructured_header_fold (const char *header)
+{
+ rfc2047_token *tokens;
+ const char *value;
+ char *folded;
+ char *field;
+ size_t len;
+
+ if (header == NULL)
+ return NULL;
+
+ value = header;
+ while (*value && *value != ':')
+ value++;
+
+ if (*value == '\0')
+ return NULL;
+
+ field = g_strndup (header, value - header);
+
+ value++;
+ while (*value && is_lwsp (*value))
+ value++;
+
+ tokens = tokenize_rfc2047_text (value, &len);
+ folded = header_fold_tokens (field, value, len, tokens, FALSE);
+ g_free (field);
+
+ return folded;
+}
+
+
+/**
+ * _g_mime_utils_unstructured_header_fold:
+ * @field: header field
+ * @value: header value
+ *
+ * Folds an unstructured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+_g_mime_utils_unstructured_header_fold (const char *field, const char *value)
+{
+ rfc2047_token *tokens;
+ size_t len;
+
+ if (field == NULL)
+ return NULL;
+
+ if (value == NULL)
+ return g_strdup_printf ("%s: \n", field);
+
+ tokens = tokenize_rfc2047_text (value, &len);
+
+ return header_fold_tokens (field, value, len, tokens, FALSE);
+}
+
+
+/**
+ * g_mime_utils_header_fold:
+ * @header: header field and value string
+ *
+ * Folds a structured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ *
+ * WARNING: This function is obsolete. Use
+ * g_mime_utils_structured_header_fold() instead.
+ **/
+char *
+g_mime_utils_header_fold (const char *header)
+{
+ return g_mime_utils_structured_header_fold (header);
+}
+
+
+/**
+ * g_mime_utils_header_printf:
+ * @format: string format
+ * @Varargs: arguments
+ *
+ * Allocates a buffer containing a formatted header specified by the
+ * @Varargs.
+ *
+ * Returns: an allocated string containing the folded header specified
+ * by @format and the following arguments.
+ **/
+char *
+g_mime_utils_header_printf (const char *format, ...)
+{
+ char *buf, *ret;
+ va_list ap;
+
+ va_start (ap, format);
+ buf = g_strdup_vprintf (format, ap);
+ va_end (ap);
+
+ ret = g_mime_utils_unstructured_header_fold (buf);
+ g_free (buf);
+
+ return ret;
+}
diff --git a/tests/test-mime.c b/tests/test-mime.c
index eaf7f3d..b711c62 100644
--- a/tests/test-mime.c
+++ b/tests/test-mime.c
@@ -490,6 +490,36 @@ test_rfc2047 (gboolean test_broken)
#endif
}
+static struct {
+ const char *input;
+ const char *folded;
+} header_folding[] = {
+ { "Subject: qqqq wwwwwww [eee
1234]=?UTF-8?Q?=20=D0=95=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=20=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=20=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=20=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC?=",
+ "Subject: qqqq wwwwwww [eee 1234]\n
=?UTF-8?Q?=20=D0=95=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=20=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=20=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=20=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC=D0=BC?=\n"
},
+};
+
+static void
+test_header_folding (void)
+{
+ char *folded;
+ guint i;
+
+ for (i = 0; i < G_N_ELEMENTS (header_folding); i++) {
+ folded = NULL;
+ testsuite_check ("header_folding[%u]", i);
+ try {
+ folded = g_mime_utils_unstructured_header_fold (header_folding[i].input);
+ if (strcmp (header_folding[i].folded, folded) != 0)
+ throw (exception_new ("folded text does not match: -->%s<-- vs -->%s<--",
header_folding[i].folded, folded));
+
+ testsuite_check_passed ();
+ } catch (ex) {
+ testsuite_check_failed ("header_folding[%u]: %s", i, ex->message);
+ } finally;
+
+ g_free (folded);
+ }
+}
static struct {
const char *input;
@@ -628,6 +658,7 @@ int main (int argc, char **argv)
g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
testsuite_start ("broken rfc2047 encoding/decoding");
+ test_header_folding ();
test_addrspec (TRUE);
test_rfc2047 (TRUE);
testsuite_end ();
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]