[gmime] Cleaned up previous patch a bit...
- From: Jeffrey Stedfast <fejj src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gmime] Cleaned up previous patch a bit...
- Date: Wed, 10 Jan 2018 15:28:34 +0000 (UTC)
commit 8a01b0b0a15dd10745ad889f65bd92c14fea6a5d
Author: Jeffrey Stedfast <jestedfa microsoft com>
Date: Fri Dec 22 13:08:50 2017 -0500
Cleaned up previous patch a bit...
gmime/gmime-parser.c | 60 +++++++++++++++++++++++++--------------------
gmime/gmime-utils.c | 66 ++++++++++++++++++++++++++++---------------------
2 files changed, 71 insertions(+), 55 deletions(-)
---
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index 857f097..ef65d7d 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -962,21 +962,25 @@ has_content_headers (GPtrArray *headers)
return FALSE;
}
-#define warn_bad_header G_STMT_START {
\
- if (can_warn) {
\
- gchar *eol;
\
- gchar *bad_header;
\
-
\
- for (eol = inptr; eol < inend && eol[0] != '\r' && eol[0] != '\n'; eol++);
\
- bad_header = g_strndup (start, eol - start);
\
- _g_mime_parser_options_warn (options, priv->header_offset, GMIME_CRIT_INVALID_HEADER_NAME,
bad_header); \
- g_free (bad_header);
\
- }
\
-} G_STMT_END
+static void
+warn_invalid_header (GMimeParser *parser, GMimeParserOptions *options, const char *start, const char *inptr,
const char *inend)
+{
+ struct _GMimeParserPrivate *priv = parser->priv;
+ const char *eoln = inptr;
+ char *header;
+
+ while (eoln < inend && *eoln != '\r' && *eoln != '\n')
+ eoln++;
+
+ header = g_strndup (start, eoln - start);
+ _g_mime_parser_options_warn (options, priv->header_offset, GMIME_CRIT_INVALID_HEADER_NAME, header);
+ g_free (header);
+}
static int
parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
{
+ gboolean can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
struct _GMimeParserPrivate *priv = parser->priv;
gboolean eoln, valid = TRUE, fieldname = TRUE;
gboolean continuation = FALSE;
@@ -1057,11 +1061,9 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
}
if (!valid) {
- gboolean can_warn = g_mime_parser_options_get_warning_callback
(options) != NULL;
-
- if (priv->format == GMIME_FORMAT_MBOX &&
- is_mbox_marker (start, (size_t) (inptr - start), FALSE)) {
- warn_bad_header;
+ if (priv->format == GMIME_FORMAT_MBOX && is_mbox_marker (start,
(size_t) (inptr - start), FALSE)) {
+ if (can_warn)
+ warn_invalid_header (parser, options, start, inptr,
inend);
goto next_message;
}
@@ -1071,14 +1073,16 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
/* probably the start of the content,
* a broken mailer didn't terminate the
* headers with an empty line. *sigh* */
- warn_bad_header;
+ if (can_warn)
+ warn_invalid_header (parser, options,
start, inptr, inend);
goto content_start;
}
} else if (has_content_headers (priv->headers)) {
/* probably the start of the content,
* a broken mailer didn't terminate the
* headers with an empty line. *sigh* */
- warn_bad_header;
+ if (can_warn)
+ warn_invalid_header (parser, options, start,
inptr, inend);
goto content_start;
}
} else if (priv->state == GMIME_PARSER_STATE_MESSAGE_HEADERS) {
@@ -1086,8 +1090,9 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
* headers, but remain lenient with lines starting with
* "From " or ">From ". */
if (!is_mbox_marker (start, (size_t) (inptr - start), TRUE)) {
+ if (can_warn)
+ warn_invalid_header (parser, options, start,
inptr, inend);
priv->state = GMIME_PARSER_STATE_ERROR;
- warn_bad_header;
return -1;
}
}
@@ -1647,19 +1652,20 @@ check_header_conflict (GMimeParserOptions *options, GMimeObject *object, const H
}
static int
-compare_header(const void *a, const void *b)
+compare_header (const void *a, const void *b)
{
return g_ascii_strcasecmp ((const gchar *) a, * (const gchar **) b);
}
+/* headers which may exist only once according to RFC 5322, Sect. 3.6 (keep the list sorted) */
+static const char *rfc5322_single_hdr[] = {
+ "bcc", "cc", "date", "from", "in-reply-to", "message-id", "references", "reply-to", "sender",
"subject", "to"
+};
+
static void
check_repeated_header (GMimeParserOptions *options, GMimeObject *object, const Header *header)
{
- /* headers which may exist only once according to RFC 5322, Sect. 3.6 (keep the list sorted) */
- static const gchar *rfc5322_single_hdr[] =
- { "bcc", "cc", "date", "from", "in-reply-to", "message-id", "references", "reply-to",
"sender", "subject", "to" };
-
- if (bsearch(header->name, rfc5322_single_hdr, G_N_ELEMENTS (rfc5322_single_hdr), sizeof (gchar *),
compare_header))
+ if (bsearch (header->name, rfc5322_single_hdr, G_N_ELEMENTS (rfc5322_single_hdr), sizeof (char *),
compare_header))
check_header_conflict (options, object, header);
}
@@ -1670,9 +1676,9 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
ContentType *content_type;
GMimeMessage *message;
GMimeObject *object;
+ gboolean can_warn;
Header *header;
guint i;
- gboolean can_warn;
g_assert (priv->state == GMIME_PARSER_STATE_CONTENT);
@@ -2041,10 +2047,10 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
GMimeObject *object;
BoundaryType found;
const char *inptr;
+ gboolean can_warn;
Header *header;
char *endptr;
guint i;
- gboolean can_warn;
/* scan the from-line if we are parsing an mbox */
while (priv->state != GMIME_PARSER_STATE_MESSAGE_HEADERS) {
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 56973d6..b5ccb37 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1374,7 +1374,6 @@ typedef struct _rfc2047_token {
size_t length;
char encoding;
char is_8bit;
- char sp_in_encword;
} rfc2047_token;
#define rfc2047_token_list_free(tokens) g_slice_free_chain (rfc2047_token, tokens, next)
@@ -1469,17 +1468,17 @@ rfc2047_token_new_encoded_word (const char *word, size_t len)
token = rfc2047_token_new (payload, inptr - payload);
token->charset = g_mime_charset_iconv_name (charset);
token->encoding = encoding;
- /* RFC 2047 forbids SP in the encoded-word */
- token->sp_in_encword = memchr(token->text, ' ', token->length) ? 1 : 0;
-
+
return token;
}
static rfc2047_token *
-tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *len)
+tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *len, gint64 offset)
{
+ gboolean can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
rfc2047_token list, *lwsp, *token, *tail;
register const char *inptr = in;
+ gboolean has_specials = FALSE;
GMimeRfcComplianceMode mode;
gboolean encoded = FALSE;
const char *text, *word;
@@ -1510,12 +1509,17 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
* have been merged with other
* words. */
+ has_specials = FALSE;
+
if (!strncmp (inptr, "=?", 2)) {
inptr += 2;
/* skip past the charset (if one is even declared, sigh) */
while (*inptr && *inptr != '?') {
- ascii = ascii && is_ascii (*inptr);
+ if (!is_atom (*inptr)) {
+ ascii = ascii && is_ascii (*inptr);
+ has_specials = TRUE;
+ }
inptr++;
}
@@ -1527,12 +1531,16 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
/* find the end of the rfc2047 encoded word token */
while (*inptr && strncmp (inptr, "?=", 2) != 0) {
- ascii = ascii && is_ascii (*inptr);
+ if (!is_atom (*inptr)) {
+ ascii = ascii && is_ascii (*inptr);
+ has_specials = TRUE;
+ }
inptr++;
}
if (*inptr == '\0') {
/* didn't find an end marker... */
+ has_specials = FALSE;
inptr = word + 2;
ascii = TRUE;
@@ -1554,6 +1562,9 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
n = (size_t) (inptr - word);
if ((token = rfc2047_token_new_encoded_word (word, n))) {
+ if (can_warn && has_specials)
+ _g_mime_parser_options_warn (options, offset,
GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, in);
+
/* rfc2047 states that you must ignore all
* whitespace between encoded words */
if (!encoded && lwsp != NULL) {
@@ -1611,10 +1622,12 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
}
static rfc2047_token *
-tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
+tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len, gint64 offset)
{
+ gboolean can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
rfc2047_token list, *lwsp, *token, *tail;
register const char *inptr = in;
+ gboolean has_specials = FALSE;
GMimeRfcComplianceMode mode;
gboolean encoded = FALSE;
const char *text, *word;
@@ -1641,12 +1654,16 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
ascii = TRUE;
if (G_LIKELY (mode == GMIME_RFC_COMPLIANCE_LOOSE)) {
+ has_specials = FALSE;
+
if (!strncmp (inptr, "=?", 2)) {
inptr += 2;
/* skip past the charset (if one is even declared, sigh) */
while (*inptr && *inptr != '?') {
ascii = ascii && is_ascii (*inptr);
+ if (is_lwsp (*inptr))
+ has_specials = TRUE;
inptr++;
}
@@ -1659,11 +1676,14 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
/* find the end of the rfc2047 encoded word token */
while (*inptr && strncmp (inptr, "?=", 2) != 0) {
ascii = ascii && is_ascii (*inptr);
+ if (is_lwsp (*inptr))
+ has_specials = TRUE;
inptr++;
}
if (*inptr == '\0') {
/* didn't find an end marker... */
+ has_specials = FALSE;
inptr = word + 2;
ascii = TRUE;
@@ -1675,8 +1695,7 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
non_rfc2047:
/* stop if we encounter a possible rfc2047 encoded
* token even if it's inside another word, sigh. */
- while (*inptr && !is_lwsp (*inptr) &&
- strncmp (inptr, "=?", 2) != 0) {
+ while (*inptr && !is_lwsp (*inptr) && strncmp (inptr, "=?", 2) != 0) {
ascii = ascii && is_ascii (*inptr);
inptr++;
}
@@ -1690,6 +1709,9 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
n = (size_t) (inptr - word);
if ((token = rfc2047_token_new_encoded_word (word, n))) {
+ if (can_warn && has_specials)
+ _g_mime_parser_options_warn (options, offset,
GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, in);
+
/* rfc2047 states that you must ignore all
* whitespace between encoded words */
if (!encoded && lwsp != NULL) {
@@ -1865,14 +1887,6 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
return g_string_free (decoded, FALSE);
}
-static inline gboolean
-has_sp_in_encword(rfc2047_token *tokens)
-{
- for (; tokens; tokens = tokens->next)
- if (tokens->encoding != 0 && tokens->sp_in_encword != 0)
- return TRUE;
- return FALSE;
-}
/**
* _g_mime_utils_header_decode_text:
@@ -1900,10 +1914,8 @@ _g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text,
return g_strdup ("");
}
- tokens = tokenize_rfc2047_text (options, text, &len);
+ tokens = tokenize_rfc2047_text (options, text, &len, offset);
decoded = rfc2047_decode_tokens (options, tokens, len, charset);
- if (g_mime_parser_options_get_warning_callback (options) != NULL && has_sp_in_encword (tokens))
- _g_mime_parser_options_warn (options, offset, GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, text);
rfc2047_token_list_free (tokens);
return decoded;
@@ -1953,10 +1965,8 @@ _g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phr
return g_strdup ("");
}
- tokens = tokenize_rfc2047_phrase (options, phrase, &len);
+ tokens = tokenize_rfc2047_phrase (options, phrase, &len, offset);
decoded = rfc2047_decode_tokens (options, tokens, len, charset);
- if (g_mime_parser_options_get_warning_callback (options) != NULL && has_sp_in_encword (tokens))
- _g_mime_parser_options_warn (options, offset, GMIME_WARN_INVALID_RFC2047_HEADER_VALUE,
phrase);
rfc2047_token_list_free (tokens);
return decoded;
@@ -2633,7 +2643,7 @@ g_mime_utils_structured_header_fold (GMimeParserOptions *options, GMimeFormatOpt
while (*value && is_lwsp (*value))
value++;
- tokens = tokenize_rfc2047_phrase (options, value, &len);
+ tokens = tokenize_rfc2047_phrase (options, value, &len, -1);
folded = header_fold_tokens (format, field, value, len, tokens, TRUE, TRUE);
g_free (field);
@@ -2665,7 +2675,7 @@ _g_mime_utils_structured_header_fold (GMimeParserOptions *options, GMimeFormatOp
if (value == NULL)
return g_strdup ("\n");
- tokens = tokenize_rfc2047_phrase (options, value, &len);
+ tokens = tokenize_rfc2047_phrase (options, value, &len, -1);
return header_fold_tokens (format, field, value, len, tokens, TRUE, FALSE);
}
@@ -2706,7 +2716,7 @@ g_mime_utils_unstructured_header_fold (GMimeParserOptions *options, GMimeFormatO
while (*value && is_lwsp (*value))
value++;
- tokens = tokenize_rfc2047_text (options, value, &len);
+ tokens = tokenize_rfc2047_text (options, value, &len, -1);
folded = header_fold_tokens (format, field, value, len, tokens, FALSE, TRUE);
g_free (field);
@@ -2737,7 +2747,7 @@ _g_mime_utils_unstructured_header_fold (GMimeParserOptions *options, GMimeFormat
if (value == NULL)
return g_strdup ("\n");
- tokens = tokenize_rfc2047_text (options, value, &len);
+ tokens = tokenize_rfc2047_text (options, value, &len, -1);
return header_fold_tokens (format, field, value, len, tokens, FALSE, FALSE);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]