[gmime: 6/27] Implement basic error reporting for the GMime parser (#26)



commit edf84ba66fa59b1bae12e16a4650dacbd07ee99b
Author: albrechtd <albrecht dress arcor de>
Date:   Wed Nov 8 21:29:00 2017 +0100

    Implement basic error reporting for the GMime parser (#26)
    
    Add an error reporting facility for the GMime parser.  Basically, it extends GMimeParserOptions
    by a callback which is called whenever the parser detects a “fishy” construct.  The callback
    receives the offset within the message, an error code and (if applicable) an additional string,
    plus a user-defined data pointer.
    
    The errors reported are at the moment:
    - conflicting Content-* headers (i.e. different Content-* headers for the same part)
    - conflicting header parameters (ditto)
    - non-conflicting Content-* headers (i.e. exactly the same header is repeated)
    - non-conflicting header parameters (ditto)
    - unencoded 8-bit characters in headers
    - invalid content-type headers and completely broken headers
    - malformed multipart/* and malformed messages
    - truncated messages
    - multipart without boundary

 docs/reference/gmime-sections.txt |    4 +
 examples/Makefile.am              |    7 ++-
 examples/msgcheck.c               |  155 +++++++++++++++++++++++++++++++++++++
 gmime/gmime-content-type.c        |   13 +++-
 gmime/gmime-disposition.c         |    9 ++-
 gmime/gmime-internal.h            |   12 +++
 gmime/gmime-object.c              |    4 +-
 gmime/gmime-param.c               |   21 +++++-
 gmime/gmime-parser-options.c      |   48 +++++++++++
 gmime/gmime-parser-options.h      |   46 +++++++++++
 gmime/gmime-parser.c              |   90 +++++++++++++++++-----
 11 files changed, 382 insertions(+), 27 deletions(-)
---
diff --git a/docs/reference/gmime-sections.txt b/docs/reference/gmime-sections.txt
index e79a28e..952fb33 100644
--- a/docs/reference/gmime-sections.txt
+++ b/docs/reference/gmime-sections.txt
@@ -1284,6 +1284,8 @@ GMIME_TYPE_FORMAT_OPTIONS
 <FILE>gmime-parser-options</FILE>
 GMimeParserOptions
 GMimeRfcComplianceMode
+GMimeParserWarning
+GMimeParserWarningFunc
 g_mime_parser_options_new
 g_mime_parser_options_free
 g_mime_parser_options_clone
@@ -1298,6 +1300,8 @@ g_mime_parser_options_get_rfc2047_compliance_mode
 g_mime_parser_options_set_rfc2047_compliance_mode
 g_mime_parser_options_get_fallback_charsets
 g_mime_parser_options_set_fallback_charsets
+g_mime_parser_options_get_warning_callback
+g_mime_parser_options_set_warning_callback
 
 <SUBSECTION Private>
 g_mime_parser_options_get_type
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 42da44b..f74c3cb 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -6,7 +6,7 @@ extra_DIST = README
 
 AM_CPPFLAGS = -I$(top_srcdir) $(GMIME_CFLAGS) $(GLIB_CFLAGS)
 
-noinst_PROGRAMS = basic-example imap-example uuencode uudecode
+noinst_PROGRAMS = basic-example imap-example uuencode uudecode msgcheck
 
 DEPS =                                                 \
        $(top_builddir)/util/libutil.la         \
@@ -42,3 +42,8 @@ uudecode_SOURCES = $(GETOPT_SOURCES) uudecode.c
 uudecode_LDFLAGS = 
 uudecode_DEPENDENCIES = $(DEPS)
 uudecode_LDADD = $(LDADDS)
+
+msgcheck_SOURCES = $(GETOPT_SOURCES) msgcheck.c
+msgcheck_LDFLAGS = 
+msgcheck_DEPENDENCIES = $(DEPS)
+msgcheck_LDADD = $(LDADDS)
diff --git a/examples/msgcheck.c b/examples/msgcheck.c
new file mode 100644
index 0000000..446b262
--- /dev/null
+++ b/examples/msgcheck.c
@@ -0,0 +1,155 @@
+/*
+ *  Example application demonstrating the GMime parser's feature for
+ *  detecting and reporting RFC violations in messages.
+ *
+ *  Written by (C) Albrecht Dreß <albrecht dress arcor de> 2017
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdlib.h>
+#include <gmime/gmime.h>
+
+
+typedef struct {
+       gint64 offset;
+       GMimeParserWarning errcode;
+       gchar *message;
+} issue_log_elem_t;
+
+
+static const gchar *
+errcode2str(GMimeParserWarning errcode)
+{
+       switch (errcode) {
+       case GMIME_WARN_DUPLICATED_CONTENT_HDR:
+               return "duplicated content header";
+       case GMIME_WARN_DUPLICATED_PARAMETER:
+               return "duplicated header parameter";
+       case GMIME_WARN_UNENCODED_8BIT_HEADER:
+               return "unencoded 8-bit characters in header";
+       case GMIME_WARN_INVALID_CONTENT_TYPE:
+               return "invalid Content-Type";
+       case GMIME_WARN_INVALID_HEADER:
+               return "invalid header";
+       case GMIME_WARN_MALFORMED_MULTIPART:
+               return "malformed multipart";
+       case GMIME_WARN_TRUNCATED_MESSAGE:
+               return "truncated message";
+       case GMIME_WARN_MALFORMED_MESSAGE:
+               return "malformed message";
+       case GMIME_CRIT_CONFLICTING_CONTENT_HDR:
+               return "conflicting content header";
+       case GMIME_CRIT_CONFLICTING_PARAMETER:
+               return "conflicting header parameter";
+       case GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY:
+               return "multipart without boundary";
+       default:
+               return "unknown";
+       }
+}
+
+
+static void
+parser_issue (gint64 offset, GMimeParserWarning errcode, const gchar *item, gpointer user_data)
+{
+       GList **issues = (GList **) user_data;
+       issue_log_elem_t *new_issue;
+
+       new_issue = g_new (issue_log_elem_t, 1U);
+       new_issue->offset = offset;
+       new_issue->errcode = errcode;
+       if (item == NULL) {
+               new_issue->message = g_strdup (errcode2str (errcode));
+       } else {
+               gchar *buf;
+
+               buf = g_strdup (item);
+               g_strstrip (buf);
+               new_issue->message = g_strdup_printf ("%s: '%s'", errcode2str (errcode), buf);
+               g_free (buf);
+       }
+       *issues = g_list_append (*issues, new_issue);
+}
+
+
+static void
+check_msg_file (const gchar *filename)
+{
+       GMimeStream *stream;
+       GError *error = NULL;
+
+       stream = g_mime_stream_file_open (filename, "r", &error);
+
+       if (stream == NULL) {
+               g_warning ("failed to open %s: %s", filename, error->message);
+               g_error_free (error);
+       } else {
+               GMimeParser *parser;
+               GMimeParserOptions *options;
+               GMimeMessage *message;
+               GList *issues = NULL;
+
+               parser = g_mime_parser_new ();
+               g_mime_parser_init_with_stream (parser, stream);
+               options = g_mime_parser_options_new ();
+               g_mime_parser_options_set_warning_callback (options, parser_issue, &issues);
+               message = g_mime_parser_construct_message (parser, options);
+               g_mime_parser_options_free (options);
+               g_object_unref (parser);
+               g_object_unref (stream);
+               if (message != NULL) {
+                       g_object_unref (message);
+               }
+
+               if (issues == NULL) {
+                       g_printf ("%s: message looks benign\n", filename);
+               } else {
+                       GList *this_issue;
+
+                       g_printf ("%s: message contains %u RFC violations:\n", filename, g_list_length 
(issues));
+                       for (this_issue = issues; this_issue != NULL; this_issue = this_issue->next) {
+                               issue_log_elem_t *issue_data = (issue_log_elem_t *) this_issue->data;
+
+                               g_printf ("offset %" G_GINT64_FORMAT ": [%u] %s\n",
+                                       issue_data->offset, issue_data->errcode, issue_data->message);
+                               g_free (issue_data->message);
+                               g_free (issue_data);
+                       }
+                       g_list_free (issues);
+               }
+       }
+}
+
+
+int
+main (int argc, char **argv)
+{
+       int n;
+
+       if (argc < 2) {
+               g_message ("usage: %s <filename> [<filename> ...]", argv[0]);
+               exit (1);
+       }
+
+       g_mime_init ();
+
+       for (n = 1; n < argc; n++) {
+               check_msg_file (argv[n]);
+       }
+
+       g_mime_shutdown ();
+       return 0;
+}
diff --git a/gmime/gmime-content-type.c b/gmime/gmime-content-type.c
index 3891ad3..8ea2c1d 100644
--- a/gmime/gmime-content-type.c
+++ b/gmime/gmime-content-type.c
@@ -31,6 +31,7 @@
 #include "gmime-content-type.h"
 #include "gmime-parse-utils.h"
 #include "gmime-events.h"
+#include "gmime-internal.h"
 
 
 #ifdef ENABLE_WARNINGS
@@ -187,6 +188,12 @@ g_mime_content_type_new (const char *type, const char *subtype)
 GMimeContentType *
 g_mime_content_type_parse (GMimeParserOptions *options, const char *str)
 {
+       return _g_mime_content_type_parse (options, str, -1);
+}
+
+GMimeContentType *
+_g_mime_content_type_parse (GMimeParserOptions *options, const char *str, gint64 offset)
+{
        GMimeContentType *content_type;
        const char *inptr = str;
        GMimeParamList *params;
@@ -194,8 +201,10 @@ g_mime_content_type_parse (GMimeParserOptions *options, const char *str)
        
        g_return_val_if_fail (str != NULL, NULL);
        
-       if (!g_mime_parse_content_type (&inptr, &type, &subtype))
+       if (!g_mime_parse_content_type (&inptr, &type, &subtype)) {
+               _g_mime_parser_options_warn(options, offset, GMIME_WARN_INVALID_CONTENT_TYPE, str);
                return g_mime_content_type_new ("application", "octet-stream");
+       }
        
        content_type = g_object_new (GMIME_TYPE_CONTENT_TYPE, NULL);
        content_type->subtype = subtype;
@@ -206,7 +215,7 @@ g_mime_content_type_parse (GMimeParserOptions *options, const char *str)
        while (*inptr && *inptr != ';')
                inptr++;
        
-       if (*inptr++ == ';' && *inptr && (params = g_mime_param_list_parse (options, inptr))) {
+       if (*inptr++ == ';' && *inptr && (params = _g_mime_param_list_parse (options, inptr, offset))) {
                g_mime_event_add (params->changed, (GMimeEventCallback) param_list_changed, content_type);
                g_object_unref (content_type->params);
                content_type->params = params;
diff --git a/gmime/gmime-disposition.c b/gmime/gmime-disposition.c
index 1c3c1e6..e03a692 100644
--- a/gmime/gmime-disposition.c
+++ b/gmime/gmime-disposition.c
@@ -29,6 +29,7 @@
 #include "gmime-common.h"
 #include "gmime-disposition.h"
 #include "gmime-events.h"
+#include "gmime-internal.h"
 
 
 /**
@@ -148,6 +149,12 @@ g_mime_content_disposition_new (void)
 GMimeContentDisposition *
 g_mime_content_disposition_parse (GMimeParserOptions *options, const char *str)
 {
+       return _g_mime_content_disposition_parse (options, str, -1);
+}
+
+GMimeContentDisposition *
+_g_mime_content_disposition_parse (GMimeParserOptions *options, const char *str, gint64 offset)
+{
        GMimeContentDisposition *disposition;
        const char *inptr = str;
        GMimeParamList *params;
@@ -168,7 +175,7 @@ g_mime_content_disposition_parse (GMimeParserOptions *options, const char *str)
        disposition->disposition = g_strstrip (value);
        
        /* parse the parameters, if any */
-       if (*inptr++ == ';' && *inptr && (params = g_mime_param_list_parse (options, inptr))) {
+       if (*inptr++ == ';' && *inptr && (params = _g_mime_param_list_parse (options, inptr, offset))) {
                g_mime_event_add (params->changed, (GMimeEventCallback) param_list_changed, disposition);
                g_object_unref (disposition->params);
                disposition->params = params;
diff --git a/gmime/gmime-internal.h b/gmime/gmime-internal.h
index eff1bc6..7902593 100644
--- a/gmime/gmime-internal.h
+++ b/gmime/gmime-internal.h
@@ -50,6 +50,8 @@ G_GNUC_INTERNAL GMimeFormatOptions *_g_mime_format_options_clone (GMimeFormatOpt
 /* GMimeParserOptions */
 G_GNUC_INTERNAL void g_mime_parser_options_init (void);
 G_GNUC_INTERNAL void g_mime_parser_options_shutdown (void);
+G_GNUC_INTERNAL void _g_mime_parser_options_warn (GMimeParserOptions *options, gint64 offset, 
GMimeParserWarning errcode,
+                                                 const gchar *item);
 
 /* GMimeHeader */
 //G_GNUC_INTERNAL void _g_mime_header_set_raw_value (GMimeHeader *header, const char *raw_value);
@@ -69,6 +71,16 @@ G_GNUC_INTERNAL void _g_mime_object_set_content_type (GMimeObject *object, GMime
 G_GNUC_INTERNAL void _g_mime_object_append_header (GMimeObject *object, const char *name, const char 
*raw_name,
                                                   const char *raw_value, gint64 offset);
 
+/* GMimeContentType */
+G_GNUC_INTERNAL GMimeContentType *_g_mime_content_type_parse (GMimeParserOptions *options, const char *str, 
gint64 offset);
+
+/* GMimeParamList */
+G_GNUC_INTERNAL GMimeParamList *_g_mime_param_list_parse (GMimeParserOptions *options, const char *str, 
gint64 offset);
+
+/* GMimeContentDisposition */
+G_GNUC_INTERNAL GMimeContentDisposition *_g_mime_content_disposition_parse (GMimeParserOptions *options, 
const char *str,
+                                                                           gint64 offset);
+
 /* utils */
 G_GNUC_INTERNAL char *_g_mime_utils_unstructured_header_fold (GMimeParserOptions *options, 
GMimeFormatOptions *format,
                                                              const char *field, const char *value);
diff --git a/gmime/gmime-object.c b/gmime/gmime-object.c
index 2ea5b94..3678f7d 100644
--- a/gmime/gmime-object.c
+++ b/gmime/gmime-object.c
@@ -213,13 +213,13 @@ object_header_changed (GMimeObject *object, GMimeHeader *header)
        switch (i) {
        case HEADER_CONTENT_DISPOSITION:
                value = g_mime_header_get_value (header);
-               disposition = g_mime_content_disposition_parse (options, value);
+               disposition = _g_mime_content_disposition_parse (options, value, header->offset);
                _g_mime_object_set_content_disposition (object, disposition);
                g_object_unref (disposition);
                break;
        case HEADER_CONTENT_TYPE:
                value = g_mime_header_get_value (header);
-               content_type = g_mime_content_type_parse (options, value);
+               content_type = _g_mime_content_type_parse (options, value, header->offset);
                _g_mime_object_set_content_type (object, content_type);
                g_object_unref (content_type);
                break;
diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c
index e1d8478..6094d7e 100644
--- a/gmime/gmime-param.c
+++ b/gmime/gmime-param.c
@@ -1311,7 +1311,7 @@ rfc2184_param_new (char *name, char *value, int id, gboolean encoded)
 }
 
 static GMimeParamList *
-decode_param_list (GMimeParserOptions *options, const char *in)
+decode_param_list (GMimeParserOptions *options, const char *in, gint64 offset)
 {
        struct _rfc2184_param *rfc2184, *list, *t;
        char *name, *value, *charset, *lang;
@@ -1361,6 +1361,8 @@ decode_param_list (GMimeParserOptions *options, const char *in)
                                g_free (name);
                        }
                } else {
+                       const GMimeParam *exist_param;
+
                        param = g_mime_param_new ();
                        param->name = name;
                        
@@ -1378,6 +1380,15 @@ decode_param_list (GMimeParserOptions *options, const char *in)
                                param->value = value;
                        }
                        
+                       exist_param = g_mime_param_list_get_parameter (params, name);
+                       if (exist_param != NULL) {
+                               if (strcmp (exist_param->value, param->value) == 0) {
+                                       _g_mime_parser_options_warn (options, offset, 
GMIME_WARN_DUPLICATED_PARAMETER, name);
+                               } else {
+                                       _g_mime_parser_options_warn (options, offset, 
GMIME_CRIT_CONFLICTING_PARAMETER, name);
+                               }
+                       }
+
                        g_mime_param_list_add (params, param);
                }
                
@@ -1428,7 +1439,13 @@ decode_param_list (GMimeParserOptions *options, const char *in)
 GMimeParamList *
 g_mime_param_list_parse (GMimeParserOptions *options, const char *str)
 {
+       return _g_mime_param_list_parse (options, str, -1);
+}
+
+GMimeParamList *
+_g_mime_param_list_parse (GMimeParserOptions *options, const char *str, gint64 offset)
+{
        g_return_val_if_fail (str != NULL, NULL);
        
-       return decode_param_list (options, str);
+       return decode_param_list (options, str, offset);
 }
diff --git a/gmime/gmime-parser-options.c b/gmime/gmime-parser-options.c
index 7eb05e8..79c4ebc 100644
--- a/gmime/gmime-parser-options.c
+++ b/gmime/gmime-parser-options.c
@@ -48,6 +48,8 @@ struct _GMimeParserOptions {
        GMimeRfcComplianceMode rfc2047;
        gboolean allow_no_domain;
        char **charsets;
+       GMimeParserWarningFunc warning_cb;
+       gpointer warning_user_data;
 };
 
 static GMimeParserOptions *default_options = NULL;
@@ -72,6 +74,13 @@ g_mime_parser_options_shutdown (void)
        default_options = NULL;
 }
 
+void
+_g_mime_parser_options_warn (GMimeParserOptions *options, gint64 offset, guint errcode, const gchar *item)
+{
+       if ((options != NULL) && (options->warning_cb != NULL)) {
+               options->warning_cb(offset, errcode, item, options->warning_user_data);
+       }
+}
 
 /**
  * g_mime_parser_options_get_default:
@@ -110,6 +119,9 @@ g_mime_parser_options_new (void)
        options->charsets[1] = g_strdup ("iso-8859-1");
        options->charsets[2] = NULL;
        
+       options->warning_cb = NULL;
+       options->warning_user_data = NULL;
+
        return options;
 }
 
@@ -145,6 +157,9 @@ g_mime_parser_options_clone (GMimeParserOptions *options)
                clone->charsets[i] = g_strdup (options->charsets[i]);
        clone->charsets[i] = NULL;
        
+       clone->warning_cb = options->warning_cb;
+       clone->warning_user_data = options->warning_user_data;
+
        return clone;
 }
 
@@ -393,3 +408,36 @@ g_mime_parser_options_set_fallback_charsets (GMimeParserOptions *options, const
                options->charsets[i] = g_strdup (charsets[i]);
        options->charsets[n] = NULL;
 }
+
+
+/**
+ * g_mime_parser_options_get_warning_callback:
+ * @options: (nullable): a #GMimeParserOptions or %NULL
+ *
+ * Gets callback function which is called if the parser detects any issues.
+ *
+ * Returns: the currently registered warning callback function
+ **/
+GMimeParserWarningFunc
+g_mime_parser_options_get_warning_callback (GMimeParserOptions *options)
+{
+       return (options != NULL) ? options->warning_cb : default_options->warning_cb;
+}
+
+
+/**
+ * g_mime_parser_options_set_warning_callback:
+ * @options: a #GMimeParserOptions
+ * @warning_cb: a #GMimeParserWarningFunc or %NULL to clear the callback
+ * @user_data: data passed to the warning callback function
+ *
+ * Registers the callback function being called if the parser detects any issues.
+ **/
+void
+g_mime_parser_options_set_warning_callback (GMimeParserOptions *options, GMimeParserWarningFunc warning_cb, 
gpointer user_data)
+{
+       g_return_if_fail (options != NULL);
+
+       options->warning_cb = warning_cb;
+       options->warning_user_data = user_data;
+}
diff --git a/gmime/gmime-parser-options.h b/gmime/gmime-parser-options.h
index b5c9af7..b4523be 100644
--- a/gmime/gmime-parser-options.h
+++ b/gmime/gmime-parser-options.h
@@ -42,12 +42,54 @@ typedef enum {
 } GMimeRfcComplianceMode;
 
 /**
+ * GMimeParserWarning:
+ * @GMIME_WARN_DUPLICATED_CONTENT_HDR: repeated exactly the same `Content-*` header
+ * @GMIME_WARN_DUPLICATED_PARAMETER: repeated exactly the same header parameter
+ * @GMIME_WARN_UNENCODED_8BIT_HEADER: a header contains unencoded 8-bit characters
+ * @GMIME_WARN_INVALID_CONTENT_TYPE: invalid content type, assume `application/octet-stream`
+ * @GMIME_WARN_INVALID_HEADER: invalid header, ignored
+ * @GMIME_WARN_MALFORMED_MULTIPART: no items in a `multipart/...`
+ * @GMIME_WARN_TRUNCATED_MESSAGE: the message is truncated
+ * @GMIME_WARN_MALFORMED_MESSAGE: the message is malformed
+ * @GMIME_CRIT_CONFLICTING_CONTENT_HDR: conflicting `Content-*` header
+ * @GMIME_CRIT_CONFLICTING_PARAMETER: conflicting header parameter
+ * @GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY: a `multipart/...` part lacks the required boundary parameter
+ *
+ * Issues the @GMimeParser detects. Note that the `GMIME_CRIT_*` issues indicate that some parts of the 
@GMimeParser input may
+ * be ignored or will be interpreted differently by other software products.
+ **/
+typedef enum {
+       GMIME_WARN_DUPLICATED_CONTENT_HDR = 1U,
+       GMIME_WARN_DUPLICATED_PARAMETER,
+       GMIME_WARN_UNENCODED_8BIT_HEADER,
+       GMIME_WARN_INVALID_CONTENT_TYPE,
+       GMIME_WARN_INVALID_HEADER,
+       GMIME_WARN_MALFORMED_MULTIPART,
+       GMIME_WARN_TRUNCATED_MESSAGE,
+       GMIME_WARN_MALFORMED_MESSAGE,
+       GMIME_CRIT_CONFLICTING_CONTENT_HDR,
+       GMIME_CRIT_CONFLICTING_PARAMETER,
+       GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY
+} GMimeParserWarning;
+
+/**
  * GMimeParserOptions:
  *
  * A set of parser options used by #GMimeParser and various other parsing functions.
  **/
 typedef struct _GMimeParserOptions GMimeParserOptions;
 
+/**
+ * GMimeParserWarningFunc:
+ * @offset: parser offset where the issue has been detected, or -1 if it is unknown
+ * @errcode: a #GMimeParserWarning
+ * @item: a NUL-terminated string containing the value causing the issue, may be %NULL
+ * @user_data: User-supplied callback data.
+ *
+ * The function signature for a callback to g_mime_parser_options_set_warning_callback().
+ **/
+typedef void (*GMimeParserWarningFunc) (gint64 offset, GMimeParserWarning errcode, const gchar *item, 
gpointer user_data);
+
 
 GType g_mime_parser_options_get_type (void);
 
@@ -73,6 +115,10 @@ void g_mime_parser_options_set_rfc2047_compliance_mode (GMimeParserOptions *opti
 const char **g_mime_parser_options_get_fallback_charsets (GMimeParserOptions *options);
 void g_mime_parser_options_set_fallback_charsets (GMimeParserOptions *options, const char **charsets);
 
+GMimeParserWarningFunc g_mime_parser_options_get_warning_callback (GMimeParserOptions *options);
+void g_mime_parser_options_set_warning_callback (GMimeParserOptions *options, GMimeParserWarningFunc 
warning_cb,
+                                                gpointer user_data);
+
 G_END_DECLS
 
 #endif /* __GMIME_PARSER_OPTIONS_H__ */
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index 1a17b23..eaa61c7 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -263,8 +263,8 @@ parser_find_header (GMimeParser *parser, const char *name, gint64 *offset)
        Header *header;
        guint i;
        
-       for (i = 0; i < priv->headers->len; i++) {
-               header = priv->headers->pdata[i];
+       for (i = priv->headers->len; i > 0U; --i) {
+               header = priv->headers->pdata[i - 1U];
                
                if (g_ascii_strcasecmp (header->name, name) != 0)
                        continue;
@@ -869,8 +869,19 @@ next_alloc_size (size_t n)
        priv->headerleft -= len;                                          \
 } G_STMT_END
 
+static inline gboolean
+is_7bit_clean (const gchar *str)
+{
+       for (; *str != '\0'; str++) {
+               if ((*str & 0x80) != 0) {
+                       return FALSE;
+               }
+       }
+       return TRUE;
+}
+
 static void
-header_parse (GMimeParser *parser)
+header_parse (GMimeParser *parser, GMimeParserOptions *options)
 {
        struct _GMimeParserPrivate *priv = parser->priv;
        gboolean blank = FALSE;
@@ -897,9 +908,12 @@ header_parse (GMimeParser *parser)
        
        if (*inptr != ':') {
                /* ignore invalid headers */
-               w(g_warning ("Invalid header at %lld: '%s'",
-                            (long long) priv->header_offset,
-                            priv->headerbuf));
+               if (strcmp(priv->headerbuf, "\r") != 0) {
+                       _g_mime_parser_options_warn (options, priv->header_offset, GMIME_WARN_INVALID_HEADER, 
priv->headerbuf);
+                       w(g_warning ("Invalid header at %lld: '%s'",
+                                    (long long) priv->header_offset,
+                                    priv->headerbuf));
+               }
                
                if (priv->preheader == NULL)
                        priv->preheader = g_strdup (priv->headerbuf);
@@ -928,6 +942,9 @@ header_parse (GMimeParser *parser)
        if (priv->regex && g_regex_match (priv->regex, header->name, 0, NULL))
                priv->header_cb (parser, header->name, header->raw_value,
                                 header->offset, priv->user_data);
+       if (!is_7bit_clean (header->raw_name) || !is_7bit_clean (header->raw_value)) {
+               _g_mime_parser_options_warn (options, header->offset, GMIME_WARN_UNENCODED_8BIT_HEADER, 
header->name);
+       }
 }
 
 enum {
@@ -980,7 +997,7 @@ has_content_headers (GPtrArray *headers)
 }
 
 static int
-parser_step_headers (GMimeParser *parser)
+parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
 {
        struct _GMimeParserPrivate *priv = parser->priv;
        gboolean eoln, valid = TRUE, fieldname = TRUE;
@@ -1019,7 +1036,7 @@ parser_step_headers (GMimeParser *parser)
                        
                        /* if we are scanning a new line, check for a folded header */
                        if (!priv->midline && continuation && (*inptr != ' ' && *inptr != '\t')) {
-                               header_parse (parser);
+                               header_parse (parser, options);
                                priv->header_offset = parser_offset (priv, inptr);
                                continuation = FALSE;
                                fieldname = TRUE;
@@ -1149,7 +1166,7 @@ parser_step_headers (GMimeParser *parser)
  headers_end:
        
        if (priv->headerptr > priv->headerbuf)
-               header_parse (parser);
+               header_parse (parser, options);
        
        priv->headers_end = parser_offset (priv, start);
        priv->state = GMIME_PARSER_STATE_HEADERS_END;
@@ -1259,7 +1276,7 @@ parser_skip_line (GMimeParser *parser)
 }
 
 static int
-parser_step (GMimeParser *parser)
+parser_step (GMimeParser *parser, GMimeParserOptions *options)
 {
        struct _GMimeParserPrivate *priv = parser->priv;
        
@@ -1288,7 +1305,7 @@ parser_step (GMimeParser *parser)
                break;
        case GMIME_PARSER_STATE_MESSAGE_HEADERS:
        case GMIME_PARSER_STATE_HEADERS:
-               parser_step_headers (parser);
+               parser_step_headers (parser, options);
                
                if (priv->message_headers_begin == -1) {
                        priv->message_headers_begin = priv->headers_begin;
@@ -1660,7 +1677,7 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
        
        /* get the headers */
        priv->state = GMIME_PARSER_STATE_HEADERS;
-       if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
+       if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR) {
                /* Note: currently cannot happen because
                 * parser_step_headers() never returns error */
                *found = BOUNDARY_EOS;
@@ -1694,6 +1711,21 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
        g_object_unref (message);
 }
 
+static void
+check_content_header_conflict (GMimeParserOptions *options, GMimeObject *object, const Header *header)
+{
+       const GMimeHeader *exist_header;
+
+       exist_header = g_mime_header_list_get_header (object->headers, header->name);
+       if (exist_header != NULL) {
+               if (strcmp (exist_header->raw_value, header->raw_value) == 0) {
+                       _g_mime_parser_options_warn (options, header->offset, 
GMIME_WARN_DUPLICATED_CONTENT_HDR, header->name);
+               } else {
+                       _g_mime_parser_options_warn (options, header->offset, 
GMIME_CRIT_CONFLICTING_CONTENT_HDR, header->name);
+               }
+       }
+}
+
 static GMimeObject *
 parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, ContentType *content_type, 
gboolean toplevel, BoundaryType *found)
 {
@@ -1718,6 +1750,7 @@ parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, Co
                header = priv->headers->pdata[i];
                
                if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
+                       check_content_header_conflict (options, object, header);
                        _g_mime_object_append_header (object, header->name, header->raw_name,
                                                      header->raw_value, header->offset);
                }
@@ -1727,7 +1760,7 @@ parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, Co
        
        if (priv->state == GMIME_PARSER_STATE_HEADERS_END) {
                /* skip empty line after headers */
-               if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
+               if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR) {
                        *found = BOUNDARY_EOS;
                        return object;
                }
@@ -1737,7 +1770,7 @@ parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, Co
                parser_scan_message_part (parser, options, (GMimeMessagePart *) object, found);
        else
                parser_scan_mime_part_content (parser, (GMimePart *) object, found);
-       
+
        return object;
 }
 
@@ -1818,7 +1851,7 @@ parser_scan_multipart_subparts (GMimeParser *parser, GMimeParserOptions *options
                
                /* get the headers */
                priv->state = GMIME_PARSER_STATE_HEADERS;
-               if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
+               if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR) {
                        found = BOUNDARY_EOS;
                        break;
                }
@@ -1851,6 +1884,7 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
        GMimeObject *object;
        Header *header;
        guint i;
+       gint64 cont_type_offs = -1;
        
        g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
        
@@ -1860,6 +1894,10 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
                header = priv->headers->pdata[i];
                
                if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
+                       check_content_header_conflict (options, object, header);
+                       if (g_ascii_strcasecmp (header->name, "Content-Type") == 0) {
+                               cont_type_offs = header->offset;
+                       }
                        _g_mime_object_append_header (object, header->name, header->raw_name,
                                                      header->raw_value, header->offset);
                }
@@ -1871,7 +1909,7 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
        
        if (priv->state == GMIME_PARSER_STATE_HEADERS_END) {
                /* skip empty line after headers */
-               if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
+               if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR) {
                        *found = BOUNDARY_EOS;
                        return object;
                }
@@ -1894,6 +1932,14 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
                        return object;
                }
                
+               if ((*found == BOUNDARY_PARENT) || (*found == BOUNDARY_PARENT_END)) {
+                       _g_mime_parser_options_warn (options, cont_type_offs, GMIME_WARN_MALFORMED_MULTIPART, 
content_type->subtype);
+               }
+
+               if (*found == BOUNDARY_EOS) {
+                       _g_mime_parser_options_warn (options, -1, GMIME_WARN_TRUNCATED_MESSAGE, NULL);
+               }
+
                multipart->write_end_boundary = FALSE;
                parser_pop_boundary (parser);
                
@@ -1902,6 +1948,7 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
                else if (*found == BOUNDARY_PARENT && found_immediate_boundary (priv, FALSE))
                        *found = BOUNDARY_IMMEDIATE;
        } else {
+               _g_mime_parser_options_warn (options, cont_type_offs, GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY, 
content_type->subtype);
                w(g_warning ("multipart without boundary encountered"));
                /* this will scan everything into the prologue */
                *found = parser_scan_multipart_prologue (parser, multipart);
@@ -1921,7 +1968,7 @@ parser_construct_part (GMimeParser *parser, GMimeParserOptions *options)
        /* get the headers */
        priv->state = GMIME_PARSER_STATE_HEADERS;
        while (priv->state < GMIME_PARSER_STATE_HEADERS_END) {
-               if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
+               if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR)
                        return NULL;
        }
        
@@ -1972,18 +2019,19 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
        
        /* scan the from-line if we are parsing an mbox */
        while (priv->state != GMIME_PARSER_STATE_MESSAGE_HEADERS) {
-               if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
+               if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR)
                        return NULL;
        }
        
        /* parse the headers */
        while (priv->state < GMIME_PARSER_STATE_HEADERS_END) {
-               if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
+               if (parser_step (parser, options) == GMIME_PARSER_STATE_ERROR)
                        return NULL;
        }
        
        message = g_mime_message_new (FALSE);
        ((GMimeObject *) message)->ensure_newline = FALSE;
+       _g_mime_header_list_set_options(g_mime_object_get_header_list(GMIME_OBJECT(message)), options);
        
        for (i = 0; i < priv->headers->len; i++) {
                header = priv->headers->pdata[i];
@@ -2021,6 +2069,10 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
        content_type_destroy (content_type);
        message->mime_part = object;
        
+       if (priv->state == GMIME_PARSER_STATE_ERROR) {
+               _g_mime_parser_options_warn (options, -1, GMIME_WARN_MALFORMED_MESSAGE, NULL);
+       }
+
        if (priv->format == GMIME_FORMAT_MBOX) {
                priv->state = GMIME_PARSER_STATE_FROM;
                parser_pop_boundary (parser);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]