[totem-pl-parser/wip/hadess/more-encoding: 4/6] plparser: Validate UTF-8 before returning it
- From: Bastien Nocera <hadess src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [totem-pl-parser/wip/hadess/more-encoding: 4/6] plparser: Validate UTF-8 before returning it
- Date: Thu, 4 Mar 2021 14:55:04 +0000 (UTC)
commit 27950578cd1c34c95dcded313030403d9e53ad37
Author: Bastien Nocera <hadess hadess net>
Date: Thu Mar 4 15:25:04 2021 +0100
plparser: Validate UTF-8 before returning it
We shouldn't blindly return data as UTF-8 simply because the XML header
says that it is UTF-8.
plparse/totem-pl-parser.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
---
diff --git a/plparse/totem-pl-parser.c b/plparse/totem-pl-parser.c
index a1bec2b..e8a6b61 100644
--- a/plparse/totem-pl-parser.c
+++ b/plparse/totem-pl-parser.c
@@ -1874,8 +1874,13 @@ totem_pl_parser_parse_xml_relaxed (char *contents,
break;
}
- if (encoding == NULL || g_ascii_strcasecmp (encoding, "UTF-8") == 0)
- return doc;
+ if (encoding == NULL || g_ascii_strcasecmp (encoding, "UTF-8") == 0) {
+ if (g_utf8_validate (contents, -1, NULL))
+ return doc;
+ g_debug ("Document pretended to be in UTF-8 but didn't validate");
+ /* FIXME detect encoding using uchardet */
+ return NULL;
+ }
xml_parser_free_tree (doc);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]