Index: HTMLparser.c
===================================================================
--- HTMLparser.c (revision 3797)
+++ HTMLparser.c (working copy)
@@ -4120,6 +4120,8 @@ htmlParseElement(htmlParserCtxtPtr ctxt)
int
htmlParseDocument(htmlParserCtxtPtr ctxt) {
+ xmlChar start[4];
+ xmlCharEncoding enc;
xmlDtdPtr dtd;
xmlInitParser();
@@ -4139,6 +4141,23 @@ htmlParseDocument(htmlParserCtxtPtr ctxt
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
+ if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
+ ((ctxt->input->end - ctxt->input->cur) >= 4)) {
+ /*
+ * Get the 4 first bytes and decode the charset
+ * if enc != XML_CHAR_ENCODING_NONE
+ * plug some encoding conversion routines.
+ */
+ start[0] = RAW;
+ start[1] = NXT(1);
+ start[2] = NXT(2);
+ start[3] = NXT(3);
+ enc = xmlDetectCharEncoding(&start[0], 4);
+ if (enc != XML_CHAR_ENCODING_NONE) {
+ xmlSwitchEncoding(ctxt, enc);
+ }
+ }
+
/*
* Wipe out everything which is before the first '<'
*/