[libxml2] Clean up encoding switching code
- From: Nick Wellnhofer <nwellnhof src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml2] Clean up encoding switching code
- Date: Sat, 2 Apr 2022 18:48:36 +0000 (UTC)
commit aab584dc3127edf383c3d1c6acda2b83353a7150
Author: Nick Wellnhofer <wellnhofer aevum de>
Date: Sun Mar 6 23:23:43 2022 +0100
Clean up encoding switching code
- Remove xmlSwitchToEncodingInt which was basically just a wrapper
around xmlSwitchInputEncodingInt.
- Simplify xmlSwitchEncoding.
- Improve error handling in xmlSwitchInputEncodingInt.
- Deprecate xmlSwitchInputEncoding.
include/libxml/parserInternals.h | 1 +
parserInternals.c | 150 ++++++---------------------------------
2 files changed, 24 insertions(+), 127 deletions(-)
---
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 0615f084..656ee462 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -339,6 +339,7 @@ XMLPUBFUN int XMLCALL
XMLPUBFUN int XMLCALL
xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
xmlCharEncodingHandlerPtr handler);
+XML_DEPRECATED
XMLPUBFUN int XMLCALL
xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,
xmlParserInputPtr input,
diff --git a/parserInternals.c b/parserInternals.c
index a68a32fa..31d94680 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -866,9 +866,6 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
************************************************************************/
static int
-xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
- xmlCharEncodingHandlerPtr handler, int len);
-static int
xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
xmlCharEncodingHandlerPtr handler, int len);
/**
@@ -968,55 +965,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
/* default encoding, no conversion should be needed */
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(0);
- case XML_CHAR_ENCODING_UTF16LE:
- break;
- case XML_CHAR_ENCODING_UTF16BE:
- break;
- case XML_CHAR_ENCODING_UCS4LE:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "USC4 little endian", NULL);
- break;
- case XML_CHAR_ENCODING_UCS4BE:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "USC4 big endian", NULL);
- break;
- case XML_CHAR_ENCODING_EBCDIC:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "EBCDIC", NULL);
- break;
- case XML_CHAR_ENCODING_UCS4_2143:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "UCS4 2143", NULL);
- break;
- case XML_CHAR_ENCODING_UCS4_3412:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "UCS4 3412", NULL);
- break;
- case XML_CHAR_ENCODING_UCS2:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "UCS2", NULL);
- break;
case XML_CHAR_ENCODING_8859_1:
- case XML_CHAR_ENCODING_8859_2:
- case XML_CHAR_ENCODING_8859_3:
- case XML_CHAR_ENCODING_8859_4:
- case XML_CHAR_ENCODING_8859_5:
- case XML_CHAR_ENCODING_8859_6:
- case XML_CHAR_ENCODING_8859_7:
- case XML_CHAR_ENCODING_8859_8:
- case XML_CHAR_ENCODING_8859_9:
- /*
- * We used to keep the internal content in the
- * document encoding however this turns being unmaintainable
- * So xmlGetCharEncodingHandler() will return non-null
- * values for this now.
- */
if ((ctxt->inputNr == 1) &&
(ctxt->encoding == NULL) &&
(ctxt->input != NULL) &&
@@ -1025,36 +974,20 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
}
ctxt->charset = enc;
return(0);
- case XML_CHAR_ENCODING_2022_JP:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "ISO-2022-JP", NULL);
- break;
- case XML_CHAR_ENCODING_SHIFT_JIS:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "Shift_JIS", NULL);
- break;
- case XML_CHAR_ENCODING_EUC_JP:
- __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
- "encoding not supported %s\n",
- BAD_CAST "EUC-JP", NULL);
- break;
default:
- break;
- }
- }
- /*
- * TODO: We could recover from errors in external entities if we
- * didn't stop the parser. But most callers of this function don't
- * check the return value.
- */
- if (handler == NULL) {
- xmlStopParser(ctxt);
- return(-1);
+ __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
+ "encoding not supported: %s\n",
+ BAD_CAST xmlGetCharEncodingName(enc), NULL);
+ /*
+ * TODO: We could recover from errors in external entities
+ * if we didn't stop the parser. But most callers of this
+ * function don't check the return value.
+ */
+ xmlStopParser(ctxt);
+ return(-1);
+ }
}
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- ret = xmlSwitchToEncodingInt(ctxt, handler, len);
+ ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
/*
* on encoding conversion errors, stop the parser
@@ -1066,7 +999,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
}
/**
- * xmlSwitchInputEncoding:
+ * xmlSwitchInputEncodingInt:
* @ctxt: the parser context
* @input: the input stream
* @handler: the encoding handler
@@ -1088,6 +1021,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
if (input == NULL)
return (-1);
if (input->buf != NULL) {
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+
if (input->buf->encoder != NULL) {
/*
* Check in case the auto encoding detection triggered
@@ -1191,12 +1126,9 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
}
return (0);
- } else if (input->length == 0) {
- /*
- * When parsing a static memory array one must know the
- * size to be able to convert the buffer.
- */
- xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
+ } else {
+ xmlErrInternal(ctxt,
+ "static memory buffer doesn't support encoding\n", NULL);
/*
* Callers assume that the input buffer takes ownership of the
* encoding handler. xmlCharEncCloseFunc frees unregistered
@@ -1205,11 +1137,6 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
xmlCharEncCloseFunc(handler);
return (-1);
}
- /*
- * We should actually raise an error here, see issue #34.
- */
- xmlCharEncCloseFunc(handler);
- return (0);
}
/**
@@ -1218,6 +1145,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* @input: the input stream
* @handler: the encoding handler
*
+ * DEPRECATED: Use xmlSwitchToEncoding
+ *
* change the input functions when discovering the character encoding
* of a given entity.
*
@@ -1229,41 +1158,6 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
}
-/**
- * xmlSwitchToEncodingInt:
- * @ctxt: the parser context
- * @handler: the encoding handler
- * @len: the length to convert or -1
- *
- * change the input functions when discovering the character encoding
- * of a given entity, and convert only @len bytes of the output, this
- * is needed on auto detect to allows any declared encoding later to
- * convert the actual content after the xmlDecl
- *
- * Returns 0 in case of success, -1 otherwise
- */
-static int
-xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
- xmlCharEncodingHandlerPtr handler, int len) {
- int ret = 0;
-
- if (handler != NULL) {
- if (ctxt->input != NULL) {
- ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
- } else {
- xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
- NULL);
- return(-1);
- }
- /*
- * The parsing is now done in UTF8 natively
- */
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- } else
- return(-1);
- return(ret);
-}
-
/**
* xmlSwitchToEncoding:
* @ctxt: the parser context
@@ -1277,7 +1171,9 @@ xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
{
- return (xmlSwitchToEncodingInt(ctxt, handler, -1));
+ if (ctxt == NULL)
+ return(-1);
+ return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));
}
/************************************************************************
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]