[libxml2] Fix pathological performance when outputting charrefs
- From: Nick Wellnhofer <nwellnhof src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml2] Fix pathological performance when outputting charrefs
- Date: Tue, 20 Jun 2017 13:55:19 +0000 (UTC)
commit e5107772ff317875bc08b859900254de058de368
Author: Nick Wellnhofer <wellnhofer aevum de>
Date: Mon Jun 19 15:32:56 2017 +0200
Fix pathological performance when outputting charrefs
If a character can't be represented in the output encoding, it is
converted to a character reference. This used to to replace the
character in the input stream by calling xmlBufAddHead or
xmlBufferAddHead. These functions shifted the entire input array
around, leading to quadratic performance when converting a run of
non-representable characters. This is most pronounced when dumping to
memory.
Output the charref directly instead.
Found with libFuzzer.
encoding.c | 129 +++++++++++++++++++++++++++--------------------------------
1 files changed, 59 insertions(+), 70 deletions(-)
---
diff --git a/encoding.c b/encoding.c
index 9b8309d..cd019c5 100644
--- a/encoding.c
+++ b/encoding.c
@@ -2395,7 +2395,6 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init)
int c_out;
xmlBufPtr in;
xmlBufPtr out;
- int charref_len = 0;
if ((output == NULL) || (output->encoder == NULL) ||
(output->buffer == NULL) || (output->conv == NULL))
@@ -2451,7 +2450,6 @@ retry:
if (ret == -1) {
if (c_out > 0) {
/* Can be a limitation of iconv or uconv */
- charref_len = 0;
goto retry;
}
ret = -3;
@@ -2488,46 +2486,38 @@ retry:
ret = -1;
break;
case -2: {
+ xmlChar charref[20];
int len = (int) xmlBufUse(in);
xmlChar *content = xmlBufContent(in);
- int cur;
+ int cur, charrefLen;
cur = xmlGetUTF8Char(content, &len);
- if ((charref_len != 0) && (c_out < charref_len)) {
- /*
- * We attempted to insert a character reference and failed.
- * Undo what was written and skip the remaining charref.
- */
- xmlBufErase(out, c_out);
- writtentot -= c_out;
- xmlBufShrink(in, charref_len - c_out);
- charref_len = 0;
-
- ret = -1;
+ if (cur <= 0)
break;
- } else if (cur > 0) {
- xmlChar charref[20];
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- content[0], content[1],
- content[2], content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ content[0], content[1],
+ content[2], content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charref_len = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufShrink(in, len);
- xmlBufAddHead(in, charref, -1);
-
- goto retry;
- } else {
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufShrink(in, len);
+ xmlBufGrow(out, charrefLen * 4);
+ c_out = xmlBufAvail(out) - 1;
+ c_in = charrefLen;
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ charref, &c_in);
+
+ if ((ret < 0) || (c_in != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@@ -2539,8 +2529,12 @@ retry:
buf);
if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
content[0] = ' ';
+ break;
}
- break;
+
+ xmlBufAddLen(out, c_out);
+ writtentot += c_out;
+ goto retry;
}
}
return(ret);
@@ -2573,7 +2567,6 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
int writtentot = 0;
int toconv;
int output = 0;
- int charref_len = 0;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
@@ -2621,7 +2614,6 @@ retry:
if (ret == -1) {
if (written > 0) {
/* Can be a limitation of iconv or uconv */
- charref_len = 0;
goto retry;
}
ret = -3;
@@ -2658,46 +2650,38 @@ retry:
ret = -1;
break;
case -2: {
+ xmlChar charref[20];
int len = in->use;
const xmlChar *utf = (const xmlChar *) in->content;
- int cur;
+ int cur, charrefLen;
cur = xmlGetUTF8Char(utf, &len);
- if ((charref_len != 0) && (written < charref_len)) {
- /*
- * We attempted to insert a character reference and failed.
- * Undo what was written and skip the remaining charref.
- */
- out->use -= written;
- writtentot -= written;
- xmlBufferShrink(in, charref_len - written);
- charref_len = 0;
-
- ret = -1;
+ if (cur <= 0)
break;
- } else if (cur > 0) {
- xmlChar charref[20];
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- in->content[0], in->content[1],
- in->content[2], in->content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ in->content[0], in->content[1],
+ in->content[2], in->content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charref_len = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufferShrink(in, len);
- xmlBufferAddHead(in, charref, -1);
-
- goto retry;
- } else {
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufferShrink(in, len);
+ xmlBufferGrow(out, charrefLen * 4);
+ written = out->size - out->use - 1;
+ toconv = charrefLen;
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ charref, &toconv);
+
+ if ((ret < 0) || (toconv != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@@ -2709,8 +2693,13 @@ retry:
buf);
if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
in->content[0] = ' ';
+ break;
}
- break;
+
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ goto retry;
}
}
return(ret);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]