Hi Daniel,
Hum, can you tellwhere this occured ?
It seems benefical to test for ctxt->token in front of a large list of other "else if" conditionals. I've attached a step-2 patch which removes all remaining ctxt->token uses in parser.c and parserInternals.c (except the initializing to 0). htmlparser.c and docbookparser.c will be treated in the next patch. This second patch eats about 60% of the performance gains of the first, but I fully agree that's nonsense to leave voodoo statements in the code. To proceed further in performance enhancement I need to do better profiling and perhaps look for more intrusive changes in the parser. Regards, Peter Jacobi
*** after-step1\parser.c Fri Jun 28 12:48:06 2002
--- parser.c Fri Jun 28 16:29:41 2002
***************
*** 316,322 ****
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
! ctxt->token = 0; ctxt->input->cur += l; \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
} while (0)
--- 316,322 ----
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
! ctxt->input->cur += l; \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
} while (0)
***************
*** 341,352 ****
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
- if (ctxt->token != 0) {
- if (!IS_BLANK(ctxt->token))
- return(0);
- ctxt->token = 0;
- res++;
- }
/*
* It's Okay to use CUR/NEXT here since all the blanks are on
* the ASCII range.
--- 341,346 ----
***************
*** 465,475 ****
unsigned int val = 0;
int count = 0;
- if (ctxt->token != 0) {
- val = ctxt->token;
- ctxt->token = 0;
- return(val);
- }
/*
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
*/
--- 459,464 ----
***************
*** 754,762 ****
xmlEntityPtr entity = NULL;
xmlParserInputPtr input;
- if (ctxt->token != 0) {
- return;
- }
if (RAW != '%') return;
switch(ctxt->instate) {
case XML_PARSER_CDATA_SECTION:
--- 743,748 ----
***************
*** 2363,2370 ****
* OK loop until we reach one of the ending char or a size limit.
*/
c = CUR_CHAR(l);
! while (((NXT(0) != limit) && /* checked */
! (c != '<')) || (ctxt->token != 0)) {
if (c == 0) break;
if (c == '&') {
if (NXT(1) == '#') {
--- 2349,2356 ----
* OK loop until we reach one of the ending char or a size limit.
*/
c = CUR_CHAR(l);
! while ((NXT(0) != limit) && /* checked */
! (c != '<')) {
if (c == 0) break;
if (c == '&') {
if (NXT(1) == '#') {
***************
*** 2685,2691 ****
* Accelerated common case where input don't need to be
* modified before passing it to the handler.
*/
! if ((ctxt->token == 0) && (!cdata)) {
in = ctxt->input->cur;
do {
get_more:
--- 2671,2677 ----
* Accelerated common case where input don't need to be
* modified before passing it to the handler.
*/
! if (!cdata) {
in = ctxt->input->cur;
do {
get_more:
***************
*** 2777,2784 ****
SHRINK;
GROW;
cur = CUR_CHAR(l);
! while (((cur != '<') || (ctxt->token == '<')) && /* checked */
! ((cur != '&') || (ctxt->token == '&')) &&
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
if ((cur == ']') && (NXT(1) == ']') &&
(NXT(2) == '>')) {
--- 2763,2770 ----
SHRINK;
GROW;
cur = CUR_CHAR(l);
! while ((cur != '<') && /* checked */
! (cur != '&') &&
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
if ((cur == ']') && (NXT(1) == ']') &&
(NXT(2) == '>')) {
***************
*** 4938,4944 ****
(NXT(2) != '>'))) {
const xmlChar *check = CUR_PTR;
int cons = ctxt->input->consumed;
- int tok = ctxt->token;
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt);
--- 4924,4929 ----
***************
*** 4955,4962 ****
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
! if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
! (tok == ctxt->token)) {
ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
--- 4940,4946 ----
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
! if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
***************
*** 5248,5254 ****
(RAW == '%') || IS_BLANK(CUR)) {
const xmlChar *check = CUR_PTR;
int cons = ctxt->input->consumed;
- int tok = ctxt->token;
GROW;
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
--- 5232,5237 ----
***************
*** 5266,5273 ****
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
! if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
! (tok == ctxt->token)) {
ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
--- 5249,5255 ----
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
! if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
***************
*** 6869,6883 ****
const xmlChar *cur = ctxt->input->cur;
/*
- * Handle possible processed charrefs.
- */
- if (ctxt->token != 0) {
- xmlParseCharData(ctxt, 0);
- }
- /*
* First case : a Processing Instruction.
*/
! else if ((*cur == '<') && (cur[1] == '?')) {
xmlParsePI(ctxt);
}
--- 6851,6859 ----
const xmlChar *cur = ctxt->input->cur;
/*
* First case : a Processing Instruction.
*/
! if ((*cur == '<') && (cur[1] == '?')) {
xmlParsePI(ctxt);
}
***************
*** 8549,8568 ****
case XML_PARSER_CONTENT: {
const xmlChar *test;
int cons;
- int tok;
-
- /*
- * Handle preparsed entities and charRef
- */
- if (ctxt->token != 0) {
- xmlChar current[2] = { 0 , 0 } ;
-
- current[0] = (xmlChar) ctxt->token;
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
- (ctxt->sax->characters != NULL))
- ctxt->sax->characters(ctxt->userData, current, 1);
- ctxt->token = 0;
- }
if ((avail < 2) && (ctxt->inputNr == 1))
goto done;
cur = ctxt->input->cur[0];
--- 8525,8530 ----
***************
*** 8570,8576 ****
test = CUR_PTR;
cons = ctxt->input->consumed;
- tok = ctxt->token;
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
--- 8532,8537 ----
***************
*** 8660,8667 ****
*/
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
! if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
! (tok == ctxt->token)) {
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
--- 8621,8627 ----
*/
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
! if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
*** after-step1\parserInternals.c Fri Jun 28 10:23:45 2002
--- parserInternals.c Fri Jun 28 16:29:41 2002
***************
*** 1110,1117 ****
* literal #xD, an XML processor must pass to the application
* the single character #xA.
*/
! if (ctxt->token != 0) ctxt->token = 0;
! else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
(ctxt->instate != XML_PARSER_COMMENT)) {
--- 1110,1116 ----
* literal #xD, an XML processor must pass to the application
* the single character #xA.
*/
! if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
(ctxt->instate != XML_PARSER_COMMENT)) {
***************
*** 2781,2791 ****
(c != end2) && (c != end3)) {
GROW;
if (c == 0) break;
! if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
int val = xmlParseCharRef(ctxt);
COPY_BUF(0,buffer,nbchars,val);
NEXTL(l);
! } else if ((c == '&') && (ctxt->token != '&') &&
(what & XML_SUBSTITUTE_REF)) {
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
--- 2780,2790 ----
(c != end2) && (c != end3)) {
GROW;
if (c == 0) break;
! if ((c == '&') && (NXT(1) == '#')) {
int val = xmlParseCharRef(ctxt);
COPY_BUF(0,buffer,nbchars,val);
NEXTL(l);
! } else if (c == '&') &&
(what & XML_SUBSTITUTE_REF)) {
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
***************
*** 3317,3545 ****
deprecated = 1;
}
- #if 0
- xmlParserInputPtr input;
- xmlChar *name;
- xmlEntityPtr ent = NULL;
-
- if (ctxt->token != 0) {
- return;
- }
- if (RAW != '&') return;
- GROW;
- if ((RAW == '&') && (NXT(1) == '#')) {
- switch(ctxt->instate) {
- case XML_PARSER_ENTITY_DECL:
- case XML_PARSER_PI:
- case XML_PARSER_CDATA_SECTION:
- case XML_PARSER_COMMENT:
- case XML_PARSER_SYSTEM_LITERAL:
- /* we just ignore it there */
- return;
- case XML_PARSER_START_TAG:
- return;
- case XML_PARSER_END_TAG:
- return;
- case XML_PARSER_EOF:
- ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_PROLOG:
- case XML_PARSER_START:
- case XML_PARSER_MISC:
- ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_EPILOG:
- ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_DTD:
- ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "CharRef are forbidden in DTDs!\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_ENTITY_VALUE:
- /*
- * NOTE: in the case of entity values, we don't do the
- * substitution here since we need the literal
- * entity value to be able to save the internal
- * subset of the document.
- * This will be handled by xmlStringDecodeEntities
- */
- return;
- case XML_PARSER_CONTENT:
- return;
- case XML_PARSER_ATTRIBUTE_VALUE:
- /* ctxt->token = xmlParseCharRef(ctxt); */
- return;
- case XML_PARSER_IGNORE:
- return;
- }
- return;
- }
-
- switch(ctxt->instate) {
- case XML_PARSER_CDATA_SECTION:
- return;
- case XML_PARSER_PI:
- case XML_PARSER_COMMENT:
- case XML_PARSER_SYSTEM_LITERAL:
- case XML_PARSER_CONTENT:
- return;
- case XML_PARSER_START_TAG:
- return;
- case XML_PARSER_END_TAG:
- return;
- case XML_PARSER_EOF:
- ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_PROLOG:
- case XML_PARSER_START:
- case XML_PARSER_MISC:
- ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_EPILOG:
- ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_ENTITY_VALUE:
- /*
- * NOTE: in the case of entity values, we don't do the
- * substitution here since we need the literal
- * entity value to be able to save the internal
- * subset of the document.
- * This will be handled by xmlStringDecodeEntities
- */
- return;
- case XML_PARSER_ATTRIBUTE_VALUE:
- /*
- * NOTE: in the case of attributes values, we don't do the
- * substitution here unless we are in a mode where
- * the parser is explicitly asked to substitute
- * entities. The SAX callback is called with values
- * without entity substitution.
- * This will then be handled by xmlStringDecodeEntities
- */
- return;
- case XML_PARSER_ENTITY_DECL:
- /*
- * we just ignore it there
- * the substitution will be done once the entity is referenced
- */
- return;
- case XML_PARSER_DTD:
- ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity references are forbidden in DTDs!\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_IGNORE:
- return;
- }
-
- /* TODO: this seems not reached anymore .... Verify ... */
- xmlGenericError(xmlGenericErrorContext,
- "Reached deprecated section in xmlParserHandleReference()\n");
- xmlGenericError(xmlGenericErrorContext,
- "Please forward the document to daniel veillard com\n");
- xmlGenericError(xmlGenericErrorContext,
- "indicating the version: %s, thanks !\n", xmlParserVersion);
- NEXT;
- name = xmlScanName(ctxt);
- if (name == NULL) {
- ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->token = '&';
- return;
- }
- if (NXT(xmlStrlen(name)) != ';') {
- ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity reference: ';' expected\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->token = '&';
- xmlFree(name);
- return;
- }
- SKIP(xmlStrlen(name) + 1);
- if (ctxt->sax != NULL) {
- if (ctxt->sax->getEntity != NULL)
- ent = ctxt->sax->getEntity(ctxt->userData, name);
- }
-
- /*
- * [ WFC: Entity Declared ]
- * the Name given in the entity reference must match that in an entity
- * declaration, except that well-formed documents need not declare any
- * of the following entities: amp, lt, gt, apos, quot.
- */
- if (ent == NULL)
- ent = xmlGetPredefinedEntity(name);
- if (ent == NULL) {
- ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity reference: entity %s not declared\n",
- name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- xmlFree(name);
- return;
- }
-
- /*
- * [ WFC: Parsed Entity ]
- * An entity reference must not contain the name of an unparsed entity
- */
- if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
- ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity reference to unparsed entity %s\n", name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
-
- if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
- ctxt->token = ent->content[0];
- xmlFree(name);
- return;
- }
- input = xmlNewEntityInputStream(ctxt, ent);
- xmlPushInput(ctxt, input);
- xmlFree(name);
- #endif
return;
}
--- 3316,3321 ----Attachment:
ctxt-token-patch-2.zip
Description: Zip archive