[libxml2] 492317 Fix Relax-NG validation problems
- From: Daniel Veillard <veillard src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [libxml2] 492317 Fix Relax-NG validation problems
- Date: Mon, 31 Aug 2009 15:11:34 +0000 (UTC)
commit 1ba2aca3ebc3b47653a86849746b168a4e0bd8c6
Author: Daniel Veillard <veillard redhat com>
Date: Mon Aug 31 16:47:39 2009 +0200
492317 Fix Relax-NG validation problems
* relaxng.c xmlregexp.c: a subtle problem when checking for compileable
content model, if using the same elements in cases of choices. Handled
by adding a special flag to the regexp compilation to detect
transitions with different atoms using same strings.
* test/relaxng/492317* result/relaxng/492317*: add the test to the
regression suite
relaxng.c | 21 +++++++++++++
result/relaxng/492317_0.err | 1 +
result/relaxng/492317_1.err | 1 +
result/relaxng/492317_2.err | 3 ++
result/relaxng/492317_err | 1 +
test/relaxng/492317.rng | 16 ++++++++++
test/relaxng/492317_0.xml | 4 ++
test/relaxng/492317_1.xml | 4 ++
test/relaxng/492317_2.xml | 4 ++
xmlregexp.c | 68 ++++++++++++++++++++++++++++++++++++------
10 files changed, 113 insertions(+), 10 deletions(-)
---
diff --git a/relaxng.c b/relaxng.c
index 6b83cfd..ea739de 100644
--- a/relaxng.c
+++ b/relaxng.c
@@ -2854,6 +2854,10 @@ xmlRelaxNGCleanupTypes(void)
* *
************************************************************************/
+/* from automata.c but not exported */
+void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
+
+
static int xmlRelaxNGTryCompile(xmlRelaxNGParserCtxtPtr ctxt,
xmlRelaxNGDefinePtr def);
@@ -3037,6 +3041,17 @@ xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def)
ctxt->am = xmlNewAutomata();
if (ctxt->am == NULL)
return (-1);
+
+ /*
+ * assume identical strings but not same pointer are different
+ * atoms, needed for non-determinism detection
+ * That way if 2 elements with the same name are in a choice
+ * branch the automata is found non-deterministic and
+ * we fallback to the normal validation which does the right
+ * thing of exploring both choices.
+ */
+ xmlAutomataSetFlags(ctxt->am, 1);
+
ctxt->state = xmlAutomataGetInitState(ctxt->am);
while (list != NULL) {
xmlRelaxNGCompile(ctxt, list);
@@ -3068,6 +3083,7 @@ xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def)
ctxt->am = xmlNewAutomata();
if (ctxt->am == NULL)
return (-1);
+ xmlAutomataSetFlags(ctxt->am, 1);
ctxt->state = xmlAutomataGetInitState(ctxt->am);
while (list != NULL) {
xmlRelaxNGCompile(ctxt, list);
@@ -3076,6 +3092,11 @@ xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def)
xmlAutomataSetFinalState(ctxt->am, ctxt->state);
def->contModel = xmlAutomataCompile(ctxt->am);
if (!xmlRegexpIsDeterminist(def->contModel)) {
+#ifdef DEBUG_COMPILE
+ xmlGenericError(xmlGenericErrorContext,
+ "Content model not determinist %s\n",
+ def->name);
+#endif
/*
* we can only use the automata if it is determinist
*/
diff --git a/result/relaxng/492317_0 b/result/relaxng/492317_0
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/492317_0.err b/result/relaxng/492317_0.err
new file mode 100644
index 0000000..9b8db15
--- /dev/null
+++ b/result/relaxng/492317_0.err
@@ -0,0 +1 @@
+./test/relaxng/492317_0.xml validates
diff --git a/result/relaxng/492317_1 b/result/relaxng/492317_1
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/492317_1.err b/result/relaxng/492317_1.err
new file mode 100644
index 0000000..177ee7b
--- /dev/null
+++ b/result/relaxng/492317_1.err
@@ -0,0 +1 @@
+./test/relaxng/492317_1.xml validates
diff --git a/result/relaxng/492317_2 b/result/relaxng/492317_2
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/492317_2.err b/result/relaxng/492317_2.err
new file mode 100644
index 0000000..e8b22e7
--- /dev/null
+++ b/result/relaxng/492317_2.err
@@ -0,0 +1,3 @@
+./test/relaxng/492317_2.xml:2: element child: Relax-NG validity error : Element child failed to validate attributes
+./test/relaxng/492317_2.xml:1: element root: Relax-NG validity error : Element root failed to validate content
+./test/relaxng/492317_2.xml fails to validate
diff --git a/result/relaxng/492317_err b/result/relaxng/492317_err
new file mode 100644
index 0000000..1f07539
--- /dev/null
+++ b/result/relaxng/492317_err
@@ -0,0 +1 @@
+./test/relaxng/492317.rng validates
diff --git a/result/relaxng/492317_valid b/result/relaxng/492317_valid
new file mode 100644
index 0000000..e69de29
diff --git a/test/relaxng/492317.rng b/test/relaxng/492317.rng
new file mode 100644
index 0000000..343f294
--- /dev/null
+++ b/test/relaxng/492317.rng
@@ -0,0 +1,16 @@
+<element name="root" xmlns="http://relaxng.org/ns/structure/1.0">
+ <choice>
+ <element name="child">
+ <attribute name="type">
+ <value>Foo</value>
+ </attribute>
+ <!-- Define stuff that's only valid when type is "Foo" -->
+ </element>
+ <element name="child">
+ <attribute name="type">
+ <value>Bar</value>
+ </attribute>
+ <!-- Define stuff that's only valid when type is "Bar" -->
+ </element>
+ </choice>
+</element>
diff --git a/test/relaxng/492317_0.xml b/test/relaxng/492317_0.xml
new file mode 100644
index 0000000..6ab9d80
--- /dev/null
+++ b/test/relaxng/492317_0.xml
@@ -0,0 +1,4 @@
+<root>
+ <child type="Foo">
+ </child>
+</root>
diff --git a/test/relaxng/492317_1.xml b/test/relaxng/492317_1.xml
new file mode 100644
index 0000000..d325ac2
--- /dev/null
+++ b/test/relaxng/492317_1.xml
@@ -0,0 +1,4 @@
+<root>
+ <child type="Bar">
+ </child>
+</root>
diff --git a/test/relaxng/492317_2.xml b/test/relaxng/492317_2.xml
new file mode 100644
index 0000000..33bbc5d
--- /dev/null
+++ b/test/relaxng/492317_2.xml
@@ -0,0 +1,4 @@
+<root>
+ <child type="">
+ </child>
+</root>
diff --git a/xmlregexp.c b/xmlregexp.c
index 0644d0b..ac6d8bc 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -233,6 +233,8 @@ struct _xmlAutomataState {
typedef struct _xmlAutomata xmlRegParserCtxt;
typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
+#define AM_AUTOMATA_RNG 1
+
struct _xmlAutomata {
xmlChar *string;
xmlChar *cur;
@@ -260,6 +262,7 @@ struct _xmlAutomata {
int determinist;
int negs;
+ int flags;
};
struct _xmlRegexp {
@@ -271,6 +274,7 @@ struct _xmlRegexp {
int nbCounters;
xmlRegCounter *counters;
int determinist;
+ int flags;
/*
* That's the compact form for determinists automatas
*/
@@ -353,6 +357,8 @@ static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
int neg, int start, int end, const xmlChar *blockName);
+void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
+
/************************************************************************
* *
* Regexp memory error handler *
@@ -434,6 +440,7 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
ret->nbCounters = ctxt->nbCounters;
ret->counters = ctxt->counters;
ret->determinist = ctxt->determinist;
+ ret->flags = ctxt->flags;
if (ret->determinist == -1) {
xmlRegexpIsDeterminist(ret);
}
@@ -2428,6 +2435,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
* xmlFAEqualAtoms:
* @atom1: an atom
* @atom2: an atom
+ * @deep: if not set only compare string pointers
*
* Compares two atoms to check whether they are the same exactly
* this is used to remove equivalent transitions
@@ -2435,7 +2443,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
* Returns 1 if same and 0 otherwise
*/
static int
-xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
int ret = 0;
if (atom1 == atom2)
@@ -2450,8 +2458,11 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
ret = 0;
break;
case XML_REGEXP_STRING:
- ret = xmlStrEqual((xmlChar *)atom1->valuep,
- (xmlChar *)atom2->valuep);
+ if (!deep)
+ ret = (atom1->valuep == atom2->valuep);
+ else
+ ret = xmlStrEqual((xmlChar *)atom1->valuep,
+ (xmlChar *)atom2->valuep);
break;
case XML_REGEXP_CHARVAL:
ret = (atom1->codepoint == atom2->codepoint);
@@ -2469,6 +2480,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
* xmlFACompareAtoms:
* @atom1: an atom
* @atom2: an atom
+ * @deep: if not set only compare string pointers
*
* Compares two atoms to check whether they intersect in some ways,
* this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
@@ -2476,7 +2488,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
* Returns 1 if yes and 0 otherwise
*/
static int
-xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
int ret = 1;
if (atom1 == atom2)
@@ -2502,8 +2514,11 @@ xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
}
switch (atom1->type) {
case XML_REGEXP_STRING:
- ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
- (xmlChar *)atom2->valuep);
+ if (!deep)
+ ret = (atom1->valuep != atom2->valuep);
+ else
+ ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
+ (xmlChar *)atom2->valuep);
break;
case XML_REGEXP_EPSILON:
goto not_determinist;
@@ -2566,9 +2581,14 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
int res;
int transnr, nbTrans;
xmlRegTransPtr t1;
+ int deep = 1;
if (state == NULL)
return(ret);
+
+ if (ctxt->flags & AM_AUTOMATA_RNG)
+ deep = 0;
+
/*
* don't recurse on transitions potentially added in the course of
* the elimination.
@@ -2592,7 +2612,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
}
if (t1->to != to)
continue;
- if (xmlFACompareAtoms(t1->atom, atom)) {
+ if (xmlFACompareAtoms(t1->atom, atom, deep)) {
ret = 0;
/* mark the transition as non-deterministic */
t1->nd = 1;
@@ -2616,6 +2636,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
xmlRegTransPtr t1, t2, last;
int i;
int ret = 1;
+ int deep = 1;
#ifdef DEBUG_REGEXP_GRAPH
printf("xmlFAComputesDeterminism\n");
@@ -2624,6 +2645,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
if (ctxt->determinist != -1)
return(ctxt->determinist);
+ if (ctxt->flags & AM_AUTOMATA_RNG)
+ deep = 0;
+
/*
* First cleanup the automata removing cancelled transitions
*/
@@ -2651,7 +2675,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
continue;
if (t2->atom != NULL) {
if (t1->to == t2->to) {
- if (xmlFAEqualAtoms(t1->atom, t2->atom) &&
+ /*
+ * Here we use deep because we want to keep the
+ * transitions which indicate a conflict
+ */
+ if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
(t1->counter == t2->counter) &&
(t1->count == t2->count))
t2->to = -1; /* eliminated */
@@ -2688,8 +2716,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
if (t2->to == -1) /* eliminated */
continue;
if (t2->atom != NULL) {
- /* not determinist ! */
- if (xmlFACompareAtoms(t1->atom, t2->atom)) {
+ /*
+ * But here we don't use deep because we want to
+ * find transitions which indicate a conflict
+ */
+ if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
ret = 0;
/* mark the transitions as non-deterministic ones */
t1->nd = 1;
@@ -5477,10 +5508,12 @@ xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
am->nbStates = comp->nbStates;
am->states = comp->states;
am->determinist = -1;
+ am->flags = comp->flags;
ret = xmlFAComputesDeterminism(am);
am->atoms = NULL;
am->states = NULL;
xmlFreeAutomata(am);
+ comp->determinist = ret;
return(ret);
}
@@ -5558,6 +5591,7 @@ xmlNewAutomata(void) {
xmlFreeAutomata(ctxt);
return(NULL);
}
+ ctxt->flags = 0;
return(ctxt);
}
@@ -5576,6 +5610,20 @@ xmlFreeAutomata(xmlAutomataPtr am) {
}
/**
+ * xmlAutomataSetFlags
+ * @am: an automata
+ * @flags: a set of internal flags
+ *
+ * Set some flags on the automata
+ */
+void
+xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
+ if (am == NULL)
+ return;
+ am->flags |= flags;
+}
+
+/**
* xmlAutomataGetInitState:
* @am: an automata
*
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]