[libxml++/libxml++-2-42] Document, DomParser: Improve XInclude processing



commit a77edc748227f7773c110dfe39253a0d737b4dc9
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date:   Thu May 4 15:18:47 2017 +0200

    Document, DomParser: Improve XInclude processing
    
    * examples/Makefile.am:
    * examples/dom_xinclude/example.xml: Changed due to moved include files.
    * examples/dom_xinclude/include1.txt:
    * examples/dom_xinclude/include2.xml: Moved to examples/dom_xinclude/xinclude/
    * examples/dom_xinclude/main.cc: Test both Document::process_xinclude() and
    Xinclude processing with DomParser::parse_file().
    * libxml++/document.[cc|h]: Add fixup_base_uris parameter to process_xinclude().
    * libxml++/parsers/domparser.[cc|h]: Add set/get_xinclude_options().
    Add optional XInclude processing to the parse methods.
    * libxml++/parsers/parser.[cc|h]: Add set/get_xinclude_options_internal().
    Bug 781566

 examples/Makefile.am                              |    4 +-
 examples/dom_xinclude/example.xml                 |    6 +-
 examples/dom_xinclude/include1.txt                |    1 -
 examples/dom_xinclude/main.cc                     |   41 +++++++++----
 examples/dom_xinclude/xinclude/include1.txt       |    1 +
 examples/dom_xinclude/{ => xinclude}/include2.xml |    2 +-
 libxml++/document.cc                              |   16 ++++-
 libxml++/document.h                               |   26 ++++++++
 libxml++/parsers/domparser.cc                     |   66 +++++++++++++++++----
 libxml++/parsers/domparser.h                      |   26 ++++++++
 libxml++/parsers/parser.cc                        |   19 ++++++-
 libxml++/parsers/parser.h                         |    5 ++
 12 files changed, 179 insertions(+), 34 deletions(-)
---
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 4fc3d73..5a6dfe1 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -124,8 +124,8 @@ dist_noinst_DATA = \
   dom_update_namespace/example1.xml \
   dom_update_namespace/example2.xml \
   dom_xinclude/example.xml \
-  dom_xinclude/include1.txt \
-  dom_xinclude/include2.xml \
+  dom_xinclude/xinclude/include1.txt \
+  dom_xinclude/xinclude/include2.xml \
   dom_xpath/example.xml \
   dtdvalidation/example.dtd \
   import_node/example1.xml \
diff --git a/examples/dom_xinclude/example.xml b/examples/dom_xinclude/example.xml
index e99a4a4..b4d8bd1 100644
--- a/examples/dom_xinclude/example.xml
+++ b/examples/dom_xinclude/example.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <document xmlns:xi="http://www.w3.org/2001/XInclude";>
-  <p><xi:include href="include1.txt" parse="text">
-    <xi:fallback>Did not find include1.txt.</xi:fallback>
+  <p><xi:include href="xinclude/include1.txt" parse="text">
+    <xi:fallback>Did not find xinclude/include1.txt.</xi:fallback>
   </xi:include></p>
-  <xi:include href="include2.xml"/>
+  <xi:include href="xinclude/include2.xml"/>
 </document>
diff --git a/examples/dom_xinclude/main.cc b/examples/dom_xinclude/main.cc
index 0031287..dbf8724 100644
--- a/examples/dom_xinclude/main.cc
+++ b/examples/dom_xinclude/main.cc
@@ -71,10 +71,8 @@ void print_node(const xmlpp::Node* node, unsigned int indentation = 0)
     std::cout << indent << "     Element line = " << node->get_line() << std::endl;
 
     //Print attributes:
-    const auto attributes = nodeElement->get_attributes();
-    for (xmlpp::Element::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); 
++iter)
+    for (const auto& attribute : nodeElement->get_attributes())
     {
-      const auto attribute = *iter;
       const auto namespace_prefix = attribute->get_namespace_prefix();
 
       std::cout << indent << "  Attribute ";
@@ -119,6 +117,7 @@ int main(int argc, char* argv[])
   bool throw_messages = false;
   bool substitute_entities = true;
   bool generate_xinclude_nodes = true;
+  bool fixup_base_uris = true;
 
   int argi = 1;
   while (argc > argi && *argv[argi] == '-') // option
@@ -142,13 +141,17 @@ int main(int argc, char* argv[])
       case 'X':
         generate_xinclude_nodes = false;
         break;
+      case 'B':
+        fixup_base_uris = false;
+        break;
      default:
-       std::cout << "Usage: " << argv[0] << " [-v] [-t] [-e] [-x] [filename]" << std::endl
+       std::cout << "Usage: " << argv[0] << " [options]... [filename]" << std::endl
                  << "       -v  Validate" << std::endl
                  << "       -t  Throw messages in an exception" << std::endl
                  << "       -e  Write messages to stderr" << std::endl
                  << "       -E  Do not substitute entities" << std::endl
-                 << "       -X  Do not generate XInclude nodes" << std::endl;
+                 << "       -X  Do not generate XInclude nodes" << std::endl
+                 << "       -B  Do not fix up base URIs" << std::endl;
        return EXIT_FAILURE;
      }
      argi++;
@@ -158,12 +161,11 @@ int main(int argc, char* argv[])
     filepath = argv[argi]; //Allow the user to specify a different XML file to parse.
   else
     filepath = "example.xml";
- 
+
   try
   {
     xmlpp::DomParser parser;
-    if (validate)
-      parser.set_validate();
+    parser.set_validate(validate);
     if (set_throw_messages)
       parser.set_throw_messages(throw_messages);
     //We can have the text resolved/unescaped automatically.
@@ -176,14 +178,31 @@ int main(int argc, char* argv[])
       print_node(pNode);
 
       std::cout << std::endl << ">>>>> Number of XInclude substitutions: "
-                << parser.get_document()->process_xinclude(generate_xinclude_nodes)
+                << parser.get_document()->process_xinclude(
+                     generate_xinclude_nodes, fixup_base_uris)
+                << std::endl << std::endl;
+
+      std::cout << ">>>>> After XInclude processing with xmlpp::Document::process_xinclude(): "
                 << std::endl << std::endl;
       pNode = parser.get_document()->get_root_node();
       print_node(pNode);
 
+      // xmlpp::Document::write_to_string() does not write XIncludeStart and
+      // XIncludeEnd nodes.
+      const auto whole = parser.get_document()->write_to_string();
+      std::cout << std::endl << whole << std::endl;
+    }
+
+    parser.set_xinclude_options(true, generate_xinclude_nodes, fixup_base_uris);
+    parser.parse_file(filepath);
+    if (parser)
+    {
+      std::cout << ">>>>> After XInclude processing with xmlpp::DomParser::parse_file(): "
+                << std::endl << std::endl;
+      print_node(parser.get_document()->get_root_node());
+
       const auto whole = parser.get_document()->write_to_string();
-      std::cout << std::endl << ">>>>> XML after XInclude processing: " << std::endl
-                << whole << std::endl;
+      std::cout << std::endl << whole << std::endl;
     }
   }
   catch (const std::exception& ex)
diff --git a/examples/dom_xinclude/xinclude/include1.txt b/examples/dom_xinclude/xinclude/include1.txt
new file mode 100644
index 0000000..8484d7c
--- /dev/null
+++ b/examples/dom_xinclude/xinclude/include1.txt
@@ -0,0 +1 @@
+This is the contents of file xinclude/include1.txt.
diff --git a/examples/dom_xinclude/include2.xml b/examples/dom_xinclude/xinclude/include2.xml
similarity index 53%
rename from examples/dom_xinclude/include2.xml
rename to examples/dom_xinclude/xinclude/include2.xml
index 19b2c9d..aaf8db1 100644
--- a/examples/dom_xinclude/include2.xml
+++ b/examples/dom_xinclude/xinclude/include2.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <chapter id="chapter-introduction">
-  <p>This is the contents of file include2.xml.</p>
+  <p>This is the contents of file xinclude/include2.xml.</p>
 </chapter>
diff --git a/libxml++/document.cc b/libxml++/document.cc
index a5dfb17..e3e8790 100644
--- a/libxml++/document.cc
+++ b/libxml++/document.cc
@@ -17,7 +17,7 @@
 
 #include <libxml/tree.h>
 #include <libxml/xinclude.h>
-#include <libxml/parser.h> // XML_PARSE_NOXINCNODE
+#include <libxml/parser.h> // XML_PARSE_NOXINCNODE, XML_PARSE_NOBASEFIX
 
 #include <iostream>
 #include <map>
@@ -421,6 +421,11 @@ void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType t
 
 int Document::process_xinclude(bool generate_xinclude_nodes)
 {
+  return process_xinclude(generate_xinclude_nodes, true);
+}
+
+int Document::process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris)
+{
   NodeMap node_map;
 
   auto root = xmlDocGetRootElement(impl_);
@@ -428,8 +433,13 @@ int Document::process_xinclude(bool generate_xinclude_nodes)
   find_wrappers(root, node_map);
 
   xmlResetLastError();
-  const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
-    generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
+
+  int flags = 0;
+  if (!generate_xinclude_nodes)
+    flags |= XML_PARSE_NOXINCNODE;
+  if (!fixup_base_uris)
+    flags |= XML_PARSE_NOBASEFIX;
+  const int n_substitutions = xmlXIncludeProcessTreeFlags(root, flags);
 
   remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
 
diff --git a/libxml++/document.h b/libxml++/document.h
index 630584f..916aef8 100644
--- a/libxml++/document.h
+++ b/libxml++/document.h
@@ -218,6 +218,8 @@ public:
                                       const Glib::ustring& publicId, const Glib::ustring& systemId,
                                       const Glib::ustring& content);
 
+  //TODO: When we can break ABI, remove the process_xinclude() with one parameter,
+  // and add default values = true in the other process_xinclude()
   /** Perform XInclude substitution on the XML document.
    * XInclude substitution may both add and delete nodes in the document,
    * as well as change the type of some nodes. All pointers to deleted nodes
@@ -226,6 +228,9 @@ public:
    * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
    * new one is created if and when it's required.)
    *
+   * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+   * affect %Document::process_xinclude().
+   *
    * @newin{2,36}
    *
    * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
@@ -234,6 +239,27 @@ public:
    */
   int process_xinclude(bool generate_xinclude_nodes = true);
 
+  /** Perform XInclude substitution on the XML document.
+   * XInclude substitution may both add and delete nodes in the document,
+   * as well as change the type of some nodes. All pointers to deleted nodes
+   * and nodes whose type is changed become invalid.
+   * (The node type represented by an underlying xmlNode struct can change.
+   * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
+   * new one is created if and when it's required.)
+   *
+   * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+   * affect %Document::process_xinclude().
+   *
+   * @newin{2,42}
+   *
+   * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   * @returns The number of substitutions.
+   * @throws xmlpp::exception
+   */
+  int process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris);
+
   ///Access the underlying libxml implementation.
   _xmlDoc* cobj();
 
diff --git a/libxml++/parsers/domparser.cc b/libxml++/parsers/domparser.cc
index ebb0135..7aa3e1c 100644
--- a/libxml++/parsers/domparser.cc
+++ b/libxml++/parsers/domparser.cc
@@ -12,6 +12,7 @@
 #include "libxml++/keepblanks.h"
 #include "libxml++/exceptions/internal_error.h"
 #include <libxml/parserInternals.h>//For xmlCreateFileParserCtxt().
+#include <libxml/xinclude.h>
 
 #include <sstream>
 #include <iostream>
@@ -38,6 +39,32 @@ DomParser::~DomParser()
   release_underlying();
 }
 
+//TODO: When we can break ABI, remove set/get_xinclude_options_internal() in
+// Parser and move all XInclude stuff to DomParser.
+void DomParser::set_xinclude_options(bool process_xinclude,
+  bool generate_xinclude_nodes, bool fixup_base_uris) noexcept
+{
+  int xinclude_options = 0;
+  if (process_xinclude)
+    xinclude_options |= XML_PARSE_XINCLUDE;
+  if (!generate_xinclude_nodes)
+    xinclude_options |= XML_PARSE_NOXINCNODE;
+  if (!fixup_base_uris)
+    xinclude_options |= XML_PARSE_NOBASEFIX;
+
+  set_xinclude_options_internal(xinclude_options);
+}
+
+void DomParser::get_xinclude_options(bool& process_xinclude,
+  bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept
+{
+  const int xinclude_options = get_xinclude_options_internal();
+
+  process_xinclude = (xinclude_options & XML_PARSE_XINCLUDE) != 0;
+  generate_xinclude_nodes = (xinclude_options & XML_PARSE_NOXINCNODE) == 0;
+  fixup_base_uris = (xinclude_options & XML_PARSE_NOBASEFIX) == 0;
+}
+
 void DomParser::parse_file(const Glib::ustring& filename)
 {
   release_underlying(); //Free any existing document.
@@ -120,13 +147,36 @@ void DomParser::parse_context()
     throw parse_error(error_str);
   }
 
+  check_xinclude_and_finish_parsing();
+}
+
+void DomParser::check_xinclude_and_finish_parsing()
+{
+  int set_options = 0;
+  int clear_options = 0;
+  get_parser_options(set_options, clear_options);
+
+  int options = get_xinclude_options_internal();
+  // Turn on/off any xinclude options.
+  options |= set_options;
+  options &= ~clear_options;
+
+  if (options & XML_PARSE_XINCLUDE)
+  {
+    const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
+    if (n_substitutions < 0)
+    {
+      throw parse_error("Couldn't process XInclude\n" + format_xml_error());
+    }
+  }
+
   doc_ = new Document(context_->myDoc);
-  // This is to indicate to release_underlying that we took the
+  // This is to indicate to release_underlying() that we took the
   // ownership on the doc.
   context_->myDoc = nullptr;
 
-  //Free the parse context, but keep the document alive so people can navigate the DOM tree:
-  //TODO: Why not keep the context alive too?
+  // Free the parser context because it's not needed anymore,
+  // but keep the document alive so people can navigate the DOM tree:
   Parser::release_underlying();
 }
 
@@ -193,15 +243,7 @@ void DomParser::parse_stream(std::istream& in)
     throw parse_error(error_str);
   }
 
-  doc_ = new Document(context_->myDoc);
-  // This is to indicate to release_underlying that we took the
-  // ownership on the doc.
-  context_->myDoc = nullptr;
-
-
-  //Free the parse context, but keep the document alive so people can navigate the DOM tree:
-  //TODO: Why not keep the context alive too?
-  Parser::release_underlying();
+  check_xinclude_and_finish_parsing();
 }
 
 void DomParser::release_underlying()
diff --git a/libxml++/parsers/domparser.h b/libxml++/parsers/domparser.h
index da262be..be6faa1 100644
--- a/libxml++/parsers/domparser.h
+++ b/libxml++/parsers/domparser.h
@@ -34,6 +34,31 @@ public:
   explicit DomParser(const Glib::ustring& filename, bool validate = false);
   ~DomParser() override;
 
+  /** Set whether and how the parser will perform XInclude substitution.
+   *
+   * @newin{2,42}
+   *
+   * @param process_xinclude Do XInclude substitution on the XML document.
+   *        If <tt>false</tt>, the other parameters have no effect.
+   * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   */
+  void set_xinclude_options(bool process_xinclude = true,
+    bool generate_xinclude_nodes = true, bool fixup_base_uris = true) noexcept;
+
+  /** Get whether and how the parser will perform XInclude substitution.
+   *
+   * @newin{2,42}
+   *
+   * @param[out] process_xinclude Do XInclude substitution on the XML document.
+   * @param[out] generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param[out] fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   */
+  void get_xinclude_options(bool& process_xinclude,
+    bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept;
+
   /** Parse an XML document from a file.
    * If the parser already contains a document, that document and all its nodes
    * are deleted.
@@ -92,6 +117,7 @@ public:
 protected:
   //TODO: Remove the virtual when we can break ABI?
   virtual void parse_context();
+  void check_xinclude_and_finish_parsing();
 
   void release_underlying() override;
   
diff --git a/libxml++/parsers/parser.cc b/libxml++/parsers/parser.cc
index b96fe90..7653d68 100644
--- a/libxml++/parsers/parser.cc
+++ b/libxml++/parsers/parser.cc
@@ -20,6 +20,9 @@
 
 namespace // anonymous
 {
+//TODO: When we can break ABI, remove ExtraParserData::xinclude_options_
+// and move all XInclude stuff to DomParser.
+
 // These are new data members that can't be added to xmlpp::Parser now,
 // because it would break ABI.
 struct ExtraParserData
@@ -27,7 +30,8 @@ struct ExtraParserData
   // Strange default values for throw_*_messages chosen for backward compatibility.
   ExtraParserData()
   : throw_parser_messages_(false), throw_validity_messages_(true),
-  include_default_attributes_(false), set_options_(0), clear_options_(0)
+  include_default_attributes_(false), set_options_(0), clear_options_(0),
+  xinclude_options_(0)
   {}
 
   Glib::ustring parser_error_;
@@ -37,6 +41,7 @@ struct ExtraParserData
   bool include_default_attributes_;
   int set_options_;
   int clear_options_;
+  int xinclude_options_;
 };
 
 std::map<const xmlpp::Parser*, ExtraParserData> extra_parser_data;
@@ -134,6 +139,18 @@ void Parser::get_parser_options(int& set_options, int& clear_options)
   clear_options = extra_parser_data[this].clear_options_;
 }
 
+void Parser::set_xinclude_options_internal(int xinclude_options) noexcept
+{
+  Glib::Threads::Mutex::Lock lock(extra_parser_data_mutex);
+  extra_parser_data[this].xinclude_options_ = xinclude_options;
+}
+
+int Parser::get_xinclude_options_internal() const noexcept
+{
+  Glib::Threads::Mutex::Lock lock(extra_parser_data_mutex);
+  return extra_parser_data[this].xinclude_options_;
+}
+
 void Parser::initialize_context()
 {
   Glib::Threads::Mutex::Lock lock(extra_parser_data_mutex);
diff --git a/libxml++/parsers/parser.h b/libxml++/parsers/parser.h
index 332f6bf..45f9edd 100644
--- a/libxml++/parsers/parser.h
+++ b/libxml++/parsers/parser.h
@@ -189,6 +189,11 @@ protected:
   static void callback_error_or_warning(MsgType msg_type, void* ctx,
                                         const char* msg, va_list var_args);
 
+  //TODO: When we can break ABI, remove set/get_xinclude_options_internal()
+  // and move all XInclude stuff to DomParser.
+  void set_xinclude_options_internal(int xinclude_options) noexcept;
+  int get_xinclude_options_internal() const noexcept;
+
   _xmlParserCtxt* context_;
   exception* exception_;
   //TODO: In a future ABI-break, add these members.


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]