[gxml] Added new methods for HTML parsing/dump
- From: Daniel Espinosa Ortiz <despinosa src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gxml] Added new methods for HTML parsing/dump
- Date: Fri, 1 Sep 2017 21:39:08 +0000 (UTC)
commit 9960fafdfb373578ab2f43f0c552c96b6ce3b1bb
Author: Daniel Espinosa <esodan gmail com>
Date: Fri Sep 1 16:30:23 2017 -0500
Added new methods for HTML parsing/dump
When parsing documents you should use the convenient HtmlDocument
constructor for type of document you use/get.
If you want correct HTML output, you should new to_html() method.
Fixes Bug:
https://bugzilla.gnome.org/show_bug.cgi?id=786812
gxml/GHtml.vala | 38 +++++++++++++++++++++++++++++++++++++-
test/HtmlDocumentTest.vala | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 78 insertions(+), 1 deletions(-)
---
diff --git a/gxml/GHtml.vala b/gxml/GHtml.vala
index 2e593f4..d315ea7 100644
--- a/gxml/GHtml.vala
+++ b/gxml/GHtml.vala
@@ -43,18 +43,54 @@ namespace GXml {
this.from_file (File.new_for_uri (uri), options);
}
+ /**
+ * This method parse strings in a {@link GLib.File} using {@link Xml.Html.Doc.read_memory}
method.
+ * Refer to libxml2 documentation about limitations on parsing.
+ *
+ * In order to use a different parser, may you want to load in memory your file,
+ * then create a new {@link HtmlDocument} using a constructor better fitting
+ * your document content or source.
+ */
public HtmlDocument.from_file (File file, int options = 0, Cancellable? cancel = null) throws
GLib.Error {
var ostream = new MemoryOutputStream.resizable ();
ostream.splice (file.read (), GLib.OutputStreamSpliceFlags.CLOSE_SOURCE, cancel);
this.from_string ((string) ostream.data, options);
}
-
+ /**
+ * This method parse strings using {@link Xml.Html.Doc.read_memory} method.
+ * Refer to libxml2 documentation about limitations on parsing.
+ */
public HtmlDocument.from_string (string html, int options = 0) {
+ base.from_doc (Html.Doc.read_memory ((char[]) html, html.length, "", null, options));
+ }
+ /**
+ * This method parse strings using {@link Xml.Html.ParserCtxt} class.
+ * Refer to libxml2 documentation about limitations on parsing.
+ */
+ public HtmlDocument.from_string_context (string html, int options = 0) {
Html.ParserCtxt ctx = new Html.ParserCtxt ();
Xml.Doc *doc = ctx.read_memory ((char[]) html, html.length, "", null, options);
base.from_doc (doc);
}
/**
+ * This method parse strings using {@link Xml.Html.read_doc} method.
+ * Refer to libxml2 documentation about limitations on parsing.
+ */
+ public HtmlDocument.from_string_doc (string html, int options = 0) {
+ base.from_doc (Html.Doc.read_doc (html, "", null, options));
+ }
+ /**
+ * This method dump to HTML string using {@link Xml.Html.dump_memory} method.
+ * Refer to libxml2 documentation about output.
+ */
+ public new string to_html () {
+ string buffer;
+ int len = 0;
+ ((Html.Doc*) doc)->dump_memory (out buffer, out len);
+ message (len.to_string ());
+ return buffer.dup ();
+ }
+ /**
* Search all {@link GXml.Element} with a property called "class" and with a
* value as a class apply to a node.
*//*
diff --git a/test/HtmlDocumentTest.vala b/test/HtmlDocumentTest.vala
index 6a26422..630b421 100644
--- a/test/HtmlDocumentTest.vala
+++ b/test/HtmlDocumentTest.vala
@@ -70,6 +70,47 @@ class HtmlDocumentTest : GXmlTest {
assert_not_reached ();
}
});
+ Test.add_func ("/gxml/htmldocument/fom_string_doc", () => {
+ try {
+ var sdoc = "<!doctype html>
+<html>
+<head>
+ <style>
+ * { color: red; }
+ </style>
+</head>
+<body>
+ <script type=\"text/javascript\">
+ </script>
+</body>
+</html>
+";
+ var doc = new HtmlDocument.from_string_doc (sdoc);
+ assert (doc.root != null);
+ assert (doc.root.name.down () == "html".down ());
+ var ln = doc.root.get_elements_by_property_value ("type","text/javascript");
+ assert (ln != null);
+ assert (ln.size == 1);
+ var np = ln.item (0);
+ assert (np != null);
+ assert (np.node_name == "script");
+ var l = doc.get_elements_by_tag_name ("style");
+ assert (l != null);
+ assert (l.size == 1);
+ var sn = l.item (0);
+ assert (sn != null);
+ assert (sn.node_name == "style");
+ message (sn.child_nodes.length.to_string ());
+ assert (sn.child_nodes.length == 1);
+ message (doc.to_html ());
+ var s = doc.to_html ();
+ message (s);
+ assert ("style>\n * { color: red; }\n </style>" in s);
+ } catch (GLib.Error e){
+ Test.message ("ERROR: "+e.message);
+ assert_not_reached ();
+ }
+ });
// Test.add_func ("/gxml/htmldocument/uri", () => {
// try {
// var f = GLib.File.new_for_uri
("http://www.omgubuntu.co.uk/2017/05/kde-neon-5-10-available-download-comes-plasma-5-10");
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]