New Upstream Snapshot - golang-github-antchfx-xmlquery
Ready changes
Summary
Merged new upstream version: 1.3.13+git20221208.1.9dbfa11 (was: 1.3.3).
Resulting package
Built on 2023-01-01T05:06 (took 3m25s)
The resulting binary packages can be installed (if you have the apt repository enabled) by running one of:
apt install -t fresh-snapshots golang-github-antchfx-xmlquery-dev
Lintian Result
- golang-github-antchfx-xmlquery-dev_1.3.13+git20221208.1.9dbfa11-1~jan+nus1_all.deb
- golang-github-antchfx-xmlquery_1.3.13+git20221208.1.9dbfa11-1~jan+nus1.dsc
- golang-github-antchfx-xmlquery_1.3.13+git20221208.1.9dbfa11-1~jan+nus1_amd64.buildinfo
- golang-github-antchfx-xmlquery_1.3.13+git20221208.1.9dbfa11-1~jan+nus1_amd64.changes
Diff
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 4d5d27b..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,32 +0,0 @@
-# vscode
-.vscode
-debug
-*.test
-
-./build
-
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
\ No newline at end of file
diff --git a/README.md b/README.md
index bae7fc3..ac65ddc 100644
--- a/README.md
+++ b/README.md
@@ -15,26 +15,13 @@ data or evaluate from XML documents with an XPath expression.
XPATH query strings. Enabling caching can avoid recompile XPath expression for
each query.
-Change Logs
-===
-
-2020-08-??
-- Add XML stream loading and parsing support.
-
-2019-11-11
-- Add XPath query caching.
+You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath
-2019-10-05
-- Add new methods compatible with invalid XPath expression error: `QueryAll` and `Query`.
-- Add `QuerySelector` and `QuerySelectorAll` methods, support for reused query objects.
-- PR [#12](https://github.com/antchfx/xmlquery/pull/12) (Thanks @FrancescoIlario)
-- PR [#11](https://github.com/antchfx/xmlquery/pull/11) (Thanks @gjvnq)
+[htmlquery](https://github.com/antchfx/htmlquery) - Package for the HTML document query.
-2018-12-23
-- Added XML output including comment nodes. [#9](https://github.com/antchfx/xmlquery/issues/9)
+[xmlquery](https://github.com/antchfx/xmlquery) - Package for the XML document query.
-2018-12-03
-- Added support to attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6)
+[jsonquery](https://github.com/antchfx/jsonquery) - Package for the JSON document query.
Installation
====
@@ -42,6 +29,52 @@ Installation
$ go get github.com/antchfx/xmlquery
```
+
+Quick Starts
+===
+
+```go
+import (
+ "github.com/antchfx/xmlquery"
+)
+
+func main(){
+ s := `<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0">
+<channel>
+ <title>W3Schools Home Page</title>
+ <link>https://www.w3schools.com</link>
+ <description>Free web building tutorials</description>
+ <item>
+ <title>RSS Tutorial</title>
+ <link>https://www.w3schools.com/xml/xml_rss.asp</link>
+ <description>New RSS tutorial on W3Schools</description>
+ </item>
+ <item>
+ <title>XML Tutorial</title>
+ <link>https://www.w3schools.com/xml</link>
+ <description>New XML tutorial on W3Schools</description>
+ </item>
+</channel>
+</rss>`
+
+ doc, err := xmlquery.Parse(strings.NewReader(s))
+ if err != nil {
+ panic(err)
+ }
+ channel := xmlquery.FindOne(doc, "//channel")
+ if n := channel.SelectElement("title"); n != nil {
+ fmt.Printf("title: %s\n", n.InnerText())
+ }
+ if n := channel.SelectElement("link"); n != nil {
+ fmt.Printf("link: %s\n", n.InnerText())
+ }
+ for i, n := range xmlquery.Find(doc, "//item/title") {
+ fmt.Printf("#%d %s\n", i, n.InnerText())
+ }
+}
+```
+
Getting Started
===
@@ -202,61 +235,11 @@ title.FirstChild = title_text
channel.FirstChild = title
fmt.Println(doc.OutputXML(true))
// <?xml version="1.0"?><rss><channel><title>W3Schools Home Page</title></channel></rss>
-```
-
-Quick Tutorial
-===
-
-```go
-import (
- "github.com/antchfx/xmlquery"
-)
-func main(){
- s := `<?xml version="1.0" encoding="UTF-8" ?>
-<rss version="2.0">
-<channel>
- <title>W3Schools Home Page</title>
- <link>https://www.w3schools.com</link>
- <description>Free web building tutorials</description>
- <item>
- <title>RSS Tutorial</title>
- <link>https://www.w3schools.com/xml/xml_rss.asp</link>
- <description>New RSS tutorial on W3Schools</description>
- </item>
- <item>
- <title>XML Tutorial</title>
- <link>https://www.w3schools.com/xml</link>
- <description>New XML tutorial on W3Schools</description>
- </item>
-</channel>
-</rss>`
-
- doc, err := xmlquery.Parse(strings.NewReader(s))
- if err != nil {
- panic(err)
- }
- channel := xmlquery.FindOne(doc, "//channel")
- if n := channel.SelectElement("title"); n != nil {
- fmt.Printf("title: %s\n", n.InnerText())
- }
- if n := channel.SelectElement("link"); n != nil {
- fmt.Printf("link: %s\n", n.InnerText())
- }
- for i, n := range xmlquery.Find(doc, "//item/title") {
- fmt.Printf("#%d %s\n", i, n.InnerText())
- }
-}
+fmt.Println(doc.OutputXMLWithOptions(WithOutputSelf()))
+// <?xml version="1.0"?><rss><channel><title>W3Schools Home Page</title></channel></rss>
```
-List of supported XPath query packages
-===
-| Name | Description |
-| ------------------------------------------------- | ----------------------------------------- |
-| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for HTML documents |
-| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for XML documents |
-| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for JSON documents |
-
- Questions
+Questions
===
Please let me know if you have any questions
diff --git a/debian/changelog b/debian/changelog
index e4084a7..a6e75f8 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+golang-github-antchfx-xmlquery (1.3.13+git20221208.1.9dbfa11-1) UNRELEASED; urgency=low
+
+ * New upstream snapshot.
+
+ -- Debian Janitor <janitor@jelmer.uk> Sun, 01 Jan 2023 05:04:08 -0000
+
golang-github-antchfx-xmlquery (1.3.3-1) unstable; urgency=medium
[ Dawid Dziurla ]
diff --git a/go.mod b/go.mod
index b6f453e..dbc1647 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/antchfx/xmlquery
go 1.14
require (
- github.com/antchfx/xpath v1.1.10
+ github.com/antchfx/xpath v1.2.1
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
- golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc
+ golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd
)
diff --git a/go.sum b/go.sum
index 9f54294..1bdaaf9 100644
--- a/go.sum
+++ b/go.sum
@@ -1,14 +1,12 @@
-github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
-github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
+github.com/antchfx/xpath v1.2.1 h1:qhp4EW6aCOVr5XIkT+l6LJ9ck/JsUH/yyauNgTQkBF8=
+github.com/antchfx/xpath v1.2.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc h1:zK/HqS5bZxDptfPJNq8v7vJfXtkU7r9TLIoSr1bXaP4=
-golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk=
+golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/node.go b/node.go
index e6b893c..4c77ed6 100644
--- a/node.go
+++ b/node.go
@@ -1,9 +1,9 @@
package xmlquery
import (
- "bytes"
"encoding/xml"
"fmt"
+ "html"
"strings"
)
@@ -29,6 +29,12 @@ const (
AttributeNode
)
+type Attr struct {
+ Name xml.Name
+ Value string
+ NamespaceURI string
+}
+
// A Node consists of a NodeType and some Data (tag name for
// element nodes, content for text) and are part of a tree of Nodes.
type Node struct {
@@ -38,34 +44,65 @@ type Node struct {
Data string
Prefix string
NamespaceURI string
- Attr []xml.Attr
+ Attr []Attr
level int // node level in the tree
}
+type outputConfiguration struct {
+ printSelf bool
+ preserveSpaces bool
+ emptyElementTagSupport bool
+ skipComments bool
+}
+
+type OutputOption func(*outputConfiguration)
+
+// WithOutputSelf configures the Node to print the root node itself
+func WithOutputSelf() OutputOption {
+ return func(oc *outputConfiguration) {
+ oc.printSelf = true
+ }
+}
+
+// WithEmptyTagSupport empty tags should be written as <empty/> and
+// not as <empty></empty>
+func WithEmptyTagSupport() OutputOption {
+ return func(oc *outputConfiguration) {
+ oc.emptyElementTagSupport = true
+ }
+}
+
+// WithoutComments will skip comments in output
+func WithoutComments() OutputOption {
+ return func(oc *outputConfiguration) {
+ oc.skipComments = true
+ }
+}
+
// InnerText returns the text between the start and end tags of the object.
func (n *Node) InnerText() string {
- var output func(*bytes.Buffer, *Node)
- output = func(buf *bytes.Buffer, n *Node) {
+ var output func(*strings.Builder, *Node)
+ output = func(b *strings.Builder, n *Node) {
switch n.Type {
case TextNode, CharDataNode:
- buf.WriteString(n.Data)
+ b.WriteString(n.Data)
case CommentNode:
default:
for child := n.FirstChild; child != nil; child = child.NextSibling {
- output(buf, child)
+ output(b, child)
}
}
}
- var buf bytes.Buffer
- output(&buf, n)
- return buf.String()
+ var b strings.Builder
+ output(&b, n)
+ return b.String()
}
func (n *Node) sanitizedData(preserveSpaces bool) string {
if preserveSpaces {
- return strings.Trim(n.Data, "\n\t")
+ return n.Data
}
return strings.TrimSpace(n.Data)
}
@@ -79,83 +116,118 @@ func calculatePreserveSpaces(n *Node, pastValue bool) bool {
return pastValue
}
-func outputXML(buf *bytes.Buffer, n *Node, preserveSpaces bool) {
+func outputXML(b *strings.Builder, n *Node, preserveSpaces bool, config *outputConfiguration) {
preserveSpaces = calculatePreserveSpaces(n, preserveSpaces)
switch n.Type {
case TextNode:
- xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces)))
+ b.WriteString(html.EscapeString(n.sanitizedData(preserveSpaces)))
return
case CharDataNode:
- buf.WriteString("<![CDATA[")
- xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces)))
- buf.WriteString("]]>")
+ b.WriteString("<![CDATA[")
+ b.WriteString(n.Data)
+ b.WriteString("]]>")
return
case CommentNode:
- buf.WriteString("<!--")
- buf.WriteString(n.Data)
- buf.WriteString("-->")
+ if !config.skipComments {
+ b.WriteString("<!--")
+ b.WriteString(n.Data)
+ b.WriteString("-->")
+ }
return
case DeclarationNode:
- buf.WriteString("<?" + n.Data)
+ b.WriteString("<?" + n.Data)
default:
if n.Prefix == "" {
- buf.WriteString("<" + n.Data)
+ b.WriteString("<" + n.Data)
} else {
- buf.WriteString("<" + n.Prefix + ":" + n.Data)
+ b.WriteString("<" + n.Prefix + ":" + n.Data)
}
}
for _, attr := range n.Attr {
if attr.Name.Space != "" {
- buf.WriteString(fmt.Sprintf(` %s:%s=`, attr.Name.Space, attr.Name.Local))
+ b.WriteString(fmt.Sprintf(` %s:%s=`, attr.Name.Space, attr.Name.Local))
} else {
- buf.WriteString(fmt.Sprintf(` %s=`, attr.Name.Local))
+ b.WriteString(fmt.Sprintf(` %s=`, attr.Name.Local))
}
- buf.WriteByte('"')
- xml.EscapeText(buf, []byte(attr.Value))
- buf.WriteByte('"')
+ b.WriteByte('"')
+ b.WriteString(html.EscapeString(attr.Value))
+ b.WriteByte('"')
}
if n.Type == DeclarationNode {
- buf.WriteString("?>")
+ b.WriteString("?>")
} else {
- buf.WriteString(">")
+ if n.FirstChild != nil || !config.emptyElementTagSupport {
+ b.WriteString(">")
+ } else {
+ b.WriteString("/>")
+ return
+ }
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
- outputXML(buf, child, preserveSpaces)
+ outputXML(b, child, preserveSpaces, config)
}
if n.Type != DeclarationNode {
if n.Prefix == "" {
- buf.WriteString(fmt.Sprintf("</%s>", n.Data))
+ b.WriteString(fmt.Sprintf("</%s>", n.Data))
} else {
- buf.WriteString(fmt.Sprintf("</%s:%s>", n.Prefix, n.Data))
+ b.WriteString(fmt.Sprintf("</%s:%s>", n.Prefix, n.Data))
}
}
}
// OutputXML returns the text that including tags name.
func (n *Node) OutputXML(self bool) string {
- var buf bytes.Buffer
- if self {
- outputXML(&buf, n, false)
+
+ config := &outputConfiguration{
+ printSelf: true,
+ emptyElementTagSupport: false,
+ }
+ preserveSpaces := calculatePreserveSpaces(n, false)
+ var b strings.Builder
+ if self && n.Type != DocumentNode {
+ outputXML(&b, n, preserveSpaces, config)
} else {
for n := n.FirstChild; n != nil; n = n.NextSibling {
- outputXML(&buf, n, false)
+ outputXML(&b, n, preserveSpaces, config)
}
}
- return buf.String()
+ return b.String()
+}
+
+// OutputXMLWithOptions returns the text that including tags name.
+func (n *Node) OutputXMLWithOptions(opts ...OutputOption) string {
+
+ config := &outputConfiguration{}
+ // Set the options
+ for _, opt := range opts {
+ opt(config)
+ }
+
+ preserveSpaces := calculatePreserveSpaces(n, false)
+ var b strings.Builder
+ if config.printSelf && n.Type != DocumentNode {
+ outputXML(&b, n, preserveSpaces, config)
+ } else {
+ for n := n.FirstChild; n != nil; n = n.NextSibling {
+ outputXML(&b, n, preserveSpaces, config)
+ }
+ }
+
+ return b.String()
}
// AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'.
func AddAttr(n *Node, key, val string) {
- var attr xml.Attr
+ var attr Attr
if i := strings.Index(key, ":"); i > 0 {
- attr = xml.Attr{
+ attr = Attr{
Name: xml.Name{Space: key[:i], Local: key[i+1:]},
Value: val,
}
} else {
- attr = xml.Attr{
+ attr = Attr{
Name: xml.Name{Local: key},
Value: val,
}
@@ -164,6 +236,55 @@ func AddAttr(n *Node, key, val string) {
n.Attr = append(n.Attr, attr)
}
+// SetAttr allows an attribute value with the specified name to be changed.
+// If the attribute did not previously exist, it will be created.
+func (n *Node) SetAttr(key, value string) {
+ if i := strings.Index(key, ":"); i > 0 {
+ space := key[:i]
+ local := key[i+1:]
+ for idx := 0; idx < len(n.Attr); idx++ {
+ if n.Attr[idx].Name.Space == space && n.Attr[idx].Name.Local == local {
+ n.Attr[idx].Value = value
+ return
+ }
+ }
+
+ AddAttr(n, key, value)
+ } else {
+ for idx := 0; idx < len(n.Attr); idx++ {
+ if n.Attr[idx].Name.Local == key {
+ n.Attr[idx].Value = value
+ return
+ }
+ }
+
+ AddAttr(n, key, value)
+ }
+}
+
+// RemoveAttr removes the attribute with the specified name.
+func (n *Node) RemoveAttr(key string) {
+ removeIdx := -1
+ if i := strings.Index(key, ":"); i > 0 {
+ space := key[:i]
+ local := key[i+1:]
+ for idx := 0; idx < len(n.Attr); idx++ {
+ if n.Attr[idx].Name.Space == space && n.Attr[idx].Name.Local == local {
+ removeIdx = idx
+ }
+ }
+ } else {
+ for idx := 0; idx < len(n.Attr); idx++ {
+ if n.Attr[idx].Name.Local == key {
+ removeIdx = idx
+ }
+ }
+ }
+ if removeIdx != -1 {
+ n.Attr = append(n.Attr[:removeIdx], n.Attr[removeIdx+1:]...)
+ }
+}
+
// AddChild adds a new node 'n' to a node 'parent' as its last child.
func AddChild(parent, n *Node) {
n.Parent = parent
diff --git a/node_test.go b/node_test.go
index a96a8e0..b8c1448 100644
--- a/node_test.go
+++ b/node_test.go
@@ -134,7 +134,7 @@ func TestAddAttr(t *testing.T) {
},
{
name: "node has existing attrs",
- n: &Node{Type: AttributeNode, Attr: []xml.Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
+ n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
key: "k2",
val: "v2",
expected: `< k1="v1" k2="v2"></>`,
@@ -147,6 +147,91 @@ func TestAddAttr(t *testing.T) {
}
}
+func TestSetAttr(t *testing.T) {
+ for _, test := range []struct {
+ name string
+ n *Node
+ key string
+ val string
+ expected string
+ }{
+ {
+ name: "node has no existing attr",
+ n: &Node{Type: AttributeNode},
+ key: "ns:k1",
+ val: "v1",
+ expected: `< ns:k1="v1"></>`,
+ },
+ {
+ name: "node has an existing attr, overwriting",
+ n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Space: "ns", Local: "k1"}, Value: "v1"}}},
+ key: "ns:k1",
+ val: "v2",
+ expected: `< ns:k1="v2"></>`,
+ },
+ {
+ name: "node has no existing attr, no ns",
+ n: &Node{Type: AttributeNode},
+ key: "k1",
+ val: "v1",
+ expected: `< k1="v1"></>`,
+ },
+ {
+ name: "node has an existing attr, no ns, overwriting",
+ n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
+ key: "k1",
+ val: "v2",
+ expected: `< k1="v2"></>`,
+ },
+ } {
+
+ t.Run(test.name, func(t *testing.T) {
+ test.n.SetAttr(test.key, test.val)
+ testValue(t, test.n.OutputXML(true), test.expected)
+ })
+ }
+}
+
+func TestRemoveAttr(t *testing.T) {
+ for _, test := range []struct {
+ name string
+ n *Node
+ key string
+ expected string
+ }{
+ {
+ name: "node has no existing attr",
+ n: &Node{Type: AttributeNode},
+ key: "ns:k1",
+ expected: `<></>`,
+ },
+ {
+ name: "node has an existing attr, overwriting",
+ n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Space: "ns", Local: "k1"}, Value: "v1"}}},
+ key: "ns:k1",
+ expected: `<></>`,
+ },
+ {
+ name: "node has no existing attr, no ns",
+ n: &Node{Type: AttributeNode},
+ key: "k1",
+ expected: `<></>`,
+ },
+ {
+ name: "node has an existing attr, no ns, overwriting",
+ n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
+ key: "k1",
+ expected: `<></>`,
+ },
+ } {
+
+ t.Run(test.name, func(t *testing.T) {
+ test.n.RemoveAttr(test.key)
+ testValue(t, test.n.OutputXML(true), test.expected)
+ })
+ }
+}
+
func TestRemoveFromTree(t *testing.T) {
xml := `<?procinst?>
<!--comment-->
@@ -280,6 +365,53 @@ func TestEscapeOutputValue(t *testing.T) {
}
+func TestUnnecessaryEscapeOutputValue(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <class_list xml:space="preserve">
+ <student>
+ <name> Robert </name>
+ <grade>A+</grade>
+
+ </student>
+ </class_list>`
+
+ root, err := Parse(strings.NewReader(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ escapedInnerText := root.OutputXML(true)
+ if strings.Contains(escapedInnerText, "	") {
+ t.Fatal("\\n has been escaped unnecessarily")
+ }
+
+ if strings.Contains(escapedInnerText, "
") {
+ t.Fatal("\\t has been escaped unnecessarily")
+ }
+
+}
+
+func TestHtmlUnescapeStringOriginString(t *testing.T) {
+ // has escape html character and \t
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <example xml:space="preserve"><word>&#48; </word></example>`
+
+ root, err := Parse(strings.NewReader(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ escapedInnerText := root.OutputXML(false)
+ unescapeString := html.UnescapeString(escapedInnerText)
+ if strings.Contains(unescapeString, "&") {
+ t.Fatal("& need unescape")
+ }
+ if !strings.Contains(escapedInnerText, "&#48;\t\t") {
+ t.Fatal("Inner Text should keep plain text")
+ }
+
+}
+
func TestOutputXMLWithNamespacePrefix(t *testing.T) {
s := `<?xml version="1.0" encoding="UTF-8"?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body></S:Body></S:Envelope>`
doc, _ := Parse(strings.NewReader(s))
@@ -305,12 +437,12 @@ func TestOutputXMLWithCommentNode(t *testing.T) {
</class_list>`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
- if e, g := "<!-- Students grades are updated bi-monthly -->", doc.OutputXML(true); strings.Index(g, e) == -1 {
+ if e, g := "<!-- Students grades are updated bi-monthly -->", doc.OutputXML(true); !strings.Contains(g, e) {
t.Fatal("missing some comment-node.")
}
n := FindOne(doc, "//class_list")
t.Log(n.OutputXML(false))
- if e, g := "<name>Lenard</name>", n.OutputXML(false); strings.Index(g, e) == -1 {
+ if e, g := "<name>Lenard</name>", n.OutputXML(false); !strings.Contains(g, e) {
t.Fatal("missing some comment-node")
}
}
@@ -326,15 +458,16 @@ func TestOutputXMLWithSpaceParent(t *testing.T) {
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
- n := FindOne(doc, "/class_list/student/name")
expected := "<name> Robert </name>"
- if g := doc.OutputXML(true); strings.Index(g, expected) == -1 {
+ if g := doc.OutputXML(true); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
- output := html.UnescapeString(doc.OutputXML(true))
- if strings.Contains(output, "\n") {
- t.Errorf("the outputted xml contains newlines")
+ n := FindOne(doc, "/class_list/student")
+ output := html.UnescapeString(n.OutputXML(false))
+ expected = "\n\t\t\t<name> Robert </name>\n\t\t\t<grade>A+</grade>\n\t\t"
+ if !(output == expected) {
+ t.Errorf(`expected "%s", obtained "%s"`, expected, output)
}
t.Log(n.OutputXML(false))
}
@@ -352,7 +485,7 @@ func TestOutputXMLWithSpaceDirect(t *testing.T) {
n := FindOne(doc, "/class_list/student/name")
expected := `<name xml:space="preserve"> Robert </name>`
- if g := doc.OutputXML(false); strings.Index(g, expected) == -1 {
+ if g := doc.OutputXML(false); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
@@ -376,7 +509,7 @@ func TestOutputXMLWithSpaceOverwrittenToPreserve(t *testing.T) {
n := FindOne(doc, "/class_list/student")
expected := `<name xml:space="preserve"> Robert </name>`
- if g := n.OutputXML(false); strings.Index(g, expected) == -1 {
+ if g := n.OutputXML(false); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
@@ -398,15 +531,49 @@ func TestOutputXMLWithSpaceOverwrittenToDefault(t *testing.T) {
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
- n := FindOne(doc, "/class_list/student")
expected := `<name xml:space="default">Robert</name>`
- if g := doc.OutputXML(false); strings.Index(g, expected) == -1 {
+ if g := doc.OutputXML(false); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
- output := html.UnescapeString(doc.OutputXML(true))
- if strings.Contains(output, "\n") {
- t.Errorf("the outputted xml contains newlines")
+ n := FindOne(doc, "/class_list/student")
+ output := html.UnescapeString(n.OutputXML(false))
+ expected = "\n\t\t\t<name xml:space=\"default\">Robert</name>\n\t\t\t<grade>A+</grade>\n\t\t"
+ if !(output == expected) {
+ t.Errorf(`expected "%s", obtained "%s"`, expected, output)
}
t.Log(n.OutputXML(false))
}
+
+func TestOutputXMLWithXMLInCDATA(t *testing.T) {
+ s := `<?xml version="1.0" encoding="utf-8"?><node><![CDATA[<greeting>Hello, world!</greeting>]]></node>`
+ doc, _ := Parse(strings.NewReader(s))
+ t.Log(doc.OutputXML(false))
+ if doc.OutputXML(false) != s {
+ t.Errorf("the outputted xml escaped CDATA section")
+ }
+}
+
+func TestOutputXMLWithDefaultOptions(t *testing.T) {
+ s := `<?xml version="1.0" encoding="utf-8"?><node><empty></empty></node>`
+ expected := `<?xml version="1.0" encoding="utf-8"?><node><empty></empty></node>`
+
+ doc, _ := Parse(strings.NewReader(s))
+ result := doc.OutputXMLWithOptions()
+ t.Log(result)
+ if result != expected {
+ t.Errorf("output was not expected. expected %v but got %v", expected, result)
+ }
+}
+
+func TestOutputXMLWithOptions(t *testing.T) {
+ s := `<?xml version="1.0" encoding="utf-8"?><node><empty></empty></node>`
+ expected := `<?xml version="1.0" encoding="utf-8"?><node><empty/></node>`
+
+ doc, _ := Parse(strings.NewReader(s))
+ result := doc.OutputXMLWithOptions(WithEmptyTagSupport())
+ t.Log(result)
+ if result != expected {
+ t.Errorf("output was not expected. expected %v but got %v", expected, result)
+ }
+}
diff --git a/options.go b/options.go
new file mode 100644
index 0000000..f3e2f99
--- /dev/null
+++ b/options.go
@@ -0,0 +1,30 @@
+package xmlquery
+
+import (
+ "encoding/xml"
+)
+
+type ParserOptions struct{
+ Decoder *DecoderOptions
+}
+
+func (options ParserOptions) apply(parser *parser) {
+ if options.Decoder != nil {
+ (*options.Decoder).apply(parser.decoder)
+ }
+}
+
+// DecoderOptions implement the very same options than the standard
+// encoding/xml package. Please refer to this documentation:
+// https://golang.org/pkg/encoding/xml/#Decoder
+type DecoderOptions struct{
+ Strict bool
+ AutoClose []string
+ Entity map[string]string
+}
+
+func (options DecoderOptions) apply(decoder *xml.Decoder) {
+ decoder.Strict = options.Strict
+ decoder.AutoClose = options.AutoClose
+ decoder.Entity = options.Entity
+}
diff --git a/options_test.go b/options_test.go
new file mode 100644
index 0000000..a13c17a
--- /dev/null
+++ b/options_test.go
@@ -0,0 +1,46 @@
+package xmlquery
+
+import (
+ "bytes"
+ "encoding/xml"
+ "testing"
+)
+
+func TestApplyOptions(t *testing.T) {
+ parser := &parser{
+ decoder: xml.NewDecoder(bytes.NewReader(make([]byte, 0))),
+ }
+ options := ParserOptions{
+ Decoder: &DecoderOptions{
+ Strict: false,
+ AutoClose: []string{"foo"},
+ Entity: map[string]string{
+ "bar": "baz",
+ },
+ },
+ }
+
+ options.apply(parser)
+ if parser.decoder.Strict != options.Decoder.Strict {
+ t.Fatalf("Expected Strict attribute of %v, got %v instead", options.Decoder.Strict, parser.decoder.Strict)
+ }
+ if parser.decoder.AutoClose[0] != options.Decoder.AutoClose[0] {
+ t.Fatalf("Expected AutoClose attribute with %v, got %v instead", options.Decoder.AutoClose, parser.decoder.AutoClose)
+ }
+ if parser.decoder.Entity["bar"] != options.Decoder.Entity["bar"] {
+ t.Fatalf("Expected Entity mode of %v, got %v instead", options.Decoder.Entity, parser.decoder.Entity)
+ }
+}
+
+func TestApplyEmptyOptions(t *testing.T) {
+ parser := &parser{
+ decoder: xml.NewDecoder(bytes.NewReader(make([]byte, 0))),
+ }
+ options := ParserOptions{
+ Decoder: nil,
+ }
+
+ // Only testing for the absence of errors since we are not
+ // expecting this call to do anything
+ options.apply(parser)
+}
diff --git a/parse.go b/parse.go
index 623f06d..76f49aa 100644
--- a/parse.go
+++ b/parse.go
@@ -3,7 +3,6 @@ package xmlquery
import (
"bufio"
"encoding/xml"
- "errors"
"fmt"
"io"
"net/http"
@@ -32,7 +31,13 @@ func LoadURL(url string) (*Node, error) {
// Parse returns the parse tree for the XML from the given Reader.
func Parse(r io.Reader) (*Node, error) {
+ return ParseWithOptions(r, ParserOptions{})
+}
+
+// ParseWithOptions is like parse, but with custom options
+func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
p := createParser(r)
+ options.apply(p)
for {
_, err := p.parse()
if err == io.EOF {
@@ -86,7 +91,15 @@ func (p *parser) parse() (*Node, error) {
case xml.StartElement:
if p.level == 0 {
// mising XML declaration
- node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
+ attributes := make([]Attr, 1)
+ attributes[0].Name = xml.Name{Local: "version"}
+ attributes[0].Value = "1.0"
+ node := &Node{
+ Type: DeclarationNode,
+ Data: "xml",
+ Attr: attributes,
+ level: 1,
+ }
AddChild(p.prev, node)
p.level = 1
p.prev = node
@@ -100,16 +113,22 @@ func (p *parser) parse() (*Node, error) {
}
}
- if tok.Name.Space != "" {
- if _, found := p.space2prefix[tok.Name.Space]; !found {
- return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
+ if space := tok.Name.Space; space != "" {
+ if _, found := p.space2prefix[space]; !found && p.decoder.Strict {
+ return nil, fmt.Errorf("xmlquery: invalid XML document, namespace %s is missing", space)
}
}
- for i := 0; i < len(tok.Attr); i++ {
- att := &tok.Attr[i]
- if prefix, ok := p.space2prefix[att.Name.Space]; ok {
- att.Name.Space = prefix
+ attributes := make([]Attr, len(tok.Attr))
+ for i, att := range tok.Attr {
+ name := att.Name
+ if prefix, ok := p.space2prefix[name.Space]; ok {
+ name.Space = prefix
+ }
+ attributes[i] = Attr{
+ Name: name,
+ Value: att.Value,
+ NamespaceURI: att.Name.Space,
}
}
@@ -118,7 +137,7 @@ func (p *parser) parse() (*Node, error) {
Data: tok.Name.Local,
Prefix: p.space2prefix[tok.Name.Space],
NamespaceURI: tok.Name.Space,
- Attr: tok.Attr,
+ Attr: attributes,
level: p.level,
}
@@ -190,7 +209,7 @@ func (p *parser) parse() (*Node, error) {
// First, normalize the cache...
cached := strings.ToUpper(string(p.reader.Cache()))
nodeType := TextNode
- if strings.HasPrefix(cached, "<![CDATA[") {
+ if strings.HasPrefix(cached, "<![CDATA[") || strings.HasPrefix(cached, "![CDATA[") {
nodeType = CharDataNode
}
@@ -234,6 +253,11 @@ func (p *parser) parse() (*Node, error) {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
+ } else if p.level < p.prev.level {
+ for i := p.prev.level - p.level; i > 1; i-- {
+ p.prev = p.prev.Parent
+ }
+ AddSibling(p.prev.Parent, node)
}
p.prev = node
case xml.Directive:
@@ -295,6 +319,16 @@ type StreamParser struct {
// streamElementFilter, if provided, cannot be successfully parsed and compiled
// into a valid xpath query.
func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) {
+ return CreateStreamParserWithOptions(r, ParserOptions{}, streamElementXPath, streamElementFilter...)
+}
+
+// CreateStreamParserWithOptions is like CreateStreamParser, but with custom options
+func CreateStreamParserWithOptions(
+ r io.Reader,
+ options ParserOptions,
+ streamElementXPath string,
+ streamElementFilter ...string,
+) (*StreamParser, error) {
elemXPath, err := getQuery(streamElementXPath)
if err != nil {
return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error())
@@ -306,8 +340,10 @@ func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFil
return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error())
}
}
+ parser := createParser(r)
+ options.apply(parser)
sp := &StreamParser{
- p: createParser(r),
+ p: parser,
}
sp.p.streamElementXPath = elemXPath
sp.p.streamElementFilter = elemFilter
@@ -325,8 +361,15 @@ func (sp *StreamParser) Read() (*Node, error) {
// Because this is a streaming read, we need to release/remove last
// target node from the node tree to free up memory.
if sp.p.streamNode != nil {
+ // We need to remove all siblings before the current stream node,
+ // because the document may contain unwanted nodes between the target
+ // ones (for example new line text node), which would otherwise
+ // accumulate as first childs, and slow down the stream over time
+ for sp.p.streamNode.PrevSibling != nil {
+ RemoveFromTree(sp.p.streamNode.PrevSibling)
+ }
+ sp.p.prev = sp.p.streamNode.Parent
RemoveFromTree(sp.p.streamNode)
- sp.p.prev = sp.p.streamNodePrev
sp.p.streamNode = nil
sp.p.streamNodePrev = nil
}
diff --git a/parse_test.go b/parse_test.go
index f78553c..2b2d7d5 100644
--- a/parse_test.go
+++ b/parse_test.go
@@ -116,7 +116,7 @@ func TestNamespaceURL(t *testing.T) {
if strings.Index(top.InnerText(), "author") > 0 {
t.Fatalf("InnerText() include comment node text")
}
- if strings.Index(top.OutputXML(true), "author") == -1 {
+ if !strings.Contains(top.OutputXML(true), "author") {
t.Fatal("OutputXML shoud include comment node,but not")
}
}
@@ -370,7 +370,7 @@ func TestStreamParser_Success1(t *testing.T) {
}
testOutputXML(t, "first call result", `<BBB>b1</BBB>`, n)
testOutputXML(t, "doc after first call",
- `<><?xml?><ROOT><AAA><CCC>c1</CCC><BBB>b1</BBB></AAA></ROOT></>`, findRoot(n))
+ `<?xml version="1.0"?><ROOT><AAA><CCC>c1</CCC><BBB>b1</BBB></AAA></ROOT>`, findRoot(n))
// Second `<BBB>` read
n, err = sp.Read()
@@ -379,7 +379,7 @@ func TestStreamParser_Success1(t *testing.T) {
}
testOutputXML(t, "second call result", `<BBB>b2<ZZZ z="1">z1</ZZZ></BBB>`, n)
testOutputXML(t, "doc after second call",
- `<><?xml?><ROOT><AAA><CCC>c1</CCC><DDD>d1</DDD><BBB>b2<ZZZ z="1">z1</ZZZ></BBB></AAA></ROOT></>`, findRoot(n))
+ `<?xml version="1.0"?><ROOT><AAA><DDD>d1</DDD><BBB>b2<ZZZ z="1">z1</ZZZ></BBB></AAA></ROOT>`, findRoot(n))
// Third `<BBB>` read (Note we will skip 'b3' since the streamElementFilter excludes it)
n, err = sp.Read()
@@ -391,7 +391,7 @@ func TestStreamParser_Success1(t *testing.T) {
// been filtered out and is not our target node, thus it is considered just like any other
// non target nodes such as `<CCC>`` or `<DDD>`
testOutputXML(t, "doc after third call",
- `<><?xml?><ROOT><AAA><CCC>c1</CCC><DDD>d1</DDD></AAA><ZZZ><BBB>b4</BBB></ZZZ></ROOT></>`,
+ `<?xml version="1.0"?><ROOT><AAA></AAA><ZZZ><BBB>b4</BBB></ZZZ></ROOT>`,
findRoot(n))
// Fourth `<BBB>` read
@@ -401,7 +401,7 @@ func TestStreamParser_Success1(t *testing.T) {
}
testOutputXML(t, "fourth call result", `<BBB>b5</BBB>`, n)
testOutputXML(t, "doc after fourth call",
- `<><?xml?><ROOT><AAA><CCC>c1</CCC><DDD>d1</DDD></AAA><ZZZ><BBB>b5</BBB></ZZZ></ROOT></>`,
+ `<?xml version="1.0"?><ROOT><AAA></AAA><ZZZ><BBB>b5</BBB></ZZZ></ROOT>`,
findRoot(n))
_, err = sp.Read()
@@ -431,7 +431,7 @@ func TestStreamParser_Success2(t *testing.T) {
t.Fatal(err.Error())
}
testOutputXML(t, "first call result", `<CCC>c1</CCC>`, n)
- testOutputXML(t, "doc after first call", `<><?xml?><AAA><CCC>c1</CCC></AAA></>`, findRoot(n))
+ testOutputXML(t, "doc after first call", `<?xml version="1.0"?><AAA><CCC>c1</CCC></AAA>`, findRoot(n))
// Second Read() should return d1
n, err = sp.Read()
@@ -440,7 +440,7 @@ func TestStreamParser_Success2(t *testing.T) {
}
testOutputXML(t, "second call result", `<DDD>d1</DDD>`, n)
testOutputXML(t, "doc after second call",
- `<><?xml?><AAA><BBB>b1</BBB><DDD>d1</DDD></AAA></>`, findRoot(n))
+ `<?xml version="1.0"?><AAA><BBB>b1</BBB><DDD>d1</DDD></AAA>`, findRoot(n))
// Third call should return c2
n, err = sp.Read()
@@ -449,7 +449,7 @@ func TestStreamParser_Success2(t *testing.T) {
}
testOutputXML(t, "third call result", `<CCC>c2</CCC>`, n)
testOutputXML(t, "doc after third call",
- `<><?xml?><AAA><BBB>b1</BBB><BBB>b2</BBB><CCC>c2</CCC></AAA></>`, findRoot(n))
+ `<?xml version="1.0"?><AAA><BBB>b2</BBB><CCC>c2</CCC></AAA>`, findRoot(n))
_, err = sp.Read()
if err != io.EOF {
@@ -474,3 +474,19 @@ func TestCDATA(t *testing.T) {
}
testOutputXML(t, "first call result", `<CCC><![CDATA[c1]]></CCC>`, n)
}
+
+func TestXMLPreservation(t *testing.T) {
+ s := `
+ <?xml version="1.0" encoding="UTF-8"?>
+ <AAA>
+ <CCC><![CDATA[c1]]></CCC>
+ </AAA>`
+
+ doc, err := Parse(strings.NewReader(s))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testOutputXML(t, "first call result",
+ `<?xml version="1.0" encoding="UTF-8"?><AAA><CCC><![CDATA[c1]]></CCC></AAA>`, doc)
+}
diff --git a/query.go b/query.go
index 7544b7e..0bd45dd 100644
--- a/query.go
+++ b/query.go
@@ -193,6 +193,9 @@ func (x *NodeNavigator) Prefix() string {
}
func (x *NodeNavigator) NamespaceURL() string {
+ if x.attr != -1 {
+ return x.curr.Attr[x.attr].NamespaceURI
+ }
return x.curr.NamespaceURI
}
@@ -272,9 +275,11 @@ func (x *NodeNavigator) MoveToNext() bool {
if x.attr != -1 {
return false
}
- if node := x.curr.NextSibling; node != nil {
+ for node := x.curr.NextSibling; node != nil; node = x.curr.NextSibling {
x.curr = node
- return true
+ if x.curr.Type != TextNode || strings.TrimSpace(x.curr.Data) != "" {
+ return true
+ }
}
return false
}
@@ -283,9 +288,11 @@ func (x *NodeNavigator) MoveToPrevious() bool {
if x.attr != -1 {
return false
}
- if node := x.curr.PrevSibling; node != nil {
+ for node := x.curr.PrevSibling; node != nil; node = x.curr.PrevSibling {
x.curr = node
- return true
+ if x.curr.Type != TextNode || strings.TrimSpace(x.curr.Data) != "" {
+ return true
+ }
}
return false
}
diff --git a/query_test.go b/query_test.go
index 6c634e7..b4158be 100644
--- a/query_test.go
+++ b/query_test.go
@@ -1,6 +1,7 @@
package xmlquery
import (
+ "fmt"
"strings"
"testing"
)
@@ -16,7 +17,7 @@ const xmlDoc = `
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
- <description>An in-depth look at creating applications
+ <description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
@@ -25,8 +26,8 @@ const xmlDoc = `
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
- <description>A former architect battles corporate zombies,
- an evil sorceress, and her own childhood to become queen
+ <description>A former architect battles corporate zombies,
+ an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
@@ -35,8 +36,8 @@ const xmlDoc = `
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
- <description>After the collapse of a nanotechnology
- society in England, the young survivors lay the
+ <description>After the collapse of a nanotechnology
+ society in England, the young survivors lay the
foundation for a new society.</description>
</book>
</catalog>`
@@ -105,8 +106,7 @@ func TestInvalidXPathExpression(t *testing.T) {
func TestNavigator(t *testing.T) {
nav := &NodeNavigator{curr: doc, root: doc, attr: -1}
nav.MoveToChild() // New Line
- nav.MoveToNext()
- nav.MoveToNext() // catalog
+ nav.MoveToNext() // catalog
if nav.curr.Data != "catalog" {
t.Fatal("current node name != `catalog`")
}
@@ -116,7 +116,6 @@ func TestNavigator(t *testing.T) {
t.Fatal("node type not CommentNode")
}
nav.Value()
- nav.MoveToNext() // New Line
nav.MoveToNext() //book
nav.MoveToChild()
nav.MoveToNext() // book/author
@@ -125,12 +124,34 @@ func TestNavigator(t *testing.T) {
}
nav.MoveToParent() // book
nav.MoveToNext() // next book
- nav.MoveToNext() // skip some whitespace
if nav.curr.SelectAttr("id") != "bk102" {
t.Fatal("node error")
}
}
+func TestAttributesNamespaces(t *testing.T) {
+ doc := loadXML(`
+ <root xmlns="ns://root" xmlns:nested="ns://nested" xmlns:other="ns://other">
+ <tag id="1" attr="v"></tag>
+ <tag id="2" nested:attr="v"></tag>
+ <nested:tag id="3" nested:attr="v"></nested:tag>
+ <nested:tag id="4" other:attr="v"></nested:tag>
+ <nested:tag id="5" attr="v"></nested:tag>
+ </root>
+ `)
+ results := Find(doc, "//*[@*[namespace-uri()='ns://nested' and local-name()='attr']]")
+ parsed := make([]string, 0, 5)
+ for _, tag := range results {
+ parsed = append(parsed, tag.SelectAttr("id"))
+ }
+ got := fmt.Sprintf("%v", parsed)
+
+ // unsure if 5 should be selected here
+ if got != "[2 3]" {
+ t.Fatalf("Expected tags [2 3], got %v", got)
+ }
+}
+
func loadXML(s string) *Node {
node, err := Parse(strings.NewReader(s))
if err != nil {
@@ -138,3 +159,14 @@ func loadXML(s string) *Node {
}
return node
}
+
+func TestMissingTextNodes(t *testing.T) {
+ doc := loadXML(`
+ <?xml version="1.0" encoding="utf-8"?>
+ <corpus><p>Lorem <a>ipsum</a> dolor</p></corpus>
+ `)
+ results := Find(doc, "//text()")
+ if len(results) != 3 {
+ t.Fatalf("Expected text nodes 3, got %d", len(results))
+ }
+}
Debdiff
[The following lists of changes regard files as different if they have different names, permissions or owners.]
Files in second set of .debs but not in first
-rw-r--r-- root/root /usr/share/gocode/src/github.com/antchfx/xmlquery/options.go -rw-r--r-- root/root /usr/share/gocode/src/github.com/antchfx/xmlquery/options_test.go
No differences were encountered in the control files