Merge pull request #212 from aubm/fix-xml-charset
Fixes xml decoding for non UTF-8 xml files
Onsi Fakhouri authored 7 years ago
GitHub committed 7 years ago
2 | 2 |
import (
|
3 | 3 |
"encoding/xml"
|
4 | 4 |
"fmt"
|
|
5 |
"io"
|
5 | 6 |
"reflect"
|
|
7 |
"strings"
|
6 | 8 |
|
7 | 9 |
"github.com/onsi/gomega/format"
|
|
10 |
"golang.org/x/net/html/charset"
|
8 | 11 |
)
|
9 | 12 |
|
10 | 13 |
type MatchXMLMatcher struct {
|
|
20 | 23 |
aval := &xmlNode{}
|
21 | 24 |
eval := &xmlNode{}
|
22 | 25 |
|
23 | |
if err := xml.Unmarshal([]byte(actualString), aval); err != nil {
|
|
26 |
if err := newXmlDecoder(strings.NewReader(actualString)).Decode(aval); err != nil {
|
24 | 27 |
return false, fmt.Errorf("Actual '%s' should be valid XML, but it is not.\nUnderlying error:%s", actualString, err)
|
25 | 28 |
}
|
26 | |
if err := xml.Unmarshal([]byte(expectedString), eval); err != nil {
|
|
29 |
if err := newXmlDecoder(strings.NewReader(expectedString)).Decode(eval); err != nil {
|
27 | 30 |
return false, fmt.Errorf("Expected '%s' should be valid XML, but it is not.\nUnderlying error:%s", expectedString, err)
|
28 | 31 |
}
|
29 | 32 |
|
|
55 | 58 |
}
|
56 | 59 |
return actualString, expectedString, nil
|
57 | 60 |
}
|
|
61 |
|
|
62 |
func newXmlDecoder(reader io.Reader) *xml.Decoder {
|
|
63 |
dec := xml.NewDecoder(reader)
|
|
64 |
dec.CharsetReader = charset.NewReaderLabel
|
|
65 |
return dec
|
|
66 |
}
|
16 | 16 |
sample_06 = readFileContents("test_data/xml/sample_06.xml")
|
17 | 17 |
sample_07 = readFileContents("test_data/xml/sample_07.xml")
|
18 | 18 |
sample_08 = readFileContents("test_data/xml/sample_08.xml")
|
|
19 |
|
|
20 |
sample_11 = readFileContents("test_data/xml/sample_11.xml")
|
19 | 21 |
)
|
20 | 22 |
|
21 | 23 |
Context("When passed stringifiables", func() {
|
|
27 | 29 |
Ω(sample_01).ShouldNot(MatchXML(sample_05)) // different structures
|
28 | 30 |
Ω(sample_06).ShouldNot(MatchXML(sample_07)) // same xml names with different namespaces
|
29 | 31 |
Ω(sample_07).ShouldNot(MatchXML(sample_08)) // same structures with different values
|
|
32 |
Ω(sample_11).Should(MatchXML(sample_11)) // with non UTF-8 encoding
|
30 | 33 |
})
|
31 | 34 |
|
32 | 35 |
It("should work with byte arrays", func() {
|
|
0 |
<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
|
|
1 |
<note>
|
|
2 |
<to>Tove</to>
|
|
3 |
<from>Jani</from>
|
|
4 |
<heading>Reminder</heading>
|
|
5 |
<body>Don't forget me this weekend!</body>
|
|
6 |
</note>
|