Previous optimisation in findStringLen has broken unescaping of \\\" sequences (#284)
* tests: don't ignore errors, verify them carefully
* fix unescaping of \\\\\" and such sequences
* tests: add Unmarshal test cases for escaped sequences
kirillx authored 4 years ago
GitHub committed 4 years ago
24 | 24 | ./tests/members_unescaped.go \ |
25 | 25 | ./tests/intern.go \ |
26 | 26 | ./tests/nocopy.go \ |
27 | ./tests/escaping.go \ | |
27 | 28 | |
28 | 29 | bin/easyjson -all ./tests/data.go |
29 | 30 | bin/easyjson -all ./tests/nothing.go |
45 | 46 | bin/easyjson -disable_members_unescape ./tests/members_unescaped.go |
46 | 47 | bin/easyjson ./tests/intern.go |
47 | 48 | bin/easyjson ./tests/nocopy.go |
49 | bin/easyjson ./tests/escaping.go | |
48 | 50 | |
49 | 51 | test: generate |
50 | 52 | go test \ |
252 | 252 | if idx == 0 || (idx > 0 && data[idx-1] != '\\') { |
253 | 253 | return true, length + idx |
254 | 254 | } |
255 | ||
256 | // count \\\\\\\ sequences. even number of slashes means quote is not really escaped | |
257 | cnt := 1 | |
258 | for idx-cnt-1 >= 0 && data[idx-cnt-1] == '\\' { | |
259 | cnt++ | |
260 | } | |
261 | if cnt%2 == 0 { | |
262 | return true, length + idx | |
263 | } | |
264 | ||
255 | 265 | length += idx + 1 |
256 | 266 | data = data[idx+1:] |
257 | 267 | } |
324 | 334 | // decodeEscape processes a single escape sequence and returns number of bytes processed. |
325 | 335 | func decodeEscape(data []byte) (decoded rune, bytesProcessed int, err error) { |
326 | 336 | if len(data) < 2 { |
327 | return 0, 0, fmt.Errorf("syntax error at %v", string(data)) | |
337 | return 0, 0, errors.New("incorrect escape symbol \\ at the end of token") | |
328 | 338 | } |
329 | 339 | |
330 | 340 | c := data[1] |
344 | 354 | case 'u': |
345 | 355 | rr := getu4(data) |
346 | 356 | if rr < 0 { |
347 | return 0, 0, errors.New("syntax error") | |
357 | return 0, 0, errors.New("incorrectly escaped \\uXXXX sequence") | |
348 | 358 | } |
349 | 359 | |
350 | 360 | read := 6 |
360 | 370 | return rr, read, nil |
361 | 371 | } |
362 | 372 | |
363 | return 0, 0, errors.New("syntax error") | |
373 | return 0, 0, errors.New("incorrectly escaped bytes") | |
364 | 374 | } |
365 | 375 | |
366 | 376 | // fetchString scans a string literal token. |
143 | 143 | {`"绿\ufffd茶"`, "绿\xc5茶"}, |
144 | 144 | {`"тест\u2028"`, "тест\xE2\x80\xA8"}, |
145 | 145 | {`"\\\r\n\t\""`, "\\\r\n\t\""}, |
146 | {`"text\\\""`, "text\\\""}, | |
146 | 147 | {`"ü"`, "ü"}, |
147 | 148 | } |
148 | 149 |
0 | package tests | |
1 | ||
2 | //easyjson:json | |
3 | type EscStringStruct struct { | |
4 | A string `json:"a"` | |
5 | } | |
6 | ||
7 | //easyjson:json | |
8 | type EscIntStruct struct { | |
9 | A int `json:"a,string"` | |
10 | } |
0 | package tests | |
1 | ||
2 | import ( | |
3 | "reflect" | |
4 | "testing" | |
5 | ||
6 | "github.com/mailru/easyjson" | |
7 | ) | |
8 | ||
9 | func TestStrFieldsUnescaping(t *testing.T) { | |
10 | cases := []struct { | |
11 | data string | |
12 | exp EscStringStruct | |
13 | }{ | |
14 | { | |
15 | data: `{}`, | |
16 | exp: EscStringStruct{}, | |
17 | }, | |
18 | { | |
19 | data: `{"a": "\""}`, | |
20 | exp: EscStringStruct{A: `"`}, | |
21 | }, | |
22 | { | |
23 | data: `{"a": "\\"}`, | |
24 | exp: EscStringStruct{A: `\`}, | |
25 | }, | |
26 | { | |
27 | data: `{"a": "\\\""}`, | |
28 | exp: EscStringStruct{A: `\"`}, | |
29 | }, | |
30 | { | |
31 | data: `{"a": "\\\\'"}`, | |
32 | exp: EscStringStruct{A: `\\'`}, | |
33 | }, | |
34 | { | |
35 | data: `{"a": "\t\\\nx\\\""}`, | |
36 | exp: EscStringStruct{A: "\t\\\nx\\\""}, | |
37 | }, | |
38 | { | |
39 | data: `{"a": "\r\n"}`, | |
40 | exp: EscStringStruct{A: "\r\n"}, | |
41 | }, | |
42 | { | |
43 | data: `{"a": "\r\n\u4e2D\u56fD\\\""}`, | |
44 | exp: EscStringStruct{A: "\r\n中国\\\""}, | |
45 | }, | |
46 | } | |
47 | ||
48 | for i, c := range cases { | |
49 | var val EscStringStruct | |
50 | err := easyjson.Unmarshal([]byte(c.data), &val) | |
51 | if err != nil { | |
52 | t.Error(err) | |
53 | } | |
54 | if !reflect.DeepEqual(val, c.exp) { | |
55 | t.Errorf("[%d] TestStrFieldsUnescaping(): got=%q, exp=%q", i, val, c.exp) | |
56 | } | |
57 | } | |
58 | } | |
59 | ||
60 | func TestIntFieldsUnescaping(t *testing.T) { | |
61 | cases := []struct { | |
62 | data string | |
63 | exp EscIntStruct | |
64 | }{ | |
65 | { | |
66 | data: `{}`, | |
67 | exp: EscIntStruct{A: 0}, | |
68 | }, | |
69 | { | |
70 | data: `{"a": "1"}`, | |
71 | exp: EscIntStruct{A: 1}, | |
72 | }, | |
73 | { | |
74 | data: `{"a": "\u0032"}`, | |
75 | exp: EscIntStruct{A: 2}, | |
76 | }, | |
77 | } | |
78 | ||
79 | for i, c := range cases { | |
80 | var val EscIntStruct | |
81 | err := easyjson.Unmarshal([]byte(c.data), &val) | |
82 | if err != nil { | |
83 | t.Error(err) | |
84 | } | |
85 | if !reflect.DeepEqual(val, c.exp) { | |
86 | t.Errorf("[%d] TestIntFieldsUnescaping(): got=%v, exp=%v", i, val, c.exp) | |
87 | } | |
88 | } | |
89 | } |
11 | 11 | var i Intern |
12 | 12 | allocsPerRun := testing.AllocsPerRun(1000, func() { |
13 | 13 | i = Intern{} |
14 | easyjson.Unmarshal(data, &i) | |
14 | err := easyjson.Unmarshal(data, &i) | |
15 | if err != nil { | |
16 | t.Error(err) | |
17 | } | |
15 | 18 | if i.Field != "string interning test" { |
16 | 19 | t.Fatalf("wrong value: %q", i.Field) |
17 | 20 | } |
23 | 26 | var n NoIntern |
24 | 27 | allocsPerRun = testing.AllocsPerRun(1000, func() { |
25 | 28 | n = NoIntern{} |
26 | easyjson.Unmarshal(data, &n) | |
29 | err := easyjson.Unmarshal(data, &n) | |
30 | if err != nil { | |
31 | t.Error(err) | |
32 | } | |
27 | 33 | if n.Field != "string interning test" { |
28 | 34 | t.Fatalf("wrong value: %q", n.Field) |
29 | 35 | } |
36 | 36 | |
37 | 37 | for i, c := range cases { |
38 | 38 | var esc MembersEscaped |
39 | easyjson.Unmarshal([]byte(c.data), &esc) | |
39 | err := easyjson.Unmarshal([]byte(c.data), &esc) | |
40 | if err != nil { | |
41 | t.Error(err) | |
42 | } | |
40 | 43 | if !reflect.DeepEqual(esc, c.esc) { |
41 | 44 | t.Errorf("[%d] TestMembersEscaping(): got=%+v, exp=%+v", i, esc, c.esc) |
42 | 45 | } |
43 | 46 | |
44 | 47 | var unesc MembersUnescaped |
45 | easyjson.Unmarshal([]byte(c.data), &unesc) | |
48 | err = easyjson.Unmarshal([]byte(c.data), &unesc) | |
49 | if err != nil { | |
50 | t.Error(err) | |
51 | } | |
46 | 52 | if !reflect.DeepEqual(unesc, c.unesc) { |
47 | 53 | t.Errorf("[%d] TestMembersEscaping(): no-unescaping case: got=%+v, exp=%+v", i, esc, c.esc) |
48 | 54 | } |
26 | 26 | } |
27 | 27 | res := NocopyStruct{} |
28 | 28 | |
29 | easyjson.Unmarshal(data, &res) | |
29 | err := easyjson.Unmarshal(data, &res) | |
30 | if err != nil { | |
31 | t.Error(err) | |
32 | } | |
30 | 33 | if !reflect.DeepEqual(exp, res) { |
31 | 34 | t.Errorf("TestNocopy(): got=%+v, exp=%+v", res, exp) |
32 | 35 | } |
41 | 44 | data = []byte(`{"b": "valueNoCopy"}`) |
42 | 45 | res = NocopyStruct{} |
43 | 46 | allocsPerRun := testing.AllocsPerRun(1000, func() { |
44 | easyjson.Unmarshal(data, &res) | |
47 | err := easyjson.Unmarshal(data, &res) | |
48 | if err != nil { | |
49 | t.Error(err) | |
50 | } | |
45 | 51 | if res.B != "valueNoCopy" { |
46 | 52 | t.Fatalf("wrong value: %q", res.B) |
47 | 53 | } |
52 | 58 | |
53 | 59 | data = []byte(`{"a": "valueNoCopy"}`) |
54 | 60 | allocsPerRun = testing.AllocsPerRun(1000, func() { |
55 | easyjson.Unmarshal(data, &res) | |
61 | err := easyjson.Unmarshal(data, &res) | |
62 | if err != nil { | |
63 | t.Error(err) | |
64 | } | |
56 | 65 | if res.A != "valueNoCopy" { |
57 | 66 | t.Fatalf("wrong value: %q", res.A) |
58 | 67 | } |