Codebase list golang-github-anmitsu-go-shlex / 22ac6a6
New upstream version 0.0~git20161002.648efa6 Dawid Dziurla 7 years ago
6 changed file(s) with 649 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 shlex.test
0 Copyright (c) anmitsu <anmitsu.s@gmail.com>
1
2 Permission is hereby granted, free of charge, to any person obtaining
3 a copy of this software and associated documentation files (the
4 "Software"), to deal in the Software without restriction, including
5 without limitation the rights to use, copy, modify, merge, publish,
6 distribute, sublicense, and/or sell copies of the Software, and to
7 permit persons to whom the Software is furnished to do so, subject to
8 the following conditions:
9
10 The above copyright notice and this permission notice shall be
11 included in all copies or substantial portions of the Software.
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
17 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
18 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
19 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0 # go-shlex
1
2 go-shlex is a library to make a lexical analyzer like Unix shell for
3 Go.
4
5 ## Install
6
7 go get -u "github.com/anmitsu/go-shlex"
8
9 ## Usage
10
11 ```go
12 package main
13
14 import (
15 "fmt"
16 "log"
17
18 "github.com/anmitsu/go-shlex"
19 )
20
21 func main() {
22 cmd := `cp -Rdp "file name" 'file name2' dir\ name`
23 words, err := shlex.Split(cmd, true)
24 if err != nil {
25 log.Fatal(err)
26 }
27
28 for _, w := range words {
29 fmt.Println(w)
30 }
31 }
32 ```
33
34 ## Documentation
35
36 http://godoc.org/github.com/anmitsu/go-shlex
37
0 package shlex_test
1
2 import (
3 "fmt"
4 "log"
5
6 "github.com/anmitsu/go-shlex"
7 flynn_shlex "github.com/flynn/go-shlex"
8 )
9
10 func ExampleSplit() {
11 cmd := `cp -Rdp "file name" 'file name2' dir\ name`
12
13 // Split of cmd with POSIX mode.
14 words1, err := shlex.Split(cmd, true)
15 if err != nil {
16 log.Fatal(err)
17 }
18 // Split of cmd with Non-POSIX mode.
19 words2, err := shlex.Split(cmd, false)
20 if err != nil {
21 log.Fatal(err)
22 }
23
24 fmt.Println("Source command:")
25 fmt.Println(`cp -Rdp "file name" 'file name2' dir\ name`)
26 fmt.Println()
27
28 fmt.Println("POSIX mode:")
29 for _, word := range words1 {
30 fmt.Println(word)
31 }
32 fmt.Println()
33 fmt.Println("Non-POSIX mode:")
34 for _, word := range words2 {
35 fmt.Println(word)
36 }
37
38 // Output:
39 // Source command:
40 // cp -Rdp "file name" 'file name2' dir\ name
41 //
42 // POSIX mode:
43 // cp
44 // -Rdp
45 // file name
46 // file name2
47 // dir name
48 //
49 // Non-POSIX mode:
50 // cp
51 // -Rdp
52 // "file name"
53 // 'file name2'
54 // dir\
55 // name
56 }
57
58 func ExampleSplit_compareFlynn() {
59 cmd := `English and 日本語`
60
61 // Split for github.com/flynn/go-shlex imported as flynn_shlex
62 words_flynn, err1 := flynn_shlex.Split(cmd)
63
64 // Split for github.com/anmitsu/go-shlex
65 words_anmitsu, err2 := shlex.Split(cmd, true)
66
67 fmt.Println("Source string:")
68 fmt.Println(cmd)
69 fmt.Println()
70
71 fmt.Println("Result of github.com/flynn/go-shlex:")
72 for _, word := range words_flynn {
73 fmt.Println(word)
74 }
75 fmt.Println(err1.Error())
76
77 fmt.Println()
78 fmt.Println("Result of github.com/anmitsu/go-shlex:")
79 for _, word := range words_anmitsu {
80 fmt.Println(word)
81 }
82 if err2 != nil {
83 fmt.Println(err2.Error())
84 }
85
86 // Output:
87 // Source string:
88 // English and 日本語
89 //
90 // Result of github.com/flynn/go-shlex:
91 // English
92 // and
93 // Unknown rune: 26085
94 //
95 // Result of github.com/anmitsu/go-shlex:
96 // English
97 // and
98 // 日本語
99 }
0 // Package shlex provides a simple lexical analysis like Unix shell.
1 package shlex
2
3 import (
4 "bufio"
5 "errors"
6 "io"
7 "strings"
8 "unicode"
9 )
10
11 var (
12 ErrNoClosing = errors.New("No closing quotation")
13 ErrNoEscaped = errors.New("No escaped character")
14 )
15
16 // Tokenizer is the interface that classifies a token according to
17 // words, whitespaces, quotations, escapes and escaped quotations.
18 type Tokenizer interface {
19 IsWord(rune) bool
20 IsWhitespace(rune) bool
21 IsQuote(rune) bool
22 IsEscape(rune) bool
23 IsEscapedQuote(rune) bool
24 }
25
26 // DefaultTokenizer implements a simple tokenizer like Unix shell.
27 type DefaultTokenizer struct{}
28
29 func (t *DefaultTokenizer) IsWord(r rune) bool {
30 return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
31 }
32 func (t *DefaultTokenizer) IsQuote(r rune) bool {
33 switch r {
34 case '\'', '"':
35 return true
36 default:
37 return false
38 }
39 }
40 func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
41 return unicode.IsSpace(r)
42 }
43 func (t *DefaultTokenizer) IsEscape(r rune) bool {
44 return r == '\\'
45 }
46 func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
47 return r == '"'
48 }
49
50 // Lexer represents a lexical analyzer.
51 type Lexer struct {
52 reader *bufio.Reader
53 tokenizer Tokenizer
54 posix bool
55 whitespacesplit bool
56 }
57
58 // NewLexer creates a new Lexer reading from io.Reader. This Lexer
59 // has a DefaultTokenizer according to posix and whitespacesplit
60 // rules.
61 func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
62 return &Lexer{
63 reader: bufio.NewReader(r),
64 tokenizer: &DefaultTokenizer{},
65 posix: posix,
66 whitespacesplit: whitespacesplit,
67 }
68 }
69
70 // NewLexerString creates a new Lexer reading from a string. This
71 // Lexer has a DefaultTokenizer according to posix and whitespacesplit
72 // rules.
73 func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
74 return NewLexer(strings.NewReader(s), posix, whitespacesplit)
75 }
76
77 // Split splits a string according to posix or non-posix rules.
78 func Split(s string, posix bool) ([]string, error) {
79 return NewLexerString(s, posix, true).Split()
80 }
81
82 // SetTokenizer sets a Tokenizer.
83 func (l *Lexer) SetTokenizer(t Tokenizer) {
84 l.tokenizer = t
85 }
86
87 func (l *Lexer) Split() ([]string, error) {
88 result := make([]string, 0)
89 for {
90 token, err := l.readToken()
91 if token != "" {
92 result = append(result, token)
93 }
94
95 if err == io.EOF {
96 break
97 } else if err != nil {
98 return result, err
99 }
100 }
101 return result, nil
102 }
103
104 func (l *Lexer) readToken() (string, error) {
105 t := l.tokenizer
106 token := ""
107 quoted := false
108 state := ' '
109 escapedstate := ' '
110 scanning:
111 for {
112 next, _, err := l.reader.ReadRune()
113 if err != nil {
114 if t.IsQuote(state) {
115 return token, ErrNoClosing
116 } else if t.IsEscape(state) {
117 return token, ErrNoEscaped
118 }
119 return token, err
120 }
121
122 switch {
123 case t.IsWhitespace(state):
124 switch {
125 case t.IsWhitespace(next):
126 break scanning
127 case l.posix && t.IsEscape(next):
128 escapedstate = 'a'
129 state = next
130 case t.IsWord(next):
131 token += string(next)
132 state = 'a'
133 case t.IsQuote(next):
134 if !l.posix {
135 token += string(next)
136 }
137 state = next
138 default:
139 token = string(next)
140 if l.whitespacesplit {
141 state = 'a'
142 } else if token != "" || (l.posix && quoted) {
143 break scanning
144 }
145 }
146 case t.IsQuote(state):
147 quoted = true
148 switch {
149 case next == state:
150 if !l.posix {
151 token += string(next)
152 break scanning
153 } else {
154 state = 'a'
155 }
156 case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
157 escapedstate = state
158 state = next
159 default:
160 token += string(next)
161 }
162 case t.IsEscape(state):
163 if t.IsQuote(escapedstate) && next != state && next != escapedstate {
164 token += string(state)
165 }
166 token += string(next)
167 state = escapedstate
168 case t.IsWord(state):
169 switch {
170 case t.IsWhitespace(next):
171 if token != "" || (l.posix && quoted) {
172 break scanning
173 }
174 case l.posix && t.IsQuote(next):
175 state = next
176 case l.posix && t.IsEscape(next):
177 escapedstate = 'a'
178 state = next
179 case t.IsWord(next) || t.IsQuote(next):
180 token += string(next)
181 default:
182 if l.whitespacesplit {
183 token += string(next)
184 } else if token != "" {
185 l.reader.UnreadRune()
186 break scanning
187 }
188 }
189 }
190 }
191 return token, nil
192 }
0 package shlex
1
2 import (
3 "fmt"
4 "testing"
5 )
6
7 var datanonposix = []struct {
8 in string
9 out []string
10 err error
11 }{
12 {`This string has an embedded apostrophe, doesn't it?`,
13 []string{
14 "This",
15 "string",
16 "has",
17 "an",
18 "embedded",
19 "apostrophe",
20 ",",
21 "doesn't",
22 "it",
23 "?",
24 },
25 nil,
26 },
27 {"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
28 []string{
29 "This",
30 "string",
31 "has",
32 "embedded",
33 `"double quotes"`,
34 "and",
35 `'single quotes'`,
36 "in",
37 "it",
38 ",",
39 "and",
40 "even",
41 `"a 'nested example'"`,
42 ".",
43 },
44 nil,
45 },
46 {`Hello world!, こんにちは 世界!`,
47 []string{
48 "Hello",
49 "world",
50 "!",
51 ",",
52 "こんにちは",
53 "世界",
54 "!",
55 },
56 nil,
57 },
58 {`Do"Not"Separate`,
59 []string{`Do"Not"Separate`},
60 nil,
61 },
62 {`"Do"Separate`,
63 []string{`"Do"`, "Separate"},
64 nil,
65 },
66 {`Escaped \e Character not in quotes`,
67 []string{
68 "Escaped",
69 `\`,
70 "e",
71 "Character",
72 "not",
73 "in",
74 "quotes",
75 },
76 nil,
77 },
78 {`Escaped "\e" Character in double quotes`,
79 []string{
80 "Escaped",
81 `"\e"`,
82 "Character",
83 "in",
84 "double",
85 "quotes",
86 },
87 nil,
88 },
89 {`Escaped '\e' Character in single quotes`,
90 []string{
91 "Escaped",
92 `'\e'`,
93 "Character",
94 "in",
95 "single",
96 "quotes",
97 },
98 nil,
99 },
100 {`Escaped '\'' \"\'\" single quote`,
101 []string{
102 "Escaped",
103 `'\'`,
104 `' \"\'`,
105 `\`,
106 `" single quote`,
107 },
108 ErrNoClosing,
109 },
110 {`Escaped "\"" \'\"\' double quote`,
111 []string{
112 "Escaped",
113 `"\"`,
114 `" \'\"`,
115 `\`,
116 `' double quote`,
117 },
118 ErrNoClosing,
119 },
120 {`"'Strip extra layer of quotes'"`,
121 []string{`"'Strip extra layer of quotes'"`},
122 nil,
123 },
124 }
125
126 var dataposix = []struct {
127 in string
128 out []string
129 err error
130 }{
131 {`This string has an embedded apostrophe, doesn't it?`,
132 []string{
133 "This",
134 "string",
135 "has",
136 "an",
137 "embedded",
138 "apostrophe",
139 ",",
140 "doesnt it?",
141 },
142 ErrNoClosing,
143 },
144 {"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
145 []string{
146 "This",
147 "string",
148 "has",
149 "embedded",
150 `double quotes`,
151 "and",
152 `single quotes`,
153 "in",
154 "it",
155 ",",
156 "and",
157 "even",
158 `a 'nested example'`,
159 ".",
160 },
161 nil,
162 },
163 {`Hello world!, こんにちは 世界!`,
164 []string{
165 "Hello",
166 "world",
167 "!",
168 ",",
169 "こんにちは",
170 "世界",
171 "!",
172 },
173 nil,
174 },
175 {`Do"Not"Separate`,
176 []string{`DoNotSeparate`},
177 nil,
178 },
179 {`"Do"Separate`,
180 []string{"DoSeparate"},
181 nil,
182 },
183 {`Escaped \e Character not in quotes`,
184 []string{
185 "Escaped",
186 "e",
187 "Character",
188 "not",
189 "in",
190 "quotes",
191 },
192 nil,
193 },
194 {`Escaped "\e" Character in double quotes`,
195 []string{
196 "Escaped",
197 `\e`,
198 "Character",
199 "in",
200 "double",
201 "quotes",
202 },
203 nil,
204 },
205 {`Escaped '\e' Character in single quotes`,
206 []string{
207 "Escaped",
208 `\e`,
209 "Character",
210 "in",
211 "single",
212 "quotes",
213 },
214 nil,
215 },
216 {`Escaped '\'' \"\'\" single quote`,
217 []string{
218 "Escaped",
219 `\ \"\"`,
220 "single",
221 "quote",
222 },
223 nil,
224 },
225 {`Escaped "\"" \'\"\' double quote`,
226 []string{
227 "Escaped",
228 `"`,
229 `'"'`,
230 "double",
231 "quote",
232 },
233 nil,
234 },
235 {`"'Strip extra layer of quotes'"`,
236 []string{`'Strip extra layer of quotes'`},
237 nil,
238 },
239 }
240
241 func TestSplitNonPOSIX(t *testing.T) {
242 testSplit(t, false)
243 }
244
245 func TestSplitPOSIX(t *testing.T) {
246 testSplit(t, true)
247 }
248
249 func testSplit(t *testing.T, posix bool) {
250 var data []struct {
251 in string
252 out []string
253 err error
254 }
255 if posix {
256 data = dataposix
257 } else {
258 data = datanonposix
259 }
260
261 for _, d := range data {
262 t.Logf("Spliting: `%s'", d.in)
263
264 result, err := NewLexerString(d.in, posix, false).Split()
265
266 // check closing and escaped error
267 if err != d.err {
268 printToken(result)
269 t.Fatalf("Error expected: `%v', but result catched: `%v'",
270 d.err, err)
271 }
272
273 // check splited number
274 if len(result) != len(d.out) {
275 printToken(result)
276 t.Fatalf("Split expeced: `%d', but result founds: `%d'",
277 len(d.out), len(result))
278 }
279
280 // check words
281 for j, out := range d.out {
282 if result[j] != out {
283 printToken(result)
284 t.Fatalf("Word expeced: `%s', but result founds: `%s' in %d",
285 out, result[j], j)
286 }
287 }
288 t.Log("ok")
289 }
290 }
291
292 func printToken(s []string) {
293 for _, token := range s {
294 fmt.Println(token)
295 }
296 }