New upstream version 0.0~git20161002.648efa6
Dawid Dziurla
7 years ago
| 0 | shlex.test |
| 0 | Copyright (c) anmitsu <anmitsu.s@gmail.com> | |
| 1 | ||
| 2 | Permission is hereby granted, free of charge, to any person obtaining | |
| 3 | a copy of this software and associated documentation files (the | |
| 4 | "Software"), to deal in the Software without restriction, including | |
| 5 | without limitation the rights to use, copy, modify, merge, publish, | |
| 6 | distribute, sublicense, and/or sell copies of the Software, and to | |
| 7 | permit persons to whom the Software is furnished to do so, subject to | |
| 8 | the following conditions: | |
| 9 | ||
| 10 | The above copyright notice and this permission notice shall be | |
| 11 | included in all copies or substantial portions of the Software. | |
| 12 | ||
| 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| 14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| 15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
| 16 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | |
| 17 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
| 18 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
| 19 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 0 | # go-shlex | |
| 1 | ||
| 2 | go-shlex is a library to make a lexical analyzer like Unix shell for | |
| 3 | Go. | |
| 4 | ||
| 5 | ## Install | |
| 6 | ||
| 7 | go get -u "github.com/anmitsu/go-shlex" | |
| 8 | ||
| 9 | ## Usage | |
| 10 | ||
| 11 | ```go | |
| 12 | package main | |
| 13 | ||
| 14 | import ( | |
| 15 | "fmt" | |
| 16 | "log" | |
| 17 | ||
| 18 | "github.com/anmitsu/go-shlex" | |
| 19 | ) | |
| 20 | ||
| 21 | func main() { | |
| 22 | cmd := `cp -Rdp "file name" 'file name2' dir\ name` | |
| 23 | words, err := shlex.Split(cmd, true) | |
| 24 | if err != nil { | |
| 25 | log.Fatal(err) | |
| 26 | } | |
| 27 | ||
| 28 | for _, w := range words { | |
| 29 | fmt.Println(w) | |
| 30 | } | |
| 31 | } | |
| 32 | ``` | |
| 33 | ||
| 34 | ## Documentation | |
| 35 | ||
| 36 | http://godoc.org/github.com/anmitsu/go-shlex | |
| 37 |
| 0 | package shlex_test | |
| 1 | ||
| 2 | import ( | |
| 3 | "fmt" | |
| 4 | "log" | |
| 5 | ||
| 6 | "github.com/anmitsu/go-shlex" | |
| 7 | flynn_shlex "github.com/flynn/go-shlex" | |
| 8 | ) | |
| 9 | ||
| 10 | func ExampleSplit() { | |
| 11 | cmd := `cp -Rdp "file name" 'file name2' dir\ name` | |
| 12 | ||
| 13 | // Split of cmd with POSIX mode. | |
| 14 | words1, err := shlex.Split(cmd, true) | |
| 15 | if err != nil { | |
| 16 | log.Fatal(err) | |
| 17 | } | |
| 18 | // Split of cmd with Non-POSIX mode. | |
| 19 | words2, err := shlex.Split(cmd, false) | |
| 20 | if err != nil { | |
| 21 | log.Fatal(err) | |
| 22 | } | |
| 23 | ||
| 24 | fmt.Println("Source command:") | |
| 25 | fmt.Println(`cp -Rdp "file name" 'file name2' dir\ name`) | |
| 26 | fmt.Println() | |
| 27 | ||
| 28 | fmt.Println("POSIX mode:") | |
| 29 | for _, word := range words1 { | |
| 30 | fmt.Println(word) | |
| 31 | } | |
| 32 | fmt.Println() | |
| 33 | fmt.Println("Non-POSIX mode:") | |
| 34 | for _, word := range words2 { | |
| 35 | fmt.Println(word) | |
| 36 | } | |
| 37 | ||
| 38 | // Output: | |
| 39 | // Source command: | |
| 40 | // cp -Rdp "file name" 'file name2' dir\ name | |
| 41 | // | |
| 42 | // POSIX mode: | |
| 43 | // cp | |
| 44 | // -Rdp | |
| 45 | // file name | |
| 46 | // file name2 | |
| 47 | // dir name | |
| 48 | // | |
| 49 | // Non-POSIX mode: | |
| 50 | // cp | |
| 51 | // -Rdp | |
| 52 | // "file name" | |
| 53 | // 'file name2' | |
| 54 | // dir\ | |
| 55 | // name | |
| 56 | } | |
| 57 | ||
| 58 | func ExampleSplit_compareFlynn() { | |
| 59 | cmd := `English and 日本語` | |
| 60 | ||
| 61 | // Split for github.com/flynn/go-shlex imported as flynn_shlex | |
| 62 | words_flynn, err1 := flynn_shlex.Split(cmd) | |
| 63 | ||
| 64 | // Split for github.com/anmitsu/go-shlex | |
| 65 | words_anmitsu, err2 := shlex.Split(cmd, true) | |
| 66 | ||
| 67 | fmt.Println("Source string:") | |
| 68 | fmt.Println(cmd) | |
| 69 | fmt.Println() | |
| 70 | ||
| 71 | fmt.Println("Result of github.com/flynn/go-shlex:") | |
| 72 | for _, word := range words_flynn { | |
| 73 | fmt.Println(word) | |
| 74 | } | |
| 75 | fmt.Println(err1.Error()) | |
| 76 | ||
| 77 | fmt.Println() | |
| 78 | fmt.Println("Result of github.com/anmitsu/go-shlex:") | |
| 79 | for _, word := range words_anmitsu { | |
| 80 | fmt.Println(word) | |
| 81 | } | |
| 82 | if err2 != nil { | |
| 83 | fmt.Println(err2.Error()) | |
| 84 | } | |
| 85 | ||
| 86 | // Output: | |
| 87 | // Source string: | |
| 88 | // English and 日本語 | |
| 89 | // | |
| 90 | // Result of github.com/flynn/go-shlex: | |
| 91 | // English | |
| 92 | // and | |
| 93 | // Unknown rune: 26085 | |
| 94 | // | |
| 95 | // Result of github.com/anmitsu/go-shlex: | |
| 96 | // English | |
| 97 | // and | |
| 98 | // 日本語 | |
| 99 | } |
| 0 | // Package shlex provides a simple lexical analysis like Unix shell. | |
| 1 | package shlex | |
| 2 | ||
| 3 | import ( | |
| 4 | "bufio" | |
| 5 | "errors" | |
| 6 | "io" | |
| 7 | "strings" | |
| 8 | "unicode" | |
| 9 | ) | |
| 10 | ||
| 11 | var ( | |
| 12 | ErrNoClosing = errors.New("No closing quotation") | |
| 13 | ErrNoEscaped = errors.New("No escaped character") | |
| 14 | ) | |
| 15 | ||
| 16 | // Tokenizer is the interface that classifies a token according to | |
| 17 | // words, whitespaces, quotations, escapes and escaped quotations. | |
| 18 | type Tokenizer interface { | |
| 19 | IsWord(rune) bool | |
| 20 | IsWhitespace(rune) bool | |
| 21 | IsQuote(rune) bool | |
| 22 | IsEscape(rune) bool | |
| 23 | IsEscapedQuote(rune) bool | |
| 24 | } | |
| 25 | ||
| 26 | // DefaultTokenizer implements a simple tokenizer like Unix shell. | |
| 27 | type DefaultTokenizer struct{} | |
| 28 | ||
| 29 | func (t *DefaultTokenizer) IsWord(r rune) bool { | |
| 30 | return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) | |
| 31 | } | |
| 32 | func (t *DefaultTokenizer) IsQuote(r rune) bool { | |
| 33 | switch r { | |
| 34 | case '\'', '"': | |
| 35 | return true | |
| 36 | default: | |
| 37 | return false | |
| 38 | } | |
| 39 | } | |
| 40 | func (t *DefaultTokenizer) IsWhitespace(r rune) bool { | |
| 41 | return unicode.IsSpace(r) | |
| 42 | } | |
| 43 | func (t *DefaultTokenizer) IsEscape(r rune) bool { | |
| 44 | return r == '\\' | |
| 45 | } | |
| 46 | func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool { | |
| 47 | return r == '"' | |
| 48 | } | |
| 49 | ||
| 50 | // Lexer represents a lexical analyzer. | |
| 51 | type Lexer struct { | |
| 52 | reader *bufio.Reader | |
| 53 | tokenizer Tokenizer | |
| 54 | posix bool | |
| 55 | whitespacesplit bool | |
| 56 | } | |
| 57 | ||
| 58 | // NewLexer creates a new Lexer reading from io.Reader. This Lexer | |
| 59 | // has a DefaultTokenizer according to posix and whitespacesplit | |
| 60 | // rules. | |
| 61 | func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer { | |
| 62 | return &Lexer{ | |
| 63 | reader: bufio.NewReader(r), | |
| 64 | tokenizer: &DefaultTokenizer{}, | |
| 65 | posix: posix, | |
| 66 | whitespacesplit: whitespacesplit, | |
| 67 | } | |
| 68 | } | |
| 69 | ||
| 70 | // NewLexerString creates a new Lexer reading from a string. This | |
| 71 | // Lexer has a DefaultTokenizer according to posix and whitespacesplit | |
| 72 | // rules. | |
| 73 | func NewLexerString(s string, posix, whitespacesplit bool) *Lexer { | |
| 74 | return NewLexer(strings.NewReader(s), posix, whitespacesplit) | |
| 75 | } | |
| 76 | ||
| 77 | // Split splits a string according to posix or non-posix rules. | |
| 78 | func Split(s string, posix bool) ([]string, error) { | |
| 79 | return NewLexerString(s, posix, true).Split() | |
| 80 | } | |
| 81 | ||
| 82 | // SetTokenizer sets a Tokenizer. | |
| 83 | func (l *Lexer) SetTokenizer(t Tokenizer) { | |
| 84 | l.tokenizer = t | |
| 85 | } | |
| 86 | ||
| 87 | func (l *Lexer) Split() ([]string, error) { | |
| 88 | result := make([]string, 0) | |
| 89 | for { | |
| 90 | token, err := l.readToken() | |
| 91 | if token != "" { | |
| 92 | result = append(result, token) | |
| 93 | } | |
| 94 | ||
| 95 | if err == io.EOF { | |
| 96 | break | |
| 97 | } else if err != nil { | |
| 98 | return result, err | |
| 99 | } | |
| 100 | } | |
| 101 | return result, nil | |
| 102 | } | |
| 103 | ||
| 104 | func (l *Lexer) readToken() (string, error) { | |
| 105 | t := l.tokenizer | |
| 106 | token := "" | |
| 107 | quoted := false | |
| 108 | state := ' ' | |
| 109 | escapedstate := ' ' | |
| 110 | scanning: | |
| 111 | for { | |
| 112 | next, _, err := l.reader.ReadRune() | |
| 113 | if err != nil { | |
| 114 | if t.IsQuote(state) { | |
| 115 | return token, ErrNoClosing | |
| 116 | } else if t.IsEscape(state) { | |
| 117 | return token, ErrNoEscaped | |
| 118 | } | |
| 119 | return token, err | |
| 120 | } | |
| 121 | ||
| 122 | switch { | |
| 123 | case t.IsWhitespace(state): | |
| 124 | switch { | |
| 125 | case t.IsWhitespace(next): | |
| 126 | break scanning | |
| 127 | case l.posix && t.IsEscape(next): | |
| 128 | escapedstate = 'a' | |
| 129 | state = next | |
| 130 | case t.IsWord(next): | |
| 131 | token += string(next) | |
| 132 | state = 'a' | |
| 133 | case t.IsQuote(next): | |
| 134 | if !l.posix { | |
| 135 | token += string(next) | |
| 136 | } | |
| 137 | state = next | |
| 138 | default: | |
| 139 | token = string(next) | |
| 140 | if l.whitespacesplit { | |
| 141 | state = 'a' | |
| 142 | } else if token != "" || (l.posix && quoted) { | |
| 143 | break scanning | |
| 144 | } | |
| 145 | } | |
| 146 | case t.IsQuote(state): | |
| 147 | quoted = true | |
| 148 | switch { | |
| 149 | case next == state: | |
| 150 | if !l.posix { | |
| 151 | token += string(next) | |
| 152 | break scanning | |
| 153 | } else { | |
| 154 | state = 'a' | |
| 155 | } | |
| 156 | case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state): | |
| 157 | escapedstate = state | |
| 158 | state = next | |
| 159 | default: | |
| 160 | token += string(next) | |
| 161 | } | |
| 162 | case t.IsEscape(state): | |
| 163 | if t.IsQuote(escapedstate) && next != state && next != escapedstate { | |
| 164 | token += string(state) | |
| 165 | } | |
| 166 | token += string(next) | |
| 167 | state = escapedstate | |
| 168 | case t.IsWord(state): | |
| 169 | switch { | |
| 170 | case t.IsWhitespace(next): | |
| 171 | if token != "" || (l.posix && quoted) { | |
| 172 | break scanning | |
| 173 | } | |
| 174 | case l.posix && t.IsQuote(next): | |
| 175 | state = next | |
| 176 | case l.posix && t.IsEscape(next): | |
| 177 | escapedstate = 'a' | |
| 178 | state = next | |
| 179 | case t.IsWord(next) || t.IsQuote(next): | |
| 180 | token += string(next) | |
| 181 | default: | |
| 182 | if l.whitespacesplit { | |
| 183 | token += string(next) | |
| 184 | } else if token != "" { | |
| 185 | l.reader.UnreadRune() | |
| 186 | break scanning | |
| 187 | } | |
| 188 | } | |
| 189 | } | |
| 190 | } | |
| 191 | return token, nil | |
| 192 | } |
| 0 | package shlex | |
| 1 | ||
| 2 | import ( | |
| 3 | "fmt" | |
| 4 | "testing" | |
| 5 | ) | |
| 6 | ||
| 7 | var datanonposix = []struct { | |
| 8 | in string | |
| 9 | out []string | |
| 10 | err error | |
| 11 | }{ | |
| 12 | {`This string has an embedded apostrophe, doesn't it?`, | |
| 13 | []string{ | |
| 14 | "This", | |
| 15 | "string", | |
| 16 | "has", | |
| 17 | "an", | |
| 18 | "embedded", | |
| 19 | "apostrophe", | |
| 20 | ",", | |
| 21 | "doesn't", | |
| 22 | "it", | |
| 23 | "?", | |
| 24 | }, | |
| 25 | nil, | |
| 26 | }, | |
| 27 | {"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n", | |
| 28 | []string{ | |
| 29 | "This", | |
| 30 | "string", | |
| 31 | "has", | |
| 32 | "embedded", | |
| 33 | `"double quotes"`, | |
| 34 | "and", | |
| 35 | `'single quotes'`, | |
| 36 | "in", | |
| 37 | "it", | |
| 38 | ",", | |
| 39 | "and", | |
| 40 | "even", | |
| 41 | `"a 'nested example'"`, | |
| 42 | ".", | |
| 43 | }, | |
| 44 | nil, | |
| 45 | }, | |
| 46 | {`Hello world!, こんにちは 世界!`, | |
| 47 | []string{ | |
| 48 | "Hello", | |
| 49 | "world", | |
| 50 | "!", | |
| 51 | ",", | |
| 52 | "こんにちは", | |
| 53 | "世界", | |
| 54 | "!", | |
| 55 | }, | |
| 56 | nil, | |
| 57 | }, | |
| 58 | {`Do"Not"Separate`, | |
| 59 | []string{`Do"Not"Separate`}, | |
| 60 | nil, | |
| 61 | }, | |
| 62 | {`"Do"Separate`, | |
| 63 | []string{`"Do"`, "Separate"}, | |
| 64 | nil, | |
| 65 | }, | |
| 66 | {`Escaped \e Character not in quotes`, | |
| 67 | []string{ | |
| 68 | "Escaped", | |
| 69 | `\`, | |
| 70 | "e", | |
| 71 | "Character", | |
| 72 | "not", | |
| 73 | "in", | |
| 74 | "quotes", | |
| 75 | }, | |
| 76 | nil, | |
| 77 | }, | |
| 78 | {`Escaped "\e" Character in double quotes`, | |
| 79 | []string{ | |
| 80 | "Escaped", | |
| 81 | `"\e"`, | |
| 82 | "Character", | |
| 83 | "in", | |
| 84 | "double", | |
| 85 | "quotes", | |
| 86 | }, | |
| 87 | nil, | |
| 88 | }, | |
| 89 | {`Escaped '\e' Character in single quotes`, | |
| 90 | []string{ | |
| 91 | "Escaped", | |
| 92 | `'\e'`, | |
| 93 | "Character", | |
| 94 | "in", | |
| 95 | "single", | |
| 96 | "quotes", | |
| 97 | }, | |
| 98 | nil, | |
| 99 | }, | |
| 100 | {`Escaped '\'' \"\'\" single quote`, | |
| 101 | []string{ | |
| 102 | "Escaped", | |
| 103 | `'\'`, | |
| 104 | `' \"\'`, | |
| 105 | `\`, | |
| 106 | `" single quote`, | |
| 107 | }, | |
| 108 | ErrNoClosing, | |
| 109 | }, | |
| 110 | {`Escaped "\"" \'\"\' double quote`, | |
| 111 | []string{ | |
| 112 | "Escaped", | |
| 113 | `"\"`, | |
| 114 | `" \'\"`, | |
| 115 | `\`, | |
| 116 | `' double quote`, | |
| 117 | }, | |
| 118 | ErrNoClosing, | |
| 119 | }, | |
| 120 | {`"'Strip extra layer of quotes'"`, | |
| 121 | []string{`"'Strip extra layer of quotes'"`}, | |
| 122 | nil, | |
| 123 | }, | |
| 124 | } | |
| 125 | ||
| 126 | var dataposix = []struct { | |
| 127 | in string | |
| 128 | out []string | |
| 129 | err error | |
| 130 | }{ | |
| 131 | {`This string has an embedded apostrophe, doesn't it?`, | |
| 132 | []string{ | |
| 133 | "This", | |
| 134 | "string", | |
| 135 | "has", | |
| 136 | "an", | |
| 137 | "embedded", | |
| 138 | "apostrophe", | |
| 139 | ",", | |
| 140 | "doesnt it?", | |
| 141 | }, | |
| 142 | ErrNoClosing, | |
| 143 | }, | |
| 144 | {"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n", | |
| 145 | []string{ | |
| 146 | "This", | |
| 147 | "string", | |
| 148 | "has", | |
| 149 | "embedded", | |
| 150 | `double quotes`, | |
| 151 | "and", | |
| 152 | `single quotes`, | |
| 153 | "in", | |
| 154 | "it", | |
| 155 | ",", | |
| 156 | "and", | |
| 157 | "even", | |
| 158 | `a 'nested example'`, | |
| 159 | ".", | |
| 160 | }, | |
| 161 | nil, | |
| 162 | }, | |
| 163 | {`Hello world!, こんにちは 世界!`, | |
| 164 | []string{ | |
| 165 | "Hello", | |
| 166 | "world", | |
| 167 | "!", | |
| 168 | ",", | |
| 169 | "こんにちは", | |
| 170 | "世界", | |
| 171 | "!", | |
| 172 | }, | |
| 173 | nil, | |
| 174 | }, | |
| 175 | {`Do"Not"Separate`, | |
| 176 | []string{`DoNotSeparate`}, | |
| 177 | nil, | |
| 178 | }, | |
| 179 | {`"Do"Separate`, | |
| 180 | []string{"DoSeparate"}, | |
| 181 | nil, | |
| 182 | }, | |
| 183 | {`Escaped \e Character not in quotes`, | |
| 184 | []string{ | |
| 185 | "Escaped", | |
| 186 | "e", | |
| 187 | "Character", | |
| 188 | "not", | |
| 189 | "in", | |
| 190 | "quotes", | |
| 191 | }, | |
| 192 | nil, | |
| 193 | }, | |
| 194 | {`Escaped "\e" Character in double quotes`, | |
| 195 | []string{ | |
| 196 | "Escaped", | |
| 197 | `\e`, | |
| 198 | "Character", | |
| 199 | "in", | |
| 200 | "double", | |
| 201 | "quotes", | |
| 202 | }, | |
| 203 | nil, | |
| 204 | }, | |
| 205 | {`Escaped '\e' Character in single quotes`, | |
| 206 | []string{ | |
| 207 | "Escaped", | |
| 208 | `\e`, | |
| 209 | "Character", | |
| 210 | "in", | |
| 211 | "single", | |
| 212 | "quotes", | |
| 213 | }, | |
| 214 | nil, | |
| 215 | }, | |
| 216 | {`Escaped '\'' \"\'\" single quote`, | |
| 217 | []string{ | |
| 218 | "Escaped", | |
| 219 | `\ \"\"`, | |
| 220 | "single", | |
| 221 | "quote", | |
| 222 | }, | |
| 223 | nil, | |
| 224 | }, | |
| 225 | {`Escaped "\"" \'\"\' double quote`, | |
| 226 | []string{ | |
| 227 | "Escaped", | |
| 228 | `"`, | |
| 229 | `'"'`, | |
| 230 | "double", | |
| 231 | "quote", | |
| 232 | }, | |
| 233 | nil, | |
| 234 | }, | |
| 235 | {`"'Strip extra layer of quotes'"`, | |
| 236 | []string{`'Strip extra layer of quotes'`}, | |
| 237 | nil, | |
| 238 | }, | |
| 239 | } | |
| 240 | ||
| 241 | func TestSplitNonPOSIX(t *testing.T) { | |
| 242 | testSplit(t, false) | |
| 243 | } | |
| 244 | ||
| 245 | func TestSplitPOSIX(t *testing.T) { | |
| 246 | testSplit(t, true) | |
| 247 | } | |
| 248 | ||
| 249 | func testSplit(t *testing.T, posix bool) { | |
| 250 | var data []struct { | |
| 251 | in string | |
| 252 | out []string | |
| 253 | err error | |
| 254 | } | |
| 255 | if posix { | |
| 256 | data = dataposix | |
| 257 | } else { | |
| 258 | data = datanonposix | |
| 259 | } | |
| 260 | ||
| 261 | for _, d := range data { | |
| 262 | t.Logf("Spliting: `%s'", d.in) | |
| 263 | ||
| 264 | result, err := NewLexerString(d.in, posix, false).Split() | |
| 265 | ||
| 266 | // check closing and escaped error | |
| 267 | if err != d.err { | |
| 268 | printToken(result) | |
| 269 | t.Fatalf("Error expected: `%v', but result catched: `%v'", | |
| 270 | d.err, err) | |
| 271 | } | |
| 272 | ||
| 273 | // check splited number | |
| 274 | if len(result) != len(d.out) { | |
| 275 | printToken(result) | |
| 276 | t.Fatalf("Split expeced: `%d', but result founds: `%d'", | |
| 277 | len(d.out), len(result)) | |
| 278 | } | |
| 279 | ||
| 280 | // check words | |
| 281 | for j, out := range d.out { | |
| 282 | if result[j] != out { | |
| 283 | printToken(result) | |
| 284 | t.Fatalf("Word expeced: `%s', but result founds: `%s' in %d", | |
| 285 | out, result[j], j) | |
| 286 | } | |
| 287 | } | |
| 288 | t.Log("ok") | |
| 289 | } | |
| 290 | } | |
| 291 | ||
| 292 | func printToken(s []string) { | |
| 293 | for _, token := range s { | |
| 294 | fmt.Println(token) | |
| 295 | } | |
| 296 | } |