Commit 22ac6a6417dc4bcf3c5ccbee14fb55c1e1019aa6 - golang-github-anmitsu-go-shlex

New upstream version 0.0~git20161002.648efa6 Dawid Dziurla 7 years ago

6 changed file(s) with 649 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all

-0

.gitignore less more

shlex.test

+20

-0

LICENSE less more

	0	Copyright (c) anmitsu <anmitsu.s@gmail.com>
	1
	2	Permission is hereby granted, free of charge, to any person obtaining
	3	a copy of this software and associated documentation files (the
	4	"Software"), to deal in the Software without restriction, including
	5	without limitation the rights to use, copy, modify, merge, publish,
	6	distribute, sublicense, and/or sell copies of the Software, and to
	7	permit persons to whom the Software is furnished to do so, subject to
	8	the following conditions:
	9
	10	The above copyright notice and this permission notice shall be
	11	included in all copies or substantial portions of the Software.
	12
	13	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	14	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	15	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	16	NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
	17	LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
	18	OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
	19	WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+38

-0

README.md less more

	0	# go-shlex
	1
	2	go-shlex is a library to make a lexical analyzer like Unix shell for
	3	Go.
	4
	5	## Install
	6
	7	go get -u "github.com/anmitsu/go-shlex"
	8
	9	## Usage
	10
	11	```go
	12	package main
	13
	14	import (
	15	"fmt"
	16	"log"
	17
	18	"github.com/anmitsu/go-shlex"
	19	)
	20
	21	func main() {
	22	cmd := `cp -Rdp "file name" 'file name2' dir\ name`
	23	words, err := shlex.Split(cmd, true)
	24	if err != nil {
	25	log.Fatal(err)
	26	}
	27
	28	for _, w := range words {
	29	fmt.Println(w)
	30	}
	31	}
	32	```
	33
	34	## Documentation
	35
	36	http://godoc.org/github.com/anmitsu/go-shlex
	37

+100

-0

example_test.go less more

	0	package shlex_test
	1
	2	import (
	3	"fmt"
	4	"log"
	5
	6	"github.com/anmitsu/go-shlex"
	7	flynn_shlex "github.com/flynn/go-shlex"
	8	)
	9
	10	func ExampleSplit() {
	11	cmd := `cp -Rdp "file name" 'file name2' dir\ name`
	12
	13	// Split of cmd with POSIX mode.
	14	words1, err := shlex.Split(cmd, true)
	15	if err != nil {
	16	log.Fatal(err)
	17	}
	18	// Split of cmd with Non-POSIX mode.
	19	words2, err := shlex.Split(cmd, false)
	20	if err != nil {
	21	log.Fatal(err)
	22	}
	23
	24	fmt.Println("Source command:")
	25	fmt.Println(`cp -Rdp "file name" 'file name2' dir\ name`)
	26	fmt.Println()
	27
	28	fmt.Println("POSIX mode:")
	29	for _, word := range words1 {
	30	fmt.Println(word)
	31	}
	32	fmt.Println()
	33	fmt.Println("Non-POSIX mode:")
	34	for _, word := range words2 {
	35	fmt.Println(word)
	36	}
	37
	38	// Output:
	39	// Source command:
	40	// cp -Rdp "file name" 'file name2' dir\ name
	41	//
	42	// POSIX mode:
	43	// cp
	44	// -Rdp
	45	// file name
	46	// file name2
	47	// dir name
	48	//
	49	// Non-POSIX mode:
	50	// cp
	51	// -Rdp
	52	// "file name"
	53	// 'file name2'
	54	// dir\
	55	// name
	56	}
	57
	58	func ExampleSplit_compareFlynn() {
	59	cmd := `English and 日本語`
	60
	61	// Split for github.com/flynn/go-shlex imported as flynn_shlex
	62	words_flynn, err1 := flynn_shlex.Split(cmd)
	63
	64	// Split for github.com/anmitsu/go-shlex
	65	words_anmitsu, err2 := shlex.Split(cmd, true)
	66
	67	fmt.Println("Source string:")
	68	fmt.Println(cmd)
	69	fmt.Println()
	70
	71	fmt.Println("Result of github.com/flynn/go-shlex:")
	72	for _, word := range words_flynn {
	73	fmt.Println(word)
	74	}
	75	fmt.Println(err1.Error())
	76
	77	fmt.Println()
	78	fmt.Println("Result of github.com/anmitsu/go-shlex:")
	79	for _, word := range words_anmitsu {
	80	fmt.Println(word)
	81	}
	82	if err2 != nil {
	83	fmt.Println(err2.Error())
	84	}
	85
	86	// Output:
	87	// Source string:
	88	// English and 日本語
	89	//
	90	// Result of github.com/flynn/go-shlex:
	91	// English
	92	// and
	93	// Unknown rune: 26085
	94	//
	95	// Result of github.com/anmitsu/go-shlex:
	96	// English
	97	// and
	98	// 日本語
	99	}

+193

-0

shlex.go less more

	0	// Package shlex provides a simple lexical analysis like Unix shell.
	1	package shlex
	2
	3	import (
	4	"bufio"
	5	"errors"
	6	"io"
	7	"strings"
	8	"unicode"
	9	)
	10
	11	var (
	12	ErrNoClosing = errors.New("No closing quotation")
	13	ErrNoEscaped = errors.New("No escaped character")
	14	)
	15
	16	// Tokenizer is the interface that classifies a token according to
	17	// words, whitespaces, quotations, escapes and escaped quotations.
	18	type Tokenizer interface {
	19	IsWord(rune) bool
	20	IsWhitespace(rune) bool
	21	IsQuote(rune) bool
	22	IsEscape(rune) bool
	23	IsEscapedQuote(rune) bool
	24	}
	25
	26	// DefaultTokenizer implements a simple tokenizer like Unix shell.
	27	type DefaultTokenizer struct{}
	28
	29	func (t *DefaultTokenizer) IsWord(r rune) bool {
	30	return r == '_' \|\| unicode.IsLetter(r) \|\| unicode.IsNumber(r)
	31	}
	32	func (t *DefaultTokenizer) IsQuote(r rune) bool {
	33	switch r {
	34	case '\'', '"':
	35	return true
	36	default:
	37	return false
	38	}
	39	}
	40	func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
	41	return unicode.IsSpace(r)
	42	}
	43	func (t *DefaultTokenizer) IsEscape(r rune) bool {
	44	return r == '\\'
	45	}
	46	func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
	47	return r == '"'
	48	}
	49
	50	// Lexer represents a lexical analyzer.
	51	type Lexer struct {
	52	reader *bufio.Reader
	53	tokenizer Tokenizer
	54	posix bool
	55	whitespacesplit bool
	56	}
	57
	58	// NewLexer creates a new Lexer reading from io.Reader. This Lexer
	59	// has a DefaultTokenizer according to posix and whitespacesplit
	60	// rules.
	61	func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
	62	return &Lexer{
	63	reader: bufio.NewReader(r),
	64	tokenizer: &DefaultTokenizer{},
	65	posix: posix,
	66	whitespacesplit: whitespacesplit,
	67	}
	68	}
	69
	70	// NewLexerString creates a new Lexer reading from a string. This
	71	// Lexer has a DefaultTokenizer according to posix and whitespacesplit
	72	// rules.
	73	func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
	74	return NewLexer(strings.NewReader(s), posix, whitespacesplit)
	75	}
	76
	77	// Split splits a string according to posix or non-posix rules.
	78	func Split(s string, posix bool) ([]string, error) {
	79	return NewLexerString(s, posix, true).Split()
	80	}
	81
	82	// SetTokenizer sets a Tokenizer.
	83	func (l *Lexer) SetTokenizer(t Tokenizer) {
	84	l.tokenizer = t
	85	}
	86
	87	func (l *Lexer) Split() ([]string, error) {
	88	result := make([]string, 0)
	89	for {
	90	token, err := l.readToken()
	91	if token != "" {
	92	result = append(result, token)
	93	}
	94
	95	if err == io.EOF {
	96	break
	97	} else if err != nil {
	98	return result, err
	99	}
	100	}
	101	return result, nil
	102	}
	103
	104	func (l *Lexer) readToken() (string, error) {
	105	t := l.tokenizer
	106	token := ""
	107	quoted := false
	108	state := ' '
	109	escapedstate := ' '
	110	scanning:
	111	for {
	112	next, _, err := l.reader.ReadRune()
	113	if err != nil {
	114	if t.IsQuote(state) {
	115	return token, ErrNoClosing
	116	} else if t.IsEscape(state) {
	117	return token, ErrNoEscaped
	118	}
	119	return token, err
	120	}
	121
	122	switch {
	123	case t.IsWhitespace(state):
	124	switch {
	125	case t.IsWhitespace(next):
	126	break scanning
	127	case l.posix && t.IsEscape(next):
	128	escapedstate = 'a'
	129	state = next
	130	case t.IsWord(next):
	131	token += string(next)
	132	state = 'a'
	133	case t.IsQuote(next):
	134	if !l.posix {
	135	token += string(next)
	136	}
	137	state = next
	138	default:
	139	token = string(next)
	140	if l.whitespacesplit {
	141	state = 'a'
	142	} else if token != "" \|\| (l.posix && quoted) {
	143	break scanning
	144	}
	145	}
	146	case t.IsQuote(state):
	147	quoted = true
	148	switch {
	149	case next == state:
	150	if !l.posix {
	151	token += string(next)
	152	break scanning
	153	} else {
	154	state = 'a'
	155	}
	156	case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
	157	escapedstate = state
	158	state = next
	159	default:
	160	token += string(next)
	161	}
	162	case t.IsEscape(state):
	163	if t.IsQuote(escapedstate) && next != state && next != escapedstate {
	164	token += string(state)
	165	}
	166	token += string(next)
	167	state = escapedstate
	168	case t.IsWord(state):
	169	switch {
	170	case t.IsWhitespace(next):
	171	if token != "" \|\| (l.posix && quoted) {
	172	break scanning
	173	}
	174	case l.posix && t.IsQuote(next):
	175	state = next
	176	case l.posix && t.IsEscape(next):
	177	escapedstate = 'a'
	178	state = next
	179	case t.IsWord(next) \|\| t.IsQuote(next):
	180	token += string(next)
	181	default:
	182	if l.whitespacesplit {
	183	token += string(next)
	184	} else if token != "" {
	185	l.reader.UnreadRune()
	186	break scanning
	187	}
	188	}
	189	}
	190	}
	191	return token, nil
	192	}

+297

-0

shlex_test.go less more

	0	package shlex
	1
	2	import (
	3	"fmt"
	4	"testing"
	5	)
	6
	7	var datanonposix = []struct {
	8	in string
	9	out []string
	10	err error
	11	}{
	12	{`This string has an embedded apostrophe, doesn't it?`,
	13	[]string{
	14	"This",
	15	"string",
	16	"has",
	17	"an",
	18	"embedded",
	19	"apostrophe",
	20	",",
	21	"doesn't",
	22	"it",
	23	"?",
	24	},
	25	nil,
	26	},
	27	{"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
	28	[]string{
	29	"This",
	30	"string",
	31	"has",
	32	"embedded",
	33	`"double quotes"`,
	34	"and",
	35	`'single quotes'`,
	36	"in",
	37	"it",
	38	",",
	39	"and",
	40	"even",
	41	`"a 'nested example'"`,
	42	".",
	43	},
	44	nil,
	45	},
	46	{`Hello world!, こんにちは　世界！`,
	47	[]string{
	48	"Hello",
	49	"world",
	50	"!",
	51	",",
	52	"こんにちは",
	53	"世界",
	54	"！",
	55	},
	56	nil,
	57	},
	58	{`Do"Not"Separate`,
	59	[]string{`Do"Not"Separate`},
	60	nil,
	61	},
	62	{`"Do"Separate`,
	63	[]string{`"Do"`, "Separate"},
	64	nil,
	65	},
	66	{`Escaped \e Character not in quotes`,
	67	[]string{
	68	"Escaped",
	69	`\`,
	70	"e",
	71	"Character",
	72	"not",
	73	"in",
	74	"quotes",
	75	},
	76	nil,
	77	},
	78	{`Escaped "\e" Character in double quotes`,
	79	[]string{
	80	"Escaped",
	81	`"\e"`,
	82	"Character",
	83	"in",
	84	"double",
	85	"quotes",
	86	},
	87	nil,
	88	},
	89	{`Escaped '\e' Character in single quotes`,
	90	[]string{
	91	"Escaped",
	92	`'\e'`,
	93	"Character",
	94	"in",
	95	"single",
	96	"quotes",
	97	},
	98	nil,
	99	},
	100	{`Escaped '\'' \"\'\" single quote`,
	101	[]string{
	102	"Escaped",
	103	`'\'`,
	104	`' \"\'`,
	105	`\`,
	106	`" single quote`,
	107	},
	108	ErrNoClosing,
	109	},
	110	{`Escaped "\"" \'\"\' double quote`,
	111	[]string{
	112	"Escaped",
	113	`"\"`,
	114	`" \'\"`,
	115	`\`,
	116	`' double quote`,
	117	},
	118	ErrNoClosing,
	119	},
	120	{`"'Strip extra layer of quotes'"`,
	121	[]string{`"'Strip extra layer of quotes'"`},
	122	nil,
	123	},
	124	}
	125
	126	var dataposix = []struct {
	127	in string
	128	out []string
	129	err error
	130	}{
	131	{`This string has an embedded apostrophe, doesn't it?`,
	132	[]string{
	133	"This",
	134	"string",
	135	"has",
	136	"an",
	137	"embedded",
	138	"apostrophe",
	139	",",
	140	"doesnt it?",
	141	},
	142	ErrNoClosing,
	143	},
	144	{"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
	145	[]string{
	146	"This",
	147	"string",
	148	"has",
	149	"embedded",
	150	`double quotes`,
	151	"and",
	152	`single quotes`,
	153	"in",
	154	"it",
	155	",",
	156	"and",
	157	"even",
	158	`a 'nested example'`,
	159	".",
	160	},
	161	nil,
	162	},
	163	{`Hello world!, こんにちは　世界！`,
	164	[]string{
	165	"Hello",
	166	"world",
	167	"!",
	168	",",
	169	"こんにちは",
	170	"世界",
	171	"！",
	172	},
	173	nil,
	174	},
	175	{`Do"Not"Separate`,
	176	[]string{`DoNotSeparate`},
	177	nil,
	178	},
	179	{`"Do"Separate`,
	180	[]string{"DoSeparate"},
	181	nil,
	182	},
	183	{`Escaped \e Character not in quotes`,
	184	[]string{
	185	"Escaped",
	186	"e",
	187	"Character",
	188	"not",
	189	"in",
	190	"quotes",
	191	},
	192	nil,
	193	},
	194	{`Escaped "\e" Character in double quotes`,
	195	[]string{
	196	"Escaped",
	197	`\e`,
	198	"Character",
	199	"in",
	200	"double",
	201	"quotes",
	202	},
	203	nil,
	204	},
	205	{`Escaped '\e' Character in single quotes`,
	206	[]string{
	207	"Escaped",
	208	`\e`,
	209	"Character",
	210	"in",
	211	"single",
	212	"quotes",
	213	},
	214	nil,
	215	},
	216	{`Escaped '\'' \"\'\" single quote`,
	217	[]string{
	218	"Escaped",
	219	`\ \"\"`,
	220	"single",
	221	"quote",
	222	},
	223	nil,
	224	},
	225	{`Escaped "\"" \'\"\' double quote`,
	226	[]string{
	227	"Escaped",
	228	`"`,
	229	`'"'`,
	230	"double",
	231	"quote",
	232	},
	233	nil,
	234	},
	235	{`"'Strip extra layer of quotes'"`,
	236	[]string{`'Strip extra layer of quotes'`},
	237	nil,
	238	},
	239	}
	240
	241	func TestSplitNonPOSIX(t *testing.T) {
	242	testSplit(t, false)
	243	}
	244
	245	func TestSplitPOSIX(t *testing.T) {
	246	testSplit(t, true)
	247	}
	248
	249	func testSplit(t *testing.T, posix bool) {
	250	var data []struct {
	251	in string
	252	out []string
	253	err error
	254	}
	255	if posix {
	256	data = dataposix
	257	} else {
	258	data = datanonposix
	259	}
	260
	261	for _, d := range data {
	262	t.Logf("Spliting: `%s'", d.in)
	263
	264	result, err := NewLexerString(d.in, posix, false).Split()
	265
	266	// check closing and escaped error
	267	if err != d.err {
	268	printToken(result)
	269	t.Fatalf("Error expected: `%v', but result catched: `%v'",
	270	d.err, err)
	271	}
	272
	273	// check splited number
	274	if len(result) != len(d.out) {
	275	printToken(result)
	276	t.Fatalf("Split expeced: `%d', but result founds: `%d'",
	277	len(d.out), len(result))
	278	}
	279
	280	// check words
	281	for j, out := range d.out {
	282	if result[j] != out {
	283	printToken(result)
	284	t.Fatalf("Word expeced: `%s', but result founds: `%s' in %d",
	285	out, result[j], j)
	286	}
	287	}
	288	t.Log("ok")
	289	}
	290	}
	291
	292	func printToken(s []string) {
	293	for _, token := range s {
	294	fmt.Println(token)
	295	}
	296	}