Initial commit
Arran Walker
4 years ago
0 | MIT License | |
1 | ||
2 | Copyright (c) 2019 Arran Walker | |
3 | ||
4 | Permission is hereby granted, free of charge, to any person obtaining a copy | |
5 | of this software and associated documentation files (the "Software"), to deal | |
6 | in the Software without restriction, including without limitation the rights | |
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
8 | copies of the Software, and to permit persons to whom the Software is | |
9 | furnished to do so, subject to the following conditions: | |
10 | ||
11 | The above copyright notice and this permission notice shall be included in all | |
12 | copies or substantial portions of the Software. | |
13 | ||
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
20 | SOFTWARE.⏎ |
0 | # walker | |
1 | ||
2 | [![](https://godoc.org/github.com/saracen/walker?status.svg)](http://godoc.org/github.com/saracen/walker) | |
3 | ||
4 | `walker` is a faster, parallel version, of `filepath.Walk`. | |
5 | ||
6 | ``` | |
7 | walker.Walk("/tmp", func(pathname string, fi os.FileInfo) error { | |
8 | fmt.Printf("%s: %d bytes\n", pathname, fi.Size()) | |
9 | }) | |
10 | ``` | |
11 | ||
12 | ## Benchmarks | |
13 | ||
14 | - Standard library (`filepath.Walk`) is `FilepathWalk`. | |
15 | - This library is `WalkerWalk` | |
16 | - `FastwalkWalk` is [https://github.com/golang/tools/tree/master/internal/fastwalk](fastwalk). | |
17 | - `GodirwalkWalk` is [https://github.com/karrick/godirwalk](godirwalk). | |
18 | ||
19 | `Fastwalk` and `Godirwalk` reduce the syscall count by leaving `os.Lstat` up to the user, should they require a full `os.FileInfo`. This library instead performs the `os.Lstat` call, for better compatibility with `filepath.Walk`, and attempts to reduce the time taken through other means. | |
20 | ||
21 | These benchmarks were performed with a warm cache. | |
22 | ||
23 | ``` | |
24 | goos: linux | |
25 | goarch: amd64 | |
26 | pkg: github.com/saracen/walker | |
27 | BenchmarkFilepathWalk-24 1 1437479938 ns/op 330704912 B/op 758715 allocs/op | |
28 | BenchmarkWalkerWalk-24 20 100948844 ns/op 71853010 B/op 593451 allocs/op | |
29 | BenchmarkFastwalkWalk-24 5 233001916 ns/op 72442246 B/op 581916 allocs/op | |
30 | BenchmarkGodirwalkWalk-24 2 705022087 ns/op 141308672 B/op 707996 allocs/op | |
31 | ``` | |
32 | ||
33 | ``` | |
34 | goos: windows | |
35 | goarch: amd64 | |
36 | pkg: github.com/saracen/walker | |
37 | BenchmarkFilepathWalk-16 1 3100710700 ns/op 269683440 B/op 1467916 allocs/op | |
38 | BenchmarkWalkerWalk-16 4 285985675 ns/op 137157000 B/op 877448 allocs/op | |
39 | BenchmarkFastwalkWalk-16 2 988358100 ns/op 268348560 B/op 1474482 allocs/op | |
40 | BenchmarkGodirwalkWalk-16 1 1200790300 ns/op 111854272 B/op 1310532 allocs/op | |
41 | ``` | |
42 | ||
43 | Performing benchmarks without having the OS cache the directory information isn't straight forward, but to get a sense of the performance, we can flush the cache and roughly time how long it took to walk a directory: | |
44 | ||
45 | #### filepath.Walk | |
46 | ``` | |
47 | $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestFilepathWalkDir -benchdir $GOPATH | |
48 | ok github.com/saracen/walker 5.790s | |
49 | ``` | |
50 | ||
51 | #### walker | |
52 | ``` | |
53 | $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestWalkerDir -benchdir $GOPATH | |
54 | ok github.com/saracen/walker 0.593s | |
55 | ``` | |
56 | ||
57 | #### fastwalk | |
58 | ``` | |
59 | $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestFastwalkDir -benchdir $GOPATH | |
60 | ok github.com/saracen/walker 0.551s | |
61 | ``` | |
62 | ||
63 | #### godirwalk | |
64 | ``` | |
65 | $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestGodirwalkDir -benchdir $GOPATH | |
66 | ok github.com/saracen/walker 3.879s | |
67 | ``` | |
68 | ||
69 | In this case, `fastwalk` is faster. This is due to it not having to perform an additional `lstat`. The time is almost identical to `walker` if you perform the `lstat` call yourself. |
0 | module github.com/saracen/walker | |
1 | ||
2 | go 1.12 | |
3 | ||
4 | require ( | |
5 | golang.org/x/sys v0.0.0-20191010194322-b09406accb47 | |
6 | golang.org/x/tools v0.0.0-20191011211836-4c025a95b26e // indirect | |
7 | ) |
0 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= | |
1 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= | |
2 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | |
3 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |
4 | golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY= | |
5 | golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |
6 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |
7 | golang.org/x/tools v0.0.0-20191011211836-4c025a95b26e h1:1o2bDs9pCd2xFhdwqJTrCIswAeEsn4h/PCNelWpfcsI= | |
8 | golang.org/x/tools v0.0.0-20191011211836-4c025a95b26e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= | |
9 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= |
0 | // Copyright 2016 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // Package fastwalk provides a faster version of filepath.Walk for file system | |
5 | // scanning tools. | |
6 | package fastwalk | |
7 | ||
8 | import ( | |
9 | "errors" | |
10 | "os" | |
11 | "path/filepath" | |
12 | "runtime" | |
13 | "sync" | |
14 | ) | |
15 | ||
16 | // TraverseLink is used as a return value from WalkFuncs to indicate that the | |
17 | // symlink named in the call may be traversed. | |
18 | var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") | |
19 | ||
20 | // SkipFiles is a used as a return value from WalkFuncs to indicate that the | |
21 | // callback should not be called for any other files in the current directory. | |
22 | // Child directories will still be traversed. | |
23 | var SkipFiles = errors.New("fastwalk: skip remaining files in directory") | |
24 | ||
25 | // Walk is a faster implementation of filepath.Walk. | |
26 | // | |
27 | // filepath.Walk's design necessarily calls os.Lstat on each file, | |
28 | // even if the caller needs less info. | |
29 | // Many tools need only the type of each file. | |
30 | // On some platforms, this information is provided directly by the readdir | |
31 | // system call, avoiding the need to stat each file individually. | |
32 | // fastwalk_unix.go contains a fork of the syscall routines. | |
33 | // | |
34 | // See golang.org/issue/16399 | |
35 | // | |
36 | // Walk walks the file tree rooted at root, calling walkFn for | |
37 | // each file or directory in the tree, including root. | |
38 | // | |
39 | // If fastWalk returns filepath.SkipDir, the directory is skipped. | |
40 | // | |
41 | // Unlike filepath.Walk: | |
42 | // * file stat calls must be done by the user. | |
43 | // The only provided metadata is the file type, which does not include | |
44 | // any permission bits. | |
45 | // * multiple goroutines stat the filesystem concurrently. The provided | |
46 | // walkFn must be safe for concurrent use. | |
47 | // * fastWalk can follow symlinks if walkFn returns the TraverseLink | |
48 | // sentinel error. It is the walkFn's responsibility to prevent | |
49 | // fastWalk from going into symlink cycles. | |
50 | func Walk(root string, walkFn func(path string, typ os.FileMode) error) error { | |
51 | // TODO(bradfitz): make numWorkers configurable? We used a | |
52 | // minimum of 4 to give the kernel more info about multiple | |
53 | // things we want, in hopes its I/O scheduling can take | |
54 | // advantage of that. Hopefully most are in cache. Maybe 4 is | |
55 | // even too low of a minimum. Profile more. | |
56 | numWorkers := 4 | |
57 | if n := runtime.NumCPU(); n > numWorkers { | |
58 | numWorkers = n | |
59 | } | |
60 | ||
61 | // Make sure to wait for all workers to finish, otherwise | |
62 | // walkFn could still be called after returning. This Wait call | |
63 | // runs after close(e.donec) below. | |
64 | var wg sync.WaitGroup | |
65 | defer wg.Wait() | |
66 | ||
67 | w := &walker{ | |
68 | fn: walkFn, | |
69 | enqueuec: make(chan walkItem, numWorkers), // buffered for performance | |
70 | workc: make(chan walkItem, numWorkers), // buffered for performance | |
71 | donec: make(chan struct{}), | |
72 | ||
73 | // buffered for correctness & not leaking goroutines: | |
74 | resc: make(chan error, numWorkers), | |
75 | } | |
76 | defer close(w.donec) | |
77 | ||
78 | for i := 0; i < numWorkers; i++ { | |
79 | wg.Add(1) | |
80 | go w.doWork(&wg) | |
81 | } | |
82 | todo := []walkItem{{dir: root}} | |
83 | out := 0 | |
84 | for { | |
85 | workc := w.workc | |
86 | var workItem walkItem | |
87 | if len(todo) == 0 { | |
88 | workc = nil | |
89 | } else { | |
90 | workItem = todo[len(todo)-1] | |
91 | } | |
92 | select { | |
93 | case workc <- workItem: | |
94 | todo = todo[:len(todo)-1] | |
95 | out++ | |
96 | case it := <-w.enqueuec: | |
97 | todo = append(todo, it) | |
98 | case err := <-w.resc: | |
99 | out-- | |
100 | if err != nil { | |
101 | return err | |
102 | } | |
103 | if out == 0 && len(todo) == 0 { | |
104 | // It's safe to quit here, as long as the buffered | |
105 | // enqueue channel isn't also readable, which might | |
106 | // happen if the worker sends both another unit of | |
107 | // work and its result before the other select was | |
108 | // scheduled and both w.resc and w.enqueuec were | |
109 | // readable. | |
110 | select { | |
111 | case it := <-w.enqueuec: | |
112 | todo = append(todo, it) | |
113 | default: | |
114 | return nil | |
115 | } | |
116 | } | |
117 | } | |
118 | } | |
119 | } | |
120 | ||
121 | // doWork reads directories as instructed (via workc) and runs the | |
122 | // user's callback function. | |
123 | func (w *walker) doWork(wg *sync.WaitGroup) { | |
124 | defer wg.Done() | |
125 | for { | |
126 | select { | |
127 | case <-w.donec: | |
128 | return | |
129 | case it := <-w.workc: | |
130 | select { | |
131 | case <-w.donec: | |
132 | return | |
133 | case w.resc <- w.walk(it.dir, !it.callbackDone): | |
134 | } | |
135 | } | |
136 | } | |
137 | } | |
138 | ||
139 | type walker struct { | |
140 | fn func(path string, typ os.FileMode) error | |
141 | ||
142 | donec chan struct{} // closed on fastWalk's return | |
143 | workc chan walkItem // to workers | |
144 | enqueuec chan walkItem // from workers | |
145 | resc chan error // from workers | |
146 | } | |
147 | ||
148 | type walkItem struct { | |
149 | dir string | |
150 | callbackDone bool // callback already called; don't do it again | |
151 | } | |
152 | ||
153 | func (w *walker) enqueue(it walkItem) { | |
154 | select { | |
155 | case w.enqueuec <- it: | |
156 | case <-w.donec: | |
157 | } | |
158 | } | |
159 | ||
160 | func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { | |
161 | joined := dirName + string(os.PathSeparator) + baseName | |
162 | if typ == os.ModeDir { | |
163 | w.enqueue(walkItem{dir: joined}) | |
164 | return nil | |
165 | } | |
166 | ||
167 | err := w.fn(joined, typ) | |
168 | if typ == os.ModeSymlink { | |
169 | if err == TraverseLink { | |
170 | // Set callbackDone so we don't call it twice for both the | |
171 | // symlink-as-symlink and the symlink-as-directory later: | |
172 | w.enqueue(walkItem{dir: joined, callbackDone: true}) | |
173 | return nil | |
174 | } | |
175 | if err == filepath.SkipDir { | |
176 | // Permit SkipDir on symlinks too. | |
177 | return nil | |
178 | } | |
179 | } | |
180 | return err | |
181 | } | |
182 | ||
183 | func (w *walker) walk(root string, runUserCallback bool) error { | |
184 | if runUserCallback { | |
185 | err := w.fn(root, os.ModeDir) | |
186 | if err == filepath.SkipDir { | |
187 | return nil | |
188 | } | |
189 | if err != nil { | |
190 | return err | |
191 | } | |
192 | } | |
193 | ||
194 | return readDir(root, w.onDirEnt) | |
195 | } |
0 | // Copyright 2016 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // +build freebsd openbsd netbsd | |
5 | ||
6 | package fastwalk | |
7 | ||
8 | import "syscall" | |
9 | ||
10 | func direntInode(dirent *syscall.Dirent) uint64 { | |
11 | return uint64(dirent.Fileno) | |
12 | } |
0 | // Copyright 2016 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // +build linux darwin | |
5 | // +build !appengine | |
6 | ||
7 | package fastwalk | |
8 | ||
9 | import "syscall" | |
10 | ||
11 | func direntInode(dirent *syscall.Dirent) uint64 { | |
12 | return uint64(dirent.Ino) | |
13 | } |
0 | // Copyright 2018 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // +build darwin freebsd openbsd netbsd | |
5 | ||
6 | package fastwalk | |
7 | ||
8 | import "syscall" | |
9 | ||
10 | func direntNamlen(dirent *syscall.Dirent) uint64 { | |
11 | return uint64(dirent.Namlen) | |
12 | } |
0 | // Copyright 2018 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // +build linux | |
5 | // +build !appengine | |
6 | ||
7 | package fastwalk | |
8 | ||
9 | import ( | |
10 | "bytes" | |
11 | "syscall" | |
12 | "unsafe" | |
13 | ) | |
14 | ||
15 | func direntNamlen(dirent *syscall.Dirent) uint64 { | |
16 | const fixedHdr = uint16(unsafe.Offsetof(syscall.Dirent{}.Name)) | |
17 | nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) | |
18 | const nameBufLen = uint16(len(nameBuf)) | |
19 | limit := dirent.Reclen - fixedHdr | |
20 | if limit > nameBufLen { | |
21 | limit = nameBufLen | |
22 | } | |
23 | nameLen := bytes.IndexByte(nameBuf[:limit], 0) | |
24 | if nameLen < 0 { | |
25 | panic("failed to find terminating 0 byte in dirent") | |
26 | } | |
27 | return uint64(nameLen) | |
28 | } |
0 | // Copyright 2016 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd | |
5 | ||
6 | package fastwalk | |
7 | ||
8 | import ( | |
9 | "io/ioutil" | |
10 | "os" | |
11 | ) | |
12 | ||
13 | // readDir calls fn for each directory entry in dirName. | |
14 | // It does not descend into directories or follow symlinks. | |
15 | // If fn returns a non-nil error, readDir returns with that error | |
16 | // immediately. | |
17 | func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { | |
18 | fis, err := ioutil.ReadDir(dirName) | |
19 | if err != nil { | |
20 | return err | |
21 | } | |
22 | skipFiles := false | |
23 | for _, fi := range fis { | |
24 | if fi.Mode().IsRegular() && skipFiles { | |
25 | continue | |
26 | } | |
27 | if err := fn(dirName, fi.Name(), fi.Mode()&os.ModeType); err != nil { | |
28 | if err == SkipFiles { | |
29 | skipFiles = true | |
30 | continue | |
31 | } | |
32 | return err | |
33 | } | |
34 | } | |
35 | return nil | |
36 | } |
0 | // Copyright 2016 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | package fastwalk_test | |
5 | ||
6 | import ( | |
7 | "bytes" | |
8 | "flag" | |
9 | "fmt" | |
10 | "io/ioutil" | |
11 | "os" | |
12 | "path/filepath" | |
13 | "reflect" | |
14 | "runtime" | |
15 | "sort" | |
16 | "strings" | |
17 | "sync" | |
18 | "testing" | |
19 | ||
20 | "golang.org/x/tools/internal/fastwalk" | |
21 | ) | |
22 | ||
23 | func formatFileModes(m map[string]os.FileMode) string { | |
24 | var keys []string | |
25 | for k := range m { | |
26 | keys = append(keys, k) | |
27 | } | |
28 | sort.Strings(keys) | |
29 | var buf bytes.Buffer | |
30 | for _, k := range keys { | |
31 | fmt.Fprintf(&buf, "%-20s: %v\n", k, m[k]) | |
32 | } | |
33 | return buf.String() | |
34 | } | |
35 | ||
36 | func testFastWalk(t *testing.T, files map[string]string, callback func(path string, typ os.FileMode) error, want map[string]os.FileMode) { | |
37 | tempdir, err := ioutil.TempDir("", "test-fast-walk") | |
38 | if err != nil { | |
39 | t.Fatal(err) | |
40 | } | |
41 | defer os.RemoveAll(tempdir) | |
42 | for path, contents := range files { | |
43 | file := filepath.Join(tempdir, "/src", path) | |
44 | if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil { | |
45 | t.Fatal(err) | |
46 | } | |
47 | var err error | |
48 | if strings.HasPrefix(contents, "LINK:") { | |
49 | err = os.Symlink(strings.TrimPrefix(contents, "LINK:"), file) | |
50 | } else { | |
51 | err = ioutil.WriteFile(file, []byte(contents), 0644) | |
52 | } | |
53 | if err != nil { | |
54 | t.Fatal(err) | |
55 | } | |
56 | } | |
57 | got := map[string]os.FileMode{} | |
58 | var mu sync.Mutex | |
59 | if err := fastwalk.Walk(tempdir, func(path string, typ os.FileMode) error { | |
60 | mu.Lock() | |
61 | defer mu.Unlock() | |
62 | if !strings.HasPrefix(path, tempdir) { | |
63 | t.Fatalf("bogus prefix on %q, expect %q", path, tempdir) | |
64 | } | |
65 | key := filepath.ToSlash(strings.TrimPrefix(path, tempdir)) | |
66 | if old, dup := got[key]; dup { | |
67 | t.Fatalf("callback called twice for key %q: %v -> %v", key, old, typ) | |
68 | } | |
69 | got[key] = typ | |
70 | return callback(path, typ) | |
71 | }); err != nil { | |
72 | t.Fatalf("callback returned: %v", err) | |
73 | } | |
74 | if !reflect.DeepEqual(got, want) { | |
75 | t.Errorf("walk mismatch.\n got:\n%v\nwant:\n%v", formatFileModes(got), formatFileModes(want)) | |
76 | } | |
77 | } | |
78 | ||
79 | func TestFastWalk_Basic(t *testing.T) { | |
80 | testFastWalk(t, map[string]string{ | |
81 | "foo/foo.go": "one", | |
82 | "bar/bar.go": "two", | |
83 | "skip/skip.go": "skip", | |
84 | }, | |
85 | func(path string, typ os.FileMode) error { | |
86 | return nil | |
87 | }, | |
88 | map[string]os.FileMode{ | |
89 | "": os.ModeDir, | |
90 | "/src": os.ModeDir, | |
91 | "/src/bar": os.ModeDir, | |
92 | "/src/bar/bar.go": 0, | |
93 | "/src/foo": os.ModeDir, | |
94 | "/src/foo/foo.go": 0, | |
95 | "/src/skip": os.ModeDir, | |
96 | "/src/skip/skip.go": 0, | |
97 | }) | |
98 | } | |
99 | ||
100 | func TestFastWalk_LongFileName(t *testing.T) { | |
101 | longFileName := strings.Repeat("x", 255) | |
102 | ||
103 | testFastWalk(t, map[string]string{ | |
104 | longFileName: "one", | |
105 | }, | |
106 | func(path string, typ os.FileMode) error { | |
107 | return nil | |
108 | }, | |
109 | map[string]os.FileMode{ | |
110 | "": os.ModeDir, | |
111 | "/src": os.ModeDir, | |
112 | "/src/" + longFileName: 0, | |
113 | }, | |
114 | ) | |
115 | } | |
116 | ||
117 | func TestFastWalk_Symlink(t *testing.T) { | |
118 | switch runtime.GOOS { | |
119 | case "windows", "plan9": | |
120 | t.Skipf("skipping on %s", runtime.GOOS) | |
121 | } | |
122 | testFastWalk(t, map[string]string{ | |
123 | "foo/foo.go": "one", | |
124 | "bar/bar.go": "LINK:../foo.go", | |
125 | "symdir": "LINK:foo", | |
126 | }, | |
127 | func(path string, typ os.FileMode) error { | |
128 | return nil | |
129 | }, | |
130 | map[string]os.FileMode{ | |
131 | "": os.ModeDir, | |
132 | "/src": os.ModeDir, | |
133 | "/src/bar": os.ModeDir, | |
134 | "/src/bar/bar.go": os.ModeSymlink, | |
135 | "/src/foo": os.ModeDir, | |
136 | "/src/foo/foo.go": 0, | |
137 | "/src/symdir": os.ModeSymlink, | |
138 | }) | |
139 | } | |
140 | ||
141 | func TestFastWalk_SkipDir(t *testing.T) { | |
142 | testFastWalk(t, map[string]string{ | |
143 | "foo/foo.go": "one", | |
144 | "bar/bar.go": "two", | |
145 | "skip/skip.go": "skip", | |
146 | }, | |
147 | func(path string, typ os.FileMode) error { | |
148 | if typ == os.ModeDir && strings.HasSuffix(path, "skip") { | |
149 | return filepath.SkipDir | |
150 | } | |
151 | return nil | |
152 | }, | |
153 | map[string]os.FileMode{ | |
154 | "": os.ModeDir, | |
155 | "/src": os.ModeDir, | |
156 | "/src/bar": os.ModeDir, | |
157 | "/src/bar/bar.go": 0, | |
158 | "/src/foo": os.ModeDir, | |
159 | "/src/foo/foo.go": 0, | |
160 | "/src/skip": os.ModeDir, | |
161 | }) | |
162 | } | |
163 | ||
164 | func TestFastWalk_SkipFiles(t *testing.T) { | |
165 | // Directory iteration order is undefined, so there's no way to know | |
166 | // which file to expect until the walk happens. Rather than mess | |
167 | // with the test infrastructure, just mutate want. | |
168 | var mu sync.Mutex | |
169 | want := map[string]os.FileMode{ | |
170 | "": os.ModeDir, | |
171 | "/src": os.ModeDir, | |
172 | "/src/zzz": os.ModeDir, | |
173 | "/src/zzz/c.go": 0, | |
174 | } | |
175 | ||
176 | testFastWalk(t, map[string]string{ | |
177 | "a_skipfiles.go": "a", | |
178 | "b_skipfiles.go": "b", | |
179 | "zzz/c.go": "c", | |
180 | }, | |
181 | func(path string, typ os.FileMode) error { | |
182 | if strings.HasSuffix(path, "_skipfiles.go") { | |
183 | mu.Lock() | |
184 | defer mu.Unlock() | |
185 | want["/src/"+filepath.Base(path)] = 0 | |
186 | return fastwalk.SkipFiles | |
187 | } | |
188 | return nil | |
189 | }, | |
190 | want) | |
191 | if len(want) != 5 { | |
192 | t.Errorf("saw too many files: wanted 5, got %v (%v)", len(want), want) | |
193 | } | |
194 | } | |
195 | ||
196 | func TestFastWalk_TraverseSymlink(t *testing.T) { | |
197 | switch runtime.GOOS { | |
198 | case "windows", "plan9": | |
199 | t.Skipf("skipping on %s", runtime.GOOS) | |
200 | } | |
201 | ||
202 | testFastWalk(t, map[string]string{ | |
203 | "foo/foo.go": "one", | |
204 | "bar/bar.go": "two", | |
205 | "skip/skip.go": "skip", | |
206 | "symdir": "LINK:foo", | |
207 | }, | |
208 | func(path string, typ os.FileMode) error { | |
209 | if typ == os.ModeSymlink { | |
210 | return fastwalk.TraverseLink | |
211 | } | |
212 | return nil | |
213 | }, | |
214 | map[string]os.FileMode{ | |
215 | "": os.ModeDir, | |
216 | "/src": os.ModeDir, | |
217 | "/src/bar": os.ModeDir, | |
218 | "/src/bar/bar.go": 0, | |
219 | "/src/foo": os.ModeDir, | |
220 | "/src/foo/foo.go": 0, | |
221 | "/src/skip": os.ModeDir, | |
222 | "/src/skip/skip.go": 0, | |
223 | "/src/symdir": os.ModeSymlink, | |
224 | "/src/symdir/foo.go": 0, | |
225 | }) | |
226 | } | |
227 | ||
228 | var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFastWalk") | |
229 | ||
230 | func BenchmarkFastWalk(b *testing.B) { | |
231 | b.ReportAllocs() | |
232 | for i := 0; i < b.N; i++ { | |
233 | err := fastwalk.Walk(*benchDir, func(path string, typ os.FileMode) error { return nil }) | |
234 | if err != nil { | |
235 | b.Fatal(err) | |
236 | } | |
237 | } | |
238 | } |
0 | // Copyright 2016 The Go Authors. All rights reserved. | |
1 | // Use of this source code is governed by a BSD-style | |
2 | // license that can be found in the LICENSE file. | |
3 | ||
4 | // +build linux darwin freebsd openbsd netbsd | |
5 | // +build !appengine | |
6 | ||
7 | package fastwalk | |
8 | ||
9 | import ( | |
10 | "fmt" | |
11 | "os" | |
12 | "syscall" | |
13 | "unsafe" | |
14 | ) | |
15 | ||
16 | const blockSize = 8 << 10 | |
17 | ||
18 | // unknownFileMode is a sentinel (and bogus) os.FileMode | |
19 | // value used to represent a syscall.DT_UNKNOWN Dirent.Type. | |
20 | const unknownFileMode os.FileMode = os.ModeNamedPipe | os.ModeSocket | os.ModeDevice | |
21 | ||
22 | func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { | |
23 | fd, err := syscall.Open(dirName, 0, 0) | |
24 | if err != nil { | |
25 | return &os.PathError{Op: "open", Path: dirName, Err: err} | |
26 | } | |
27 | defer syscall.Close(fd) | |
28 | ||
29 | // The buffer must be at least a block long. | |
30 | buf := make([]byte, blockSize) // stack-allocated; doesn't escape | |
31 | bufp := 0 // starting read position in buf | |
32 | nbuf := 0 // end valid data in buf | |
33 | skipFiles := false | |
34 | for { | |
35 | if bufp >= nbuf { | |
36 | bufp = 0 | |
37 | nbuf, err = syscall.ReadDirent(fd, buf) | |
38 | if err != nil { | |
39 | return os.NewSyscallError("readdirent", err) | |
40 | } | |
41 | if nbuf <= 0 { | |
42 | return nil | |
43 | } | |
44 | } | |
45 | consumed, name, typ := parseDirEnt(buf[bufp:nbuf]) | |
46 | bufp += consumed | |
47 | if name == "" || name == "." || name == ".." { | |
48 | continue | |
49 | } | |
50 | // Fallback for filesystems (like old XFS) that don't | |
51 | // support Dirent.Type and have DT_UNKNOWN (0) there | |
52 | // instead. | |
53 | if typ == unknownFileMode { | |
54 | fi, err := os.Lstat(dirName + "/" + name) | |
55 | if err != nil { | |
56 | // It got deleted in the meantime. | |
57 | if os.IsNotExist(err) { | |
58 | continue | |
59 | } | |
60 | return err | |
61 | } | |
62 | typ = fi.Mode() & os.ModeType | |
63 | } | |
64 | if skipFiles && typ.IsRegular() { | |
65 | continue | |
66 | } | |
67 | if err := fn(dirName, name, typ); err != nil { | |
68 | if err == SkipFiles { | |
69 | skipFiles = true | |
70 | continue | |
71 | } | |
72 | return err | |
73 | } | |
74 | } | |
75 | } | |
76 | ||
77 | func parseDirEnt(buf []byte) (consumed int, name string, typ os.FileMode) { | |
78 | // golang.org/issue/15653 | |
79 | dirent := (*syscall.Dirent)(unsafe.Pointer(&buf[0])) | |
80 | if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v { | |
81 | panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v)) | |
82 | } | |
83 | if len(buf) < int(dirent.Reclen) { | |
84 | panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen)) | |
85 | } | |
86 | consumed = int(dirent.Reclen) | |
87 | if direntInode(dirent) == 0 { // File absent in directory. | |
88 | return | |
89 | } | |
90 | switch dirent.Type { | |
91 | case syscall.DT_REG: | |
92 | typ = 0 | |
93 | case syscall.DT_DIR: | |
94 | typ = os.ModeDir | |
95 | case syscall.DT_LNK: | |
96 | typ = os.ModeSymlink | |
97 | case syscall.DT_BLK: | |
98 | typ = os.ModeDevice | |
99 | case syscall.DT_FIFO: | |
100 | typ = os.ModeNamedPipe | |
101 | case syscall.DT_SOCK: | |
102 | typ = os.ModeSocket | |
103 | case syscall.DT_UNKNOWN: | |
104 | typ = unknownFileMode | |
105 | default: | |
106 | // Skip weird things. | |
107 | // It's probably a DT_WHT (http://lwn.net/Articles/325369/) | |
108 | // or something. Revisit if/when this package is moved outside | |
109 | // of goimports. goimports only cares about regular files, | |
110 | // symlinks, and directories. | |
111 | return | |
112 | } | |
113 | ||
114 | nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) | |
115 | nameLen := direntNamlen(dirent) | |
116 | ||
117 | // Special cases for common things: | |
118 | if nameLen == 1 && nameBuf[0] == '.' { | |
119 | name = "." | |
120 | } else if nameLen == 2 && nameBuf[0] == '.' && nameBuf[1] == '.' { | |
121 | name = ".." | |
122 | } else { | |
123 | name = string(nameBuf[:nameLen]) | |
124 | } | |
125 | return | |
126 | } |
0 | package walker | |
1 | ||
2 | import ( | |
3 | "os" | |
4 | "path/filepath" | |
5 | "runtime" | |
6 | "sync" | |
7 | "sync/atomic" | |
8 | ) | |
9 | ||
10 | // Walk walks the file tree rooted at root, calling walkFn for each | |
11 | // file or directory in the tree, including root. | |
12 | // | |
13 | // If fastWalk returns filepath.SkipDir, the directory is skipped. | |
14 | // | |
15 | // Multiple goroutines stat the filesystem concurrently. The provided | |
16 | // walkFn must be safe for concurrent use. | |
17 | func Walk(root string, walkFn func(pathname string, fi os.FileInfo) error) error { | |
18 | fi, err := os.Lstat(root) | |
19 | if err != nil { | |
20 | return err | |
21 | } | |
22 | if err = walkFn(root, fi); err == filepath.SkipDir { | |
23 | return nil | |
24 | } | |
25 | if err != nil || !fi.IsDir() { | |
26 | return err | |
27 | } | |
28 | ||
29 | w := walker{limit: runtime.NumCPU(), fn: walkFn} | |
30 | if w.limit < 4 { | |
31 | w.limit = 4 | |
32 | } | |
33 | ||
34 | err = w.walk(root) | |
35 | ||
36 | w.wg.Wait() | |
37 | if err := w.err.Load(); err != nil { | |
38 | return err.(error) | |
39 | } | |
40 | ||
41 | return err | |
42 | } | |
43 | ||
44 | type walker struct { | |
45 | counter uint32 | |
46 | limit int | |
47 | wg sync.WaitGroup | |
48 | err atomic.Value | |
49 | fn func(pathname string, fi os.FileInfo) error | |
50 | } | |
51 | ||
52 | func (w *walker) do(dirname string, fi os.FileInfo) error { | |
53 | pathname := dirname + string(filepath.Separator) + fi.Name() | |
54 | ||
55 | err := w.fn(pathname, fi) | |
56 | if err == filepath.SkipDir { | |
57 | return nil | |
58 | } | |
59 | if err != nil { | |
60 | return err | |
61 | } | |
62 | ||
63 | // don't follow symbolic links | |
64 | if fi.Mode()&os.ModeSymlink != 0 { | |
65 | return nil | |
66 | } | |
67 | ||
68 | if fi.IsDir() { | |
69 | current := atomic.LoadUint32(&w.counter) | |
70 | ||
71 | // if we haven't reached our goroutine limit, spawn a new one | |
72 | if current < uint32(w.limit) { | |
73 | if atomic.CompareAndSwapUint32(&w.counter, current, current+1) { | |
74 | if err := w.err.Load(); err != nil { | |
75 | return err.(error) | |
76 | } | |
77 | ||
78 | w.wg.Add(1) | |
79 | go func() { | |
80 | if err := w.walk(pathname); err != nil { | |
81 | w.err.Store(err) | |
82 | } | |
83 | ||
84 | w.wg.Done() | |
85 | atomic.AddUint32(&w.counter, ^uint32(0)) | |
86 | }() | |
87 | ||
88 | return nil | |
89 | } | |
90 | } | |
91 | ||
92 | // if we've reached our limit, continue with this goroutine | |
93 | if err := w.walk(pathname); err != nil { | |
94 | return err | |
95 | } | |
96 | } | |
97 | ||
98 | return nil | |
99 | } |
0 | // +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd | |
1 | ||
2 | package walker | |
3 | ||
4 | import "os" | |
5 | ||
6 | func (w *walker) walk(dirname string) error { | |
7 | f, err := os.Open(dirname) | |
8 | if err != nil { | |
9 | return err | |
10 | } | |
11 | ||
12 | list, err := f.Readdir(-1) | |
13 | f.Close() | |
14 | if err != nil { | |
15 | return err | |
16 | } | |
17 | ||
18 | for _, fi := range list { | |
19 | if err = w.do(dirname, fi); err != nil { | |
20 | return err | |
21 | } | |
22 | } | |
23 | return nil | |
24 | } |
0 | package walker | |
1 | ||
2 | import ( | |
3 | "flag" | |
4 | "io/ioutil" | |
5 | "os" | |
6 | "path/filepath" | |
7 | "runtime" | |
8 | "strings" | |
9 | "sync" | |
10 | "testing" | |
11 | ||
12 | //"github.com/karrick/godirwalk" | |
13 | "github.com/saracen/walker/testdata/fastwalk" | |
14 | ) | |
15 | ||
16 | func testWalk(t *testing.T, files map[string]os.FileMode) { | |
17 | dir, err := ioutil.TempDir("", "walker-test") | |
18 | if err != nil { | |
19 | t.Error(err) | |
20 | return | |
21 | } | |
22 | defer os.RemoveAll(dir) | |
23 | ||
24 | for path, mode := range files { | |
25 | path = filepath.Join(dir, path) | |
26 | if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { | |
27 | t.Fatal(err) | |
28 | } | |
29 | ||
30 | switch { | |
31 | case mode&os.ModeSymlink != 0 && mode&os.ModeDir != 0: | |
32 | if err := os.Symlink(filepath.Dir(path), path); err != nil { | |
33 | t.Fatal(err) | |
34 | } | |
35 | ||
36 | case mode&os.ModeSymlink != 0: | |
37 | if err := os.Symlink("foo/foo.go", path); err != nil { | |
38 | t.Fatal(err) | |
39 | } | |
40 | ||
41 | default: | |
42 | if err := ioutil.WriteFile(path, []byte(path), mode); err != nil { | |
43 | t.Fatal(err) | |
44 | } | |
45 | } | |
46 | } | |
47 | ||
48 | filepathResults := make(map[string]os.FileInfo) | |
49 | err = filepath.Walk(dir, func(pathname string, fi os.FileInfo, err error) error { | |
50 | if strings.Contains(pathname, "skip") { | |
51 | return filepath.SkipDir | |
52 | } | |
53 | ||
54 | filepathResults[pathname] = fi | |
55 | return nil | |
56 | }) | |
57 | if err != nil { | |
58 | t.Fatal(err) | |
59 | } | |
60 | ||
61 | var l sync.Mutex | |
62 | walkerResults := make(map[string]os.FileInfo) | |
63 | err = Walk(dir, func(pathname string, fi os.FileInfo) error { | |
64 | if strings.Contains(pathname, "skip") { | |
65 | return filepath.SkipDir | |
66 | } | |
67 | ||
68 | l.Lock() | |
69 | walkerResults[pathname] = fi | |
70 | l.Unlock() | |
71 | ||
72 | return nil | |
73 | }) | |
74 | if err != nil { | |
75 | t.Fatal(err) | |
76 | } | |
77 | ||
78 | for path, info := range filepathResults { | |
79 | info2, ok := walkerResults[path] | |
80 | if !ok { | |
81 | t.Fatalf("walk mismatch, path %q doesn't exist", path) | |
82 | } | |
83 | ||
84 | if info.IsDir() != info2.IsDir() || | |
85 | info.ModTime() != info2.ModTime() || | |
86 | info.Mode() != info2.Mode() || | |
87 | info.Name() != info2.Name() || | |
88 | info.Size() != info2.Size() { | |
89 | t.Fatalf("walk mismatch, got %v, wanted %v", info2, info) | |
90 | } | |
91 | } | |
92 | } | |
93 | ||
94 | func TestWalker(t *testing.T) { | |
95 | testWalk(t, map[string]os.FileMode{ | |
96 | "foo/foo.go": 0644, | |
97 | "bar/bar.go": 0777, | |
98 | "bar/foo/bar/foo/bar": 0600, | |
99 | "skip/file": 0700, | |
100 | "bar/symlink": os.ModeDir | os.ModeSymlink | 0777, | |
101 | "bar/symlink.go": os.ModeSymlink | 0777, | |
102 | }) | |
103 | } | |
104 | ||
105 | var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFilepathWalk and BenchmarkWalkerWalk") | |
106 | ||
107 | func TestFilepathWalkDir(t *testing.T) { | |
108 | err := filepath.Walk(*benchDir, func(pathname string, fi os.FileInfo, err error) error { return nil }) | |
109 | if err != nil { | |
110 | t.Fatal(err) | |
111 | } | |
112 | } | |
113 | ||
114 | func BenchmarkFilepathWalk(b *testing.B) { | |
115 | b.ReportAllocs() | |
116 | for i := 0; i < b.N; i++ { | |
117 | err := filepath.Walk(*benchDir, func(pathname string, fi os.FileInfo, err error) error { return nil }) | |
118 | if err != nil { | |
119 | b.Fatal(err) | |
120 | } | |
121 | } | |
122 | } | |
123 | ||
124 | func TestWalkerDir(t *testing.T) { | |
125 | err := Walk(*benchDir, func(pathname string, fi os.FileInfo) error { return nil }) | |
126 | if err != nil { | |
127 | t.Fatal(err) | |
128 | } | |
129 | } | |
130 | ||
131 | func BenchmarkWalkerWalk(b *testing.B) { | |
132 | b.ReportAllocs() | |
133 | for i := 0; i < b.N; i++ { | |
134 | err := Walk(*benchDir, func(pathname string, fi os.FileInfo) error { return nil }) | |
135 | if err != nil { | |
136 | b.Fatal(err) | |
137 | } | |
138 | } | |
139 | } | |
140 | ||
141 | func TestFastwalkDir(t *testing.T) { | |
142 | err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error { | |
143 | return nil | |
144 | }) | |
145 | if err != nil { | |
146 | t.Fatal(err) | |
147 | } | |
148 | } | |
149 | ||
150 | func BenchmarkFastwalkWalk(b *testing.B) { | |
151 | b.ReportAllocs() | |
152 | for i := 0; i < b.N; i++ { | |
153 | err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error { | |
154 | _, err := os.Lstat(pathname) | |
155 | return err | |
156 | }) | |
157 | if err != nil { | |
158 | b.Fatal(err) | |
159 | } | |
160 | } | |
161 | } | |
162 | ||
163 | /*func TestGodirwalkDir(t *testing.T) { | |
164 | err := godirwalk.Walk(*benchDir, &godirwalk.Options{ | |
165 | Callback: func(osPathname string, dirent *godirwalk.Dirent) error { | |
166 | return nil | |
167 | }, | |
168 | Unsorted: true, | |
169 | }) | |
170 | if err != nil { | |
171 | t.Fatal(err) | |
172 | } | |
173 | } | |
174 | ||
175 | func BenchmarkGodirwalkWalk(b *testing.B) { | |
176 | b.ReportAllocs() | |
177 | for i := 0; i < b.N; i++ { | |
178 | err := godirwalk.Walk(*benchDir, &godirwalk.Options{ | |
179 | Callback: func(osPathname string, dirent *godirwalk.Dirent) error { | |
180 | return nil | |
181 | }, | |
182 | Unsorted: true, | |
183 | }) | |
184 | if err != nil { | |
185 | b.Fatal(err) | |
186 | } | |
187 | } | |
188 | }*/ |
0 | // +build linux darwin freebsd openbsd netbsd | |
1 | // +build !appengine | |
2 | ||
3 | package walker | |
4 | ||
5 | import ( | |
6 | "os" | |
7 | "syscall" | |
8 | ||
9 | "golang.org/x/sys/unix" | |
10 | ) | |
11 | ||
12 | func (w *walker) walk(dirname string) error { | |
13 | fd, err := syscall.Open(dirname, 0, 0) | |
14 | if err != nil { | |
15 | return &os.PathError{Op: "open", Path: dirname, Err: err} | |
16 | } | |
17 | defer syscall.Close(fd) | |
18 | ||
19 | buf := make([]byte, 8<<10) | |
20 | n, err := unix.ReadDirent(fd, buf) | |
21 | if err != nil { | |
22 | return err | |
23 | } | |
24 | ||
25 | names := make([]string, 0, 100) | |
26 | offset := 0 | |
27 | for { | |
28 | consumed, count, names := unix.ParseDirent(buf[offset:n], 100, names[0:]) | |
29 | offset += consumed | |
30 | ||
31 | if count <= 0 { | |
32 | return nil | |
33 | } | |
34 | ||
35 | for _, name := range names[:count] { | |
36 | fi, err := os.Lstat(dirname + "/" + name) | |
37 | if os.IsNotExist(err) { | |
38 | continue | |
39 | } | |
40 | if err != nil { | |
41 | return err | |
42 | } | |
43 | if err = w.do(dirname, fi); err != nil { | |
44 | return err | |
45 | } | |
46 | } | |
47 | } | |
48 | return nil | |
49 | } |