Codebase list golang-github-saracen-walker / 7b0a8b5
Initial commit Arran Walker 4 years ago
16 changed file(s) with 1141 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 MIT License
1
2 Copyright (c) 2019 Arran Walker
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 SOFTWARE.
0 # walker
1
2 [![](https://godoc.org/github.com/saracen/walker?status.svg)](http://godoc.org/github.com/saracen/walker)
3
4 `walker` is a faster, parallel version, of `filepath.Walk`.
5
6 ```
7 walker.Walk("/tmp", func(pathname string, fi os.FileInfo) error {
8 fmt.Printf("%s: %d bytes\n", pathname, fi.Size())
9 })
10 ```
11
12 ## Benchmarks
13
14 - Standard library (`filepath.Walk`) is `FilepathWalk`.
15 - This library is `WalkerWalk`
16 - `FastwalkWalk` is [https://github.com/golang/tools/tree/master/internal/fastwalk](fastwalk).
17 - `GodirwalkWalk` is [https://github.com/karrick/godirwalk](godirwalk).
18
19 `Fastwalk` and `Godirwalk` reduce the syscall count by leaving `os.Lstat` up to the user, should they require a full `os.FileInfo`. This library instead performs the `os.Lstat` call, for better compatibility with `filepath.Walk`, and attempts to reduce the time taken through other means.
20
21 These benchmarks were performed with a warm cache.
22
23 ```
24 goos: linux
25 goarch: amd64
26 pkg: github.com/saracen/walker
27 BenchmarkFilepathWalk-24 1 1437479938 ns/op 330704912 B/op 758715 allocs/op
28 BenchmarkWalkerWalk-24 20 100948844 ns/op 71853010 B/op 593451 allocs/op
29 BenchmarkFastwalkWalk-24 5 233001916 ns/op 72442246 B/op 581916 allocs/op
30 BenchmarkGodirwalkWalk-24 2 705022087 ns/op 141308672 B/op 707996 allocs/op
31 ```
32
33 ```
34 goos: windows
35 goarch: amd64
36 pkg: github.com/saracen/walker
37 BenchmarkFilepathWalk-16 1 3100710700 ns/op 269683440 B/op 1467916 allocs/op
38 BenchmarkWalkerWalk-16 4 285985675 ns/op 137157000 B/op 877448 allocs/op
39 BenchmarkFastwalkWalk-16 2 988358100 ns/op 268348560 B/op 1474482 allocs/op
40 BenchmarkGodirwalkWalk-16 1 1200790300 ns/op 111854272 B/op 1310532 allocs/op
41 ```
42
43 Performing benchmarks without having the OS cache the directory information isn't straight forward, but to get a sense of the performance, we can flush the cache and roughly time how long it took to walk a directory:
44
45 #### filepath.Walk
46 ```
47 $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestFilepathWalkDir -benchdir $GOPATH
48 ok github.com/saracen/walker 5.790s
49 ```
50
51 #### walker
52 ```
53 $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestWalkerDir -benchdir $GOPATH
54 ok github.com/saracen/walker 0.593s
55 ```
56
57 #### fastwalk
58 ```
59 $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestFastwalkDir -benchdir $GOPATH
60 ok github.com/saracen/walker 0.551s
61 ```
62
63 #### godirwalk
64 ```
65 $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -v -run TestGodirwalkDir -benchdir $GOPATH
66 ok github.com/saracen/walker 3.879s
67 ```
68
69 In this case, `fastwalk` is faster. This is due to it not having to perform an additional `lstat`. The time is almost identical to `walker` if you perform the `lstat` call yourself.
0 module github.com/saracen/walker
1
2 go 1.12
3
4 require (
5 golang.org/x/sys v0.0.0-20191010194322-b09406accb47
6 golang.org/x/tools v0.0.0-20191011211836-4c025a95b26e // indirect
7 )
0 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
1 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
2 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
3 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
4 golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=
5 golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
6 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
7 golang.org/x/tools v0.0.0-20191011211836-4c025a95b26e h1:1o2bDs9pCd2xFhdwqJTrCIswAeEsn4h/PCNelWpfcsI=
8 golang.org/x/tools v0.0.0-20191011211836-4c025a95b26e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
9 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
0 // Copyright 2016 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // Package fastwalk provides a faster version of filepath.Walk for file system
5 // scanning tools.
6 package fastwalk
7
8 import (
9 "errors"
10 "os"
11 "path/filepath"
12 "runtime"
13 "sync"
14 )
15
16 // TraverseLink is used as a return value from WalkFuncs to indicate that the
17 // symlink named in the call may be traversed.
18 var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
19
20 // SkipFiles is a used as a return value from WalkFuncs to indicate that the
21 // callback should not be called for any other files in the current directory.
22 // Child directories will still be traversed.
23 var SkipFiles = errors.New("fastwalk: skip remaining files in directory")
24
25 // Walk is a faster implementation of filepath.Walk.
26 //
27 // filepath.Walk's design necessarily calls os.Lstat on each file,
28 // even if the caller needs less info.
29 // Many tools need only the type of each file.
30 // On some platforms, this information is provided directly by the readdir
31 // system call, avoiding the need to stat each file individually.
32 // fastwalk_unix.go contains a fork of the syscall routines.
33 //
34 // See golang.org/issue/16399
35 //
36 // Walk walks the file tree rooted at root, calling walkFn for
37 // each file or directory in the tree, including root.
38 //
39 // If fastWalk returns filepath.SkipDir, the directory is skipped.
40 //
41 // Unlike filepath.Walk:
42 // * file stat calls must be done by the user.
43 // The only provided metadata is the file type, which does not include
44 // any permission bits.
45 // * multiple goroutines stat the filesystem concurrently. The provided
46 // walkFn must be safe for concurrent use.
47 // * fastWalk can follow symlinks if walkFn returns the TraverseLink
48 // sentinel error. It is the walkFn's responsibility to prevent
49 // fastWalk from going into symlink cycles.
50 func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
51 // TODO(bradfitz): make numWorkers configurable? We used a
52 // minimum of 4 to give the kernel more info about multiple
53 // things we want, in hopes its I/O scheduling can take
54 // advantage of that. Hopefully most are in cache. Maybe 4 is
55 // even too low of a minimum. Profile more.
56 numWorkers := 4
57 if n := runtime.NumCPU(); n > numWorkers {
58 numWorkers = n
59 }
60
61 // Make sure to wait for all workers to finish, otherwise
62 // walkFn could still be called after returning. This Wait call
63 // runs after close(e.donec) below.
64 var wg sync.WaitGroup
65 defer wg.Wait()
66
67 w := &walker{
68 fn: walkFn,
69 enqueuec: make(chan walkItem, numWorkers), // buffered for performance
70 workc: make(chan walkItem, numWorkers), // buffered for performance
71 donec: make(chan struct{}),
72
73 // buffered for correctness & not leaking goroutines:
74 resc: make(chan error, numWorkers),
75 }
76 defer close(w.donec)
77
78 for i := 0; i < numWorkers; i++ {
79 wg.Add(1)
80 go w.doWork(&wg)
81 }
82 todo := []walkItem{{dir: root}}
83 out := 0
84 for {
85 workc := w.workc
86 var workItem walkItem
87 if len(todo) == 0 {
88 workc = nil
89 } else {
90 workItem = todo[len(todo)-1]
91 }
92 select {
93 case workc <- workItem:
94 todo = todo[:len(todo)-1]
95 out++
96 case it := <-w.enqueuec:
97 todo = append(todo, it)
98 case err := <-w.resc:
99 out--
100 if err != nil {
101 return err
102 }
103 if out == 0 && len(todo) == 0 {
104 // It's safe to quit here, as long as the buffered
105 // enqueue channel isn't also readable, which might
106 // happen if the worker sends both another unit of
107 // work and its result before the other select was
108 // scheduled and both w.resc and w.enqueuec were
109 // readable.
110 select {
111 case it := <-w.enqueuec:
112 todo = append(todo, it)
113 default:
114 return nil
115 }
116 }
117 }
118 }
119 }
120
121 // doWork reads directories as instructed (via workc) and runs the
122 // user's callback function.
123 func (w *walker) doWork(wg *sync.WaitGroup) {
124 defer wg.Done()
125 for {
126 select {
127 case <-w.donec:
128 return
129 case it := <-w.workc:
130 select {
131 case <-w.donec:
132 return
133 case w.resc <- w.walk(it.dir, !it.callbackDone):
134 }
135 }
136 }
137 }
138
139 type walker struct {
140 fn func(path string, typ os.FileMode) error
141
142 donec chan struct{} // closed on fastWalk's return
143 workc chan walkItem // to workers
144 enqueuec chan walkItem // from workers
145 resc chan error // from workers
146 }
147
148 type walkItem struct {
149 dir string
150 callbackDone bool // callback already called; don't do it again
151 }
152
153 func (w *walker) enqueue(it walkItem) {
154 select {
155 case w.enqueuec <- it:
156 case <-w.donec:
157 }
158 }
159
160 func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
161 joined := dirName + string(os.PathSeparator) + baseName
162 if typ == os.ModeDir {
163 w.enqueue(walkItem{dir: joined})
164 return nil
165 }
166
167 err := w.fn(joined, typ)
168 if typ == os.ModeSymlink {
169 if err == TraverseLink {
170 // Set callbackDone so we don't call it twice for both the
171 // symlink-as-symlink and the symlink-as-directory later:
172 w.enqueue(walkItem{dir: joined, callbackDone: true})
173 return nil
174 }
175 if err == filepath.SkipDir {
176 // Permit SkipDir on symlinks too.
177 return nil
178 }
179 }
180 return err
181 }
182
183 func (w *walker) walk(root string, runUserCallback bool) error {
184 if runUserCallback {
185 err := w.fn(root, os.ModeDir)
186 if err == filepath.SkipDir {
187 return nil
188 }
189 if err != nil {
190 return err
191 }
192 }
193
194 return readDir(root, w.onDirEnt)
195 }
0 // Copyright 2016 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // +build freebsd openbsd netbsd
5
6 package fastwalk
7
8 import "syscall"
9
10 func direntInode(dirent *syscall.Dirent) uint64 {
11 return uint64(dirent.Fileno)
12 }
0 // Copyright 2016 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // +build linux darwin
5 // +build !appengine
6
7 package fastwalk
8
9 import "syscall"
10
11 func direntInode(dirent *syscall.Dirent) uint64 {
12 return uint64(dirent.Ino)
13 }
0 // Copyright 2018 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // +build darwin freebsd openbsd netbsd
5
6 package fastwalk
7
8 import "syscall"
9
10 func direntNamlen(dirent *syscall.Dirent) uint64 {
11 return uint64(dirent.Namlen)
12 }
0 // Copyright 2018 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // +build linux
5 // +build !appengine
6
7 package fastwalk
8
9 import (
10 "bytes"
11 "syscall"
12 "unsafe"
13 )
14
15 func direntNamlen(dirent *syscall.Dirent) uint64 {
16 const fixedHdr = uint16(unsafe.Offsetof(syscall.Dirent{}.Name))
17 nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0]))
18 const nameBufLen = uint16(len(nameBuf))
19 limit := dirent.Reclen - fixedHdr
20 if limit > nameBufLen {
21 limit = nameBufLen
22 }
23 nameLen := bytes.IndexByte(nameBuf[:limit], 0)
24 if nameLen < 0 {
25 panic("failed to find terminating 0 byte in dirent")
26 }
27 return uint64(nameLen)
28 }
0 // Copyright 2016 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd
5
6 package fastwalk
7
8 import (
9 "io/ioutil"
10 "os"
11 )
12
13 // readDir calls fn for each directory entry in dirName.
14 // It does not descend into directories or follow symlinks.
15 // If fn returns a non-nil error, readDir returns with that error
16 // immediately.
17 func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error {
18 fis, err := ioutil.ReadDir(dirName)
19 if err != nil {
20 return err
21 }
22 skipFiles := false
23 for _, fi := range fis {
24 if fi.Mode().IsRegular() && skipFiles {
25 continue
26 }
27 if err := fn(dirName, fi.Name(), fi.Mode()&os.ModeType); err != nil {
28 if err == SkipFiles {
29 skipFiles = true
30 continue
31 }
32 return err
33 }
34 }
35 return nil
36 }
0 // Copyright 2016 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 package fastwalk_test
5
6 import (
7 "bytes"
8 "flag"
9 "fmt"
10 "io/ioutil"
11 "os"
12 "path/filepath"
13 "reflect"
14 "runtime"
15 "sort"
16 "strings"
17 "sync"
18 "testing"
19
20 "golang.org/x/tools/internal/fastwalk"
21 )
22
23 func formatFileModes(m map[string]os.FileMode) string {
24 var keys []string
25 for k := range m {
26 keys = append(keys, k)
27 }
28 sort.Strings(keys)
29 var buf bytes.Buffer
30 for _, k := range keys {
31 fmt.Fprintf(&buf, "%-20s: %v\n", k, m[k])
32 }
33 return buf.String()
34 }
35
36 func testFastWalk(t *testing.T, files map[string]string, callback func(path string, typ os.FileMode) error, want map[string]os.FileMode) {
37 tempdir, err := ioutil.TempDir("", "test-fast-walk")
38 if err != nil {
39 t.Fatal(err)
40 }
41 defer os.RemoveAll(tempdir)
42 for path, contents := range files {
43 file := filepath.Join(tempdir, "/src", path)
44 if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil {
45 t.Fatal(err)
46 }
47 var err error
48 if strings.HasPrefix(contents, "LINK:") {
49 err = os.Symlink(strings.TrimPrefix(contents, "LINK:"), file)
50 } else {
51 err = ioutil.WriteFile(file, []byte(contents), 0644)
52 }
53 if err != nil {
54 t.Fatal(err)
55 }
56 }
57 got := map[string]os.FileMode{}
58 var mu sync.Mutex
59 if err := fastwalk.Walk(tempdir, func(path string, typ os.FileMode) error {
60 mu.Lock()
61 defer mu.Unlock()
62 if !strings.HasPrefix(path, tempdir) {
63 t.Fatalf("bogus prefix on %q, expect %q", path, tempdir)
64 }
65 key := filepath.ToSlash(strings.TrimPrefix(path, tempdir))
66 if old, dup := got[key]; dup {
67 t.Fatalf("callback called twice for key %q: %v -> %v", key, old, typ)
68 }
69 got[key] = typ
70 return callback(path, typ)
71 }); err != nil {
72 t.Fatalf("callback returned: %v", err)
73 }
74 if !reflect.DeepEqual(got, want) {
75 t.Errorf("walk mismatch.\n got:\n%v\nwant:\n%v", formatFileModes(got), formatFileModes(want))
76 }
77 }
78
79 func TestFastWalk_Basic(t *testing.T) {
80 testFastWalk(t, map[string]string{
81 "foo/foo.go": "one",
82 "bar/bar.go": "two",
83 "skip/skip.go": "skip",
84 },
85 func(path string, typ os.FileMode) error {
86 return nil
87 },
88 map[string]os.FileMode{
89 "": os.ModeDir,
90 "/src": os.ModeDir,
91 "/src/bar": os.ModeDir,
92 "/src/bar/bar.go": 0,
93 "/src/foo": os.ModeDir,
94 "/src/foo/foo.go": 0,
95 "/src/skip": os.ModeDir,
96 "/src/skip/skip.go": 0,
97 })
98 }
99
100 func TestFastWalk_LongFileName(t *testing.T) {
101 longFileName := strings.Repeat("x", 255)
102
103 testFastWalk(t, map[string]string{
104 longFileName: "one",
105 },
106 func(path string, typ os.FileMode) error {
107 return nil
108 },
109 map[string]os.FileMode{
110 "": os.ModeDir,
111 "/src": os.ModeDir,
112 "/src/" + longFileName: 0,
113 },
114 )
115 }
116
117 func TestFastWalk_Symlink(t *testing.T) {
118 switch runtime.GOOS {
119 case "windows", "plan9":
120 t.Skipf("skipping on %s", runtime.GOOS)
121 }
122 testFastWalk(t, map[string]string{
123 "foo/foo.go": "one",
124 "bar/bar.go": "LINK:../foo.go",
125 "symdir": "LINK:foo",
126 },
127 func(path string, typ os.FileMode) error {
128 return nil
129 },
130 map[string]os.FileMode{
131 "": os.ModeDir,
132 "/src": os.ModeDir,
133 "/src/bar": os.ModeDir,
134 "/src/bar/bar.go": os.ModeSymlink,
135 "/src/foo": os.ModeDir,
136 "/src/foo/foo.go": 0,
137 "/src/symdir": os.ModeSymlink,
138 })
139 }
140
141 func TestFastWalk_SkipDir(t *testing.T) {
142 testFastWalk(t, map[string]string{
143 "foo/foo.go": "one",
144 "bar/bar.go": "two",
145 "skip/skip.go": "skip",
146 },
147 func(path string, typ os.FileMode) error {
148 if typ == os.ModeDir && strings.HasSuffix(path, "skip") {
149 return filepath.SkipDir
150 }
151 return nil
152 },
153 map[string]os.FileMode{
154 "": os.ModeDir,
155 "/src": os.ModeDir,
156 "/src/bar": os.ModeDir,
157 "/src/bar/bar.go": 0,
158 "/src/foo": os.ModeDir,
159 "/src/foo/foo.go": 0,
160 "/src/skip": os.ModeDir,
161 })
162 }
163
164 func TestFastWalk_SkipFiles(t *testing.T) {
165 // Directory iteration order is undefined, so there's no way to know
166 // which file to expect until the walk happens. Rather than mess
167 // with the test infrastructure, just mutate want.
168 var mu sync.Mutex
169 want := map[string]os.FileMode{
170 "": os.ModeDir,
171 "/src": os.ModeDir,
172 "/src/zzz": os.ModeDir,
173 "/src/zzz/c.go": 0,
174 }
175
176 testFastWalk(t, map[string]string{
177 "a_skipfiles.go": "a",
178 "b_skipfiles.go": "b",
179 "zzz/c.go": "c",
180 },
181 func(path string, typ os.FileMode) error {
182 if strings.HasSuffix(path, "_skipfiles.go") {
183 mu.Lock()
184 defer mu.Unlock()
185 want["/src/"+filepath.Base(path)] = 0
186 return fastwalk.SkipFiles
187 }
188 return nil
189 },
190 want)
191 if len(want) != 5 {
192 t.Errorf("saw too many files: wanted 5, got %v (%v)", len(want), want)
193 }
194 }
195
196 func TestFastWalk_TraverseSymlink(t *testing.T) {
197 switch runtime.GOOS {
198 case "windows", "plan9":
199 t.Skipf("skipping on %s", runtime.GOOS)
200 }
201
202 testFastWalk(t, map[string]string{
203 "foo/foo.go": "one",
204 "bar/bar.go": "two",
205 "skip/skip.go": "skip",
206 "symdir": "LINK:foo",
207 },
208 func(path string, typ os.FileMode) error {
209 if typ == os.ModeSymlink {
210 return fastwalk.TraverseLink
211 }
212 return nil
213 },
214 map[string]os.FileMode{
215 "": os.ModeDir,
216 "/src": os.ModeDir,
217 "/src/bar": os.ModeDir,
218 "/src/bar/bar.go": 0,
219 "/src/foo": os.ModeDir,
220 "/src/foo/foo.go": 0,
221 "/src/skip": os.ModeDir,
222 "/src/skip/skip.go": 0,
223 "/src/symdir": os.ModeSymlink,
224 "/src/symdir/foo.go": 0,
225 })
226 }
227
228 var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFastWalk")
229
230 func BenchmarkFastWalk(b *testing.B) {
231 b.ReportAllocs()
232 for i := 0; i < b.N; i++ {
233 err := fastwalk.Walk(*benchDir, func(path string, typ os.FileMode) error { return nil })
234 if err != nil {
235 b.Fatal(err)
236 }
237 }
238 }
0 // Copyright 2016 The Go Authors. All rights reserved.
1 // Use of this source code is governed by a BSD-style
2 // license that can be found in the LICENSE file.
3
4 // +build linux darwin freebsd openbsd netbsd
5 // +build !appengine
6
7 package fastwalk
8
9 import (
10 "fmt"
11 "os"
12 "syscall"
13 "unsafe"
14 )
15
16 const blockSize = 8 << 10
17
18 // unknownFileMode is a sentinel (and bogus) os.FileMode
19 // value used to represent a syscall.DT_UNKNOWN Dirent.Type.
20 const unknownFileMode os.FileMode = os.ModeNamedPipe | os.ModeSocket | os.ModeDevice
21
22 func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error {
23 fd, err := syscall.Open(dirName, 0, 0)
24 if err != nil {
25 return &os.PathError{Op: "open", Path: dirName, Err: err}
26 }
27 defer syscall.Close(fd)
28
29 // The buffer must be at least a block long.
30 buf := make([]byte, blockSize) // stack-allocated; doesn't escape
31 bufp := 0 // starting read position in buf
32 nbuf := 0 // end valid data in buf
33 skipFiles := false
34 for {
35 if bufp >= nbuf {
36 bufp = 0
37 nbuf, err = syscall.ReadDirent(fd, buf)
38 if err != nil {
39 return os.NewSyscallError("readdirent", err)
40 }
41 if nbuf <= 0 {
42 return nil
43 }
44 }
45 consumed, name, typ := parseDirEnt(buf[bufp:nbuf])
46 bufp += consumed
47 if name == "" || name == "." || name == ".." {
48 continue
49 }
50 // Fallback for filesystems (like old XFS) that don't
51 // support Dirent.Type and have DT_UNKNOWN (0) there
52 // instead.
53 if typ == unknownFileMode {
54 fi, err := os.Lstat(dirName + "/" + name)
55 if err != nil {
56 // It got deleted in the meantime.
57 if os.IsNotExist(err) {
58 continue
59 }
60 return err
61 }
62 typ = fi.Mode() & os.ModeType
63 }
64 if skipFiles && typ.IsRegular() {
65 continue
66 }
67 if err := fn(dirName, name, typ); err != nil {
68 if err == SkipFiles {
69 skipFiles = true
70 continue
71 }
72 return err
73 }
74 }
75 }
76
77 func parseDirEnt(buf []byte) (consumed int, name string, typ os.FileMode) {
78 // golang.org/issue/15653
79 dirent := (*syscall.Dirent)(unsafe.Pointer(&buf[0]))
80 if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v {
81 panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v))
82 }
83 if len(buf) < int(dirent.Reclen) {
84 panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen))
85 }
86 consumed = int(dirent.Reclen)
87 if direntInode(dirent) == 0 { // File absent in directory.
88 return
89 }
90 switch dirent.Type {
91 case syscall.DT_REG:
92 typ = 0
93 case syscall.DT_DIR:
94 typ = os.ModeDir
95 case syscall.DT_LNK:
96 typ = os.ModeSymlink
97 case syscall.DT_BLK:
98 typ = os.ModeDevice
99 case syscall.DT_FIFO:
100 typ = os.ModeNamedPipe
101 case syscall.DT_SOCK:
102 typ = os.ModeSocket
103 case syscall.DT_UNKNOWN:
104 typ = unknownFileMode
105 default:
106 // Skip weird things.
107 // It's probably a DT_WHT (http://lwn.net/Articles/325369/)
108 // or something. Revisit if/when this package is moved outside
109 // of goimports. goimports only cares about regular files,
110 // symlinks, and directories.
111 return
112 }
113
114 nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0]))
115 nameLen := direntNamlen(dirent)
116
117 // Special cases for common things:
118 if nameLen == 1 && nameBuf[0] == '.' {
119 name = "."
120 } else if nameLen == 2 && nameBuf[0] == '.' && nameBuf[1] == '.' {
121 name = ".."
122 } else {
123 name = string(nameBuf[:nameLen])
124 }
125 return
126 }
0 package walker
1
2 import (
3 "os"
4 "path/filepath"
5 "runtime"
6 "sync"
7 "sync/atomic"
8 )
9
10 // Walk walks the file tree rooted at root, calling walkFn for each
11 // file or directory in the tree, including root.
12 //
13 // If fastWalk returns filepath.SkipDir, the directory is skipped.
14 //
15 // Multiple goroutines stat the filesystem concurrently. The provided
16 // walkFn must be safe for concurrent use.
17 func Walk(root string, walkFn func(pathname string, fi os.FileInfo) error) error {
18 fi, err := os.Lstat(root)
19 if err != nil {
20 return err
21 }
22 if err = walkFn(root, fi); err == filepath.SkipDir {
23 return nil
24 }
25 if err != nil || !fi.IsDir() {
26 return err
27 }
28
29 w := walker{limit: runtime.NumCPU(), fn: walkFn}
30 if w.limit < 4 {
31 w.limit = 4
32 }
33
34 err = w.walk(root)
35
36 w.wg.Wait()
37 if err := w.err.Load(); err != nil {
38 return err.(error)
39 }
40
41 return err
42 }
43
44 type walker struct {
45 counter uint32
46 limit int
47 wg sync.WaitGroup
48 err atomic.Value
49 fn func(pathname string, fi os.FileInfo) error
50 }
51
52 func (w *walker) do(dirname string, fi os.FileInfo) error {
53 pathname := dirname + string(filepath.Separator) + fi.Name()
54
55 err := w.fn(pathname, fi)
56 if err == filepath.SkipDir {
57 return nil
58 }
59 if err != nil {
60 return err
61 }
62
63 // don't follow symbolic links
64 if fi.Mode()&os.ModeSymlink != 0 {
65 return nil
66 }
67
68 if fi.IsDir() {
69 current := atomic.LoadUint32(&w.counter)
70
71 // if we haven't reached our goroutine limit, spawn a new one
72 if current < uint32(w.limit) {
73 if atomic.CompareAndSwapUint32(&w.counter, current, current+1) {
74 if err := w.err.Load(); err != nil {
75 return err.(error)
76 }
77
78 w.wg.Add(1)
79 go func() {
80 if err := w.walk(pathname); err != nil {
81 w.err.Store(err)
82 }
83
84 w.wg.Done()
85 atomic.AddUint32(&w.counter, ^uint32(0))
86 }()
87
88 return nil
89 }
90 }
91
92 // if we've reached our limit, continue with this goroutine
93 if err := w.walk(pathname); err != nil {
94 return err
95 }
96 }
97
98 return nil
99 }
0 // +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd
1
2 package walker
3
4 import "os"
5
6 func (w *walker) walk(dirname string) error {
7 f, err := os.Open(dirname)
8 if err != nil {
9 return err
10 }
11
12 list, err := f.Readdir(-1)
13 f.Close()
14 if err != nil {
15 return err
16 }
17
18 for _, fi := range list {
19 if err = w.do(dirname, fi); err != nil {
20 return err
21 }
22 }
23 return nil
24 }
0 package walker
1
2 import (
3 "flag"
4 "io/ioutil"
5 "os"
6 "path/filepath"
7 "runtime"
8 "strings"
9 "sync"
10 "testing"
11
12 //"github.com/karrick/godirwalk"
13 "github.com/saracen/walker/testdata/fastwalk"
14 )
15
16 func testWalk(t *testing.T, files map[string]os.FileMode) {
17 dir, err := ioutil.TempDir("", "walker-test")
18 if err != nil {
19 t.Error(err)
20 return
21 }
22 defer os.RemoveAll(dir)
23
24 for path, mode := range files {
25 path = filepath.Join(dir, path)
26 if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil {
27 t.Fatal(err)
28 }
29
30 switch {
31 case mode&os.ModeSymlink != 0 && mode&os.ModeDir != 0:
32 if err := os.Symlink(filepath.Dir(path), path); err != nil {
33 t.Fatal(err)
34 }
35
36 case mode&os.ModeSymlink != 0:
37 if err := os.Symlink("foo/foo.go", path); err != nil {
38 t.Fatal(err)
39 }
40
41 default:
42 if err := ioutil.WriteFile(path, []byte(path), mode); err != nil {
43 t.Fatal(err)
44 }
45 }
46 }
47
48 filepathResults := make(map[string]os.FileInfo)
49 err = filepath.Walk(dir, func(pathname string, fi os.FileInfo, err error) error {
50 if strings.Contains(pathname, "skip") {
51 return filepath.SkipDir
52 }
53
54 filepathResults[pathname] = fi
55 return nil
56 })
57 if err != nil {
58 t.Fatal(err)
59 }
60
61 var l sync.Mutex
62 walkerResults := make(map[string]os.FileInfo)
63 err = Walk(dir, func(pathname string, fi os.FileInfo) error {
64 if strings.Contains(pathname, "skip") {
65 return filepath.SkipDir
66 }
67
68 l.Lock()
69 walkerResults[pathname] = fi
70 l.Unlock()
71
72 return nil
73 })
74 if err != nil {
75 t.Fatal(err)
76 }
77
78 for path, info := range filepathResults {
79 info2, ok := walkerResults[path]
80 if !ok {
81 t.Fatalf("walk mismatch, path %q doesn't exist", path)
82 }
83
84 if info.IsDir() != info2.IsDir() ||
85 info.ModTime() != info2.ModTime() ||
86 info.Mode() != info2.Mode() ||
87 info.Name() != info2.Name() ||
88 info.Size() != info2.Size() {
89 t.Fatalf("walk mismatch, got %v, wanted %v", info2, info)
90 }
91 }
92 }
93
94 func TestWalker(t *testing.T) {
95 testWalk(t, map[string]os.FileMode{
96 "foo/foo.go": 0644,
97 "bar/bar.go": 0777,
98 "bar/foo/bar/foo/bar": 0600,
99 "skip/file": 0700,
100 "bar/symlink": os.ModeDir | os.ModeSymlink | 0777,
101 "bar/symlink.go": os.ModeSymlink | 0777,
102 })
103 }
104
105 var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFilepathWalk and BenchmarkWalkerWalk")
106
107 func TestFilepathWalkDir(t *testing.T) {
108 err := filepath.Walk(*benchDir, func(pathname string, fi os.FileInfo, err error) error { return nil })
109 if err != nil {
110 t.Fatal(err)
111 }
112 }
113
114 func BenchmarkFilepathWalk(b *testing.B) {
115 b.ReportAllocs()
116 for i := 0; i < b.N; i++ {
117 err := filepath.Walk(*benchDir, func(pathname string, fi os.FileInfo, err error) error { return nil })
118 if err != nil {
119 b.Fatal(err)
120 }
121 }
122 }
123
124 func TestWalkerDir(t *testing.T) {
125 err := Walk(*benchDir, func(pathname string, fi os.FileInfo) error { return nil })
126 if err != nil {
127 t.Fatal(err)
128 }
129 }
130
131 func BenchmarkWalkerWalk(b *testing.B) {
132 b.ReportAllocs()
133 for i := 0; i < b.N; i++ {
134 err := Walk(*benchDir, func(pathname string, fi os.FileInfo) error { return nil })
135 if err != nil {
136 b.Fatal(err)
137 }
138 }
139 }
140
141 func TestFastwalkDir(t *testing.T) {
142 err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error {
143 return nil
144 })
145 if err != nil {
146 t.Fatal(err)
147 }
148 }
149
150 func BenchmarkFastwalkWalk(b *testing.B) {
151 b.ReportAllocs()
152 for i := 0; i < b.N; i++ {
153 err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error {
154 _, err := os.Lstat(pathname)
155 return err
156 })
157 if err != nil {
158 b.Fatal(err)
159 }
160 }
161 }
162
163 /*func TestGodirwalkDir(t *testing.T) {
164 err := godirwalk.Walk(*benchDir, &godirwalk.Options{
165 Callback: func(osPathname string, dirent *godirwalk.Dirent) error {
166 return nil
167 },
168 Unsorted: true,
169 })
170 if err != nil {
171 t.Fatal(err)
172 }
173 }
174
175 func BenchmarkGodirwalkWalk(b *testing.B) {
176 b.ReportAllocs()
177 for i := 0; i < b.N; i++ {
178 err := godirwalk.Walk(*benchDir, &godirwalk.Options{
179 Callback: func(osPathname string, dirent *godirwalk.Dirent) error {
180 return nil
181 },
182 Unsorted: true,
183 })
184 if err != nil {
185 b.Fatal(err)
186 }
187 }
188 }*/
0 // +build linux darwin freebsd openbsd netbsd
1 // +build !appengine
2
3 package walker
4
5 import (
6 "os"
7 "syscall"
8
9 "golang.org/x/sys/unix"
10 )
11
12 func (w *walker) walk(dirname string) error {
13 fd, err := syscall.Open(dirname, 0, 0)
14 if err != nil {
15 return &os.PathError{Op: "open", Path: dirname, Err: err}
16 }
17 defer syscall.Close(fd)
18
19 buf := make([]byte, 8<<10)
20 n, err := unix.ReadDirent(fd, buf)
21 if err != nil {
22 return err
23 }
24
25 names := make([]string, 0, 100)
26 offset := 0
27 for {
28 consumed, count, names := unix.ParseDirent(buf[offset:n], 100, names[0:])
29 offset += consumed
30
31 if count <= 0 {
32 return nil
33 }
34
35 for _, name := range names[:count] {
36 fi, err := os.Lstat(dirname + "/" + name)
37 if os.IsNotExist(err) {
38 continue
39 }
40 if err != nil {
41 return err
42 }
43 if err = w.do(dirname, fi); err != nil {
44 return err
45 }
46 }
47 }
48 return nil
49 }