Codebase list golang-github-vbatts-go-mtree / d2997578-1c5d-4af9-b5d3-d3fbb82190d0/main tar.go
d2997578-1c5d-4af9-b5d3-d3fbb82190d0/main

Tree @d2997578-1c5d-4af9-b5d3-d3fbb82190d0/main (Download .tar.gz)

tar.go @d2997578-1c5d-4af9-b5d3-d3fbb82190d0/mainraw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
package mtree

import (
	"archive/tar"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"path/filepath"
	"strings"

	"github.com/sirupsen/logrus"
	"github.com/vbatts/go-mtree/pkg/govis"
)

// Streamer creates a file hierarchy out of a tar stream
type Streamer interface {
	io.ReadCloser
	Hierarchy() (*DirectoryHierarchy, error)
}

var tarDefaultSetKeywords = []KeyVal{
	"type=file",
	"flags=none",
	"mode=0664",
}

// NewTarStreamer streams a tar archive and creates a file hierarchy based off
// of the tar metadata headers
func NewTarStreamer(r io.Reader, excludes []ExcludeFunc, keywords []Keyword) Streamer {
	pR, pW := io.Pipe()
	ts := &tarStream{
		pipeReader: pR,
		pipeWriter: pW,
		creator:    dhCreator{DH: &DirectoryHierarchy{}},
		teeReader:  io.TeeReader(r, pW),
		tarReader:  tar.NewReader(pR),
		keywords:   keywords,
		hardlinks:  map[string][]string{},
		excludes:   excludes,
	}

	go ts.readHeaders()
	return ts
}

type tarStream struct {
	root       *Entry
	hardlinks  map[string][]string
	creator    dhCreator
	pipeReader *io.PipeReader
	pipeWriter *io.PipeWriter
	teeReader  io.Reader
	tarReader  *tar.Reader
	keywords   []Keyword
	excludes   []ExcludeFunc
	err        error
}

func (ts *tarStream) readHeaders() {
	// remove "time" keyword
	notimekws := []Keyword{}
	for _, kw := range ts.keywords {
		if !InKeywordSlice(kw, notimekws) {
			if kw == "time" {
				if !InKeywordSlice("tar_time", ts.keywords) {
					notimekws = append(notimekws, "tar_time")
				}
			} else {
				notimekws = append(notimekws, kw)
			}
		}
	}
	ts.keywords = notimekws
	// We have to start with the directory we're in, and anything beyond these
	// items is determined at the time a tar is extracted.
	ts.root = &Entry{
		Name: ".",
		Type: RelativeType,
		Prev: &Entry{
			Raw:  "# .",
			Type: CommentType,
		},
		Set:      nil,
		Keywords: []KeyVal{"type=dir"},
	}
	// insert signature and metadata comments first (user, machine, tree, date)
	for _, e := range signatureEntries("<user specified tar archive>") {
		e.Pos = len(ts.creator.DH.Entries)
		ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
	}
	// insert keyword metadata next
	for _, e := range keywordEntries(ts.keywords) {
		e.Pos = len(ts.creator.DH.Entries)
		ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
	}
hdrloop:
	for {
		hdr, err := ts.tarReader.Next()
		if err != nil {
			ts.pipeReader.CloseWithError(err)
			return
		}

		for _, ex := range ts.excludes {
			if ex(hdr.Name, hdr.FileInfo()) {
				continue hdrloop
			}
		}

		// Because the content of the file may need to be read by several
		// KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from
		// ts.tarReader is not enough.
		tmpFile, err := ioutil.TempFile("", "ts.payload.")
		if err != nil {
			ts.pipeReader.CloseWithError(err)
			return
		}
		// for good measure
		if err := tmpFile.Chmod(0600); err != nil {
			tmpFile.Close()
			os.Remove(tmpFile.Name())
			ts.pipeReader.CloseWithError(err)
			return
		}
		if _, err := io.Copy(tmpFile, ts.tarReader); err != nil {
			tmpFile.Close()
			os.Remove(tmpFile.Name())
			ts.pipeReader.CloseWithError(err)
			return
		}
		// Alright, it's either file or directory
		encodedName, err := govis.Vis(filepath.Base(hdr.Name), DefaultVisFlags)
		if err != nil {
			tmpFile.Close()
			os.Remove(tmpFile.Name())
			ts.pipeReader.CloseWithError(err)
			return
		}
		e := Entry{
			Name: encodedName,
			Type: RelativeType,
		}

		// Keep track of which files are hardlinks so we can resolve them later
		if hdr.Typeflag == tar.TypeLink {
			keyFunc := KeywordFuncs["link"]
			kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), nil)
			if err != nil {
				logrus.Warn(err)
				break // XXX is breaking an okay thing to do here?
			}
			linkname, err := govis.Unvis(KeyVal(kvs[0]).Value(), DefaultVisFlags)
			if err != nil {
				logrus.Warn(err)
				break // XXX is breaking an okay thing to do here?
			}
			if _, ok := ts.hardlinks[linkname]; !ok {
				ts.hardlinks[linkname] = []string{hdr.Name}
			} else {
				ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
			}
		}

		// now collect keywords on the file
		for _, keyword := range ts.keywords {
			if keyFunc, ok := KeywordFuncs[keyword.Prefix()]; ok {
				// We can't extract directories on to disk, so "size" keyword
				// is irrelevant for now
				if hdr.FileInfo().IsDir() && keyword == "size" {
					continue
				}
				kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
				if err != nil {
					ts.setErr(err)
				}
				// for good measure, check that we actually get a value for a keyword
				if len(kvs) > 0 && kvs[0] != "" {
					e.Keywords = append(e.Keywords, kvs[0])
				}

				// don't forget to reset the reader
				if _, err := tmpFile.Seek(0, 0); err != nil {
					tmpFile.Close()
					os.Remove(tmpFile.Name())
					ts.pipeReader.CloseWithError(err)
					return
				}
			}
		}
		// collect meta-set keywords for a directory so that we can build the
		// actual sets in `flatten`
		if hdr.FileInfo().IsDir() {
			s := Entry{
				Name: "meta-set",
				Type: SpecialType,
			}
			for _, setKW := range SetKeywords {
				if keyFunc, ok := KeywordFuncs[setKW.Prefix()]; ok {
					kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
					if err != nil {
						ts.setErr(err)
					}
					for _, kv := range kvs {
						if kv != "" {
							s.Keywords = append(s.Keywords, kv)
						}
					}
					if _, err := tmpFile.Seek(0, 0); err != nil {
						tmpFile.Close()
						os.Remove(tmpFile.Name())
						ts.pipeReader.CloseWithError(err)
					}
				}
			}
			e.Set = &s
		}
		err = populateTree(ts.root, &e, hdr)
		if err != nil {
			ts.setErr(err)
		}
		tmpFile.Close()
		os.Remove(tmpFile.Name())
	}
}

// populateTree creates a pseudo file tree hierarchy using an Entry's Parent and
// Children fields. When examining the Entry e to insert in the tree, we
// determine if the path to that Entry exists yet. If it does, insert it in the
// appropriate position in the tree. If not, create a path up until the Entry's
// directory that it is contained in. Then, insert the Entry.
// root: the "." Entry
//    e: the Entry we are looking to insert
//  hdr: the tar header struct associated with e
func populateTree(root, e *Entry, hdr *tar.Header) error {
	if root == nil || e == nil {
		return fmt.Errorf("cannot populate or insert nil Entry's")
	} else if root.Prev == nil {
		return fmt.Errorf("root needs to be an Entry associated with a directory")
	}
	isDir := hdr.FileInfo().IsDir()
	wd := filepath.Clean(hdr.Name)
	if !isDir {
		// directory up until the actual file
		wd = filepath.Dir(wd)
		if wd == "." {
			root.Children = append([]*Entry{e}, root.Children...)
			e.Parent = root
			return nil
		}
	}
	dirNames := strings.Split(wd, "/")
	parent := root
	for _, name := range dirNames[:] {
		encoded, err := govis.Vis(name, DefaultVisFlags)
		if err != nil {
			return err
		}
		if node := parent.Descend(encoded); node == nil {
			// Entry for directory doesn't exist in tree relative to root.
			// We don't know if this directory is an actual tar header (because a
			// user could have just specified a path to a deep file), so we must
			// specify this placeholder directory as a "type=dir", and Set=nil.
			newEntry := Entry{
				Name:     encoded,
				Type:     RelativeType,
				Parent:   parent,
				Keywords: []KeyVal{"type=dir"}, // temp data
				Set:      nil,                  // temp data
			}
			pathname, err := newEntry.Path()
			if err != nil {
				return err
			}
			newEntry.Prev = &Entry{
				Type: CommentType,
				Raw:  "# " + pathname,
			}
			parent.Children = append(parent.Children, &newEntry)
			parent = &newEntry
		} else {
			// Entry for directory exists in tree, just keep going
			parent = node
		}
	}
	if !isDir {
		parent.Children = append([]*Entry{e}, parent.Children...)
		e.Parent = parent
	} else {
		// fill in the actual data from e
		parent.Keywords = e.Keywords
		parent.Set = e.Set
	}
	return nil
}

// After constructing a pseudo file hierarchy tree, we want to "flatten" this
// tree by putting the Entries into a slice with appropriate positioning.
//     root: the "head" of the sub-tree to flatten
//  creator: a dhCreator that helps with the '/set' keyword
// keywords: keywords specified by the user that should be evaluated
func flatten(root *Entry, creator *dhCreator, keywords []Keyword) {
	if root == nil || creator == nil {
		return
	}
	if root.Prev != nil {
		// root.Prev != nil implies root is a directory
		creator.DH.Entries = append(creator.DH.Entries,
			Entry{
				Type: BlankType,
				Pos:  len(creator.DH.Entries),
			})
		root.Prev.Pos = len(creator.DH.Entries)
		creator.DH.Entries = append(creator.DH.Entries, *root.Prev)

		if root.Set != nil {
			// Check if we need a new set
			consolidatedKeys := keyvalSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords)
			if creator.curSet == nil {
				creator.curSet = &Entry{
					Type:     SpecialType,
					Name:     "/set",
					Keywords: consolidatedKeys,
					Pos:      len(creator.DH.Entries),
				}
				creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
			} else {
				needNewSet := false
				for _, k := range root.Set.Keywords {
					if !inKeyValSlice(k, creator.curSet.Keywords) {
						needNewSet = true
						break
					}
				}
				if needNewSet {
					creator.curSet = &Entry{
						Name:     "/set",
						Type:     SpecialType,
						Pos:      len(creator.DH.Entries),
						Keywords: consolidatedKeys,
					}
					creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
				}
			}
		} else if creator.curSet != nil {
			// Getting into here implies that the Entry's set has not and
			// was not supposed to be evaluated, thus, we need to reset curSet
			creator.DH.Entries = append(creator.DH.Entries, Entry{
				Name: "/unset",
				Type: SpecialType,
				Pos:  len(creator.DH.Entries),
			})
			creator.curSet = nil
		}
	}
	root.Set = creator.curSet
	if creator.curSet != nil {
		root.Keywords = keyValDifference(root.Keywords, creator.curSet.Keywords)
	}
	root.Pos = len(creator.DH.Entries)
	creator.DH.Entries = append(creator.DH.Entries, *root)
	for _, c := range root.Children {
		flatten(c, creator, keywords)
	}
	if root.Prev != nil {
		// Show a comment when stepping out
		root.Prev.Pos = len(creator.DH.Entries)
		creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
		dotEntry := Entry{
			Type: DotDotType,
			Name: "..",
			Pos:  len(creator.DH.Entries),
		}
		creator.DH.Entries = append(creator.DH.Entries, dotEntry)
	}
	return
}

// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
// with hardlinks and fills them in with the actual data from the base file.
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
	originals := make(map[string]*Entry)
	for base, links := range hardlinks {
		var basefile *Entry
		if seen, ok := originals[base]; !ok {
			basefile = root.Find(base)
			if basefile == nil {
				logrus.Printf("%s does not exist in this tree\n", base)
				continue
			}
			originals[base] = basefile
		} else {
			basefile = seen
		}
		for _, link := range links {
			linkfile := root.Find(link)
			if linkfile == nil {
				logrus.Printf("%s does not exist in this tree\n", link)
				continue
			}
			linkfile.Keywords = basefile.Keywords
			if countlinks {
				linkfile.Keywords = append(linkfile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
			}
		}
		if countlinks {
			basefile.Keywords = append(basefile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
		}
	}
}

// filter takes in a pointer to an Entry, and returns a slice of Entry's that
// satisfy the predicate p
func filter(root *Entry, p func(*Entry) bool) []Entry {
	if root != nil {
		var validEntrys []Entry
		if len(root.Children) > 0 || root.Prev != nil {
			for _, c := range root.Children {
				// filter the sub-directory
				if c.Prev != nil {
					validEntrys = append(validEntrys, filter(c, p)...)
				}
				if p(c) {
					if c.Prev == nil {
						validEntrys = append([]Entry{*c}, validEntrys...)
					} else {
						validEntrys = append(validEntrys, *c)
					}
				}
			}
			return validEntrys
		}
	}
	return nil
}

func (ts *tarStream) setErr(err error) {
	ts.err = err
}

func (ts *tarStream) Read(p []byte) (n int, err error) {
	return ts.teeReader.Read(p)
}

func (ts *tarStream) Close() error {
	return ts.pipeReader.Close()
}

// Hierarchy returns the DirectoryHierarchy of the archive. It flattens the
// Entry tree before returning the DirectoryHierarchy
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
	if ts.err != nil && ts.err != io.EOF {
		return nil, ts.err
	}
	if ts.root == nil {
		return nil, fmt.Errorf("root Entry not found, nothing to flatten")
	}
	resolveHardlinks(ts.root, ts.hardlinks, InKeywordSlice(Keyword("nlink"), ts.keywords))
	flatten(ts.root, &ts.creator, ts.keywords)
	return ts.creator.DH, nil
}