New Upstream Release - golang-github-shenwei356-xopen

Ready changes

Summary

Merged new upstream version: 0.0~git20230314.34c819a (was: 0.0~git20181203.f4f16dd).

Diff

diff --git a/README.md b/README.md
index 77e36f0..65261f6 100644
--- a/README.md
+++ b/README.md
@@ -1,125 +1,49 @@
-[![GoDoc] (https://godoc.org/github.com/brentp/xopen?status.png)](https://godoc.org/github.com/brentp/xopen)
-[![Build Status](https://travis-ci.org/brentp/xopen.svg)](https://travis-ci.org/brentp/xopen)
-[![Coverage Status](https://coveralls.io/repos/brentp/xopen/badge.svg?branch=master)](https://coveralls.io/r/brentp/xopen?branch=master)
+[![GoDoc](https://godoc.org/github.com/shenwei356/xopen?status.png)](https://godoc.org/github.com/shenwei356/xopen)
+[![Build Status](https://travis-ci.org/shenwei356/xopen.svg)](https://travis-ci.org/shenwei356/xopen)
+[![Coverage Status](https://coveralls.io/repos/shenwei356/xopen/badge.svg?branch=master)](https://coveralls.io/r/shenwei356/xopen?branch=master)
 
 # xopen
---
-    import "github.com/brentp/xopen"
 
-xopen makes it easy to get buffered (possibly gzipped) readers and writers. and
-close all of the associated files. Ropen opens a file for reading. Wopen opens a
-file for writing. Both will use gzip when appropriate and will use buffered IO.
+    import "github.com/shenwei356/xopen"
+
+xopen makes it easy to get buffered (possibly `gzip`-, `xz`-, or `zstd`- compressed) readers and writers. and
+close all of the associated files. `Ropen` opens a file for reading. `Wopen` opens a
+file for writing. 
+
+> This packages is forked from https://github.com/brentp/xopen ,
+> but I have modified too much :(
 
 ## Usage
 
 Here's how to get a buffered reader:
+
 ```go
 // gzipped
 rdr, err := xopen.Ropen("some.gz")
+// xz compressed
+rdr, err := xopen.Ropen("some.xz")
+// zstd compressed
+rdr, err := xopen.Ropen("some.zst")
 // normal
 rdr, err := xopen.Ropen("some.txt")
-// stdin (possibly gzipped)
+// stdin (possibly gzip-, xz-, or zstd-compressed)
 rdr, err := xopen.Ropen("-")
 // https://
 rdr, err := xopen.Ropen("http://example.com/some-file.txt")
 // Cmd
-rdr, err := xopen.Ropen("|ls -lh somefile.gz")
+rdr, err := xopen.Ropen("| ls -lh somefile.gz")
 // User directory:
-rdr, err := xopen.Ropen("~/brentp/somefile")
-
-```
-Get a buffered writer with `xopen.Wopen`.
-
-
-#### func  CheckBytes
-
-```go
-func CheckBytes(b *bufio.Reader, buf []byte) (bool, error)
-```
-CheckBytes peeks at a buffered stream and checks if the first read bytes match.
-
-#### func  IsGzip
-
-```go
-func IsGzip(b *bufio.Reader) (bool, error)
-```
-IsGzip returns true buffered Reader has the gzip magic.
-
-#### func  IsStdin
+rdr, err := xopen.Ropen("~/shenwei356/somefile")
 
-```go
-func IsStdin() bool
+checkError(err)
+defer checkError(rdr.Close())
 ```
-IsStdin checks if we are getting data from stdin.
 
-#### func  XReader
+Writter
 
 ```go
-func XReader(f string) (io.Reader, error)
-```
-XReader returns a reader from a url string or a file.
+wtr, err := xopen.Wopen("some.gz")
+defer checkError(wtr.Close())
 
-
-#### type Reader
-
-```go
-type Reader struct {
-	*bufio.Reader
-}
-```
-
-Reader is returned by Ropen
-
-#### func  Buf
-
-```go
-func Buf(r io.Reader) *Reader
-```
-Return a buffered reader from an io.Reader If f == "-", then it will attempt to
-read from os.Stdin. If the file is gzipped, it will be read as such.
-
-#### func  Ropen
-
-```go
-func Ropen(f string) (*Reader, error)
-```
-Ropen opens a buffered reader.
-
-#### func (*Reader) Close
-
-```go
-func (r *Reader) Close() error
-```
-Close the associated files.
-
-#### type Writer
-
-```go
-type Writer struct {
-	*bufio.Writer
-}
-```
-
-Writer is returned by Wopen
-
-#### func  Wopen
-
-```go
-func Wopen(f string) (*Writer, error)
-```
-Wopen opens a buffered reader. If f == "-", then stdout will be used. If f
-endswith ".gz", then the output will be gzipped.
-
-#### func (*Writer) Close
-
-```go
-func (w *Writer) Close() error
-```
-Close the associated files.
-
-#### func (*Writer) Flush
-
-```go
-func (w *Writer) Flush()
-```
-Flush the writer.
+outfh.Flush()
+```
\ No newline at end of file
diff --git a/debian/changelog b/debian/changelog
index 7bd2b72..d0dad26 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+golang-github-shenwei356-xopen (0.0~git20230314.34c819a-1) UNRELEASED; urgency=low
+
+  * New upstream snapshot.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Thu, 01 Jun 2023 12:55:50 -0000
+
 golang-github-shenwei356-xopen (0.0~git20181203.f4f16dd-2) unstable; urgency=medium
 
   * Source only upload
diff --git a/fastgzreader.go b/fastgzreader.go
deleted file mode 100644
index 28b75ca..0000000
--- a/fastgzreader.go
+++ /dev/null
@@ -1,80 +0,0 @@
-package xopen
-
-import (
-	"compress/gzip"
-	"io"
-	"os/exec"
-)
-
-// from: https://gist.github.com/rasky/d42a52c16683f1a2f4dccdef80e2712d
-
-// fastGzReader is an API-compatible drop-in replacement
-// for gzip.Reader, that achieves a higher decoding speed
-// by spawning an external gzip instance and pipeing data
-// through it.
-// Go's native gzip implementation is about 2x slower at
-// decompressing data compared to zlib (mostly due to Go compiler
-// inefficiencies). So for tasks where the gzip decoding
-// speed is important, this is a quick workaround that doesn't
-// require cgo.
-// gzip is part of the gzip package and comes preinstalled on
-// most Linux distributions and on OSX.
-type fastGzReader struct {
-	io.ReadCloser
-}
-
-func hasProg(prog ...string) bool {
-	var cmd *exec.Cmd
-	if len(prog) > 1 {
-		cmd = exec.Command(prog[0], prog[1:]...)
-	} else {
-		cmd = exec.Command(prog[0])
-	}
-	err := cmd.Start()
-	has := err == nil
-	cmd.Wait()
-	return has
-}
-
-var hasZlib = hasProg("gzip", "-d")
-var hasPigz = hasProg("pigz", "-d")
-
-func newFastGzReader(r io.Reader) (io.ReadCloser, error) {
-
-	if hasZlib || hasPigz {
-		var gz fastGzReader
-		if err := gz.Reset(r); err != nil {
-			return nil, err
-		}
-		return gz, nil
-	}
-	return gzip.NewReader(r)
-
-}
-
-func (gz *fastGzReader) Reset(r io.Reader) error {
-	if gz.ReadCloser != nil {
-		gz.Close()
-	}
-	var cmd *exec.Cmd
-	if hasPigz {
-		cmd = exec.Command("pigz", "-d")
-	} else {
-		cmd = exec.Command("gzip", "-d")
-	}
-	cmd.Stdin = r
-
-	rpipe, err := cmd.StdoutPipe()
-	if err != nil {
-		return err
-	}
-
-	err = cmd.Start()
-	if err != nil {
-		rpipe.Close()
-		return err
-	}
-
-	gz.ReadCloser = rpipe
-	return nil
-}
diff --git a/xopen.go b/xopen.go
index 47542a4..221fdc9 100644
--- a/xopen.go
+++ b/xopen.go
@@ -16,11 +16,16 @@ import (
 	"path/filepath"
 	"strings"
 
+	"github.com/dsnet/compress/bzip2"
+	"github.com/klauspost/compress/zstd"
 	gzip "github.com/klauspost/pgzip"
-	//"github.com/klauspost/compress/gzip"
-	// "compress/gzip"
+	"github.com/ulikunitz/xz"
 )
 
+// Level is the default compression level of gzip.
+// This value will be automatically adjusted to the default value of zstd or bzip2.
+var Level = gzip.DefaultCompression
+
 // ErrNoContent means nothing in the stream/file.
 var ErrNoContent = errors.New("xopen: no content")
 
@@ -32,6 +37,21 @@ func IsGzip(b *bufio.Reader) (bool, error) {
 	return CheckBytes(b, []byte{0x1f, 0x8b})
 }
 
+// IsXz returns true buffered Reader has the xz magic.
+func IsXz(b *bufio.Reader) (bool, error) {
+	return CheckBytes(b, []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00})
+}
+
+// IsZst returns true buffered Reader has the zstd magic.
+func IsZst(b *bufio.Reader) (bool, error) {
+	return CheckBytes(b, []byte{0x28, 0xB5, 0x2f, 0xfd})
+}
+
+// IsBzip2 returns true buffered Reader has the bzip2 magic.
+func IsBzip2(b *bufio.Reader) (bool, error) {
+	return CheckBytes(b, []byte{0x42, 0x5a, 0x68})
+}
+
 // IsStdin checks if we are getting data from stdin.
 func IsStdin() bool {
 	// http://stackoverflow.com/a/26567513
@@ -44,7 +64,7 @@ func IsStdin() bool {
 
 // ExpandUser expands ~/path and ~otheruser/path appropriately
 func ExpandUser(path string) (string, error) {
-	if path[0] != '~' {
+	if len(path) == 0 || path[0] != '~' {
 		return path, nil
 	}
 	var u *user.User
@@ -78,7 +98,8 @@ func CheckBytes(b *bufio.Reader, buf []byte) (bool, error) {
 
 	m, err := b.Peek(len(buf))
 	if err != nil {
-		return false, ErrNoContent
+		// return false, ErrNoContent
+		return false, err // EOF
 	}
 	for i := range buf {
 		if m[i] != buf[i] {
@@ -97,11 +118,18 @@ type Reader struct {
 
 // Close the associated files.
 func (r *Reader) Close() error {
+	var err error
 	if r.gz != nil {
-		r.gz.Close()
+		err = r.gz.Close()
+		if err != nil {
+			return err
+		}
 	}
 	if c, ok := r.rdr.(io.ReadCloser); ok {
-		c.Close()
+		err = c.Close()
+		if err != nil {
+			return err
+		}
 	}
 	return nil
 }
@@ -111,45 +139,147 @@ type Writer struct {
 	*bufio.Writer
 	wtr *os.File
 	gz  *gzip.Writer
+	xw  *xz.Writer
+	zw  *zstd.Encoder
+	bz2 *bzip2.Writer
 }
 
 // Close the associated files.
 func (w *Writer) Close() error {
-	w.Flush()
+	var err error
+	err = w.Flush()
+	if err != nil {
+		return err
+	}
+
 	if w.gz != nil {
-		w.gz.Close()
+		err = w.gz.Close()
+		if err != nil {
+			return err
+		}
 	}
-	w.wtr.Close()
-	return nil
+	if w.xw != nil {
+		err = w.xw.Close()
+		if err != nil {
+			return err
+		}
+	}
+	if w.zw != nil {
+		err = w.zw.Close()
+		if err != nil {
+			return err
+		}
+	}
+	if w.bz2 != nil {
+		err = w.bz2.Close()
+		if err != nil {
+			return err
+		}
+	}
+	return w.wtr.Close()
 }
 
 // Flush the writer.
-func (w *Writer) Flush() {
-	w.Writer.Flush()
+func (w *Writer) Flush() error {
+	var err error
+	err = w.Writer.Flush()
+	if err != nil {
+		return err
+	}
+
 	if w.gz != nil {
-		w.gz.Flush()
+		err = w.gz.Flush()
+		if err != nil {
+			return err
+		}
+	}
+	if w.zw != nil {
+		err = w.zw.Flush()
+		if err != nil {
+			return err
+		}
 	}
+	return nil
 }
 
-var pageSize = os.Getpagesize() * 2
+var bufSize = 65536
 
 // Buf returns a buffered reader from an io.Reader
 // If f == "-", then it will attempt to read from os.Stdin.
 // If the file is gzipped, it will be read as such.
 func Buf(r io.Reader) (*Reader, error) {
-	b := bufio.NewReaderSize(r, pageSize)
+	b := bufio.NewReaderSize(r, bufSize)
+	var rd io.Reader
 	var rdr io.ReadCloser
-	if is, err := IsGzip(b); err != nil && err != io.EOF {
-		return nil, err
+
+	if is, err := IsGzip(b); err != nil {
+		// check BOM
+		t, _, err := b.ReadRune() // no content
+		if err != nil {
+			return nil, ErrNoContent
+		}
+		if t != '\uFEFF' {
+			b.UnreadRune()
+		}
+		return &Reader{b, r, rdr}, nil // non-gzip file with content less than 2 bytes
 	} else if is {
-		// rdr, err = newFastGzReader(b)
 		rdr, err = gzip.NewReader(b)
 		if err != nil {
 			return nil, err
 		}
-		b = bufio.NewReaderSize(rdr, pageSize)
+		b = bufio.NewReaderSize(rdr, bufSize)
+	} else if is, err := IsZst(b); err != nil {
+		// check BOM
+		t, _, err := b.ReadRune() // no content
+		if err != nil {
+			return nil, ErrNoContent
+		}
+		if t != '\uFEFF' {
+			b.UnreadRune()
+		}
+		return &Reader{b, r, rdr}, nil // non-gzip/zst file with content less than 4 bytes
+	} else if is {
+		rd, err = zstd.NewReader(b)
+		if err != nil {
+			return nil, err
+		}
+		b = bufio.NewReaderSize(rd, bufSize)
+	} else if is, err := IsXz(b); err != nil {
+		// check BOM
+		t, _, err := b.ReadRune() // no content
+		if err != nil {
+			return nil, ErrNoContent
+		}
+		if t != '\uFEFF' {
+			b.UnreadRune()
+		}
+		return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
+	} else if is {
+		rd, err = xz.NewReader(b)
+		if err != nil {
+			return nil, err
+		}
+		b = bufio.NewReaderSize(rd, bufSize)
+	} else if is, err := IsBzip2(b); err != nil {
+		// check BOM
+		t, _, err := b.ReadRune() // no content
+		if err != nil {
+			return nil, ErrNoContent
+		}
+		if t != '\uFEFF' {
+			b.UnreadRune()
+		}
+		return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
+	} else if is {
+		rd, err = bzip2.NewReader(b, &bzip2.ReaderConfig{})
+		if err != nil {
+			return nil, err
+		}
+		b = bufio.NewReaderSize(rd, bufSize)
 	}
 
+	// other files with content >= 6 bytes
+
 	// check BOM
 	t, _, err := b.ReadRune()
 	if err != nil {
@@ -231,35 +361,19 @@ func Ropen(f string) (*Reader, error) {
 // Wopen opens a buffered reader.
 // If f == "-", then stdout will be used.
 // If f endswith ".gz", then the output will be gzipped.
+// If f endswith ".xz", then the output will be zx-compressed.
+// If f endswith ".zst", then the output will be zstd-compressed.
+// If f endswith ".bz2", then the output will be bzip2-compressed.
 func Wopen(f string) (*Writer, error) {
-	var wtr *os.File
-	if f == "-" {
-		wtr = os.Stdout
-	} else {
-		dir := filepath.Dir(f)
-		fi, err := os.Stat(dir)
-		if err == nil && !fi.IsDir() {
-			return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
-		}
-		if os.IsNotExist(err) {
-			os.MkdirAll(dir, 0755)
-		}
-
-		wtr, err = os.Create(f)
-		if err != nil {
-			return nil, err
-		}
-	}
-	if !strings.HasSuffix(f, ".gz") {
-		return &Writer{bufio.NewWriterSize(wtr, pageSize), wtr, nil}, nil
-	}
-	gz := gzip.NewWriter(wtr)
-	return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil
+	return WopenFile(f, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
 }
 
-// WopenGzip opens a buffered gzipped reader.
+// WopenFile opens a buffered reader.
 // If f == "-", then stdout will be used.
-func WopenGzip(f string) (*Writer, error) {
+// If f endswith ".gz", then the output will be gzipped.
+// If f endswith ".xz", then the output will be zx-compressed.
+// If f endswith ".bz2", then the output will be bzip2-compressed.
+func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) {
 	var wtr *os.File
 	if f == "-" {
 		wtr = os.Stdout
@@ -272,39 +386,45 @@ func WopenGzip(f string) (*Writer, error) {
 		if os.IsNotExist(err) {
 			os.MkdirAll(dir, 0755)
 		}
-		wtr, err = os.Create(f)
+		wtr, err = os.OpenFile(f, flag, perm)
 		if err != nil {
 			return nil, err
 		}
 	}
-	gz := gzip.NewWriter(wtr)
-	return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil
-}
 
-// WopenFile opens a buffered reader.
-// If f == "-", then stdout will be used.
-// If f endswith ".gz", then the output will be gzipped.
-func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) {
-	var wtr *os.File
-	if f == "-" {
-		wtr = os.Stdout
-	} else {
-		dir := filepath.Dir(f)
-		fi, err := os.Stat(dir)
-		if err == nil && !fi.IsDir() {
-			return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
+	f2 := strings.ToLower(f)
+	if strings.HasSuffix(f2, ".gz") {
+		gz, err := gzip.NewWriterLevel(wtr, Level)
+		if err != nil {
+			err = errors.New(fmt.Sprintf("xopen: %s", err))
 		}
-		if os.IsNotExist(err) {
-			os.MkdirAll(dir, 0755)
+		return &Writer{bufio.NewWriterSize(gz, bufSize), wtr, gz, nil, nil, nil}, err
+	}
+	if strings.HasSuffix(f2, ".xz") {
+		xw, err := xz.NewWriter(wtr)
+		return &Writer{bufio.NewWriterSize(xw, bufSize), wtr, nil, xw, nil, nil}, err
+	}
+	if strings.HasSuffix(f2, ".zst") {
+		level := Level
+		if level == gzip.DefaultCompression {
+			level = 2
 		}
-		wtr, err = os.OpenFile(f, flag, perm)
+		zw, err := zstd.NewWriter(wtr, zstd.WithEncoderLevel(zstd.EncoderLevel(level)))
 		if err != nil {
-			return nil, err
+			err = errors.New(fmt.Sprintf("xopen: zstd: %s", err))
 		}
+		return &Writer{bufio.NewWriterSize(zw, bufSize), wtr, nil, nil, zw, nil}, err
 	}
-	if !strings.HasSuffix(f, ".gz") {
-		return &Writer{bufio.NewWriterSize(wtr, pageSize), wtr, nil}, nil
+	if strings.HasSuffix(f2, ".bz2") {
+		level := Level
+		if level == gzip.DefaultCompression {
+			level = 6
+		}
+		bz2, err := bzip2.NewWriter(wtr, &bzip2.WriterConfig{Level: level})
+		if err != nil {
+			err = errors.New(fmt.Sprintf("xopen: %s", err))
+		}
+		return &Writer{bufio.NewWriterSize(bz2, bufSize), wtr, nil, nil, nil, bz2}, err
 	}
-	gz := gzip.NewWriter(wtr)
-	return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil
+	return &Writer{bufio.NewWriterSize(wtr, bufSize), wtr, nil, nil, nil, nil}, nil
 }
diff --git a/xopen_test.go b/xopen_test.go
index e30046c..7879b8e 100644
--- a/xopen_test.go
+++ b/xopen_test.go
@@ -49,7 +49,7 @@ func (s *XopenTest) TestIsStdin(c *C) {
 
 func (s *XopenTest) TestRopen(c *C) {
 	rdr, err := Ropen("-")
-	c.Assert(err, ErrorMatches, ".* stdin not detected")
+	c.Assert(err, ErrorMatches, "stdin not detected")
 	c.Assert(rdr, IsNil)
 }
 
@@ -130,13 +130,7 @@ func (s *XopenTest) TestOpenStdout(c *C) {
 func (s *XopenTest) TestOpenBadFile(c *C) {
 	r, err := Ropen("XXXXXXXXXXXXXXXXXXXXXXX")
 	c.Assert(r, IsNil)
-	c.Assert(err, ErrorMatches, ".* no such file .*")
-}
-
-func (s *XopenTest) TestWOpenBadFile(c *C) {
-	w, err := Wopen("XX/XXX/XXX/XXX/XXX/XXXXXXXXX")
-	c.Assert(w, IsNil)
-	c.Assert(err, ErrorMatches, ".* no such file .*")
+	c.Assert(err, ErrorMatches, ".*no such file.*")
 }
 
 func (s *XopenTest) TestExists(c *C) {

More details

Full run details

Historical runs