main.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. package main
  2. import (
  3. "bufio"
  4. "errors"
  5. "flag"
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "log"
  10. "os"
  11. "path/filepath"
  12. "runtime"
  13. "runtime/pprof"
  14. "runtime/trace"
  15. "strconv"
  16. "strings"
  17. "sync"
  18. "time"
  19. "unicode"
  20. "github.com/klauspost/compress/s2"
  21. "github.com/klauspost/compress/s2/cmd/internal/readahead"
  22. )
  23. var (
  24. faster = flag.Bool("faster", false, "Compress faster, but with a minor compression loss")
  25. cpu = flag.Int("cpu", runtime.GOMAXPROCS(0), "Compress using this amount of threads")
  26. blockSize = flag.String("blocksize", "4M", "Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB")
  27. safe = flag.Bool("safe", false, "Do not overwrite output files")
  28. padding = flag.String("pad", "1", "Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc")
  29. stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated")
  30. remove = flag.Bool("rm", false, "Delete source file(s) after successful compression")
  31. quiet = flag.Bool("q", false, "Don't write any output to terminal, except errors")
  32. bench = flag.Int("bench", 0, "Run benchmark n times. No output will be written")
  33. help = flag.Bool("help", false, "Display help")
  34. cpuprofile, memprofile, traceprofile string
  35. version = "(dev)"
  36. date = "(unknown)"
  37. )
  38. func main() {
  39. if false {
  40. flag.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file")
  41. flag.StringVar(&memprofile, "memprofile", "", "write mem profile to file")
  42. flag.StringVar(&traceprofile, "traceprofile", "", "write trace profile to file")
  43. }
  44. flag.Parse()
  45. sz, err := toSize(*blockSize)
  46. exitErr(err)
  47. pad, err := toSize(*padding)
  48. exitErr(err)
  49. args := flag.Args()
  50. if len(args) == 0 || *help {
  51. _, _ = fmt.Fprintf(os.Stderr, "s2 compress v%v, built at %v.\n\n", version, date)
  52. _, _ = fmt.Fprintf(os.Stderr, "Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.\n"+
  53. "Copyright (c) 2019 Klaus Post. All rights reserved.\n\n")
  54. _, _ = fmt.Fprintln(os.Stderr, `Usage: s2c [options] file1 file2
  55. Compresses all files supplied as input separately.
  56. Output files are written as 'filename.ext.s2'.
  57. By default output files will be overwritten.
  58. Use - as the only file name to read from stdin and write to stdout.
  59. Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
  60. Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
  61. Options:`)
  62. flag.PrintDefaults()
  63. }
  64. opts := []s2.WriterOption{s2.WriterBlockSize(int(sz)), s2.WriterConcurrency(*cpu), s2.WriterPadding(int(pad))}
  65. if !*faster {
  66. opts = append(opts, s2.WriterBetterCompression())
  67. }
  68. wr := s2.NewWriter(nil, opts...)
  69. // No args, use stdin/stdout
  70. if len(args) == 1 && args[0] == "-" {
  71. wr.Reset(os.Stdout)
  72. _, err := io.Copy(wr, os.Stdin)
  73. exitErr(err)
  74. exitErr(wr.Close())
  75. return
  76. }
  77. var files []string
  78. for _, pattern := range args {
  79. found, err := filepath.Glob(pattern)
  80. exitErr(err)
  81. if len(found) == 0 {
  82. exitErr(fmt.Errorf("unable to find file %v", pattern))
  83. }
  84. files = append(files, found...)
  85. }
  86. if cpuprofile != "" {
  87. f, err := os.Create(cpuprofile)
  88. if err != nil {
  89. log.Fatal(err)
  90. }
  91. pprof.StartCPUProfile(f)
  92. defer pprof.StopCPUProfile()
  93. }
  94. if memprofile != "" {
  95. f, err := os.Create(memprofile)
  96. if err != nil {
  97. log.Fatal(err)
  98. }
  99. defer f.Close()
  100. defer pprof.WriteHeapProfile(f)
  101. }
  102. if traceprofile != "" {
  103. f, err := os.Create(traceprofile)
  104. if err != nil {
  105. log.Fatal(err)
  106. }
  107. defer f.Close()
  108. err = trace.Start(f)
  109. if err != nil {
  110. log.Fatal(err)
  111. }
  112. defer trace.Stop()
  113. }
  114. *quiet = *quiet || *stdout
  115. allFiles := files
  116. for i := 0; i < *bench; i++ {
  117. files = append(files, allFiles...)
  118. }
  119. for _, filename := range files {
  120. func() {
  121. var closeOnce sync.Once
  122. dstFilename := fmt.Sprintf("%s%s", filename, ".s2")
  123. if *bench > 0 {
  124. dstFilename = "(discarded)"
  125. }
  126. if !*quiet {
  127. fmt.Println("Compressing", filename, "->", dstFilename)
  128. }
  129. // Input file.
  130. file, err := os.Open(filename)
  131. exitErr(err)
  132. defer closeOnce.Do(func() { file.Close() })
  133. src, err := readahead.NewReaderSize(file, *cpu+1, 1<<20)
  134. exitErr(err)
  135. defer src.Close()
  136. finfo, err := file.Stat()
  137. exitErr(err)
  138. var out io.Writer
  139. switch {
  140. case *bench > 0:
  141. out = ioutil.Discard
  142. case *stdout:
  143. out = os.Stdout
  144. default:
  145. mode := finfo.Mode() // use the same mode for the output file
  146. if *safe {
  147. _, err := os.Stat(dstFilename)
  148. if !os.IsNotExist(err) {
  149. exitErr(errors.New("destination file exists"))
  150. }
  151. }
  152. dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY, mode)
  153. exitErr(err)
  154. defer dstFile.Close()
  155. bw := bufio.NewWriterSize(dstFile, int(sz)*2)
  156. defer bw.Flush()
  157. out = bw
  158. }
  159. wc := wCounter{out: out}
  160. wr.Reset(&wc)
  161. defer wr.Close()
  162. start := time.Now()
  163. input, err := wr.ReadFrom(src)
  164. exitErr(err)
  165. err = wr.Close()
  166. exitErr(err)
  167. if !*quiet {
  168. elapsed := time.Since(start)
  169. mbpersec := (float64(input) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
  170. pct := float64(wc.n) * 100 / float64(input)
  171. fmt.Printf("%d -> %d [%.02f%%]; %.01fMB/s\n", input, wc.n, pct, mbpersec)
  172. }
  173. if *remove {
  174. closeOnce.Do(func() {
  175. file.Close()
  176. err := os.Remove(filename)
  177. exitErr(err)
  178. })
  179. }
  180. }()
  181. }
  182. }
  183. func exitErr(err error) {
  184. if err != nil {
  185. fmt.Fprintln(os.Stderr, "ERROR:", err.Error())
  186. os.Exit(2)
  187. }
  188. }
  189. // toSize converts a size indication to bytes.
  190. func toSize(size string) (uint64, error) {
  191. size = strings.ToUpper(strings.TrimSpace(size))
  192. firstLetter := strings.IndexFunc(size, unicode.IsLetter)
  193. if firstLetter == -1 {
  194. firstLetter = len(size)
  195. }
  196. bytesString, multiple := size[:firstLetter], size[firstLetter:]
  197. bytes, err := strconv.ParseUint(bytesString, 10, 64)
  198. if err != nil {
  199. return 0, fmt.Errorf("unable to parse size: %v", err)
  200. }
  201. switch multiple {
  202. case "M", "MB", "MIB":
  203. return bytes * 1 << 20, nil
  204. case "K", "KB", "KIB":
  205. return bytes * 1 << 10, nil
  206. case "B", "":
  207. return bytes, nil
  208. default:
  209. return 0, fmt.Errorf("unknown size suffix: %v", multiple)
  210. }
  211. }
  212. type wCounter struct {
  213. n int
  214. out io.Writer
  215. }
  216. func (w *wCounter) Write(p []byte) (n int, err error) {
  217. n, err = w.out.Write(p)
  218. w.n += n
  219. return n, err
  220. }