| // Copyright 2019+ Klaus Post. All rights reserved. |
| // License information can be found in the LICENSE file. |
| // Based on work by Yann Collet, released under BSD License. |
| |
| package zstd |
| |
| import ( |
| "bytes" |
| "fmt" |
| "io" |
| "math/rand" |
| "os" |
| "runtime" |
| "strings" |
| "sync" |
| "testing" |
| "time" |
| |
| "github.com/klauspost/compress/zip" |
| "github.com/klauspost/compress/zstd/internal/xxhash" |
| ) |
| |
| var testWindowSizes = []int{MinWindowSize, 1 << 16, 1 << 22, 1 << 24} |
| |
| type testEncOpt struct { |
| name string |
| o []EOption |
| } |
| |
| func getEncOpts(cMax int) []testEncOpt { |
| var o []testEncOpt |
| for level := speedNotSet + 1; level < speedLast; level++ { |
| if isRaceTest && level >= SpeedBestCompression { |
| break |
| } |
| for conc := 1; conc <= 4; conc *= 2 { |
| for _, wind := range testWindowSizes { |
| addOpt := func(name string, options ...EOption) { |
| opts := append([]EOption(nil), WithEncoderLevel(level), WithEncoderConcurrency(conc), WithWindowSize(wind)) |
| name = fmt.Sprintf("%s-c%d-w%dk-%s", level.String(), conc, wind/1024, name) |
| o = append(o, testEncOpt{name: name, o: append(opts, options...)}) |
| } |
| addOpt("default") |
| if testing.Short() { |
| break |
| } |
| addOpt("nocrc", WithEncoderCRC(false)) |
| addOpt("lowmem", WithLowerEncoderMem(true)) |
| addOpt("alllit", WithAllLitEntropyCompression(true)) |
| addOpt("nolit", WithNoEntropyCompression(true)) |
| addOpt("pad1k", WithEncoderPadding(1024)) |
| addOpt("zerof", WithZeroFrames(true)) |
| addOpt("1seg", WithSingleSegment(true)) |
| } |
| if testing.Short() && conc == 2 { |
| break |
| } |
| if conc >= cMax { |
| break |
| } |
| } |
| } |
| return o |
| } |
| |
| func TestEncoder_EncodeAllSimple(t *testing.T) { |
| in, err := os.ReadFile("testdata/z000028") |
| if err != nil { |
| t.Fatal(err) |
| } |
| dec, err := NewReader(nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| |
| in = append(in, in...) |
| for _, opts := range getEncOpts(4) { |
| t.Run(opts.name, func(t *testing.T) { |
| runtime.GC() |
| e, err := NewWriter(nil, opts.o...) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| start := time.Now() |
| dst := e.EncodeAll(in, nil) |
| //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst)) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm) |
| os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm) |
| t.Fatal("Decoded does not match") |
| } |
| //t.Log("Encoded content matched") |
| }) |
| } |
| } |
| |
| func TestEncoder_EncodeAllConcurrent(t *testing.T) { |
| in, err := os.ReadFile("testdata/z000028") |
| if err != nil { |
| t.Fatal(err) |
| } |
| in = append(in, in...) |
| |
| // When running race no more than 8k goroutines allowed. |
| n := 400 / runtime.GOMAXPROCS(0) |
| if testing.Short() { |
| n = 20 / runtime.GOMAXPROCS(0) |
| } |
| dec, err := NewReader(nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| for _, opts := range getEncOpts(2) { |
| t.Run(opts.name, func(t *testing.T) { |
| rng := rand.New(rand.NewSource(0x1337)) |
| e, err := NewWriter(nil, opts.o...) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| var wg sync.WaitGroup |
| wg.Add(n) |
| for i := 0; i < n; i++ { |
| in := in[rng.Int()&1023:] |
| in = in[:rng.Intn(len(in))] |
| go func() { |
| defer wg.Done() |
| dst := e.EncodeAll(in, nil) |
| if len(dst) > e.MaxEncodedSize(len(in)) { |
| t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in))) |
| } |
| //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst)) |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| //os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm) |
| //os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm) |
| t.Error("Decoded does not match") |
| return |
| } |
| }() |
| } |
| wg.Wait() |
| //t.Log("Encoded content matched.", n, "goroutines") |
| }) |
| } |
| } |
| |
| func TestEncoder_EncodeAllEncodeXML(t *testing.T) { |
| f, err := os.Open("testdata/xml.zst") |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer f.Close() |
| |
| dec, err := NewReader(f) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| in, err := io.ReadAll(dec) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if testing.Short() { |
| in = in[:10000] |
| } |
| |
| for level := speedNotSet + 1; level < speedLast; level++ { |
| t.Run(level.String(), func(t *testing.T) { |
| if isRaceTest && level >= SpeedBestCompression { |
| t.SkipNow() |
| } |
| e, err := NewWriter(nil, WithEncoderLevel(level)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| start := time.Now() |
| dst := e.EncodeAll(in, nil) |
| if len(dst) > e.MaxEncodedSize(len(in)) { |
| t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in))) |
| } |
| //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst)) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-xml.got", decoded, os.ModePerm) |
| t.Error("Decoded does not match") |
| return |
| } |
| //t.Log("Encoded content matched") |
| }) |
| } |
| } |
| |
| func TestEncoderRegression(t *testing.T) { |
| defer timeout(4 * time.Minute)() |
| data, err := os.ReadFile("testdata/comp-crashers.zip") |
| if err != nil { |
| t.Fatal(err) |
| } |
| // We can't close the decoder. |
| dec, err := NewReader(nil) |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| defer dec.Close() |
| for _, opts := range getEncOpts(2) { |
| t.Run(opts.name, func(t *testing.T) { |
| zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) |
| if err != nil { |
| t.Fatal(err) |
| } |
| enc, err := NewWriter( |
| nil, |
| opts.o..., |
| ) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer enc.Close() |
| |
| for i, tt := range zr.File { |
| if !strings.HasSuffix(t.Name(), "") { |
| continue |
| } |
| if testing.Short() && i > 10 { |
| break |
| } |
| |
| t.Run(tt.Name, func(t *testing.T) { |
| r, err := tt.Open() |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| in, err := io.ReadAll(r) |
| if err != nil { |
| t.Error(err) |
| } |
| encoded := enc.EncodeAll(in, nil) |
| if len(encoded) > enc.MaxEncodedSize(len(in)) { |
| t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in))) |
| } |
| // Usually too small... |
| got, err := dec.DecodeAll(encoded, make([]byte, 0, len(in))) |
| if err != nil { |
| t.Logf("error: %v\nwant: %v\ngot: %v", err, len(in), len(got)) |
| t.Fatal(err) |
| } |
| // Use the Writer |
| var dst bytes.Buffer |
| enc.ResetContentSize(&dst, int64(len(in))) |
| _, err = enc.Write(in) |
| if err != nil { |
| t.Error(err) |
| } |
| err = enc.Close() |
| if err != nil { |
| t.Error(err) |
| } |
| encoded = dst.Bytes() |
| if len(encoded) > enc.MaxEncodedSize(len(in)) { |
| t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in))) |
| } |
| got, err = dec.DecodeAll(encoded, make([]byte, 0, len(in)/2)) |
| if err != nil { |
| t.Logf("error: %v\nwant: %v\ngot: %v", err, in, got) |
| t.Error(err) |
| } |
| }) |
| } |
| }) |
| } |
| } |
| |
| func TestEncoder_EncodeAllTwain(t *testing.T) { |
| in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") |
| if err != nil { |
| t.Fatal(err) |
| } |
| testWindowSizes := testWindowSizes |
| if testing.Short() { |
| testWindowSizes = []int{1 << 20} |
| } |
| |
| dec, err := NewReader(nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| |
| for level := speedNotSet + 1; level < speedLast; level++ { |
| t.Run(level.String(), func(t *testing.T) { |
| if isRaceTest && level >= SpeedBestCompression { |
| t.SkipNow() |
| } |
| for _, windowSize := range testWindowSizes { |
| t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) { |
| e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| start := time.Now() |
| dst := e.EncodeAll(in, nil) |
| t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst)) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-Mark.Twain-Tom.Sawyer.txt.got", decoded, os.ModePerm) |
| t.Fatal("Decoded does not match") |
| } |
| t.Log("Encoded content matched") |
| }) |
| } |
| }) |
| } |
| } |
| |
| func TestEncoder_EncodeAllPi(t *testing.T) { |
| in, err := os.ReadFile("../testdata/pi.txt") |
| if err != nil { |
| t.Fatal(err) |
| } |
| testWindowSizes := testWindowSizes |
| if testing.Short() { |
| testWindowSizes = []int{1 << 20} |
| } |
| |
| dec, err := NewReader(nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| |
| for level := speedNotSet + 1; level < speedLast; level++ { |
| t.Run(level.String(), func(t *testing.T) { |
| if isRaceTest && level >= SpeedBestCompression { |
| t.SkipNow() |
| } |
| for _, windowSize := range testWindowSizes { |
| t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) { |
| e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| start := time.Now() |
| dst := e.EncodeAll(in, nil) |
| t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst)) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-pi.txt.got", decoded, os.ModePerm) |
| t.Fatal("Decoded does not match") |
| } |
| t.Log("Encoded content matched") |
| }) |
| } |
| }) |
| } |
| } |
| |
| func TestWithEncoderPadding(t *testing.T) { |
| n := 100 |
| if testing.Short() { |
| n = 2 |
| } |
| rng := rand.New(rand.NewSource(0x1337)) |
| d, err := NewReader(nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer d.Close() |
| |
| for i := 0; i < n; i++ { |
| padding := (rng.Int() & 0xfff) + 1 |
| src := make([]byte, (rng.Int()&0xfffff)+1) |
| for i := range src { |
| src[i] = uint8(rng.Uint32()) & 7 |
| } |
| e, err := NewWriter(nil, WithEncoderPadding(padding), WithEncoderCRC(rng.Uint32()&1 == 0)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| // Test the added padding is invisible. |
| dst := e.EncodeAll(src, nil) |
| if len(dst)%padding != 0 { |
| t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding) |
| } |
| got, err := d.DecodeAll(dst, nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if !bytes.Equal(src, got) { |
| t.Fatal("output mismatch") |
| } |
| // Test when we supply data as well. |
| dst = e.EncodeAll(src, make([]byte, rng.Int()&255)) |
| if len(dst)%padding != 0 { |
| t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding) |
| } |
| |
| // Test using the writer. |
| var buf bytes.Buffer |
| e.ResetContentSize(&buf, int64(len(src))) |
| _, err = io.Copy(e, bytes.NewBuffer(src)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| err = e.Close() |
| if err != nil { |
| t.Fatal(err) |
| } |
| dst = buf.Bytes() |
| if len(dst)%padding != 0 { |
| t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding) |
| } |
| // Test the added padding is invisible. |
| got, err = d.DecodeAll(dst, nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if !bytes.Equal(src, got) { |
| t.Fatal("output mismatch") |
| } |
| // Try after reset |
| buf.Reset() |
| e.Reset(&buf) |
| _, err = io.Copy(e, bytes.NewBuffer(src)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| err = e.Close() |
| if err != nil { |
| t.Fatal(err) |
| } |
| dst = buf.Bytes() |
| if len(dst)%padding != 0 { |
| t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding) |
| } |
| // Test the added padding is invisible. |
| got, err = d.DecodeAll(dst, nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if !bytes.Equal(src, got) { |
| t.Fatal("output mismatch") |
| } |
| } |
| } |
| func TestEncoder_EncoderXML(t *testing.T) { |
| testEncoderRoundtrip(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe}) |
| testEncoderRoundtripWriter(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe}) |
| } |
| |
| func TestEncoder_EncoderTwain(t *testing.T) { |
| testEncoderRoundtrip(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6}) |
| testEncoderRoundtripWriter(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6}) |
| } |
| |
| func TestEncoder_EncoderPi(t *testing.T) { |
| testEncoderRoundtrip(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb}) |
| testEncoderRoundtripWriter(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb}) |
| } |
| |
| func TestEncoder_EncoderSilesia(t *testing.T) { |
| testEncoderRoundtrip(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b}) |
| testEncoderRoundtripWriter(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b}) |
| } |
| |
| func TestEncoder_EncoderSimple(t *testing.T) { |
| testEncoderRoundtrip(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95}) |
| testEncoderRoundtripWriter(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95}) |
| } |
| |
| func TestEncoder_EncoderHTML(t *testing.T) { |
| testEncoderRoundtrip(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37}) |
| testEncoderRoundtripWriter(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37}) |
| } |
| |
| func TestEncoder_EncoderEnwik9(t *testing.T) { |
| //testEncoderRoundtrip(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12}) |
| //testEncoderRoundtripWriter(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12}) |
| } |
| |
| // test roundtrip using io.ReaderFrom interface. |
| func testEncoderRoundtrip(t *testing.T, file string, wantCRC []byte) { |
| for _, opt := range getEncOpts(1) { |
| t.Run(opt.name, func(t *testing.T) { |
| opt := opt |
| //t.Parallel() |
| f, err := os.Open(file) |
| if err != nil { |
| if os.IsNotExist(err) { |
| t.Skip("No input file:", file) |
| return |
| } |
| t.Fatal(err) |
| } |
| defer f.Close() |
| if stat, err := f.Stat(); testing.Short() && err == nil { |
| if stat.Size() > 10000 { |
| t.SkipNow() |
| } |
| } |
| input := io.Reader(f) |
| if strings.HasSuffix(file, ".zst") { |
| dec, err := NewReader(f) |
| if err != nil { |
| t.Fatal(err) |
| } |
| input = dec |
| defer dec.Close() |
| } |
| |
| pr, pw := io.Pipe() |
| dec2, err := NewReader(pr) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec2.Close() |
| |
| enc, err := NewWriter(pw, opt.o...) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer enc.Close() |
| var wantSize int64 |
| start := time.Now() |
| go func() { |
| n, err := enc.ReadFrom(input) |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| wantSize = n |
| err = enc.Close() |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| pw.Close() |
| }() |
| var gotSize int64 |
| |
| // Check CRC |
| d := xxhash.New() |
| if true { |
| gotSize, err = io.Copy(d, dec2) |
| } else { |
| fout, err := os.Create(file + ".got") |
| if err != nil { |
| t.Fatal(err) |
| } |
| gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2) |
| if err != nil { |
| t.Fatal(err) |
| } |
| } |
| if wantSize != gotSize { |
| t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize) |
| } |
| if err != nil { |
| t.Fatal(err) |
| } |
| if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) { |
| t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC) |
| } else if len(wantCRC) != 8 { |
| t.Logf("Unable to verify CRC: %#v", gotCRC) |
| } else { |
| t.Logf("CRC Verified: %#v", gotCRC) |
| } |
| t.Log("Encoder len", wantSize) |
| mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec) |
| }) |
| } |
| } |
| |
| type writerWrapper struct { |
| w io.Writer |
| } |
| |
| func (w writerWrapper) Write(p []byte) (n int, err error) { |
| return w.w.Write(p) |
| } |
| |
| // test roundtrip using plain io.Writer interface. |
| func testEncoderRoundtripWriter(t *testing.T, file string, wantCRC []byte) { |
| f, err := os.Open(file) |
| if err != nil { |
| if os.IsNotExist(err) { |
| t.Skip("No input file:", file) |
| return |
| } |
| t.Fatal(err) |
| } |
| defer f.Close() |
| if stat, err := f.Stat(); testing.Short() && err == nil { |
| if stat.Size() > 10000 { |
| t.SkipNow() |
| } |
| } |
| input := io.Reader(f) |
| if strings.HasSuffix(file, ".zst") { |
| dec, err := NewReader(f) |
| if err != nil { |
| t.Fatal(err) |
| } |
| input = dec |
| defer dec.Close() |
| } |
| |
| pr, pw := io.Pipe() |
| dec2, err := NewReader(pr) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec2.Close() |
| |
| enc, err := NewWriter(pw, WithEncoderCRC(true)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer enc.Close() |
| encW := writerWrapper{w: enc} |
| var wantSize int64 |
| start := time.Now() |
| go func() { |
| n, err := io.CopyBuffer(encW, input, make([]byte, 1337)) |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| wantSize = n |
| err = enc.Close() |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| pw.Close() |
| }() |
| var gotSize int64 |
| |
| // Check CRC |
| d := xxhash.New() |
| if true { |
| gotSize, err = io.Copy(d, dec2) |
| } else { |
| fout, err := os.Create(file + ".got") |
| if err != nil { |
| t.Fatal(err) |
| } |
| gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2) |
| if err != nil { |
| t.Fatal(err) |
| } |
| } |
| if wantSize != gotSize { |
| t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize) |
| } |
| if err != nil { |
| t.Fatal(err) |
| } |
| if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) { |
| t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC) |
| } else if len(wantCRC) != 8 { |
| t.Logf("Unable to verify CRC: %#v", gotCRC) |
| } else { |
| t.Logf("CRC Verified: %#v", gotCRC) |
| } |
| t.Log("Fast Encoder len", wantSize) |
| mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec) |
| } |
| |
| func TestEncoder_EncodeAllSilesia(t *testing.T) { |
| if testing.Short() { |
| t.SkipNow() |
| } |
| in, err := os.ReadFile("testdata/silesia.tar") |
| if err != nil { |
| if os.IsNotExist(err) { |
| t.Skip("Missing testdata/silesia.tar") |
| return |
| } |
| t.Fatal(err) |
| } |
| |
| var e Encoder |
| start := time.Now() |
| dst := e.EncodeAll(in, nil) |
| t.Log("Fast Encoder len", len(in), "-> zstd len", len(dst)) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| |
| dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-silesia.tar.got", decoded, os.ModePerm) |
| t.Fatal("Decoded does not match") |
| } |
| t.Log("Encoded content matched") |
| } |
| |
| func TestEncoderReadFrom(t *testing.T) { |
| buffer := bytes.NewBuffer(nil) |
| encoder, err := NewWriter(buffer) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if _, err := encoder.ReadFrom(strings.NewReader("0")); err != nil { |
| t.Fatal(err) |
| } |
| if err := encoder.Close(); err != nil { |
| t.Fatal(err) |
| } |
| |
| dec, _ := NewReader(nil) |
| toDec := buffer.Bytes() |
| toDec = append(toDec, toDec...) |
| decoded, err := dec.DecodeAll(toDec, nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| if !bytes.Equal([]byte("00"), decoded) { |
| t.Logf("encoded: % x\n", buffer.Bytes()) |
| t.Fatalf("output mismatch, got %s", string(decoded)) |
| } |
| dec.Close() |
| } |
| |
| func TestInterleavedWriteReadFrom(t *testing.T) { |
| var encoded bytes.Buffer |
| |
| enc, err := NewWriter(&encoded) |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| if _, err := enc.Write([]byte("write1")); err != nil { |
| t.Fatal(err) |
| } |
| if _, err := enc.Write([]byte("write2")); err != nil { |
| t.Fatal(err) |
| } |
| if _, err := enc.ReadFrom(strings.NewReader("readfrom1")); err != nil { |
| t.Fatal(err) |
| } |
| if _, err := enc.Write([]byte("write3")); err != nil { |
| t.Fatal(err) |
| } |
| |
| if err := enc.Close(); err != nil { |
| t.Fatal(err) |
| } |
| |
| dec, err := NewReader(&encoded) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| |
| gotb, err := io.ReadAll(dec) |
| if err != nil { |
| t.Fatal(err) |
| } |
| got := string(gotb) |
| |
| if want := "write1write2readfrom1write3"; got != want { |
| t.Errorf("got decoded %q, want %q", got, want) |
| } |
| } |
| |
| func TestEncoder_EncodeAllEmpty(t *testing.T) { |
| if testing.Short() { |
| t.SkipNow() |
| } |
| var in []byte |
| |
| for _, opt := range getEncOpts(1) { |
| t.Run(opt.name, func(t *testing.T) { |
| e, err := NewWriter(nil, opt.o...) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| dst := e.EncodeAll(in, nil) |
| t.Log("Block Encoder len", len(in), "-> zstd len", len(dst), dst) |
| |
| dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| t.Fatal("Decoded does not match") |
| } |
| |
| // Test buffer writer. |
| var buf bytes.Buffer |
| e.Reset(&buf) |
| err = e.Close() |
| if err != nil { |
| t.Fatal(err) |
| } |
| dst = buf.Bytes() |
| t.Log("Buffer Encoder len", len(in), "-> zstd len", len(dst)) |
| |
| decoded, err = dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| t.Fatal("Decoded does not match") |
| } |
| |
| t.Log("Encoded content matched") |
| }) |
| } |
| } |
| |
| func TestEncoder_EncodeAllEnwik9(t *testing.T) { |
| if testing.Short() { |
| t.SkipNow() |
| } |
| file := "testdata/enwik9.zst" |
| f, err := os.Open(file) |
| if err != nil { |
| if os.IsNotExist(err) { |
| t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" + |
| "compress it with 'zstd -15 -T0 enwik9' and place it in " + file) |
| } |
| } |
| defer f.Close() |
| |
| dec, err := NewReader(f) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| in, err := io.ReadAll(dec) |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| start := time.Now() |
| e, err := NewWriter(nil) |
| dst := e.EncodeAll(in, nil) |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst)) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| decoded, err := dec.DecodeAll(dst, nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm) |
| t.Fatal("Decoded does not match") |
| } |
| t.Log("Encoded content matched") |
| } |
| |
| func TestEncoder_EncoderStreamEnwik9(t *testing.T) { |
| if testing.Short() { |
| t.SkipNow() |
| } |
| file := "testdata/enwik9.zst" |
| f, err := os.Open(file) |
| if err != nil { |
| if os.IsNotExist(err) { |
| t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" + |
| "compress it with 'zstd -15 -T0 enwik9' and place it in " + file) |
| } |
| } |
| defer f.Close() |
| |
| dec, err := NewReader(f) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer dec.Close() |
| in, err := io.ReadAll(dec) |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| start := time.Now() |
| var dst bytes.Buffer |
| e, err := NewWriter(&dst) |
| if err != nil { |
| t.Fatal(err) |
| } |
| _, err = io.Copy(e, bytes.NewBuffer(in)) |
| if err != nil { |
| t.Fatal(err) |
| } |
| e.Close() |
| t.Log("Full Encoder len", len(in), "-> zstd len", dst.Len()) |
| mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second))) |
| t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec) |
| if false { |
| decoded, err := dec.DecodeAll(dst.Bytes(), nil) |
| if err != nil { |
| t.Error(err, len(decoded)) |
| } |
| if !bytes.Equal(decoded, in) { |
| os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm) |
| t.Fatal("Decoded does not match") |
| } |
| t.Log("Encoded content matched") |
| } |
| } |
| |
| func BenchmarkEncoder_EncodeAllXML(b *testing.B) { |
| f, err := os.Open("testdata/xml.zst") |
| if err != nil { |
| b.Fatal(err) |
| } |
| defer f.Close() |
| |
| dec, err := NewReader(f) |
| if err != nil { |
| b.Fatal(err) |
| } |
| in, err := io.ReadAll(dec) |
| if err != nil { |
| b.Fatal(err) |
| } |
| dec.Close() |
| |
| enc, _ := NewWriter(nil, WithEncoderConcurrency(1)) |
| dst := enc.EncodeAll(in, nil) |
| wantSize := len(dst) |
| //b.Log("Output size:", len(dst)) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(in))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(in, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkEncoder_EncodeAllSimple(b *testing.B) { |
| in, err := os.ReadFile("testdata/z000028") |
| if err != nil { |
| b.Fatal(err) |
| } |
| |
| for level := speedNotSet + 1; level < speedLast; level++ { |
| b.Run(level.String(), func(b *testing.B) { |
| enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level)) |
| if err != nil { |
| b.Fatal(err) |
| } |
| defer enc.Close() |
| dst := enc.EncodeAll(in, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(in))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(in, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| }) |
| } |
| } |
| |
| func BenchmarkEncoder_EncodeAllSimple4K(b *testing.B) { |
| in, err := os.ReadFile("testdata/z000028") |
| if err != nil { |
| b.Fatal(err) |
| } |
| in = in[:4096] |
| |
| for level := speedNotSet + 1; level < speedLast; level++ { |
| b.Run(level.String(), func(b *testing.B) { |
| enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level)) |
| if err != nil { |
| b.Fatal(err) |
| } |
| defer enc.Close() |
| dst := enc.EncodeAll(in, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(in))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(in, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| }) |
| } |
| } |
| |
| func BenchmarkEncoder_EncodeAllHTML(b *testing.B) { |
| in, err := os.ReadFile("../testdata/html.txt") |
| if err != nil { |
| b.Fatal(err) |
| } |
| |
| enc, _ := NewWriter(nil, WithEncoderConcurrency(1)) |
| dst := enc.EncodeAll(in, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(in))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(in, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkEncoder_EncodeAllTwain(b *testing.B) { |
| in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") |
| if err != nil { |
| b.Fatal(err) |
| } |
| |
| enc, _ := NewWriter(nil, WithEncoderConcurrency(1)) |
| dst := enc.EncodeAll(in, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(in))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(in, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkEncoder_EncodeAllPi(b *testing.B) { |
| in, err := os.ReadFile("../testdata/pi.txt") |
| if err != nil { |
| b.Fatal(err) |
| } |
| |
| enc, _ := NewWriter(nil, WithEncoderConcurrency(1)) |
| dst := enc.EncodeAll(in, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(in))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(in, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkRandom4KEncodeAllFastest(b *testing.B) { |
| rng := rand.New(rand.NewSource(1)) |
| data := make([]byte, 4<<10) |
| for i := range data { |
| data[i] = uint8(rng.Intn(256)) |
| } |
| enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1)) |
| defer enc.Close() |
| dst := enc.EncodeAll(data, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(data))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(data, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkRandom10MBEncodeAllFastest(b *testing.B) { |
| rng := rand.New(rand.NewSource(1)) |
| data := make([]byte, 10<<20) |
| rng.Read(data) |
| enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(2)) |
| defer enc.Close() |
| dst := enc.EncodeAll(data, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(data))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(data, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkRandom4KEncodeAllDefault(b *testing.B) { |
| rng := rand.New(rand.NewSource(1)) |
| data := make([]byte, 4<<10) |
| rng.Read(data) |
| enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1)) |
| defer enc.Close() |
| dst := enc.EncodeAll(data, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(data))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(data, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkRandomEncodeAllDefault(b *testing.B) { |
| rng := rand.New(rand.NewSource(1)) |
| data := make([]byte, 10<<20) |
| rng.Read(data) |
| enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1)) |
| defer enc.Close() |
| dst := enc.EncodeAll(data, nil) |
| wantSize := len(dst) |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(int64(len(data))) |
| for i := 0; i < b.N; i++ { |
| dst := enc.EncodeAll(data, dst[:0]) |
| if len(dst) != wantSize { |
| b.Fatal(len(dst), "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkRandom10MBEncoderFastest(b *testing.B) { |
| rng := rand.New(rand.NewSource(1)) |
| data := make([]byte, 10<<20) |
| rng.Read(data) |
| wantSize := int64(len(data)) |
| enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedFastest)) |
| defer enc.Close() |
| n, err := io.Copy(enc, bytes.NewBuffer(data)) |
| if err != nil { |
| b.Fatal(err) |
| } |
| if n != wantSize { |
| b.Fatal(n, "!=", wantSize) |
| } |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(wantSize) |
| for i := 0; i < b.N; i++ { |
| enc.Reset(io.Discard) |
| n, err := io.Copy(enc, bytes.NewBuffer(data)) |
| if err != nil { |
| b.Fatal(err) |
| } |
| if n != wantSize { |
| b.Fatal(n, "!=", wantSize) |
| } |
| } |
| } |
| |
| func BenchmarkRandomEncoderDefault(b *testing.B) { |
| rng := rand.New(rand.NewSource(1)) |
| data := make([]byte, 10<<20) |
| rng.Read(data) |
| wantSize := int64(len(data)) |
| enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedDefault)) |
| defer enc.Close() |
| n, err := io.Copy(enc, bytes.NewBuffer(data)) |
| if err != nil { |
| b.Fatal(err) |
| } |
| if n != wantSize { |
| b.Fatal(n, "!=", wantSize) |
| } |
| b.ResetTimer() |
| b.ReportAllocs() |
| b.SetBytes(wantSize) |
| for i := 0; i < b.N; i++ { |
| enc.Reset(io.Discard) |
| n, err := io.Copy(enc, bytes.NewBuffer(data)) |
| if err != nil { |
| b.Fatal(err) |
| } |
| if n != wantSize { |
| b.Fatal(n, "!=", wantSize) |
| } |
| } |
| } |