Skip to content

Commit

Permalink
Add options (#46)
Browse files Browse the repository at this point in the history
* Add options

Make constants changeable as options.

The API remains backwards compatible.

* Update documentation.

* Fix line endings

* fmt

* fmt

* Use functions for parameters.

Much neater.
  • Loading branch information
klauspost committed Feb 19, 2017
1 parent c056598 commit 5abf0ee
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 46 deletions.
11 changes: 8 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ os:
- osx

go:
- 1.3
- 1.4
- 1.5
- 1.6
- tip
- 1.7
- 1.8
- master

install:
- go get ./...
Expand All @@ -26,3 +26,8 @@ script:
- go build examples/stream-decoder.go
- go build examples/stream-encoder.go
- diff <(gofmt -d .) <("")

matrix:
allow_failures:
- go: 'master'
fast_finish: true
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,18 @@ There is no buffering or timeouts/retry specified. If you want to add that, you

For complete examples of a streaming encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).

#Advanced Options

You can modify internal options which affects how jobs are split between and processed by goroutines.

To create options, use the WithXXX functions. You can supply options to `New`, `NewStream` and `NewStreamC`. If no Options are supplied, default options are used.

Example of how to supply options:

```Go
enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
```


# Performance
Performance depends mainly on the number of parity shards. In rough terms, doubling the number of parity shards will double the encoding time.
Expand Down
16 changes: 6 additions & 10 deletions galois_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@

package reedsolomon

import (
"github.com/klauspost/cpuid"
)

//go:noescape
func galMulSSSE3(low, high, in, out []byte)

Expand Down Expand Up @@ -40,12 +36,12 @@ func galMulSSSE3Xor(low, high, in, out []byte) {
}
*/

func galMulSlice(c byte, in, out []byte) {
func galMulSlice(c byte, in, out []byte, ssse3, avx2 bool) {
var done int
if cpuid.CPU.AVX2() {
if avx2 {
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
} else if cpuid.CPU.SSSE3() {
} else if ssse3 {
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 4) << 4
}
Expand All @@ -58,12 +54,12 @@ func galMulSlice(c byte, in, out []byte) {
}
}

func galMulSliceXor(c byte, in, out []byte) {
func galMulSliceXor(c byte, in, out []byte, ssse3, avx2 bool) {
var done int
if cpuid.CPU.AVX2() {
if avx2 {
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
} else if cpuid.CPU.SSSE3() {
} else if ssse3 {
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 4) << 4
}
Expand Down
4 changes: 2 additions & 2 deletions galois_noasm.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

package reedsolomon

func galMulSlice(c byte, in, out []byte) {
func galMulSlice(c byte, in, out []byte, ssse3, avx2 bool) {
mt := mulTable[c]
for n, input := range in {
out[n] = mt[input]
}
}

func galMulSliceXor(c byte, in, out []byte) {
func galMulSliceXor(c byte, in, out []byte, ssse3, avx2 bool) {
mt := mulTable[c]
for n, input := range in {
out[n] ^= mt[input]
Expand Down
4 changes: 2 additions & 2 deletions galois_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,13 @@ func TestGalois(t *testing.T) {
// Test slices (>16 entries to test assembler)
in := []byte{0, 1, 2, 3, 4, 5, 6, 10, 50, 100, 150, 174, 201, 255, 99, 32, 67, 85}
out := make([]byte, len(in))
galMulSlice(25, in, out)
galMulSlice(25, in, out, false, false)
expect := []byte{0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0xfa, 0xb8, 0x6d, 0xc7, 0x85, 0xc3, 0x1f, 0x22, 0x7, 0x25, 0xfe}
if 0 != bytes.Compare(out, expect) {
t.Errorf("got %#v, expected %#v", out, expect)
}

galMulSlice(177, in, out)
galMulSlice(177, in, out, false, false)
expect = []byte{0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x9e, 0x3, 0x6, 0xe8, 0x75, 0xbd, 0x40, 0x36, 0xa3, 0x95, 0xcb}
if 0 != bytes.Compare(out, expect) {
t.Errorf("got %#v, expected %#v", out, expect)
Expand Down
67 changes: 67 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package reedsolomon

import (
"runtime"

"github.com/klauspost/cpuid"
)

// Option allows to override processing parameters.
type Option func(*options)

type options struct {
maxGoroutines int
minSplitSize int
useAVX2, useSSSE3 bool
}

var defaultOptions = options{
maxGoroutines: 50,
minSplitSize: 512,
}

func init() {
if runtime.GOMAXPROCS(0) <= 1 {
defaultOptions.maxGoroutines = 1
}
// Detect CPU capabilities.
defaultOptions.useSSSE3 = cpuid.CPU.SSSE3()
defaultOptions.useAVX2 = cpuid.CPU.AVX2()
}

// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
// Jobs will be split into this many parts, unless each goroutine would have to process
// less than minSplitSize bytes (set with WithMinSplitSize).
// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
// scheduling.
// If n <= 0, it is ignored.
func WithMaxGoroutines(n int) Option {
return func(o *options) {
if n > 0 {
o.maxGoroutines = n
}
}
}

// MinSplitSize Is the minimum encoding size in bytes per goroutine.
// See WithMaxGoroutines on how jobs are split.
// If n <= 0, it is ignored.
func WithMinSplitSize(n int) Option {
return func(o *options) {
if n > 0 {
o.maxGoroutines = n
}
}
}

func withSSE3(enabled bool) Option {
return func(o *options) {
o.useSSSE3 = enabled
}
}

func withAVX2(enabled bool) Option {
return func(o *options) {
o.useAVX2 = enabled
}
}
61 changes: 42 additions & 19 deletions reedsolomon.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"bytes"
"errors"
"io"
"runtime"
"sync"
)

Expand Down Expand Up @@ -83,6 +82,7 @@ type reedSolomon struct {
m matrix
tree inversionTree
parity [][]byte
o options
}

// ErrInvShardNum will be returned by New, if you attempt to create
Expand All @@ -98,13 +98,18 @@ var ErrMaxShardNum = errors.New("cannot create Encoder with 255 or more data+par
// the number of data shards and parity shards that
// you want to use. You can reuse this encoder.
// Note that the maximum number of data shards is 256.
func New(dataShards, parityShards int) (Encoder, error) {
// If no options are supplied, default options are used.
func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
r := reedSolomon{
DataShards: dataShards,
ParityShards: parityShards,
Shards: dataShards + parityShards,
o: defaultOptions,
}

for _, opt := range opts {
opt(&r.o)
}
if dataShards <= 0 || parityShards <= 0 {
return nil, ErrInvShardNum
}
Expand Down Expand Up @@ -201,34 +206,29 @@ func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
// number of matrix rows used, is determined by
// outputCount, which is the number of outputs to compute.
func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
if runtime.GOMAXPROCS(0) > 1 && len(inputs[0]) > minSplitSize {
if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
return
}
for c := 0; c < r.DataShards; c++ {
in := inputs[c]
for iRow := 0; iRow < outputCount; iRow++ {
if c == 0 {
galMulSlice(matrixRows[iRow][c], in, outputs[iRow])
galMulSlice(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
} else {
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow])
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
}
}
}
}

const (
minSplitSize = 512 // min split size per goroutine
maxGoroutines = 50 // max goroutines number for encoding & decoding
)

// Perform the same as codeSomeShards, but split the workload into
// several goroutines.
func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
var wg sync.WaitGroup
do := byteCount / maxGoroutines
if do < minSplitSize {
do = minSplitSize
do := byteCount / r.o.maxGoroutines
if do < r.o.minSplitSize {
do = r.o.minSplitSize
}
start := 0
for start < byteCount {
Expand All @@ -241,9 +241,9 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
in := inputs[c]
for iRow := 0; iRow < outputCount; iRow++ {
if c == 0 {
galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop])
galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
} else {
galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop])
galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
}
}
}
Expand All @@ -258,13 +258,36 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
// except this will check values and return
// as soon as a difference is found.
func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
}
outputs := make([][]byte, len(toCheck))
for i := range outputs {
outputs[i] = make([]byte, byteCount)
}
for c := 0; c < r.DataShards; c++ {
in := inputs[c]
for iRow := 0; iRow < outputCount; iRow++ {
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
}
}

for i, calc := range outputs {
if !bytes.Equal(calc, toCheck[i]) {
return false
}
}
return true
}

func (r reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
same := true
var mu sync.RWMutex // For above

var wg sync.WaitGroup
do := byteCount / maxGoroutines
if do < minSplitSize {
do = minSplitSize
do := byteCount / r.o.maxGoroutines
if do < r.o.minSplitSize {
do = r.o.minSplitSize
}
start := 0
for start < byteCount {
Expand All @@ -287,7 +310,7 @@ func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outpu
mu.RUnlock()
in := inputs[c][start : start+do]
for iRow := 0; iRow < outputCount; iRow++ {
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow])
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
}
}

Expand Down
Loading

0 comments on commit 5abf0ee

Please sign in to comment.