From 6a9df697dce81ba20ee4aa223ef76dacd172cb4d Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 31 Aug 2024 07:15:04 -0700 Subject: [PATCH] Test asm slice reads/writes in race tests (#286) * Test asm slice reads/writes in race tests * Update CI and go.mod When calling asm functions in race mode, check inputs and outputs for races. --- .github/workflows/go.yml | 2 +- _gen/gen.go | 40 +++++++++- galois_amd64.go | 117 ++++++++++++++++++++++++---- galois_arm64.go | 13 +++- galois_gen_switch_amd64.go | 40 +++++++++- galois_gen_switch_arm64.go | 42 +++++++--- galois_gen_switch_nopshufb_amd64.go | 20 +++++ go.mod | 6 +- go.sum | 7 +- race.go | 61 +++++++++++++++ race_none.go | 17 ++++ race_none_test.go | 8 -- race_test.go | 8 -- reedsolomon_test.go | 5 +- xor_arm64.go | 6 +- 15 files changed, 329 insertions(+), 63 deletions(-) create mode 100644 race.go create mode 100644 race_none.go delete mode 100644 race_none_test.go delete mode 100644 race_test.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 5c9ead3a..39196c2c 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -11,7 +11,7 @@ jobs: build: strategy: matrix: - go-version: [1.20.x, 1.21.x, 1.22.x] + go-version: [1.21.x, 1.22.x, 1.23.x] os: [ubuntu-latest, macos-latest, windows-latest] env: CGO_ENABLED: 0 diff --git a/_gen/gen.go b/_gen/gen.go index 0fb751d0..b3d9e98a 100644 --- a/_gen/gen.go +++ b/_gen/gen.go @@ -178,8 +178,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma if pshufb { w.WriteString(` -func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop-start +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } `) @@ -197,8 +203,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } -func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop-start) +func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } `) @@ -223,6 +235,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { @@ -242,6 +259,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { @@ -264,6 +286,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { @@ -283,6 +310,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { diff --git a/galois_amd64.go b/galois_amd64.go index 8099f166..8025560f 100644 --- a/galois_amd64.go +++ b/galois_amd64.go @@ -53,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) { } if o.useAVX2 { if len(in) >= bigSwitchover { - galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } if len(in) > 32 { - galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } } else if o.useSSSE3 { - galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } @@ -85,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { if o.useAVX2 { if len(in) >= bigSwitchover { - galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } if len(in) >= 32 { - galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } } else if o.useSSSE3 { - galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } @@ -117,20 +141,32 @@ func sliceXor(in, out []byte, o *options) { if o.useSSE2 { if len(in) >= bigSwitchover { if o.useAVX2 { - avx2XorSlice_64(in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + avx2XorSlice_64(in, out) in = in[done:] out = out[done:] } else { - sSE2XorSlice_64(in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + sSE2XorSlice_64(in, out) in = in[done:] out = out[done:] } } if len(in) >= 16 { - sSE2XorSlice(in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + sSE2XorSlice(in, out) in = in[done:] out = out[done:] } @@ -462,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } fftDIT2_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } fftDIT2_ssse3(x, y, tmp) } else { // Reference version: @@ -480,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) { } if o.useAVX2 { + done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } fftDIT28_avx2(x, y, &multiply256LUT8[log_m]) if len(x)&63 == 0 { return } - done := (len(y) >> 6) << 6 y = y[done:] x = x[done:] } @@ -499,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) { } if o.useAVX2 { + done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } ifftDIT28_avx2(x, y, &multiply256LUT8[log_m]) if len(x)&63 == 0 { return } - done := (len(y) >> 6) << 6 y = y[done:] x = x[done:] } @@ -514,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) { func mulAdd8(x, y []byte, log_m ffe8, o *options) { if o.useAVX2 { t := &multiply256LUT8[log_m] - galMulAVX2Xor_64(t[:16], t[16:32], y, x) done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } + galMulAVX2Xor_64(t[:16], t[16:32], y, x) y = y[done:] x = x[done:] } else if o.useSSSE3 { t := &multiply256LUT8[log_m] - galMulSSSE3Xor(t[:16], t[16:32], y, x) done := (len(y) >> 4) << 4 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } + galMulSSSE3Xor(t[:16], t[16:32], y, x) y = y[done:] x = x[done:] } @@ -535,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } + ifftDIT2_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } + ifftDIT2_ssse3(x, y, tmp) } else { // Reference version: @@ -552,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } mulgf16_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } mulgf16_ssse3(x, y, tmp) } else { refMul(x, y, log_m) @@ -564,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) { func mulgf8(out, in []byte, log_m ffe8, o *options) { if o.useAVX2 { t := &multiply256LUT8[log_m] - galMulAVX2_64(t[:16], t[16:32], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + + galMulAVX2_64(t[:16], t[16:32], in, out) in = in[done:] out = out[done:] } else if o.useSSSE3 { t := &multiply256LUT8[log_m] - galMulSSSE3(t[:16], t[16:32], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3(t[:16], t[16:32], in, out) in = in[done:] out = out[done:] } diff --git a/galois_arm64.go b/galois_arm64.go index e34f39a5..d860525c 100644 --- a/galois_arm64.go +++ b/galois_arm64.go @@ -33,8 +33,12 @@ func galMulSlice(c byte, in, out []byte, o *options) { return } var done int - galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) done = (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) remain := len(in) - done if remain > 0 { @@ -50,9 +54,12 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { sliceXor(in, out, o) return } - var done int + done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) - done = (len(in) >> 5) << 5 remain := len(in) - done if remain > 0 { diff --git a/galois_gen_switch_amd64.go b/galois_gen_switch_amd64.go index d4f46ea2..f9c36e29 100644 --- a/galois_gen_switch_amd64.go +++ b/galois_gen_switch_amd64.go @@ -43,8 +43,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs } -func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop - start +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(in) { case 1: @@ -381,8 +387,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } -func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) +func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(in) { case 1: @@ -722,6 +734,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1060,6 +1077,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1398,6 +1420,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1736,6 +1763,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { diff --git a/galois_gen_switch_arm64.go b/galois_gen_switch_arm64.go index ff2541b8..656e0621 100644 --- a/galois_gen_switch_arm64.go +++ b/galois_gen_switch_arm64.go @@ -38,9 +38,15 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma } // galMulSlicesSve -func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop - start +func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } // fmt.Println(len(in), len(out)) switch len(out) { case 1: @@ -78,8 +84,15 @@ func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) int { } // galMulSlicesSveXor -func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) +func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = (stop - start) + + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(out) { case 1: @@ -117,8 +130,14 @@ func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) int { } // galMulSlicesNeon -func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop - start +func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(out) { case 1: @@ -156,9 +175,14 @@ func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) int { } // galMulSlicesNeonXor -func galMulSlicesNeonXor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) - +func galMulSlicesNeonXor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = (stop - start) + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(out) { case 1: mulNeon_10x1_64Xor(matrix, in, out, start, n) diff --git a/galois_gen_switch_nopshufb_amd64.go b/galois_gen_switch_nopshufb_amd64.go index 66bab8a0..3ac349d3 100644 --- a/galois_gen_switch_nopshufb_amd64.go +++ b/galois_gen_switch_nopshufb_amd64.go @@ -45,6 +45,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -383,6 +388,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -721,6 +731,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1059,6 +1074,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { diff --git a/go.mod b/go.mod index 680b7a6a..19096fb9 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,10 @@ module github.com/klauspost/reedsolomon -go 1.18 +go 1.21 -require github.com/klauspost/cpuid/v2 v2.2.6 +require github.com/klauspost/cpuid/v2 v2.2.8 -require golang.org/x/sys v0.5.0 // indirect +require golang.org/x/sys v0.24.0 // indirect retract ( v1.12.2 // https://github.com/klauspost/reedsolomon/pull/283 diff --git a/go.sum b/go.sum index 77781b67..6b4a7794 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,5 @@ -github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= -github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/race.go b/race.go new file mode 100644 index 00000000..4f2c0b69 --- /dev/null +++ b/race.go @@ -0,0 +1,61 @@ +// Copyright (c) 2024+ Klaus Post. See LICENSE for license + +//go:build race + +package reedsolomon + +import ( + "runtime" + "unsafe" +) + +const raceEnabled = true + +func raceReadSlice[T any](s []T) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) +} + +func raceWriteSlice[T any](s []T) { + if len(s) == 0 { + return + } + runtime.RaceWriteRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) +} + +func raceReadSlices[T any](s [][]T, start, n int) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) + for _, v := range s { + if len(v) == 0 { + continue + } + n := n + if n < 0 { + n = len(v) - start + } + runtime.RaceReadRange(unsafe.Pointer(&v[start]), n*int(unsafe.Sizeof(v[0]))) + } +} + +func raceWriteSlices[T any](s [][]T, start, n int) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) + + for _, v := range s { + if len(v) == 0 { + continue + } + n := n + if n < 0 { + n = len(v) - start + } + runtime.RaceWriteRange(unsafe.Pointer(&v[start]), n*int(unsafe.Sizeof(v[0]))) + } +} diff --git a/race_none.go b/race_none.go new file mode 100644 index 00000000..c7d05f28 --- /dev/null +++ b/race_none.go @@ -0,0 +1,17 @@ +// Copyright (c) 2024+ Klaus Post. See LICENSE for license + +//go:build !race + +package reedsolomon + +const raceEnabled = false + +func raceReadSlice[T any](s []T) { +} + +func raceWriteSlice[T any](s []T) { +} + +func raceReadSlices[T any](s [][]T, start, n int) {} + +func raceWriteSlices[T any](s [][]T, start, n int) {} diff --git a/race_none_test.go b/race_none_test.go deleted file mode 100644 index 3c0d24ba..00000000 --- a/race_none_test.go +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2022, Klaus Post, see LICENSE for details. - -//go:build !race -// +build !race - -package reedsolomon - -const raceEnabled = false diff --git a/race_test.go b/race_test.go deleted file mode 100644 index 417a0e55..00000000 --- a/race_test.go +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2022, Klaus Post, see LICENSE for details. - -//go:build race -// +build race - -package reedsolomon - -const raceEnabled = true diff --git a/reedsolomon_test.go b/reedsolomon_test.go index a7f7ab25..76b46584 100644 --- a/reedsolomon_test.go +++ b/reedsolomon_test.go @@ -441,10 +441,7 @@ func testEncodingIdx(t *testing.T, o ...Option) { t.Run(fmt.Sprint(perShard), func(t *testing.T) { - shards := make([][]byte, data+parity) - for s := range shards { - shards[s] = make([]byte, perShard) - } + shards := AllocAligned(data+parity, perShard) shuffle := make([]int, data) for i := range shuffle { shuffle[i] = i diff --git a/xor_arm64.go b/xor_arm64.go index 6f0522f8..ffda8884 100644 --- a/xor_arm64.go +++ b/xor_arm64.go @@ -7,8 +7,12 @@ func xorSliceNEON(in, out []byte) // simple slice xor func sliceXor(in, out []byte, o *options) { - xorSliceNEON(in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceWriteSlice(out[:done]) + raceReadSlice(in[:done]) + } + xorSliceNEON(in, out) remain := len(in) - done if remain > 0 {