diff --git a/_gen/gen.go b/_gen/gen.go index 0fb751d0..b3d9e98a 100644 --- a/_gen/gen.go +++ b/_gen/gen.go @@ -178,8 +178,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma if pshufb { w.WriteString(` -func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop-start +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } `) @@ -197,8 +203,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } -func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop-start) +func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } `) @@ -223,6 +235,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { @@ -242,6 +259,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { @@ -264,6 +286,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { @@ -283,6 +310,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop-start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + `) w.WriteString(`switch len(in) { diff --git a/galois_amd64.go b/galois_amd64.go index 8099f166..8025560f 100644 --- a/galois_amd64.go +++ b/galois_amd64.go @@ -53,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) { } if o.useAVX2 { if len(in) >= bigSwitchover { - galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } if len(in) > 32 { - galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } } else if o.useSSSE3 { - galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } @@ -85,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { if o.useAVX2 { if len(in) >= bigSwitchover { - galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } if len(in) >= 32 { - galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } } else if o.useSSSE3 { - galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } @@ -117,20 +141,32 @@ func sliceXor(in, out []byte, o *options) { if o.useSSE2 { if len(in) >= bigSwitchover { if o.useAVX2 { - avx2XorSlice_64(in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + avx2XorSlice_64(in, out) in = in[done:] out = out[done:] } else { - sSE2XorSlice_64(in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + sSE2XorSlice_64(in, out) in = in[done:] out = out[done:] } } if len(in) >= 16 { - sSE2XorSlice(in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + sSE2XorSlice(in, out) in = in[done:] out = out[done:] } @@ -462,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } fftDIT2_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } fftDIT2_ssse3(x, y, tmp) } else { // Reference version: @@ -480,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) { } if o.useAVX2 { + done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } fftDIT28_avx2(x, y, &multiply256LUT8[log_m]) if len(x)&63 == 0 { return } - done := (len(y) >> 6) << 6 y = y[done:] x = x[done:] } @@ -499,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) { } if o.useAVX2 { + done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } ifftDIT28_avx2(x, y, &multiply256LUT8[log_m]) if len(x)&63 == 0 { return } - done := (len(y) >> 6) << 6 y = y[done:] x = x[done:] } @@ -514,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) { func mulAdd8(x, y []byte, log_m ffe8, o *options) { if o.useAVX2 { t := &multiply256LUT8[log_m] - galMulAVX2Xor_64(t[:16], t[16:32], y, x) done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } + galMulAVX2Xor_64(t[:16], t[16:32], y, x) y = y[done:] x = x[done:] } else if o.useSSSE3 { t := &multiply256LUT8[log_m] - galMulSSSE3Xor(t[:16], t[16:32], y, x) done := (len(y) >> 4) << 4 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } + galMulSSSE3Xor(t[:16], t[16:32], y, x) y = y[done:] x = x[done:] } @@ -535,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } + ifftDIT2_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } + ifftDIT2_ssse3(x, y, tmp) } else { // Reference version: @@ -552,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } mulgf16_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } mulgf16_ssse3(x, y, tmp) } else { refMul(x, y, log_m) @@ -564,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) { func mulgf8(out, in []byte, log_m ffe8, o *options) { if o.useAVX2 { t := &multiply256LUT8[log_m] - galMulAVX2_64(t[:16], t[16:32], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + + galMulAVX2_64(t[:16], t[16:32], in, out) in = in[done:] out = out[done:] } else if o.useSSSE3 { t := &multiply256LUT8[log_m] - galMulSSSE3(t[:16], t[16:32], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3(t[:16], t[16:32], in, out) in = in[done:] out = out[done:] } diff --git a/galois_gen_switch_amd64.go b/galois_gen_switch_amd64.go index d4f46ea2..f9c36e29 100644 --- a/galois_gen_switch_amd64.go +++ b/galois_gen_switch_amd64.go @@ -43,8 +43,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs } -func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop - start +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(in) { case 1: @@ -381,8 +387,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } -func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) +func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(in) { case 1: @@ -722,6 +734,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1060,6 +1077,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1398,6 +1420,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1736,6 +1763,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { diff --git a/galois_gen_switch_arm64.go b/galois_gen_switch_arm64.go index ff2541b8..656e0621 100644 --- a/galois_gen_switch_arm64.go +++ b/galois_gen_switch_arm64.go @@ -38,9 +38,15 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma } // galMulSlicesSve -func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop - start +func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } // fmt.Println(len(in), len(out)) switch len(out) { case 1: @@ -78,8 +84,15 @@ func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) int { } // galMulSlicesSveXor -func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) +func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = (stop - start) + + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(out) { case 1: @@ -117,8 +130,14 @@ func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) int { } // galMulSlicesNeon -func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) int { - n := stop - start +func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(out) { case 1: @@ -156,9 +175,14 @@ func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) int { } // galMulSlicesNeonXor -func galMulSlicesNeonXor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) - +func galMulSlicesNeonXor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = (stop - start) + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(out) { case 1: mulNeon_10x1_64Xor(matrix, in, out, start, n) diff --git a/galois_gen_switch_nopshufb_amd64.go b/galois_gen_switch_nopshufb_amd64.go index 66bab8a0..3ac349d3 100644 --- a/galois_gen_switch_nopshufb_amd64.go +++ b/galois_gen_switch_nopshufb_amd64.go @@ -45,6 +45,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -383,6 +388,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (64 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -721,6 +731,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { @@ -1059,6 +1074,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { n := (stop - start) & (maxInt - (32 - 1)) + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + switch len(in) { case 1: switch len(out) { diff --git a/race.go b/race.go new file mode 100644 index 00000000..4f2c0b69 --- /dev/null +++ b/race.go @@ -0,0 +1,61 @@ +// Copyright (c) 2024+ Klaus Post. See LICENSE for license + +//go:build race + +package reedsolomon + +import ( + "runtime" + "unsafe" +) + +const raceEnabled = true + +func raceReadSlice[T any](s []T) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) +} + +func raceWriteSlice[T any](s []T) { + if len(s) == 0 { + return + } + runtime.RaceWriteRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) +} + +func raceReadSlices[T any](s [][]T, start, n int) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) + for _, v := range s { + if len(v) == 0 { + continue + } + n := n + if n < 0 { + n = len(v) - start + } + runtime.RaceReadRange(unsafe.Pointer(&v[start]), n*int(unsafe.Sizeof(v[0]))) + } +} + +func raceWriteSlices[T any](s [][]T, start, n int) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) + + for _, v := range s { + if len(v) == 0 { + continue + } + n := n + if n < 0 { + n = len(v) - start + } + runtime.RaceWriteRange(unsafe.Pointer(&v[start]), n*int(unsafe.Sizeof(v[0]))) + } +} diff --git a/race_none.go b/race_none.go new file mode 100644 index 00000000..c7d05f28 --- /dev/null +++ b/race_none.go @@ -0,0 +1,17 @@ +// Copyright (c) 2024+ Klaus Post. See LICENSE for license + +//go:build !race + +package reedsolomon + +const raceEnabled = false + +func raceReadSlice[T any](s []T) { +} + +func raceWriteSlice[T any](s []T) { +} + +func raceReadSlices[T any](s [][]T, start, n int) {} + +func raceWriteSlices[T any](s [][]T, start, n int) {} diff --git a/race_none_test.go b/race_none_test.go deleted file mode 100644 index 3c0d24ba..00000000 --- a/race_none_test.go +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2022, Klaus Post, see LICENSE for details. - -//go:build !race -// +build !race - -package reedsolomon - -const raceEnabled = false diff --git a/race_test.go b/race_test.go deleted file mode 100644 index 417a0e55..00000000 --- a/race_test.go +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2022, Klaus Post, see LICENSE for details. - -//go:build race -// +build race - -package reedsolomon - -const raceEnabled = true diff --git a/reedsolomon_test.go b/reedsolomon_test.go index a7f7ab25..76b46584 100644 --- a/reedsolomon_test.go +++ b/reedsolomon_test.go @@ -441,10 +441,7 @@ func testEncodingIdx(t *testing.T, o ...Option) { t.Run(fmt.Sprint(perShard), func(t *testing.T) { - shards := make([][]byte, data+parity) - for s := range shards { - shards[s] = make([]byte, perShard) - } + shards := AllocAligned(data+parity, perShard) shuffle := make([]int, data) for i := range shuffle { shuffle[i] = i diff --git a/xor_arm64.go b/xor_arm64.go index 6f0522f8..b3b579dd 100644 --- a/xor_arm64.go +++ b/xor_arm64.go @@ -7,8 +7,12 @@ func xorSliceNEON(in, out []byte) // simple slice xor func sliceXor(in, out []byte, o *options) { - xorSliceNEON(in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceWriteSlice(out[:done]) + raceReadSlices(in[:done]) + } + xorSliceNEON(in, out) remain := len(in) - done if remain > 0 {