diff --git a/src/Sep/Internals/SepParserAvx256To128CmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserAvx256To128CmpOrMoveMaskTzcnt.cs new file mode 100644 index 00000000..8b6dce01 --- /dev/null +++ b/src/Sep/Internals/SepParserAvx256To128CmpOrMoveMaskTzcnt.cs @@ -0,0 +1,191 @@ +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using static System.Runtime.CompilerServices.Unsafe; +using static nietras.SeparatedValues.SepDefaults; +using static nietras.SeparatedValues.SepParseMask; +using ISA = System.Runtime.Intrinsics.X86.Sse2; +using Vec = System.Runtime.Intrinsics.Vector128; +using VecUI16 = System.Runtime.Intrinsics.Vector256; +using VecUI8 = System.Runtime.Intrinsics.Vector128; + +namespace nietras.SeparatedValues; + +[ExcludeFromCodeCoverage] +#if SEPUSESTRUCTFORPARSERSFORDISASMO +struct +#else +sealed class +#endif +SepParserAvx256To128CmpOrMoveMaskTzcnt : ISepParser +{ + readonly char _separator; + readonly VecUI8 _nls = Vec.Create(LineFeedByte); + readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); + readonly VecUI8 _qts = Vec.Create(QuoteByte); + readonly VecUI8 _sps; + nuint _quoteCount = 0; + + public unsafe SepParserAvx256To128CmpOrMoveMaskTzcnt(SepParserOptions options) + { + _separator = options.Separator; + _sps = Vec.Create((byte)_separator); + _qts = Vec.Create((byte)options.QuotesOrSeparatorIfDisabled); + } + + // Parses 2 x char vectors e.g. 1 byte vector + public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public void ParseColEnds(SepReaderState s) + { + Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public void ParseColInfos(SepReaderState s) + { + Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + void Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods + { + // Method should **not** call other non-inlined methods, since this + // impacts code-generation severely. + + // Unpack instance fields + var separator = _separator; + var quoteCount = _quoteCount; + // Use instance fields to force values into registers + var nls = _nls; //Vec.Create(LineFeedByte); + var crs = _crs; //Vec.Create(CarriageReturnByte); + var qts = _qts; //Vec.Create(QuoteByte); + var sps = _sps; //Vec.Create(_separator); + + // Unpack state fields + var chars = s._chars; + var charsIndex = s._charsParseStart; + var charsEnd = s._charsDataEnd; + var lineNumber = s._parsingLineNumber; + var colInfos = s._colEndsOrColInfos; + + var colInfosLength = TColInfoMethods.IntsLengthToColInfosLength(colInfos.Length); + + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex, PaddingLength); + A.Assert(charsIndex <= charsEnd); + A.Assert(charsEnd <= (chars.Length - PaddingLength)); + + ref var charsOriginRef = ref MemoryMarshal.GetArrayDataReference(chars); + + ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); + ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); + + charsIndex -= VecUI8.Count; + LOOPSTEP: + charsIndex += VecUI8.Count; + LOOPNOSTEP: + if (charsIndex < charsEnd && + // If current is greater than or equal than "stop", then there is no + // longer guaranteed space enough for next VecUI8.Count + next row start. + !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) + { + ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); + ref var byteRef = ref As(ref charsRef); + var v = ReadUnaligned(ref byteRef); + var bytes = Avx512BW.VL.ConvertToVector128ByteWithSaturation(v); + + var nlsEq = Vec.Equals(bytes, nls); + var crsEq = Vec.Equals(bytes, crs); + var qtsEq = Vec.Equals(bytes, qts); + var spsEq = Vec.Equals(bytes, sps); + + var lineEndings = nlsEq | crsEq; + var lineEndingsSeparators = spsEq | lineEndings; + var specialChars = lineEndingsSeparators | qtsEq; + + // Optimize for the case of no special character + var specialCharMask = MoveMask(specialChars); + if (specialCharMask != 0u) + { + var separatorsMask = MoveMask(spsEq); + // Optimize for case of only separators i.e. no endings or quotes. + // Add quote count to mask as hack to skip if quoting. + var testMask = specialCharMask + quoteCount; + if (separatorsMask == testMask) + { + colInfosRefCurrent = ref ParseSeparatorsMask( + separatorsMask, charsIndex, ref colInfosRefCurrent); + } + else + { + var separatorLineEndingsMask = MoveMask(lineEndingsSeparators); + if (separatorLineEndingsMask == testMask) + { + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( + separatorsMask, separatorLineEndingsMask, + ref charsRef, ref charsIndex, separator, + ref colInfosRefCurrent, ref lineNumber); + goto NEWROW; + } + else + { + var rowLineEndingOffset = 0; + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, + separator, ref charsRef, charsIndex, + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); + // Used both to indicate row ended and if need to step +2 due to '\r\n' + if (rowLineEndingOffset != 0) + { + // Must be a col end and last is then dataIndex + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; + goto NEWROW; + } + } + } + } + goto LOOPSTEP; + NEWROW: + var colCount = TColInfoMethods.CountOffset(ref colInfosRef, ref colInfosRefCurrent); + // Add new parsed row + ref var parsedRowRef = ref MemoryMarshal.GetArrayDataReference(s._parsedRows); + Add(ref parsedRowRef, s._parsedRowsCount) = new(lineNumber, colCount); + ++s._parsedRowsCount; + // Next row start (one before) + colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); + colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); + // Update for next row + colInfosRef = ref colInfosRefCurrent; + s._parsingRowColEndsOrInfosStartIndex += colCount + 1; + s._parsingRowCharsStartIndex = charsIndex; + // Space for more rows? + if (s._parsedRowsCount < s._parsedRows.Length) + { + goto LOOPNOSTEP; + } + } + // Update instance state from enregistered + _quoteCount = quoteCount; + s._parsingRowColCount = TColInfoMethods.CountOffset(ref colInfosRef, ref colInfosRefCurrent); + s._parsingLineNumber = lineNumber; + // Step is VecUI8.Count so may go past end, ensure limited + s._charsParseStart = Math.Min(charsEnd, charsIndex); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static nuint MoveMask(VecUI8 v) => (uint)ISA.MoveMask(v); +} diff --git a/src/Sep/Internals/SepParserFactory.cs b/src/Sep/Internals/SepParserFactory.cs index c3c63550..f261da2d 100644 --- a/src/Sep/Internals/SepParserFactory.cs +++ b/src/Sep/Internals/SepParserFactory.cs @@ -47,21 +47,26 @@ static IReadOnlyDictionary> CreateFac static void AddFactories(TCollection parsers, bool createUnaccelerated) where TCollection : ICollection>> { +#if NET8_0_OR_GREATER + // AVX-512 to 256 avoids mask register code gen issues + if (Avx512BW.IsSupported) + { Add(parsers, nameof(SepParserAvx512To256CmpOrMoveMaskTzcnt), static sep => new SepParserAvx512To256CmpOrMoveMaskTzcnt(sep)); } +#endif + // Avx2 and Vector256 are faster than most AVX-512 due to mask register code gen issues + if (Avx2.IsSupported) + { Add(parsers, nameof(SepParserAvx2PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx2PackCmpOrMoveMaskTzcnt(sep)); } + if (createUnaccelerated || Vector256.IsHardwareAccelerated) + { Add(parsers, nameof(SepParserVector256NrwCmpExtMsbTzcnt), static sep => new SepParserVector256NrwCmpExtMsbTzcnt(sep)); } #if NET8_0_OR_GREATER if (Environment.Is64BitProcess && Avx512BW.IsSupported) - { - Add(parsers, nameof(SepParserAvx512To256CmpOrMoveMaskTzcnt), static sep => new SepParserAvx512To256CmpOrMoveMaskTzcnt(sep)); - Add(parsers, nameof(SepParserAvx512PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx512PackCmpOrMoveMaskTzcnt(sep)); - } + { Add(parsers, nameof(SepParserAvx512PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx512PackCmpOrMoveMaskTzcnt(sep)); } + if (Avx512BW.VL.IsSupported) + { Add(parsers, nameof(SepParserAvx256To128CmpOrMoveMaskTzcnt), static sep => new SepParserAvx256To128CmpOrMoveMaskTzcnt(sep)); } if (Environment.Is64BitProcess && (createUnaccelerated || Vector512.IsHardwareAccelerated)) { Add(parsers, nameof(SepParserVector512NrwCmpExtMsbTzcnt), static sep => new SepParserVector512NrwCmpExtMsbTzcnt(sep)); } #endif - if (Avx2.IsSupported) - { Add(parsers, nameof(SepParserAvx2PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx2PackCmpOrMoveMaskTzcnt(sep)); } if (Sse2.IsSupported) { Add(parsers, nameof(SepParserSse2PackCmpOrMoveMaskTzcnt), static sep => new SepParserSse2PackCmpOrMoveMaskTzcnt(sep)); } - if (createUnaccelerated || Vector256.IsHardwareAccelerated) - { Add(parsers, nameof(SepParserVector256NrwCmpExtMsbTzcnt), static sep => new SepParserVector256NrwCmpExtMsbTzcnt(sep)); } if (createUnaccelerated || Vector128.IsHardwareAccelerated) { Add(parsers, nameof(SepParserVector128NrwCmpExtMsbTzcnt), static sep => new SepParserVector128NrwCmpExtMsbTzcnt(sep)); } if (createUnaccelerated || Vector64.IsHardwareAccelerated) diff --git a/test-parsers.ps1 b/test-parsers.ps1 index f75fccd6..db9b1520 100644 --- a/test-parsers.ps1 +++ b/test-parsers.ps1 @@ -4,6 +4,7 @@ Try { "SepParserAvx512To256CmpOrMoveMaskTzcnt", "SepParserAvx512PackCmpOrMoveMaskTzcnt", "SepParserAvx2PackCmpOrMoveMaskTzcnt", + "SepParserAvx256To128CmpOrMoveMaskTzcnt", "SepParserSse2PackCmpOrMoveMaskTzcnt", "SepParserVector512NrwCmpExtMsbTzcnt", "SepParserVector256NrwCmpExtMsbTzcnt",