Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions src/Sep/Internals/SepParserAvx256To128CmpOrMoveMaskTzcnt.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
using static System.Runtime.CompilerServices.Unsafe;
using static nietras.SeparatedValues.SepDefaults;
using static nietras.SeparatedValues.SepParseMask;
using ISA = System.Runtime.Intrinsics.X86.Sse2;
using Vec = System.Runtime.Intrinsics.Vector128;
using VecUI16 = System.Runtime.Intrinsics.Vector256<ushort>;
using VecUI8 = System.Runtime.Intrinsics.Vector128<byte>;

namespace nietras.SeparatedValues;

[ExcludeFromCodeCoverage]
#if SEPUSESTRUCTFORPARSERSFORDISASMO
struct
#else
sealed class
#endif
SepParserAvx256To128CmpOrMoveMaskTzcnt : ISepParser
{
readonly char _separator;
readonly VecUI8 _nls = Vec.Create(LineFeedByte);
readonly VecUI8 _crs = Vec.Create(CarriageReturnByte);
readonly VecUI8 _qts = Vec.Create(QuoteByte);
readonly VecUI8 _sps;
nuint _quoteCount = 0;

public unsafe SepParserAvx256To128CmpOrMoveMaskTzcnt(SepParserOptions options)
{
_separator = options.Separator;
_sps = Vec.Create((byte)_separator);
_qts = Vec.Create((byte)options.QuotesOrSeparatorIfDisabled);
}

// Parses 2 x char vectors e.g. 1 byte vector
public int PaddingLength => VecUI8.Count;
public int QuoteCount => (int)_quoteCount;

[SkipLocalsInit]
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public void ParseColEnds(SepReaderState s)
{
Parse<int, SepColEndMethods>(s);
}

[SkipLocalsInit]
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public void ParseColInfos(SepReaderState s)
{
Parse<SepColInfo, SepColInfoMethods>(s);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
where TColInfo : unmanaged
where TColInfoMethods : ISepColInfoMethods<TColInfo>
{
// Method should **not** call other non-inlined methods, since this
// impacts code-generation severely.

// Unpack instance fields
var separator = _separator;
var quoteCount = _quoteCount;
// Use instance fields to force values into registers
var nls = _nls; //Vec.Create(LineFeedByte);
var crs = _crs; //Vec.Create(CarriageReturnByte);
var qts = _qts; //Vec.Create(QuoteByte);
var sps = _sps; //Vec.Create(_separator);

// Unpack state fields
var chars = s._chars;
var charsIndex = s._charsParseStart;
var charsEnd = s._charsDataEnd;
var lineNumber = s._parsingLineNumber;
var colInfos = s._colEndsOrColInfos;

var colInfosLength = TColInfoMethods.IntsLengthToColInfosLength(colInfos.Length);

chars.CheckPaddingAndIsZero(charsEnd, PaddingLength);
SepArrayExtensions.CheckPadding(colInfosLength, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex, PaddingLength);
A.Assert(charsIndex <= charsEnd);
A.Assert(charsEnd <= (chars.Length - PaddingLength));

ref var charsOriginRef = ref MemoryMarshal.GetArrayDataReference(chars);

ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
ref var byteRef = ref As<char, byte>(ref charsRef);
var v = ReadUnaligned<VecUI16>(ref byteRef);
var bytes = Avx512BW.VL.ConvertToVector128ByteWithSaturation(v);

var nlsEq = Vec.Equals(bytes, nls);
var crsEq = Vec.Equals(bytes, crs);
var qtsEq = Vec.Equals(bytes, qts);
var spsEq = Vec.Equals(bytes, sps);

var lineEndings = nlsEq | crsEq;
var lineEndingsSeparators = spsEq | lineEndings;
var specialChars = lineEndingsSeparators | qtsEq;

// Optimize for the case of no special character
var specialCharMask = MoveMask(specialChars);
if (specialCharMask != 0u)
{
var separatorsMask = MoveMask(spsEq);
// Optimize for case of only separators i.e. no endings or quotes.
// Add quote count to mask as hack to skip if quoting.
var testMask = specialCharMask + quoteCount;
if (separatorsMask == testMask)
{
colInfosRefCurrent = ref ParseSeparatorsMask<TColInfo, TColInfoMethods>(
separatorsMask, charsIndex, ref colInfosRefCurrent);
}
else
{
var separatorLineEndingsMask = MoveMask(lineEndingsSeparators);
if (separatorLineEndingsMask == testMask)
{
colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks<TColInfo, TColInfoMethods>(
separatorsMask, separatorLineEndingsMask,
ref charsRef, ref charsIndex, separator,
ref colInfosRefCurrent, ref lineNumber);
goto NEWROW;
}
else
{
var rowLineEndingOffset = 0;
colInfosRefCurrent = ref ParseAnyCharsMask<TColInfo, TColInfoMethods>(specialCharMask,
separator, ref charsRef, charsIndex,
ref rowLineEndingOffset, ref quoteCount,
ref colInfosRefCurrent, ref lineNumber);
// Used both to indicate row ended and if need to step +2 due to '\r\n'
if (rowLineEndingOffset != 0)
{
// Must be a col end and last is then dataIndex
charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset;
goto NEWROW;
}
}
}
}
goto LOOPSTEP;
NEWROW:
var colCount = TColInfoMethods.CountOffset(ref colInfosRef, ref colInfosRefCurrent);
// Add new parsed row
ref var parsedRowRef = ref MemoryMarshal.GetArrayDataReference(s._parsedRows);
Add(ref parsedRowRef, s._parsedRowsCount) = new(lineNumber, colCount);
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
s._parsingRowColEndsOrInfosStartIndex += colCount + 1;
s._parsingRowCharsStartIndex = charsIndex;
// Space for more rows?
if (s._parsedRowsCount < s._parsedRows.Length)
{
goto LOOPNOSTEP;
}
}
// Update instance state from enregistered
_quoteCount = quoteCount;
s._parsingRowColCount = TColInfoMethods.CountOffset(ref colInfosRef, ref colInfosRefCurrent);
s._parsingLineNumber = lineNumber;
// Step is VecUI8.Count so may go past end, ensure limited
s._charsParseStart = Math.Min(charsEnd, charsIndex);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static nuint MoveMask(VecUI8 v) => (uint)ISA.MoveMask(v);
}
21 changes: 13 additions & 8 deletions src/Sep/Internals/SepParserFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,26 @@ static IReadOnlyDictionary<string, Func<SepParserOptions, ISepParser>> CreateFac
static void AddFactories<TCollection>(TCollection parsers, bool createUnaccelerated)
where TCollection : ICollection<KeyValuePair<string, Func<SepParserOptions, ISepParser>>>
{
#if NET8_0_OR_GREATER
// AVX-512 to 256 avoids mask register code gen issues
if (Avx512BW.IsSupported)
{ Add(parsers, nameof(SepParserAvx512To256CmpOrMoveMaskTzcnt), static sep => new SepParserAvx512To256CmpOrMoveMaskTzcnt(sep)); }
#endif
// Avx2 and Vector256 are faster than most AVX-512 due to mask register code gen issues
if (Avx2.IsSupported)
{ Add(parsers, nameof(SepParserAvx2PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx2PackCmpOrMoveMaskTzcnt(sep)); }
if (createUnaccelerated || Vector256.IsHardwareAccelerated)
{ Add(parsers, nameof(SepParserVector256NrwCmpExtMsbTzcnt), static sep => new SepParserVector256NrwCmpExtMsbTzcnt(sep)); }
#if NET8_0_OR_GREATER
if (Environment.Is64BitProcess && Avx512BW.IsSupported)
{
Add(parsers, nameof(SepParserAvx512To256CmpOrMoveMaskTzcnt), static sep => new SepParserAvx512To256CmpOrMoveMaskTzcnt(sep));
Add(parsers, nameof(SepParserAvx512PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx512PackCmpOrMoveMaskTzcnt(sep));
}
{ Add(parsers, nameof(SepParserAvx512PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx512PackCmpOrMoveMaskTzcnt(sep)); }
if (Avx512BW.VL.IsSupported)
{ Add(parsers, nameof(SepParserAvx256To128CmpOrMoveMaskTzcnt), static sep => new SepParserAvx256To128CmpOrMoveMaskTzcnt(sep)); }
if (Environment.Is64BitProcess && (createUnaccelerated || Vector512.IsHardwareAccelerated))
{ Add(parsers, nameof(SepParserVector512NrwCmpExtMsbTzcnt), static sep => new SepParserVector512NrwCmpExtMsbTzcnt(sep)); }
#endif
if (Avx2.IsSupported)
{ Add(parsers, nameof(SepParserAvx2PackCmpOrMoveMaskTzcnt), static sep => new SepParserAvx2PackCmpOrMoveMaskTzcnt(sep)); }
if (Sse2.IsSupported)
{ Add(parsers, nameof(SepParserSse2PackCmpOrMoveMaskTzcnt), static sep => new SepParserSse2PackCmpOrMoveMaskTzcnt(sep)); }
if (createUnaccelerated || Vector256.IsHardwareAccelerated)
{ Add(parsers, nameof(SepParserVector256NrwCmpExtMsbTzcnt), static sep => new SepParserVector256NrwCmpExtMsbTzcnt(sep)); }
if (createUnaccelerated || Vector128.IsHardwareAccelerated)
{ Add(parsers, nameof(SepParserVector128NrwCmpExtMsbTzcnt), static sep => new SepParserVector128NrwCmpExtMsbTzcnt(sep)); }
if (createUnaccelerated || Vector64.IsHardwareAccelerated)
Expand Down
1 change: 1 addition & 0 deletions test-parsers.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Try {
"SepParserAvx512To256CmpOrMoveMaskTzcnt",
"SepParserAvx512PackCmpOrMoveMaskTzcnt",
"SepParserAvx2PackCmpOrMoveMaskTzcnt",
"SepParserAvx256To128CmpOrMoveMaskTzcnt",
"SepParserSse2PackCmpOrMoveMaskTzcnt",
"SepParserVector512NrwCmpExtMsbTzcnt",
"SepParserVector256NrwCmpExtMsbTzcnt",
Expand Down