diff --git a/scintilla/include/VectorISA.h b/scintilla/include/VectorISA.h index ef25da9573..79824f465c 100644 --- a/scintilla/include/VectorISA.h +++ b/scintilla/include/VectorISA.h @@ -37,7 +37,6 @@ #define NP2_USE_SSE2 1 // Clang and GCC use -march=x86-64-v3, https://clang.llvm.org/docs/UsersManual.html#x86 - // or -mavx2 -mpopcnt -mbmi -mbmi2 -mlzcnt -mmovbe // MSVC use /arch:AVX2 #if defined(_WIN64) && defined(__AVX2__) #define NP2_USE_AVX2 1 @@ -161,6 +160,7 @@ // https://stackoverflow.com/questions/32945410/sse2-intrinsics-comparing-unsigned-integers #if NP2_USE_AVX2 +#define mm256_set1_epi8(ch) _mm256_broadcastb_epi8(_mm_cvtsi32_si128(ch)) #define mm256_movemask_epi8(a) static_cast(_mm256_movemask_epi8(a)) #define mm256_cmpge_epu8(a, b) \ _mm256_cmpeq_epi8(_mm256_max_epu8((a), (b)), (a)) @@ -240,8 +240,8 @@ inline uint32_t loadbe_u32(const void *ptr) noexcept { #define andn_u32(a, b) _andn_u32((a), (b)) #endif -#define bit_zero_high_u32(x, index) _bzhi_u32((x), (index)) // BMI2 -//#define bit_zero_high_u32(x, index) _bextr_u32((x), 0, (index)) // BMI1 +#define bit_zero_high_u32(x, index) _bzhi_u32((x), (index)) +#define bit_zero_high_u64(x, index) _bzhi_u64((x), (index)) #else inline uint32_t loadbe_u32(const void *ptr) noexcept { @@ -255,6 +255,9 @@ constexpr uint32_t andn_u32(uint32_t a, uint32_t b) noexcept { constexpr uint32_t bit_zero_high_u32(uint32_t x, uint32_t index) noexcept { return x & ((1U << index) - 1); } +constexpr uint64_t bit_zero_high_u64(uint64_t x, uint32_t index) noexcept { + return x & ((UINT64_C(1) << index) - 1); +} #endif #if NP2_TARGET_ARM diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index 068c1cccfc..c447363693 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -38,7 +38,7 @@ #include "ILexer.h" #include "Debugging.h" -//#include "VectorISA.h" +#include "VectorISA.h" #include "CharacterSet.h" //#include "CharacterCategory.h" @@ -2930,7 +2930,9 @@ Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool sing return pos; } -static constexpr char BraceOpposite(char ch) noexcept { +namespace { + +constexpr char BraceOpposite(char ch) noexcept { if (AnyOf<'(', ')'>(ch)) { return '(' + ')' - ch; } @@ -2943,18 +2945,113 @@ static constexpr char BraceOpposite(char ch) noexcept { return '\0'; } +} + // TODO: should be able to extend styled region to find matching brace Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) const noexcept { const unsigned char chBrace = CharAt(position); const unsigned char chSeek = BraceOpposite(chBrace); - if (chSeek == '\0') + if (chSeek == '\0') { return -1; + } const int styBrace = StyleIndexAt(position); const int direction = (chBrace < chSeek) ? 1 : -1; const unsigned char safeChar = (direction >= 0) ? asciiForwardSafeChar : asciiBackwardSafeChar; position = useStartPos ? startPos : NextPosition(position, direction); const Sci::Position length = LengthNoExcept(); int depth = 1; + if (chBrace <= asciiBackwardSafeChar && IsValidIndex(position + 32*direction, length)) { +#if NP2_USE_AVX2 + if (direction >= 0) { + const SplitView cbView = cb.AllView(); + const __m256i mmBrace = mm256_set1_epi8(chBrace); + const __m256i mmSeek = mm256_set1_epi8(chSeek); + do { + const bool scanFirst = IsValidIndex(position, cbView.length1); + const Sci::Position segmentLength = scanFirst ? cbView.length1 : length; + const char * const segment = scanFirst ? cbView.segment1 : cbView.segment2; + const __m256i *ptr = reinterpret_cast(segment + position); + uint32_t mask = 0; + do { + const __m256i chunk1 = _mm256_loadu_si256(ptr); + mask = mm256_movemask_epi8(_mm256_or_si256(_mm256_cmpeq_epi8(chunk1, mmBrace), _mm256_cmpeq_epi8(chunk1, mmSeek))); + if (mask != 0) { + break; + } + ptr++; + position += sizeof(mmBrace); + } while (position < segmentLength); + Sci::Position index = position; + position += sizeof(mmBrace); + if (position >= segmentLength && index <= segmentLength) { + position = segmentLength; + const uint32_t offset = static_cast(position - index); + mask = bit_zero_high_u32(mask, offset); + } + while (mask) { + const uint32_t trailing = np2::ctz(mask); + index += trailing; + mask >>= trailing; + if (index > GetEndStyled() || StyleIndexAt(index) == styBrace) { + const unsigned char chAtPos = segment[index]; + depth += (chAtPos == chBrace) ? 1 : -1; + if (depth == 0) { + return index; + } + } + index++; + mask >>= 1; + } + } while (position < length); + } + // end NP2_USE_AVX2 +#elif NP2_USE_SSE2 + if (direction >= 0) { + const SplitView cbView = cb.AllView(); + const __m128i mmBrace = _mm_set1_epi8(chBrace); + const __m128i mmSeek = _mm_set1_epi8(chSeek); + do { + const bool scanFirst = IsValidIndex(position, cbView.length1); + const Sci::Position segmentLength = scanFirst ? cbView.length1 : length; + const char * const segment = scanFirst ? cbView.segment1 : cbView.segment2; + const __m128i *ptr = reinterpret_cast(segment + position); + uint32_t mask = 0; + do { + const __m128i chunk1 = _mm_loadu_si128(ptr); + mask = mm_movemask_epi8(_mm_or_si128(_mm_cmpeq_epi8(chunk1, mmBrace), _mm_cmpeq_epi8(chunk1, mmSeek))); + if (mask != 0) { + break; + } + ptr++; + position += sizeof(mmBrace); + } while (position < segmentLength); + Sci::Position index = position; + position += sizeof(mmBrace); + if (position >= segmentLength && index <= segmentLength) { + position = segmentLength; + const uint32_t offset = static_cast(position - index); + mask = bit_zero_high_u32(mask, offset); + } + while (mask) { + const uint32_t trailing = np2::ctz(mask); + index += trailing; + mask >>= trailing; + if (index > GetEndStyled() || StyleIndexAt(index) == styBrace) { + const unsigned char chAtPos = segment[index]; + depth += (chAtPos == chBrace) ? 1 : -1; + if (depth == 0) { + return index; + } + } + index++; + mask >>= 1; + } + } while (position < length); + } + // end NP2_USE_SSE2 +#endif + } + while (IsValidIndex(position, length)) { const unsigned char chAtPos = CharAt(position); if (chAtPos == chBrace || chAtPos == chSeek) { diff --git a/scintilla/test/BraceMatchTest.cpp b/scintilla/test/BraceMatchTest.cpp new file mode 100644 index 0000000000..15378279bc --- /dev/null +++ b/scintilla/test/BraceMatchTest.cpp @@ -0,0 +1,234 @@ +// This file is part of Notepad4. +// See License.txt for details about distribution and modification. +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include +#include "../include/VectorISA.h" + +// cl /EHsc /std:c++20 /DNDEBUG /O2 /FAcs /GS- /GR- /Gv /W4 /arch:AVX2 BraceMatchTest.cpp +// clang-cl /EHsc /std:c++20 /DNDEBUG /O2 /FA /GS- /GR- /Gv /W4 -march=x86-64-v3 BraceMatchTest.cpp +// g++ -S -std=gnu++20 -DNDEBUG -O3 -fno-rtti -Wall -Wextra -march=x86-64-v3 BraceMatchTest.cpp +constexpr bool IsValidIndex(size_t index, size_t length) noexcept { + return index < length; +} +struct SplitView { + const char *segment1 = nullptr; + size_t length1 = 0; + const char *segment2 = nullptr; + size_t length = 0; + + char CharAt(size_t position) const noexcept { + if (position < length1) { + return segment1[position]; + } + if (position < length) { + return segment2[position]; + } + return '\0'; + } +}; +constexpr char chBrace = '{'; +constexpr char chSeek = '}'; +constexpr uint32_t maxLength = 256; + +void FindAllBraceForward(const SplitView &cbView, ptrdiff_t position, const ptrdiff_t length, uint32_t (&result)[maxLength]) noexcept { + unsigned j = 0; +#if NP2_USE_AVX2 + const __m256i mmBrace = _mm256_set1_epi8(chBrace); + const __m256i mmSeek = _mm256_set1_epi8(chSeek); + while (position < length) { + const bool scanFirst = IsValidIndex(position, cbView.length1); + const ptrdiff_t segmentLength = scanFirst ? cbView.length1 : length; + const char * const segment = scanFirst ? cbView.segment1 : cbView.segment2; + const __m256i *ptr = reinterpret_cast(segment + position); + uint32_t mask = 0; + do { + const __m256i chunk1 = _mm256_loadu_si256(ptr); + mask = _mm256_movemask_epi8(_mm256_or_si256(_mm256_cmpeq_epi8(chunk1, mmBrace), _mm256_cmpeq_epi8(chunk1, mmSeek))); + if (mask != 0) { + break; + } + ptr++; + position += sizeof(__m256i); + } while (position < segmentLength); + ptrdiff_t index = position; + position += sizeof(__m256i); + if (position >= segmentLength && index <= segmentLength) { + position = segmentLength; + const uint32_t offset = static_cast(position - index); + mask = bit_zero_high_u32(mask, offset); + } + while (mask) { + const uint32_t trailing = np2::ctz(mask); + index += trailing; + mask >>= trailing; + result[j++] = static_cast(index + 1); + index++; + mask >>= 1; + } + } + +#elif NP2_USE_SSE2 + const __m128i mmBrace = _mm_set1_epi8(chBrace); + const __m128i mmSeek = _mm_set1_epi8(chSeek); + while (position < length) { + const bool scanFirst = IsValidIndex(position, cbView.length1); + const ptrdiff_t segmentLength = scanFirst ? cbView.length1 : length; + const char * const segment = scanFirst ? cbView.segment1 : cbView.segment2; + const __m128i *ptr = reinterpret_cast(segment + position); + uint32_t mask = 0; + do { + const __m128i chunk1 = _mm_loadu_si128(ptr); + mask = _mm_movemask_epi8(_mm_or_si128(_mm_cmpeq_epi8(chunk1, mmBrace), _mm_cmpeq_epi8(chunk1, mmSeek))); + if (mask != 0) { + break; + } + ptr++; + position += sizeof(__m128i); + } while (position < segmentLength); + ptrdiff_t index = position; + position += sizeof(__m128i); + if (position >= segmentLength && index <= segmentLength) { + position = segmentLength; + const uint32_t offset = static_cast(position - index); + mask = bit_zero_high_u32(mask, offset); + } + while (mask) { + const uint32_t trailing = np2::ctz(mask); + index += trailing; + mask >>= trailing; + result[j++] = static_cast(index + 1); + index++; + mask >>= 1; + } + } +#endif + + while (position < length) { + const char chAtPos = cbView.CharAt(position); + if (chAtPos == chBrace || chAtPos == chSeek) { + result[j++] = static_cast(position + 1); + } + ++position; + } +} + +void FindAllBraceBackward(const SplitView &cbView, ptrdiff_t position, uint32_t (&result)[maxLength]) noexcept { + unsigned j = 0; + + while (position >= 0) { + const char chAtPos = cbView.CharAt(position); + if (chAtPos == chBrace || chAtPos == chSeek) { + result[j++] = static_cast(position + 1); + } + --position; + } +} + +bool TestFindBrace(const SplitView &cbView, ptrdiff_t position, ptrdiff_t length, bool forward) noexcept { + uint32_t result[maxLength]{}; + uint32_t naive[maxLength]{}; + unsigned j = 0; + if (forward) { + FindAllBraceForward(cbView, position, length, result); + while (position < length) { + const char chAtPos = cbView.CharAt(position); + if (chAtPos == chBrace || chAtPos == chSeek) { + naive[j++] = static_cast(position + 1); + } + ++position; + } + } else { + FindAllBraceBackward(cbView, position, result); + while (position >= 0) { + const char chAtPos = cbView.CharAt(position); + if (chAtPos == chBrace || chAtPos == chSeek) { + naive[j++] = static_cast(position + 1); + } + --position; + } + } + + const char *tag = forward ? "forward" : "backward"; + bool same = true; + for (j = 0; j < maxLength; j++) { + const uint32_t lhs = naive[j]; + const uint32_t rhs = result[j]; + if (lhs != rhs) { + same = false; + printf("%s fail %u: (%u, '%c'), (%u, '%c')\n", tag, j, lhs, cbView.CharAt(lhs - 1), rhs, cbView.CharAt(rhs - 1)); + } + } + return same; +} + +int __cdecl main(int argc, char *argv[]) { + if (argc > 1) { + argc = atoi(argv[1]); + } + + srand(static_cast(reinterpret_cast(argv))); + constexpr uint32_t padding = 32; + char buffer[padding + maxLength + padding + 1]{}; + memset(buffer, chBrace, padding); + memset(buffer + padding + maxLength, chSeek, padding); + +#if 0 + { + strcpy(buffer, ""); + constexpr uint32_t gapPosition = 0; + constexpr uint32_t gapLength = 0; + constexpr uint32_t position = 0; + constexpr uint32_t length = 0; + const SplitView cbView { + buffer + padding, + (gapPosition != 0 && gapLength != 0) ? gapPosition : length, + buffer + padding + gapLength, + length, + }; + printf("doc: (%u, %u), gap: (%u, %u)\n", position, length, gapPosition, gapLength); + //TestFindBrace(cbView, position, length, true); + //TestFindBrace(cbView, position, length, false); + argc = 0; + } +#endif + + for (int j = 0; j < argc; j++) { + for (uint32_t i = 0; i < maxLength; i += 4) { + const uint32_t value = rand(); + buffer[i + padding + 0] = "0{12[3(45)6]78}9"[value & 15]; + buffer[i + padding + 1] = "0{12[3(45)6]78}9"[(value >> 4) & 15]; + buffer[i + padding + 2] = "0{12[3(45)6]78}9"[(value >> 8) & 15]; + buffer[i + padding + 3] = "0{12[3(45)6]78}9"[(value >> 12) & 15]; + } + + const uint32_t value = rand(); + const uint32_t gapPosition = value & 127; + const uint32_t gapLength = (value >> 4) & 127; + uint32_t position = (value >> 8) & (maxLength - 1); + const uint32_t length = maxLength - gapLength; + if (position >= length) { + position = length - 1; + } + memset(buffer + padding + gapPosition, chBrace, gapLength); + const SplitView cbView { + buffer + padding, + (gapPosition != 0 && gapLength != 0) ? gapPosition : length, + buffer + padding + gapLength, + length, + }; + + if (!TestFindBrace(cbView, position, length, true)) { + printf("%4d: (%u, %u), gap: (%u, %u)\n%s\n", j, position, length, gapPosition, gapLength, buffer); + break; + } + if (!TestFindBrace(cbView, position, length, false)) { + printf("%4d: (%u, %u), gap: (%u, %u)\n%s\n", j, position, length, gapPosition, gapLength, buffer); + break; + } + } + printf("done: %d\n", argc); + return 0; +}