Skip to content

Commit

Permalink
Optimize forward brace match with SSE2/AVX2, issue #911.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Nov 23, 2024
1 parent 4a8489a commit 633c1aa
Show file tree
Hide file tree
Showing 3 changed files with 340 additions and 6 deletions.
9 changes: 6 additions & 3 deletions scintilla/include/VectorISA.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#define NP2_USE_SSE2 1

// Clang and GCC use -march=x86-64-v3, https://clang.llvm.org/docs/UsersManual.html#x86
// or -mavx2 -mpopcnt -mbmi -mbmi2 -mlzcnt -mmovbe
// MSVC use /arch:AVX2
#if defined(_WIN64) && defined(__AVX2__)
#define NP2_USE_AVX2 1
Expand Down Expand Up @@ -161,6 +160,7 @@

// https://stackoverflow.com/questions/32945410/sse2-intrinsics-comparing-unsigned-integers
#if NP2_USE_AVX2
#define mm256_set1_epi8(ch) _mm256_broadcastb_epi8(_mm_cvtsi32_si128(ch))
#define mm256_movemask_epi8(a) static_cast<uint32_t>(_mm256_movemask_epi8(a))
#define mm256_cmpge_epu8(a, b) \
_mm256_cmpeq_epi8(_mm256_max_epu8((a), (b)), (a))
Expand Down Expand Up @@ -240,8 +240,8 @@ inline uint32_t loadbe_u32(const void *ptr) noexcept {
#define andn_u32(a, b) _andn_u32((a), (b))
#endif

#define bit_zero_high_u32(x, index) _bzhi_u32((x), (index)) // BMI2
//#define bit_zero_high_u32(x, index) _bextr_u32((x), 0, (index)) // BMI1
#define bit_zero_high_u32(x, index) _bzhi_u32((x), (index))
#define bit_zero_high_u64(x, index) _bzhi_u64((x), (index))
#else

inline uint32_t loadbe_u32(const void *ptr) noexcept {
Expand All @@ -255,6 +255,9 @@ constexpr uint32_t andn_u32(uint32_t a, uint32_t b) noexcept {
constexpr uint32_t bit_zero_high_u32(uint32_t x, uint32_t index) noexcept {
return x & ((1U << index) - 1);
}
constexpr uint64_t bit_zero_high_u64(uint64_t x, uint32_t index) noexcept {
return x & ((UINT64_C(1) << index) - 1);
}
#endif

#if NP2_TARGET_ARM
Expand Down
103 changes: 100 additions & 3 deletions scintilla/src/Document.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#include "ILexer.h"

#include "Debugging.h"
//#include "VectorISA.h"
#include "VectorISA.h"

#include "CharacterSet.h"
//#include "CharacterCategory.h"
Expand Down Expand Up @@ -2930,7 +2930,9 @@ Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool sing
return pos;
}

static constexpr char BraceOpposite(char ch) noexcept {
namespace {

constexpr char BraceOpposite(char ch) noexcept {
if (AnyOf<'(', ')'>(ch)) {
return '(' + ')' - ch;
}
Expand All @@ -2943,18 +2945,113 @@ static constexpr char BraceOpposite(char ch) noexcept {
return '\0';
}

}

// TODO: should be able to extend styled region to find matching brace
Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) const noexcept {
const unsigned char chBrace = CharAt(position);
const unsigned char chSeek = BraceOpposite(chBrace);
if (chSeek == '\0')
if (chSeek == '\0') {
return -1;
}
const int styBrace = StyleIndexAt(position);
const int direction = (chBrace < chSeek) ? 1 : -1;
const unsigned char safeChar = (direction >= 0) ? asciiForwardSafeChar : asciiBackwardSafeChar;
position = useStartPos ? startPos : NextPosition(position, direction);
const Sci::Position length = LengthNoExcept();
int depth = 1;
if (chBrace <= asciiBackwardSafeChar && IsValidIndex(position + 32*direction, length)) {
#if NP2_USE_AVX2
if (direction >= 0) {
const SplitView cbView = cb.AllView();
const __m256i mmBrace = mm256_set1_epi8(chBrace);
const __m256i mmSeek = mm256_set1_epi8(chSeek);
do {
const bool scanFirst = IsValidIndex(position, cbView.length1);
const Sci::Position segmentLength = scanFirst ? cbView.length1 : length;
const char * const segment = scanFirst ? cbView.segment1 : cbView.segment2;
const __m256i *ptr = reinterpret_cast<const __m256i *>(segment + position);
uint32_t mask = 0;
do {
const __m256i chunk1 = _mm256_loadu_si256(ptr);
mask = mm256_movemask_epi8(_mm256_or_si256(_mm256_cmpeq_epi8(chunk1, mmBrace), _mm256_cmpeq_epi8(chunk1, mmSeek)));
if (mask != 0) {
break;
}
ptr++;
position += sizeof(mmBrace);
} while (position < segmentLength);
Sci::Position index = position;
position += sizeof(mmBrace);
if (position >= segmentLength && index <= segmentLength) {
position = segmentLength;
const uint32_t offset = static_cast<uint32_t>(position - index);
mask = bit_zero_high_u32(mask, offset);
}
while (mask) {
const uint32_t trailing = np2::ctz(mask);
index += trailing;
mask >>= trailing;
if (index > GetEndStyled() || StyleIndexAt(index) == styBrace) {
const unsigned char chAtPos = segment[index];
depth += (chAtPos == chBrace) ? 1 : -1;
if (depth == 0) {
return index;
}
}
index++;
mask >>= 1;
}
} while (position < length);
}
// end NP2_USE_AVX2
#elif NP2_USE_SSE2
if (direction >= 0) {
const SplitView cbView = cb.AllView();
const __m128i mmBrace = _mm_set1_epi8(chBrace);
const __m128i mmSeek = _mm_set1_epi8(chSeek);
do {
const bool scanFirst = IsValidIndex(position, cbView.length1);
const Sci::Position segmentLength = scanFirst ? cbView.length1 : length;
const char * const segment = scanFirst ? cbView.segment1 : cbView.segment2;
const __m128i *ptr = reinterpret_cast<const __m128i *>(segment + position);
uint32_t mask = 0;
do {
const __m128i chunk1 = _mm_loadu_si128(ptr);
mask = mm_movemask_epi8(_mm_or_si128(_mm_cmpeq_epi8(chunk1, mmBrace), _mm_cmpeq_epi8(chunk1, mmSeek)));
if (mask != 0) {
break;
}
ptr++;
position += sizeof(mmBrace);
} while (position < segmentLength);
Sci::Position index = position;
position += sizeof(mmBrace);
if (position >= segmentLength && index <= segmentLength) {
position = segmentLength;
const uint32_t offset = static_cast<uint32_t>(position - index);
mask = bit_zero_high_u32(mask, offset);
}
while (mask) {
const uint32_t trailing = np2::ctz(mask);
index += trailing;
mask >>= trailing;
if (index > GetEndStyled() || StyleIndexAt(index) == styBrace) {
const unsigned char chAtPos = segment[index];
depth += (chAtPos == chBrace) ? 1 : -1;
if (depth == 0) {
return index;
}
}
index++;
mask >>= 1;
}
} while (position < length);
}
// end NP2_USE_SSE2
#endif
}

while (IsValidIndex(position, length)) {
const unsigned char chAtPos = CharAt(position);
if (chAtPos == chBrace || chAtPos == chSeek) {
Expand Down
Loading

0 comments on commit 633c1aa

Please sign in to comment.