-
Notifications
You must be signed in to change notification settings - Fork 89
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Describe what you are looking for
The sz_tolower
function requires copying to another buffer. Within find
and find_byte
routines a lowercasing step can be done quickly in a few extra cycles. I'm happy to add the feature, just gauging interest here.
For avx512
SZ_INTERNAL __m512i sz_lower_avx512(__m512i in)
{
__m512i A = _mm512_set1_epi8('A');
__m512i Z = _mm512_set1_epi8('Z');
__m512i to_lower = _mm512_set1_epi8('a' - 'A');
__mmask64 ge_A = _mm512_cmpge_epi8_mask(in, A);
__mmask64 le_Z = _mm512_cmple_epi8_mask(in, Z);
__mmask64 is_upper = _kand_mask64(ge_A, le_Z);
return _mm512_mask_add_epi8(in, is_upper, in, to_lower);
}
SZ_PUBLIC sz_cptr_t sz_find_byte_case_insensitive_avx512(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n)
{
__mmask64 mask;
sz_u512_vec_t h_vec, n_vec;
/// PATCH!!!
n_vec.zmm = _mm512_set1_epi8(sz_u8_tolower(n[0]));
/// PATCH!!!
while (h_length >= 64) {
/// PATCH!!!
h_vec.zmm = sz_lower_avx512(_mm512_loadu_si512(h));
/// PATCH!!!
mask = _mm512_cmpeq_epi8_mask(h_vec.zmm, n_vec.zmm);
if (mask)
return h + sz_u64_ctz(mask);
h += 64, h_length -= 64;
}
if (h_length) {
mask = _sz_u64_mask_until(h_length);
/// PATCH!!!
h_vec.zmm = sz_lower_avx512(_mm512_maskz_loadu_epi8(mask, h));
/// PATCH!!!
// Reuse the same `mask` variable to find the bit that doesn't match
mask = _mm512_mask_cmpeq_epu8_mask(mask, h_vec.zmm, n_vec.zmm);
if (mask)
return h + sz_u64_ctz(mask);
}
return SZ_NULL_CHAR;
}
Can you contribute to the implementation?
- I can contribute
Is your feature request specific to a certain interface?
It applies to everything
Contact Details
Is there an existing issue for this?
- I have searched the existing issues
Code of Conduct
- I agree to follow this project's Code of Conduct
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request