|
26 | 26 |
|
27 | 27 | namespace bq { |
28 | 28 | namespace string_tools { |
| 29 | + template <typename T, typename = void> |
| 30 | + struct __has_value_type : bq::false_type {}; |
29 | 31 |
|
30 | | - /** |
31 | | - * @brief Fallback loop for compile-time constant evaluation. |
32 | | - * Compilers like GCC and Clang can unroll and constant-fold this |
33 | | - * when the input is a string literal. |
34 | | - */ |
35 | 32 | template <typename T> |
36 | | - static BQ_FUNC_RETURN_CONSTEXPR size_t constexpr_len(const T* str) { |
37 | | - size_t i = 0; |
38 | | - while (str[i] != 0) { |
39 | | - i++; |
40 | | - } |
41 | | - return i; |
42 | | - } |
43 | | - |
44 | | - /** |
45 | | - * @brief SWAR (SIMD Within A Register) implementation for UTF-16. |
46 | | - * Processes 4 characters (8 bytes) per iteration using bit manipulation |
47 | | - * to detect the null terminator. |
48 | | - */ |
49 | | - static bq_forceinline size_t u16_len_rt(const char16_t* s) { |
50 | | - const char16_t* p = s; |
51 | | - // Align to 8-byte boundary |
52 | | - while ((reinterpret_cast<uintptr_t>(p) & 7) != 0) { |
53 | | - if (*p == 0) return static_cast<size_t>(p - s); |
54 | | - p++; |
55 | | - } |
56 | | - |
57 | | - const uint64_t* chunk = reinterpret_cast<const uint64_t*>(p); |
58 | | - const uint64_t mask = 0x8000800080008000ULL; |
59 | | - const uint64_t low_bit = 0x0001000100010001ULL; |
60 | | - |
61 | | - while (true) { |
62 | | - uint64_t v = *chunk; |
63 | | - // Zero detection logic: (v - 0x0001) & ~v & 0x8000 |
64 | | - if ((v - low_bit) & ~v & mask) { |
65 | | - p = reinterpret_cast<const char16_t*>(chunk); |
66 | | - if (p[0] == 0) return static_cast<size_t>(p - s); |
67 | | - if (p[1] == 0) return static_cast<size_t>(p + 1 - s); |
68 | | - if (p[2] == 0) return static_cast<size_t>(p + 2 - s); |
69 | | - if (p[3] == 0) return static_cast<size_t>(p + 3 - s); |
70 | | - } |
71 | | - chunk++; |
72 | | - } |
73 | | - } |
| 33 | + struct __has_value_type<T, bq::void_t<typename T::value_type>> : bq::true_type {}; |
74 | 34 |
|
75 | | - /** |
76 | | - * @brief SWAR implementation for UTF-32. |
77 | | - * Processes 2 characters (8 bytes) per iteration. |
78 | | - */ |
79 | | - static bq_forceinline size_t u32_len_rt(const char32_t* s) { |
80 | | - const char32_t* p = s; |
81 | | - while ((reinterpret_cast<uintptr_t>(p) & 7) != 0) { |
82 | | - if (*p == 0) return static_cast<size_t>(p - s); |
83 | | - p++; |
84 | | - } |
| 35 | + template <typename T> |
| 36 | + struct is_c_str_compatible : bq::bool_type<bq::string::template is_std_string_compatible<T>::value || |
| 37 | + bq::u16string::template is_std_string_compatible<T>::value || |
| 38 | + bq::u32string::template is_std_string_compatible<T>::value> {}; |
85 | 39 |
|
86 | | - const uint64_t* chunk = reinterpret_cast<const uint64_t*>(p); |
87 | | - const uint64_t mask = 0x8000000080000000ULL; |
88 | | - const uint64_t low_bit = 0x0000000100000001ULL; |
| 40 | + template <typename T> |
| 41 | + struct is_data_compatible : bq::bool_type<bq::string::template is_std_string_view_compatible<T>::value || |
| 42 | + bq::u16string::template is_std_string_view_compatible<T>::value || |
| 43 | + bq::u32string::template is_std_string_view_compatible<T>::value> {}; |
89 | 44 |
|
90 | | - while (true) { |
91 | | - uint64_t v = *chunk; |
92 | | - if ((v - low_bit) & ~v & mask) { |
93 | | - p = reinterpret_cast<const char32_t*>(chunk); |
94 | | - if (p[0] == 0) return static_cast<size_t>(p - s); |
95 | | - if (p[1] == 0) return static_cast<size_t>(p + 1 - s); |
96 | | - } |
97 | | - chunk++; |
98 | | - } |
| 45 | + template <typename T> |
| 46 | + inline auto __bq_string_compatible_class_get_data(const T& str) -> bq::enable_if_t<is_c_str_compatible<T>::value, const typename T::value_type*> |
| 47 | + { |
| 48 | + return str.c_str(); |
99 | 49 | } |
100 | 50 |
|
101 | | - /* --- Dispatcher Templates --- */ |
102 | | - |
103 | | - template <typename T, bool WCHAR_SIZE_IS_16> |
104 | | - struct string_len_dispatch { |
105 | | - static BQ_FUNC_RETURN_CONSTEXPR size_t exec(const T* s) { |
106 | | - return constexpr_len(s); |
107 | | - } |
108 | | - }; |
109 | | - |
110 | | - template <bool WCHAR_SIZE_IS_16> |
111 | | - struct string_len_dispatch<char, WCHAR_SIZE_IS_16> { |
112 | | - static bq_forceinline size_t exec(const char* s) { |
113 | | -#if BQ_GCC_CLANG_BUILTIN(__builtin_wcslen) |
114 | | - return __builtin_strlen(s); |
115 | | -#else |
116 | | - return strlen(s); |
117 | | -#endif |
118 | | - } |
119 | | - }; |
120 | | - |
121 | | - template <bool WCHAR_SIZE_IS_16> |
122 | | - struct string_len_dispatch<wchar_t, WCHAR_SIZE_IS_16> { |
123 | | - static bq_forceinline size_t exec(const wchar_t* s) { |
124 | | -#if BQ_GCC_CLANG_BUILTIN(__builtin_wcslen) |
125 | | - return __builtin_wcslen(s); |
126 | | -#else |
127 | | - return wcslen(s); |
128 | | -#endif |
129 | | - } |
130 | | - }; |
131 | | - |
132 | | - template <> |
133 | | - struct string_len_dispatch<char16_t, true> { |
134 | | - static bq_forceinline size_t exec(const char16_t* s) { |
135 | | - return string_len_dispatch<wchar_t, true>::exec(reinterpret_cast<const wchar_t*>(s)); |
136 | | - } |
137 | | - }; |
138 | | - template <> |
139 | | - struct string_len_dispatch<char16_t, false> { |
140 | | - static bq_forceinline size_t exec(const char16_t* s) { |
141 | | - return u16_len_rt(s); |
142 | | - } |
143 | | - }; |
144 | | - |
145 | | - template <> |
146 | | - struct string_len_dispatch<char32_t, true> { |
147 | | - static bq_forceinline size_t exec(const char32_t* s) { |
148 | | - return u32_len_rt(s); |
149 | | - } |
150 | | - }; |
151 | | - template <> |
152 | | - struct string_len_dispatch<char32_t, false> { |
153 | | - static bq_forceinline size_t exec(const char32_t* s) { |
154 | | - return string_len_dispatch<wchar_t, false>::exec(reinterpret_cast<const wchar_t*>(s)); |
155 | | - } |
156 | | - }; |
157 | | - |
158 | | - /** |
159 | | - * @brief Unified entry point for string length calculation. |
160 | | - * * Performance characteristics: |
161 | | - * 1. Compile-time: Literal strings result in a zero-cost constant. |
162 | | - * 2. Runtime (char/wchar_t): Dispatches to compiler built-ins (SIMD optimized). |
163 | | - * 3. Runtime (u16/u32): Uses 64-bit SWAR to process multiple units per cycle. |
164 | | - */ |
165 | | - template <typename CHAR_TYPE> |
166 | | - BQ_FUNC_RETURN_CONSTEXPR size_t string_len_ptr(const CHAR_TYPE* str) { |
167 | | - if (!str) { |
168 | | - return 0; |
169 | | - } |
170 | | -#if BQ_GCC_CLANG_BUILTIN(__builtin_constant_p) |
171 | | - BQ_CONSTEXPR_IF(__builtin_constant_p(*str)) |
172 | | - { |
173 | | - return constexpr_len(str); |
174 | | - } |
175 | | - else { |
176 | | -#endif |
177 | | - return string_len_dispatch<CHAR_TYPE, sizeof(char16_t) == sizeof(wchar_t)>::exec(str); |
178 | | -#if defined(BQ_GCC) || defined(BQ_CLANG) |
179 | | - } |
180 | | -#endif |
181 | | - } |
182 | | - template <typename CHAR_TYPE, size_t N> |
183 | | - BQ_FUNC_RETURN_CONSTEXPR size_t string_len_array(const CHAR_TYPE(&str)[N]) { |
184 | | - BQ_LIKELY_IF(str[N - 1] == '\0') { |
185 | | - return N - 1; //constexpr_len(str); |
186 | | - } |
187 | | - else { |
188 | | - return N; |
189 | | - } |
190 | | - } |
191 | | - template <typename STR_TYPE> |
192 | | - bq_forceinline bq::enable_if_t<bq::is_pointer<STR_TYPE>::value, size_t> string_len(const STR_TYPE& str) { |
193 | | - return string_len_ptr(str); |
| 51 | + template <typename T> |
| 52 | + inline auto __bq_string_compatible_class_get_data(const T& str) -> bq::enable_if_t<is_data_compatible<T>::value, const typename T::value_type*> |
| 53 | + { |
| 54 | + return str.data(); |
194 | 55 | } |
195 | 56 |
|
| 57 | + |
196 | 58 | template <typename STR_TYPE> |
197 | | - bq_forceinline bq::enable_if_t<bq::is_array<STR_TYPE>::value, size_t> string_len(const STR_TYPE& str) { |
198 | | - return string_len_array(str); |
| 59 | + bq_forceinline size_t string_len(const STR_TYPE& str) { |
| 60 | + (void)str; |
| 61 | + return 0; |
199 | 62 | } |
200 | 63 |
|
201 | | - |
202 | 64 | template <typename CHAR_TYPE> |
203 | 65 | inline const CHAR_TYPE* find_char(const CHAR_TYPE* str, CHAR_TYPE c) |
204 | 66 | { |
|
0 commit comments