Skip to content

Commit 245c85a

Browse files
committed
Fix: Add null terminators to character arrays to include final characters
Adds '\0' to the end of carray definitions like digits(), ensuring that all intended characters are included in the bitset. Without this, the last character was previously excluded, causing methods (like is_digit()) to fail for the final character.
1 parent 86f53d9 commit 245c85a

File tree

1 file changed

+53
-45
lines changed

1 file changed

+53
-45
lines changed

include/stringzilla/stringzilla.hpp

+53-45
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,13 @@ inline void memcpy(void *target, void const *source, std::size_t n) noexcept {
119119
* @brief The concatenation of the `ascii_lowercase` and `ascii_uppercase`. This value is not locale-dependent.
120120
* https://docs.python.org/3/library/string.html#string.ascii_letters
121121
*/
122-
inline carray<52> const &ascii_letters() noexcept {
123-
static carray<52> const all = {
122+
inline carray<53> const &ascii_letters() noexcept {
123+
static carray<53> const all = {
124124
//
125-
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
126-
's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
127-
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
125+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
126+
's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
127+
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', //
128+
'\0',
128129
};
129130
return all;
130131
}
@@ -133,11 +134,12 @@ inline carray<52> const &ascii_letters() noexcept {
133134
* @brief The lowercase letters "abcdefghijklmnopqrstuvwxyz". This value is not locale-dependent.
134135
* https://docs.python.org/3/library/string.html#string.ascii_lowercase
135136
*/
136-
inline carray<26> const &ascii_lowercase() noexcept {
137-
static carray<26> const all = {
137+
inline carray<27> const &ascii_lowercase() noexcept {
138+
static carray<27> const all = {
138139
//
139-
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
140-
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
140+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
141+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', //
142+
'\0',
141143
};
142144
return all;
143145
}
@@ -146,11 +148,12 @@ inline carray<26> const &ascii_lowercase() noexcept {
146148
* @brief The uppercase letters "ABCDEFGHIJKLMNOPQRSTUVWXYZ". This value is not locale-dependent.
147149
* https://docs.python.org/3/library/string.html#string.ascii_uppercase
148150
*/
149-
inline carray<26> const &ascii_uppercase() noexcept {
150-
static carray<26> const all = {
151+
inline carray<27> const &ascii_uppercase() noexcept {
152+
static carray<27> const all = {
151153
//
152-
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
153-
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
154+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
155+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', //
156+
'\0',
154157
};
155158
return all;
156159
}
@@ -160,14 +163,15 @@ inline carray<26> const &ascii_uppercase() noexcept {
160163
* A combination of `digits`, `ascii_letters`, `punctuation`, and `whitespace`.
161164
* https://docs.python.org/3/library/string.html#string.printable
162165
*/
163-
inline carray<100> const &ascii_printables() noexcept {
164-
static carray<100> const all = {
166+
inline carray<101> const &ascii_printables() noexcept {
167+
static carray<101> const all = {
165168
//
166-
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
167-
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
168-
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
169-
'Y', 'Z', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<',
170-
'=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', ' ', '\t', '\n', '\r', '\f', '\v',
169+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
170+
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
171+
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
172+
'Y', 'Z', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<',
173+
'=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', ' ', '\t', '\n', '\r', '\f', '\v', //
174+
'\0',
171175
};
172176
return all;
173177
}
@@ -176,11 +180,12 @@ inline carray<100> const &ascii_printables() noexcept {
176180
* @brief Non-printable ASCII control characters.
177181
* Includes all codes from 0 to 31 and 127.
178182
*/
179-
inline carray<33> const &ascii_controls() noexcept {
180-
static carray<33> const all = {
183+
inline carray<34> const &ascii_controls() noexcept {
184+
static carray<34> const all = {
181185
//
182-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
183-
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127,
186+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
187+
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, //
188+
'\0',
184189
};
185190
return all;
186191
}
@@ -189,20 +194,21 @@ inline carray<33> const &ascii_controls() noexcept {
189194
* @brief The digits "0123456789".
190195
* https://docs.python.org/3/library/string.html#string.digits
191196
*/
192-
inline carray<10> const &digits() noexcept {
193-
static carray<10> const all = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
197+
inline carray<11> const &digits() noexcept {
198+
static carray<11> const all = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '\0'};
194199
return all;
195200
}
196201

197202
/**
198203
* @brief The letters "0123456789abcdefABCDEF".
199204
* https://docs.python.org/3/library/string.html#string.hexdigits
200205
*/
201-
inline carray<22> const &hexdigits() noexcept {
202-
static carray<22> const all = {
206+
inline carray<23> const &hexdigits() noexcept {
207+
static carray<23> const all = {
203208
//
204-
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
205-
'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F',
209+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
210+
'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', //
211+
'\0',
206212
};
207213
return all;
208214
}
@@ -211,8 +217,8 @@ inline carray<22> const &hexdigits() noexcept {
211217
* @brief The letters "01234567".
212218
* https://docs.python.org/3/library/string.html#string.octdigits
213219
*/
214-
inline carray<8> const &octdigits() noexcept {
215-
static carray<8> const all = {'0', '1', '2', '3', '4', '5', '6', '7'};
220+
inline carray<9> const &octdigits() noexcept {
221+
static carray<9> const all = {'0', '1', '2', '3', '4', '5', '6', '7', '\0'};
216222
return all;
217223
}
218224

@@ -221,11 +227,12 @@ inline carray<8> const &octdigits() noexcept {
221227
* !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~.
222228
* https://docs.python.org/3/library/string.html#string.punctuation
223229
*/
224-
inline carray<32> const &punctuation() noexcept {
225-
static carray<32> const all = {
230+
inline carray<33> const &punctuation() noexcept {
231+
static carray<33> const all = {
226232
//
227-
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':',
228-
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
233+
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':',
234+
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', //
235+
'\0',
229236
};
230237
return all;
231238
}
@@ -235,29 +242,30 @@ inline carray<32> const &punctuation() noexcept {
235242
* This includes space, tab, linefeed, return, formfeed, and vertical tab.
236243
* https://docs.python.org/3/library/string.html#string.whitespace
237244
*/
238-
inline carray<6> const &whitespaces() noexcept {
239-
static carray<6> const all = {' ', '\t', '\n', '\r', '\f', '\v'};
245+
inline carray<7> const &whitespaces() noexcept {
246+
static carray<7> const all = {' ', '\t', '\n', '\r', '\f', '\v', '\0'};
240247
return all;
241248
}
242249

243250
/**
244251
* @brief ASCII characters that are considered line delimiters.
245252
* https://docs.python.org/3/library/stdtypes.html#str.splitlines
246253
*/
247-
inline carray<8> const &newlines() noexcept {
248-
static carray<8> const all = {'\n', '\r', '\f', '\v', '\x1C', '\x1D', '\x1E', '\x85'};
254+
inline carray<9> const &newlines() noexcept {
255+
static carray<9> const all = {'\n', '\r', '\f', '\v', '\x1C', '\x1D', '\x1E', '\x85', '\0'};
249256
return all;
250257
}
251258

252259
/**
253260
* @brief ASCII characters forming the BASE64 encoding alphabet.
254261
*/
255-
inline carray<64> const &base64() noexcept {
256-
static carray<64> const all = {
262+
inline carray<65> const &base64() noexcept {
263+
static carray<65> const all = {
257264
//
258-
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
259-
'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
260-
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
265+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
266+
'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
267+
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', //
268+
'\0',
261269
};
262270
return all;
263271
}

0 commit comments

Comments
 (0)