From 5741200c0d9171719eecaa45ce261899b474a475 Mon Sep 17 00:00:00 2001 From: Pavel Artsishevsky Date: Mon, 16 Dec 2024 23:42:51 +0100 Subject: [PATCH] Fix multibyte string processing Signed-off-by: Pavel Artsishevsky --- include/slimlog/pattern-inl.h | 45 ++++++++++++++++------------------ include/slimlog/pattern.h | 28 ++++++++++----------- include/slimlog/util/unicode.h | 36 +++++++++++++++++---------- 3 files changed, 57 insertions(+), 52 deletions(-) diff --git a/include/slimlog/pattern-inl.h b/include/slimlog/pattern-inl.h index bf6ebf5..9e78fca 100644 --- a/include/slimlog/pattern-inl.h +++ b/include/slimlog/pattern-inl.h @@ -268,19 +268,9 @@ void Pattern::format_string(auto& out, const auto& item, StringView&& data if (auto& specs = std::get(item); specs.width > 0) [[unlikely]] { - write_padded(out, std::forward(data), specs, codepoints); + write_string_padded(out, std::forward(data), specs, codepoints); } else { - using DataChar = typename std::remove_cvref_t::value_type; - if constexpr (std::is_same_v && !std::is_same_v) { - out.resize(out.size() + codepoints + 1); - const std::size_t written = Util::Unicode::from_multibyte( - std::prev(out.end()), - std::forward(data), // NOLINT(cppcoreguidelines-slicing) - codepoints); - out.resize(out.size() + codepoints - written); - } else { - out.append(std::forward(data)); - } + write_string(out, std::forward(data), codepoints); } } @@ -439,7 +429,24 @@ auto Pattern::get_string_specs(StringViewType value) -> Placeholder::Strin template template -constexpr void Pattern::write_padded( +constexpr void Pattern::write_string(auto& dst, StringView&& src, std::size_t codepoints) +{ + using DataChar = typename std::remove_cvref_t::value_type; + if constexpr (std::is_same_v && !std::is_same_v) { + dst.reserve(dst.size() + codepoints + 1); // Take into account null terminator + const std::size_t written = Util::Unicode::from_multibyte( + dst.end(), + std::forward(src), // NOLINT(cppcoreguidelines-slicing) + codepoints + 1); + dst.resize(dst.size() + written - 1); // Trim null terminator + } else { + dst.append(std::forward(src)); + } +} + +template +template +constexpr void Pattern::write_string_padded( auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints) { const auto spec_width = Util::Types::to_unsigned(specs.width); @@ -490,17 +497,7 @@ constexpr void Pattern::write_padded( } // Fill data - using DataChar = typename std::remove_cvref_t::value_type; - if constexpr (std::is_same_v && !std::is_same_v) { - dst.resize(dst.size() + codepoints + 1); - const std::size_t written = Util::Unicode::from_multibyte( - std::prev(dst.end()), - std::forward(src), // NOLINT(cppcoreguidelines-slicing) - codepoints); - dst.resize(dst.size() + codepoints - written); - } else { - dst.append(std::forward(src)); - } + write_string(dst, src, codepoints); // Fill right padding if (right_padding != 0) { diff --git a/include/slimlog/pattern.h b/include/slimlog/pattern.h index 294a5fe..86621d1 100644 --- a/include/slimlog/pattern.h +++ b/include/slimlog/pattern.h @@ -333,35 +333,33 @@ class Pattern { */ static auto get_string_specs(StringViewType value) -> Placeholder::StringSpecs; + /** + * @brief Writes the source string to the destination buffer. + * + * @tparam StringView String view type, convertible to `std::basic_string_view`. + * @param dst Destination buffer where the string will be written. + * @param src Source string view to be written. + * @param codepoints Number of codepoints the source string contains. + */ + template + constexpr static void write_string(auto& dst, StringView&& src, std::size_t codepoints); + /** * @brief Writes the source string to the destination buffer with specific alignment. * * This function writes the source string to the destination buffer, applying the specified * alignment and fill character. * - * @tparam T Character type for the string view. + * @tparam StringView String view type, convertible to `std::basic_string_view`. * @param dst Destination buffer where the string will be written. * @param src Source string view to be written. * @param specs String specifications, including alignment and fill character. * @param codepoints Number of codepoints the source string contains. */ template - constexpr static void write_padded( + constexpr static void write_string_padded( auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints); - /** - * @brief Converts a multi-byte string to a single-byte string. - * - * This function converts a multi-byte string to a single-byte string and appends the result to - * the provided destination stream buffer. - * - * @tparam T Character type of the source string. - * @param out Destination stream buffer where the converted string will be appended. - * @param data Source multi-byte string to be converted. - * @param codepoints Number of codepoints the data string contains. - */ - static void from_multibyte(auto& out, std::string_view data, std::size_t codepoints); - std::basic_string m_pattern; std::vector m_placeholders; Levels m_levels; diff --git a/include/slimlog/util/unicode.h b/include/slimlog/util/unicode.h index 647028a..5fd492c 100644 --- a/include/slimlog/util/unicode.h +++ b/include/slimlog/util/unicode.h @@ -18,7 +18,6 @@ #include #include #include -#include namespace SlimLog::Util::Unicode { @@ -29,21 +28,21 @@ namespace Detail { namespace Fallback { #ifdef __cpp_char8_t template -inline auto mbrtoc8(Args... /*unused*/) +inline auto mbrtoc8(Args... /*unused*/) -> std::nullptr_t { - return std::monostate{}; + return nullptr; }; #endif #ifdef __cpp_unicode_characters template -inline auto mbrtoc16(Args... /*unused*/) +inline auto mbrtoc16(Args... /*unused*/) -> std::nullptr_t { - return std::monostate{}; + return nullptr; }; template -inline auto mbrtoc32(Args... /*unused*/) +inline auto mbrtoc32(Args... /*unused*/) -> std::nullptr_t { - return std::monostate{}; + return nullptr; }; #endif } // namespace Fallback @@ -78,7 +77,7 @@ struct FromMultibyte { return static_cast(res); } - template + template static auto handle(T /*unused*/) -> int { static_assert( @@ -262,6 +261,18 @@ constexpr auto to_ascii(Char chr) -> char return chr <= std::numeric_limits::max() ? static_cast(chr) : '\0'; } +/** + * @brief Converts a null-terminated multibyte string to a singlebyte character sequence. + * + * Destination buffer has to be capable of storing at least @p codepoints + 1 characters + * including null terminator. + * + * @tparam Char Character type of the destination string. + * @param dest Pointer to destination buffer for the converted string. + * @param data Source multi-byte string to be converted. + * @param codepoints Number of codepoints to be written to the destination string. + * @return Number of characters written including null terminator. + */ template constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t codepoints) { @@ -271,7 +282,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod if constexpr (std::is_same_v) { std::mbstate_t state = {}; #if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) - if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) { + if (mbsrtowcs_s(&written, dest, codepoints, &source, codepoints - 1, &state) != 0) { throw std::runtime_error("mbsrtowcs_s(): conversion error"); } #else @@ -280,8 +291,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod if (written == static_cast(-1)) { throw std::runtime_error("std::mbsrtowcs(): conversion error"); } - *std::next(dest, codepoints) = '\0'; - ++written; + *std::next(dest, written++) = '\0'; #endif } else { Char wchr; @@ -298,7 +308,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod throw std::runtime_error("std::mbrtocN(): conversion error"); break; case -2: - // Incomplete but valid character, skip it + // Incomplete but valid character, go further break; case -3: // Next character from surrogate pair was processed @@ -316,7 +326,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod break; } } - *std::next(dest, codepoints) = '\0'; + *dest = '\0'; ++written; } return written;