Skip to content

Commit

Permalink
Fix multibyte string processing
Browse files Browse the repository at this point in the history
Signed-off-by: Pavel Artsishevsky <[email protected]>
  • Loading branch information
polter-rnd committed Dec 16, 2024
1 parent c083041 commit 5741200
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 52 deletions.
45 changes: 21 additions & 24 deletions include/slimlog/pattern-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,19 +268,9 @@ void Pattern<Char>::format_string(auto& out, const auto& item, StringView&& data

if (auto& specs = std::get<typename Placeholder::StringSpecs>(item); specs.width > 0)
[[unlikely]] {
write_padded(out, std::forward<StringView>(data), specs, codepoints);
write_string_padded(out, std::forward<StringView>(data), specs, codepoints);
} else {
using DataChar = typename std::remove_cvref_t<StringView>::value_type;
if constexpr (std::is_same_v<DataChar, char> && !std::is_same_v<Char, char>) {
out.resize(out.size() + codepoints + 1);
const std::size_t written = Util::Unicode::from_multibyte(
std::prev(out.end()),
std::forward<StringView>(data), // NOLINT(cppcoreguidelines-slicing)
codepoints);
out.resize(out.size() + codepoints - written);
} else {
out.append(std::forward<StringView>(data));
}
write_string(out, std::forward<StringView>(data), codepoints);
}
}

Expand Down Expand Up @@ -439,7 +429,24 @@ auto Pattern<Char>::get_string_specs(StringViewType value) -> Placeholder::Strin

template<typename Char>
template<typename StringView>
constexpr void Pattern<Char>::write_padded(
constexpr void Pattern<Char>::write_string(auto& dst, StringView&& src, std::size_t codepoints)
{
using DataChar = typename std::remove_cvref_t<StringView>::value_type;
if constexpr (std::is_same_v<DataChar, char> && !std::is_same_v<Char, char>) {
dst.reserve(dst.size() + codepoints + 1); // Take into account null terminator
const std::size_t written = Util::Unicode::from_multibyte(
dst.end(),
std::forward<StringView>(src), // NOLINT(cppcoreguidelines-slicing)
codepoints + 1);
dst.resize(dst.size() + written - 1); // Trim null terminator
} else {
dst.append(std::forward<StringView>(src));
}
}

template<typename Char>
template<typename StringView>
constexpr void Pattern<Char>::write_string_padded(
auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints)
{
const auto spec_width = Util::Types::to_unsigned(specs.width);
Expand Down Expand Up @@ -490,17 +497,7 @@ constexpr void Pattern<Char>::write_padded(
}

// Fill data
using DataChar = typename std::remove_cvref_t<StringView>::value_type;
if constexpr (std::is_same_v<DataChar, char> && !std::is_same_v<Char, char>) {
dst.resize(dst.size() + codepoints + 1);
const std::size_t written = Util::Unicode::from_multibyte(
std::prev(dst.end()),
std::forward<StringView>(src), // NOLINT(cppcoreguidelines-slicing)
codepoints);
dst.resize(dst.size() + codepoints - written);
} else {
dst.append(std::forward<StringView>(src));
}
write_string(dst, src, codepoints);

// Fill right padding
if (right_padding != 0) {
Expand Down
28 changes: 13 additions & 15 deletions include/slimlog/pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,35 +333,33 @@ class Pattern {
*/
static auto get_string_specs(StringViewType value) -> Placeholder::StringSpecs;

/**
* @brief Writes the source string to the destination buffer.
*
* @tparam StringView String view type, convertible to `std::basic_string_view`.
* @param dst Destination buffer where the string will be written.
* @param src Source string view to be written.
* @param codepoints Number of codepoints the source string contains.
*/
template<typename StringView>
constexpr static void write_string(auto& dst, StringView&& src, std::size_t codepoints);

/**
* @brief Writes the source string to the destination buffer with specific alignment.
*
* This function writes the source string to the destination buffer, applying the specified
* alignment and fill character.
*
* @tparam T Character type for the string view.
* @tparam StringView String view type, convertible to `std::basic_string_view`.
* @param dst Destination buffer where the string will be written.
* @param src Source string view to be written.
* @param specs String specifications, including alignment and fill character.
* @param codepoints Number of codepoints the source string contains.
*/
template<typename StringView>
constexpr static void write_padded(
constexpr static void write_string_padded(
auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints);

/**
* @brief Converts a multi-byte string to a single-byte string.
*
* This function converts a multi-byte string to a single-byte string and appends the result to
* the provided destination stream buffer.
*
* @tparam T Character type of the source string.
* @param out Destination stream buffer where the converted string will be appended.
* @param data Source multi-byte string to be converted.
* @param codepoints Number of codepoints the data string contains.
*/
static void from_multibyte(auto& out, std::string_view data, std::size_t codepoints);

std::basic_string<Char> m_pattern;
std::vector<Placeholder> m_placeholders;
Levels m_levels;
Expand Down
36 changes: 23 additions & 13 deletions include/slimlog/util/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <limits>
#include <stdexcept>
#include <string_view>
#include <variant>

namespace SlimLog::Util::Unicode {

Expand All @@ -29,21 +28,21 @@ namespace Detail {
namespace Fallback {
#ifdef __cpp_char8_t
template<typename... Args>
inline auto mbrtoc8(Args... /*unused*/)
inline auto mbrtoc8(Args... /*unused*/) -> std::nullptr_t
{
return std::monostate{};
return nullptr;
};
#endif
#ifdef __cpp_unicode_characters
template<typename... Args>
inline auto mbrtoc16(Args... /*unused*/)
inline auto mbrtoc16(Args... /*unused*/) -> std::nullptr_t
{
return std::monostate{};
return nullptr;
};
template<typename... Args>
inline auto mbrtoc32(Args... /*unused*/)
inline auto mbrtoc32(Args... /*unused*/) -> std::nullptr_t
{
return std::monostate{};
return nullptr;
};
#endif
} // namespace Fallback
Expand Down Expand Up @@ -78,7 +77,7 @@ struct FromMultibyte {
return static_cast<int>(res);
}

template<typename T = std::monostate>
template<typename T = std::nullptr_t>
static auto handle(T /*unused*/) -> int
{
static_assert(
Expand Down Expand Up @@ -262,6 +261,18 @@ constexpr auto to_ascii(Char chr) -> char
return chr <= std::numeric_limits<unsigned char>::max() ? static_cast<char>(chr) : '\0';
}

/**
* @brief Converts a null-terminated multibyte string to a singlebyte character sequence.
*
* Destination buffer has to be capable of storing at least @p codepoints + 1 characters
* including null terminator.
*
* @tparam Char Character type of the destination string.
* @param dest Pointer to destination buffer for the converted string.
* @param data Source multi-byte string to be converted.
* @param codepoints Number of codepoints to be written to the destination string.
* @return Number of characters written including null terminator.
*/
template<typename Char>
constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t codepoints)
{
Expand All @@ -271,7 +282,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod
if constexpr (std::is_same_v<Char, wchar_t>) {
std::mbstate_t state = {};
#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__)
if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) {
if (mbsrtowcs_s(&written, dest, codepoints, &source, codepoints - 1, &state) != 0) {
throw std::runtime_error("mbsrtowcs_s(): conversion error");
}
#else
Expand All @@ -280,8 +291,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod
if (written == static_cast<std::size_t>(-1)) {
throw std::runtime_error("std::mbsrtowcs(): conversion error");
}
*std::next(dest, codepoints) = '\0';
++written;
*std::next(dest, written++) = '\0';
#endif
} else {
Char wchr;
Expand All @@ -298,7 +308,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod
throw std::runtime_error("std::mbrtocN(): conversion error");
break;
case -2:
// Incomplete but valid character, skip it
// Incomplete but valid character, go further
break;
case -3:
// Next character from surrogate pair was processed
Expand All @@ -316,7 +326,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod
break;
}
}
*std::next(dest, codepoints) = '\0';
*dest = '\0';
++written;
}
return written;
Expand Down

0 comments on commit 5741200

Please sign in to comment.