Skip to content

Commit

Permalink
🚧 WIP for #4552
Browse files Browse the repository at this point in the history
  • Loading branch information
nlohmann committed Dec 18, 2024
1 parent 30cd44d commit 4d67e12
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 1 deletion.
6 changes: 6 additions & 0 deletions include/nlohmann/detail/output/serializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,12 @@ class serializer
// thus removing/ignoring the invalid characters
bytes = bytes_after_last_accept;

// fix for #4552
if (error_handler == error_handler_t::ignore)
{
bytes += undumped_chars;
}

if (error_handler == error_handler_t::replace)
{
// add a replacement character
Expand Down
6 changes: 6 additions & 0 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18816,6 +18816,12 @@ class serializer
// thus removing/ignoring the invalid characters
bytes = bytes_after_last_accept;

// fix for #4552 - discussion pending
if (error_handler == error_handler_t::ignore)
{
bytes += undumped_chars;
}

if (error_handler == error_handler_t::replace)
{
// add a replacement character
Expand Down
8 changes: 8 additions & 0 deletions tests/src/unit-regression2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,14 @@ TEST_CASE("regression tests 2")
CHECK(p.x == 1);
CHECK(p.y == 2);
}

SECTION("issue #4552 - UTF-8 invalid characters are not always ignored when dumping with error_handler_t::ignore")
{
nlohmann::json node;
node["test"] = "test\334\005";
const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore);
CHECK(test_dump == "{\"test\":\"test\334\\u0005\"}");
}
}

DOCTEST_CLANG_SUPPRESS_WARNING_POP
6 changes: 5 additions & 1 deletion tests/src/unit-serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,11 @@ TEST_CASE("serialization")

CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 5: 0x34", json::type_error&);
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");

// see pending discussion at #4452
// CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\xF1\xB0\x34\x35\x36\"");

CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\"");
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\"");
}
Expand Down

0 comments on commit 4d67e12

Please sign in to comment.