diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index b42a310448..3509de5fc4 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -529,6 +529,12 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; + // fix for #4552 + if (error_handler == error_handler_t::ignore) + { + bytes += undumped_chars; + } + if (error_handler == error_handler_t::replace) { // add a replacement character diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 00a467a99c..d1ba1cb75b 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -18816,6 +18816,12 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; + // fix for #4552 - discussion pending + if (error_handler == error_handler_t::ignore) + { + bytes += undumped_chars; + } + if (error_handler == error_handler_t::replace) { // add a replacement character diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index 38d01059d1..8f3a8b4126 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -995,6 +995,14 @@ TEST_CASE("regression tests 2") CHECK(p.x == 1); CHECK(p.y == 2); } + + SECTION("issue #4552 - UTF-8 invalid characters are not always ignored when dumping with error_handler_t::ignore") + { + nlohmann::json node; + node["test"] = "test\334\005"; + const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore); + CHECK(test_dump == "{\"test\":\"test\334\\u0005\"}"); + } } DOCTEST_CLANG_SUPPRESS_WARNING_POP diff --git a/tests/src/unit-serialization.cpp b/tests/src/unit-serialization.cpp index 201e5724cc..bb7f1bfaf8 100644 --- a/tests/src/unit-serialization.cpp +++ b/tests/src/unit-serialization.cpp @@ -107,7 +107,11 @@ TEST_CASE("serialization") CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 5: 0x34", json::type_error&); CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&); - CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); + + // see pending discussion at #4452 + // CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\xF1\xB0\x34\x35\x36\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\""); CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\""); }