Skip to content

Commit d99b77b

Browse files
Added edge cases to Suggestion.spellingSuggestions
1 parent 34c4578 commit d99b77b

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

test/suggestion.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,11 @@ TEST(Suggestion, spellingSuggestions) {
811811
"seit",
812812
"vorgestern",
813813
"wahrscheinlich",
814+
815+
// Entries for demonstrating shortcomings of the PoC implementation
816+
"Lorem ipsum",
817+
"King",
818+
"Kong",
814819
});
815820

816821
EXPECT_SPELLING_CORRECTION(a, "", 1, ({}));
@@ -891,6 +896,46 @@ TEST(Suggestion, spellingSuggestions) {
891896
EXPECT_SPELLING_CORRECTION(a, "Farradschluss", 1, ({"Fahrradschloss"}));
892897
EXPECT_SPELLING_CORRECTION(a, "Konkorenz", 1, ({"Konkurrenz"}));
893898
EXPECT_SPELLING_CORRECTION(a, "Hirachie", 1, ({"Hierarchie"}));
899+
900+
//////////////////////////////////////////////////////////////////////////////
901+
// Edge cases
902+
//////////////////////////////////////////////////////////////////////////////
903+
904+
// Exact match is not considered a spelling correction
905+
EXPECT_SPELLING_CORRECTION(a, "Führerschein", 1, ({}));
906+
907+
// Max edit distance is 3
908+
EXPECT_SPELLING_CORRECTION(a, "Führersch", 1, ({"Führerschein"}));
909+
EXPECT_SPELLING_CORRECTION(a, "Führersc", 1, ({}));
910+
// Case matters in edit distance
911+
EXPECT_SPELLING_CORRECTION(a, "führersch", 1, ({}));
912+
// Diacritics matters in edit distance
913+
EXPECT_SPELLING_CORRECTION(a, "Fuhrersch", 1, ({}));
914+
// Mismatch in diacritics counts as 1 in edit distance (this is not trivial,
915+
// because from the UTF-8 perspective it is a one-byte vs two-byte encoding
916+
// of a Unicode codepoint).
917+
EXPECT_SPELLING_CORRECTION(a, "Führersche", 1, ({"Führerschein"}));
918+
919+
EXPECT_SPELLING_CORRECTION(a, "Führershine", 1, ({"Führerschein"}));
920+
EXPECT_SPELLING_CORRECTION(a, "Führershyne", 1, ({}));
921+
EXPECT_SPELLING_CORRECTION(a, "führershine", 1, ({}));
922+
923+
EXPECT_SPELLING_CORRECTION(a, "Führerschrom", 1, ({"Führerschein"}));
924+
EXPECT_SPELLING_CORRECTION(a, "Führerscdrom", 1, ({}));
925+
926+
//////////////////////////////////////////////////////////////////////////////
927+
// Shortcomings of the proof-of-concept implementation
928+
//////////////////////////////////////////////////////////////////////////////
929+
930+
// Multiword titles are treated as a single entity
931+
EXPECT_SPELLING_CORRECTION(a, "Laurem", 1, ({}));
932+
EXPECT_SPELLING_CORRECTION(a, "ibsum", 1, ({}));
933+
EXPECT_SPELLING_CORRECTION(a, "Loremipsum", 1, ({"Lorem ipsum"}));
934+
935+
// Only one spelling correction can be requested
936+
// EXPECT_SPELLING_CORRECTION(a, "Kung", 2, ({"King", "Kong"}));
937+
EXPECT_THROW(getSpellingSuggestions(a, "Kung", 2), std::runtime_error);
938+
894939
}
895940

896941
zim::Entry getTitleIndexEntry(const zim::Archive& a)

0 commit comments

Comments
 (0)