@@ -811,6 +811,11 @@ TEST(Suggestion, spellingSuggestions) {
811811 " seit" ,
812812 " vorgestern" ,
813813 " wahrscheinlich" ,
814+
815+ // Entries for demonstrating shortcomings of the PoC implementation
816+ " Lorem ipsum" ,
817+ " King" ,
818+ " Kong" ,
814819 });
815820
816821 EXPECT_SPELLING_CORRECTION (a, " " , 1 , ({}));
@@ -891,6 +896,46 @@ TEST(Suggestion, spellingSuggestions) {
891896 EXPECT_SPELLING_CORRECTION (a, " Farradschluss" , 1 , ({" Fahrradschloss" }));
892897 EXPECT_SPELLING_CORRECTION (a, " Konkorenz" , 1 , ({" Konkurrenz" }));
893898 EXPECT_SPELLING_CORRECTION (a, " Hirachie" , 1 , ({" Hierarchie" }));
899+
900+ // ////////////////////////////////////////////////////////////////////////////
901+ // Edge cases
902+ // ////////////////////////////////////////////////////////////////////////////
903+
904+ // Exact match is not considered a spelling correction
905+ EXPECT_SPELLING_CORRECTION (a, " Führerschein" , 1 , ({}));
906+
907+ // Max edit distance is 3
908+ EXPECT_SPELLING_CORRECTION (a, " Führersch" , 1 , ({" Führerschein" }));
909+ EXPECT_SPELLING_CORRECTION (a, " Führersc" , 1 , ({}));
910+ // Case matters in edit distance
911+ EXPECT_SPELLING_CORRECTION (a, " führersch" , 1 , ({}));
912+ // Diacritics matters in edit distance
913+ EXPECT_SPELLING_CORRECTION (a, " Fuhrersch" , 1 , ({}));
914+ // Mismatch in diacritics counts as 1 in edit distance (this is not trivial,
915+ // because from the UTF-8 perspective it is a one-byte vs two-byte encoding
916+ // of a Unicode codepoint).
917+ EXPECT_SPELLING_CORRECTION (a, " Führersche" , 1 , ({" Führerschein" }));
918+
919+ EXPECT_SPELLING_CORRECTION (a, " Führershine" , 1 , ({" Führerschein" }));
920+ EXPECT_SPELLING_CORRECTION (a, " Führershyne" , 1 , ({}));
921+ EXPECT_SPELLING_CORRECTION (a, " führershine" , 1 , ({}));
922+
923+ EXPECT_SPELLING_CORRECTION (a, " Führerschrom" , 1 , ({" Führerschein" }));
924+ EXPECT_SPELLING_CORRECTION (a, " Führerscdrom" , 1 , ({}));
925+
926+ // ////////////////////////////////////////////////////////////////////////////
927+ // Shortcomings of the proof-of-concept implementation
928+ // ////////////////////////////////////////////////////////////////////////////
929+
930+ // Multiword titles are treated as a single entity
931+ EXPECT_SPELLING_CORRECTION (a, " Laurem" , 1 , ({}));
932+ EXPECT_SPELLING_CORRECTION (a, " ibsum" , 1 , ({}));
933+ EXPECT_SPELLING_CORRECTION (a, " Loremipsum" , 1 , ({" Lorem ipsum" }));
934+
935+ // Only one spelling correction can be requested
936+ // EXPECT_SPELLING_CORRECTION(a, "Kung", 2, ({"King", "Kong"}));
937+ EXPECT_THROW (getSpellingSuggestions (a, " Kung" , 2 ), std::runtime_error);
938+
894939}
895940
896941zim::Entry getTitleIndexEntry (const zim::Archive& a)
0 commit comments