Skip to content

Commit 4669964

Browse files
Increased the max word length for title indexing
1 parent b48f6fe commit 4669964

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

src/writer/xapianIndexer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ size_t getTermCount(const Xapian::Document& d)
130130

131131
void XapianIndexer::indexTitle(const std::string& path, const std::string& title, const std::string& targetPath)
132132
{
133-
const size_t MAX_WORD_LENGTH = 64;
133+
const size_t MAX_WORD_LENGTH = 240; // Xapian's hard limit is 245
134134

135135
assert(indexingMode == IndexingMode::TITLE);
136136
Xapian::Stem stemmer;

test/suggestion.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -706,8 +706,8 @@ std::string makeLongWord(size_t n) {
706706
}
707707

708708
TEST(Suggestion, titleEdgeCases) {
709-
const std::string w64 = makeLongWord(64);
710-
const std::string w65 = makeLongWord(65);
709+
const std::string shortOfBeingTooLong = makeLongWord(240);
710+
const std::string tooLong = makeLongWord(241);
711711

712712
TempZimArchiveMadeOfEmptyHtmlArticles tza("en", {
713713
// { path , title }
@@ -717,17 +717,17 @@ TEST(Suggestion, titleEdgeCases) {
717717
{ "Without", "" }, // No title
718718
//
719719
// Titles containing long words
720-
{ "toolongword1", "Is " + w64 + " too long?" },
721-
{ "toolongword2", "Is " + w65 + " too long?" },
722-
{ "toolongsingleword1", w64 },
723-
{ "toolongsingleword2", w65 },
720+
{ "toolongword1", "Is " + shortOfBeingTooLong + " too long?" },
721+
{ "toolongword2", "Is " + tooLong + " too long?" },
722+
{ "toolongsingleword1", shortOfBeingTooLong },
723+
{ "toolongsingleword2", tooLong },
724724

725725
// Handling of pseudo-words consisting exclusively of punctuation
726726
{ "winknsmilewithouttext", ";-)" }, // A punctuation-only title
727727
{ "winknsmilebothways", ";-) wink'n'smile" },
728728
{ "winknsmiletheotherwayaround", "wink'n'smile ;-)" },
729-
{ "smilinglongword", ";-) " + w65 },
730-
{ "winknsmilewithothernonwords", "~~ ;-) ~~" },
729+
{ "smilinglongword", ";-) " + tooLong },
730+
{ "winknsmilewithothernonwords", "~~ ;-) ~~" },
731731

732732
// Handling of stopwords
733733
{ "hasisastopword", "Kiwix has our support" },
@@ -759,18 +759,18 @@ TEST(Suggestion, titleEdgeCases) {
759759
);
760760

761761
EXPECT_SUGGESTED_TITLES(archive, "long",
762-
"Is " + w65 + " too long?",
763-
"Is " + w64 + " too long?"
762+
"Is " + tooLong + " too long?",
763+
"Is " + shortOfBeingTooLong + " too long?"
764764
);
765765

766766
EXPECT_SUGGESTED_TITLES(archive, "awordthatis",
767-
w64,
768-
"Is " + w64 + " too long?"
769-
// The following results aren't included because w65 has been ignored
767+
shortOfBeingTooLong,
768+
"Is " + shortOfBeingTooLong + " too long?"
769+
// The following results aren't included because tooLong has been ignored
770770
// during indexing:
771-
// - w65
772-
// - "Is " + w65 + " too long?"
773-
// - ";-) " + w65
771+
// - tooLong
772+
// - "Is " + tooLong + " too long?"
773+
// - ";-) " + tooLong
774774
);
775775

776776
EXPECT_SUGGESTED_TITLES(archive, ";-",
@@ -779,7 +779,7 @@ TEST(Suggestion, titleEdgeCases) {
779779
// term in the presence of anything else:
780780
// - ";-) wink'n'smile"
781781
// - "wink'n'smile ;-)"
782-
// - ";-) " + w65
782+
// - ";-) " + tooLong
783783
// - "~~ ;-) ~~"
784784
);
785785

0 commit comments

Comments
 (0)