File tree Expand file tree Collapse file tree 1 file changed +11
-1
lines changed
llama_index/text_splitter Expand file tree Collapse file tree 1 file changed +11
-1
lines changed Original file line number Diff line number Diff line change 1+ import logging
12from typing import Callable , List
23
34from llama_index .text_splitter .types import TextSplitter
45
6+ logger = logging .getLogger (__name__ )
7+
58
69def truncate_text (text : str , text_splitter : TextSplitter ) -> str :
710 """Truncate text to fit within the chunk size."""
@@ -46,7 +49,14 @@ def split_by_sentence_tokenizer() -> Callable[[str], List[str]]:
4649 try :
4750 nltk .data .find ("tokenizers/punkt" )
4851 except LookupError :
49- nltk .download ("punkt" , download_dir = nltk_data_dir )
52+ try :
53+ nltk .download ("punkt" , download_dir = nltk_data_dir )
54+ except FileExistsError :
55+ logger .info (
56+ "Tried to re-download NLTK files but already exists. "
57+ "This could happen in multi-theaded deployments, "
58+ "should be benign"
59+ )
5060
5161 tokenizer = nltk .tokenize .PunktSentenceTokenizer ()
5262
You can’t perform that action at this time.
0 commit comments