use AutoProcessor with use_fast=False since there's a bug with use_fa…

…st=True where whitespace is removed on single token decodes
exo-explore · Jul 30, 2024 · 2d20000 · 2d20000
1 parent 0ec77e1
commit 2d20000
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py
@@ -73,8 +73,8 @@ def resolve_tinygrad_tokenizer(model_id: str):
 
 async def resolve_tokenizer(model_id: str):
   try:
-    if DEBUG >= 2: print(f"Trying to AutoProcessor for {model_id}")
-    processor = AutoProcessor.from_pretrained(model_id)
+    if DEBUG >= 2: print(f"Trying AutoProcessor for {model_id}")
+    processor = AutoProcessor.from_pretrained(model_id, use_fast=False)
     processor.eos_token_id = processor.tokenizer.eos_token_id
     processor.encode = processor.tokenizer.encode
     return processor