diff --git a/src/PreTokenizers/WhitespaceSplit.php b/src/PreTokenizers/WhitespaceSplit.php index d8087d4..a3e8f0f 100644 --- a/src/PreTokenizers/WhitespaceSplit.php +++ b/src/PreTokenizers/WhitespaceSplit.php @@ -14,6 +14,8 @@ public function __construct(protected array $config) public function preTokenizeText(string|array $text, array $options): array { - return explode(' ', $text); + preg_match_all('/\S+/', $text, $matches); + + return $matches[0] ?? []; } } \ No newline at end of file