diff --git a/src/instructlab/sdg/utils/chunkers.py b/src/instructlab/sdg/utils/chunkers.py index 0e200e45..b9675ee4 100644 --- a/src/instructlab/sdg/utils/chunkers.py +++ b/src/instructlab/sdg/utils/chunkers.py @@ -325,7 +325,7 @@ def create_tokenizer(model_name: Optional[str]): ) else: - raise Exception(f"Received path to invalid model format {model_path}") + raise ValueError(f"Received path to invalid model format {model_path}") logger.info(f"Successfully loaded tokenizer from: {model_path}") return tokenizer diff --git a/tests/test_chunkers.py b/tests/test_chunkers.py index 940f5c7b..bb3c5abb 100644 --- a/tests/test_chunkers.py +++ b/tests/test_chunkers.py @@ -81,13 +81,14 @@ def test_create_tokenizer(tokenizer_model_name): @pytest.mark.parametrize( - "model_name", + "model_name, expected_exception", [ - os.path.join(TEST_DATA_DIR, "models/invalid_gguf.gguf"), - os.path.join(TEST_DATA_DIR, "models/invalid_safetensors_dir/"), - os.path.join(TEST_DATA_DIR, "bad_path)"), + (os.path.join(TEST_DATA_DIR, "models/invalid_gguf.gguf"), ValueError) + (os.path.join(TEST_DATA_DIR, "models/invalid_safetensors_dir/"), ValueError) + (os.path.join(TEST_DATA_DIR, "bad_path)"), FileNotFoundError) ] ) -def test_invalid_tokenizer(model_name): - with pytest.raises(Exception): +def test_invalid_tokenizer(model_name, expected_exception): + # FileNot + with pytest.raises(expected_exception): ContextAwareChunker.create_tokenizer(model_name)