From 7ebb8d39507716f5f234a76da214e0ea1a4dd3a2 Mon Sep 17 00:00:00 2001 From: Khaled Sulayman Date: Wed, 13 Nov 2024 16:25:52 -0500 Subject: [PATCH] Increase Exception specificity for invalid model paths Signed-off-by: Khaled Sulayman --- src/instructlab/sdg/utils/chunkers.py | 2 +- tests/test_chunkers.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/instructlab/sdg/utils/chunkers.py b/src/instructlab/sdg/utils/chunkers.py index 0e200e45..b9675ee4 100644 --- a/src/instructlab/sdg/utils/chunkers.py +++ b/src/instructlab/sdg/utils/chunkers.py @@ -325,7 +325,7 @@ def create_tokenizer(model_name: Optional[str]): ) else: - raise Exception(f"Received path to invalid model format {model_path}") + raise ValueError(f"Received path to invalid model format {model_path}") logger.info(f"Successfully loaded tokenizer from: {model_path}") return tokenizer diff --git a/tests/test_chunkers.py b/tests/test_chunkers.py index 940f5c7b..bb3c5abb 100644 --- a/tests/test_chunkers.py +++ b/tests/test_chunkers.py @@ -81,13 +81,14 @@ def test_create_tokenizer(tokenizer_model_name): @pytest.mark.parametrize( - "model_name", + "model_name, expected_exception", [ - os.path.join(TEST_DATA_DIR, "models/invalid_gguf.gguf"), - os.path.join(TEST_DATA_DIR, "models/invalid_safetensors_dir/"), - os.path.join(TEST_DATA_DIR, "bad_path)"), + (os.path.join(TEST_DATA_DIR, "models/invalid_gguf.gguf"), ValueError) + (os.path.join(TEST_DATA_DIR, "models/invalid_safetensors_dir/"), ValueError) + (os.path.join(TEST_DATA_DIR, "bad_path)"), FileNotFoundError) ] ) -def test_invalid_tokenizer(model_name): - with pytest.raises(Exception): +def test_invalid_tokenizer(model_name, expected_exception): + # FileNot + with pytest.raises(expected_exception): ContextAwareChunker.create_tokenizer(model_name)