Skip to content

Commit

Permalink
change HF model path to local model path in functional test
Browse files Browse the repository at this point in the history
Signed-off-by: Khaled Sulayman <[email protected]>
  • Loading branch information
khaledsulayman committed Nov 14, 2024
1 parent 36cc0d2 commit fa555c2
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions tests/functional/test_chunkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@
from pathlib import Path
import os

# Third Party
import pytest

# First Party
from instructlab.sdg.utils.chunkers import DocumentChunker

TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "testdata")


def test_chunk_pdf(tmp_path):
@pytest.fixture
def tokenizer_model_name():
return os.path.join(TEST_DATA_DIR, "models/instructlab/granite-7b-lab")


def test_chunk_pdf(tmp_path, tokenizer_model_name):
pdf_path = Path(os.path.join(TEST_DATA_DIR, "sample_documents", "phoenix.pdf"))
leaf_node = [
{
Expand All @@ -23,7 +31,7 @@ def test_chunk_pdf(tmp_path):
output_dir=tmp_path,
server_ctx_size=4096,
chunk_word_count=500,
tokenizer_model_name="instructlab/merlinite-7b-lab",
tokenizer_model_name=tokenizer_model_name,
)
chunks = chunker.chunk_documents()
assert len(chunks) > 9
Expand All @@ -33,7 +41,7 @@ def test_chunk_pdf(tmp_path):
assert len(chunk) < 2500


def test_chunk_md(tmp_path):
def test_chunk_md(tmp_path, tokenizer_model_name):
markdown_path = Path(os.path.join(TEST_DATA_DIR, "sample_documents", "phoenix.md"))
leaf_node = [
{
Expand All @@ -48,7 +56,7 @@ def test_chunk_md(tmp_path):
output_dir=tmp_path,
server_ctx_size=4096,
chunk_word_count=500,
tokenizer_model_name="instructlab/merlinite-7b-lab",
tokenizer_model_name=tokenizer_model_name,
)
chunks = chunker.chunk_documents()
assert len(chunks) > 7
Expand Down

0 comments on commit fa555c2

Please sign in to comment.