From ab545d9f22ea0ef3c688dcfb45825a62451ee3b6 Mon Sep 17 00:00:00 2001 From: Yong723 <50616781+Yongtae723@users.noreply.github.com> Date: Mon, 23 Oct 2023 21:11:51 +0900 Subject: [PATCH] Refactor testset_generator.py for newbies to understand easily (#207) Hi I totally understand original code is correct. But I think my suggestion is much easier to understand. If you feel it is too much, you can ignore --- src/ragas/testset/testset_generator.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ragas/testset/testset_generator.py b/src/ragas/testset/testset_generator.py index 77a7a0d08..fceed662a 100644 --- a/src/ragas/testset/testset_generator.py +++ b/src/ragas/testset/testset_generator.py @@ -281,16 +281,17 @@ def generate( documents: list[LlamaindexDocument] | list[LangchainDocument], test_size: int, ) -> TestDataset: + if not isinstance(documents[0], (LlamaindexDocument, LangchainDocument)): + raise ValueError( + "Testset Generatation only supports LlamaindexDocuments or LangchainDocuments" # noqa + ) + if isinstance(documents[0], LangchainDocument): # cast to LangchainDocument since its the only case here documents = t.cast(list[LangchainDocument], documents) documents = [ LlamaindexDocument.from_langchain_format(doc) for doc in documents ] - elif not isinstance(documents[0], LlamaindexDocument): - raise ValueError( - "Testset Generatation only supports LlamaindexDocuments or LangchainDocuments" # noqa - ) # Convert documents into nodes node_parser = SimpleNodeParser.from_defaults( chunk_size=self.chunk_size, chunk_overlap=0, include_metadata=True