feat: upload testset generation upload (#1647)

explodinggradients · Nov 11, 2024 · b990d68 · b990d68
1 parent fd44b4c
commit b990d68
Show file tree

Hide file tree

Showing 7 changed files with 51 additions and 40 deletions.
diff --git a/docs/getstarted/rag_evaluation.md b/docs/getstarted/rag_evaluation.md
@@ -8,7 +8,11 @@ The dataset used here is from [Amnesty QA RAG](https://huggingface.co/datasets/e
 
 ```python
 from datasets import load_dataset
-dataset = load_dataset("explodinggradients/amnesty_qa","english_v3")
+dataset = load_dataset(
+    "explodinggradients/amnesty_qa",
+    "english_v3",
+    trust_remote_code=True
+)
 ```
 
 Load the dataset into Ragas EvaluationDataset object. 

diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py
@@ -7,8 +7,6 @@
 from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager
 from langchain_core.embeddings import Embeddings as LangchainEmbeddings
 from langchain_core.language_models import BaseLanguageModel as LangchainLLM
-from llama_index.core.base.embeddings.base import BaseEmbedding as LlamaIndexEmbedding
-from llama_index.core.base.llms.base import BaseLLM as LlamaIndexLLM
 
 from ragas._analytics import EvaluationEvent, track, track_was_completed
 from ragas.callbacks import ChainType, RagasTracer, new_group
@@ -21,14 +19,13 @@
 from ragas.embeddings.base import (
     BaseRagasEmbeddings,
     LangchainEmbeddingsWrapper,
-    LlamaIndexEmbeddingsWrapper,
     embedding_factory,
 )
 from ragas.exceptions import ExceptionInRunner
 from ragas.executor import Executor
 from ragas.integrations.helicone import helicone_config
 from ragas.llms import llm_factory
-from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper, LlamaIndexLLMWrapper
+from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper
 from ragas.metrics import AspectCritic
 from ragas.metrics._answer_correctness import AnswerCorrectness
 from ragas.metrics.base import (
@@ -59,10 +56,8 @@
 def evaluate(
     dataset: t.Union[Dataset, EvaluationDataset],
     metrics: t.Optional[t.Sequence[Metric]] = None,
-    llm: t.Optional[BaseRagasLLM | LangchainLLM | LlamaIndexLLM] = None,
-    embeddings: t.Optional[
-        BaseRagasEmbeddings | LangchainEmbeddings | LlamaIndexEmbedding
-    ] = None,
+    llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
+    embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
     callbacks: Callbacks = None,
     in_ci: bool = False,
     run_config: RunConfig = RunConfig(),
@@ -187,12 +182,8 @@ def evaluate(
     # set the llm and embeddings
     if isinstance(llm, LangchainLLM):
         llm = LangchainLLMWrapper(llm, run_config=run_config)
-    elif isinstance(llm, LlamaIndexLLM):
-        llm = LlamaIndexLLMWrapper(llm, run_config=run_config)
     if isinstance(embeddings, LangchainEmbeddings):
         embeddings = LangchainEmbeddingsWrapper(embeddings)
-    elif isinstance(embeddings, LlamaIndexEmbedding):
-        embeddings = LlamaIndexEmbeddingsWrapper(embeddings)
 
     # init llms and embeddings
     binary_metrics = []

diff --git a/src/ragas/metrics/__init__.py b/src/ragas/metrics/__init__.py
@@ -79,6 +79,7 @@
     "ContextRecall",
     "context_recall",
     "AspectCritic",
+    "AspectCriticWithReference",
     "AnswerRelevancy",
     "answer_relevancy",
     "ContextEntityRecall",

diff --git a/src/ragas/testset/synthesizers/generate.py b/src/ragas/testset/synthesizers/generate.py
@@ -12,7 +12,6 @@
 from ragas.cost import TokenUsageParser
 from ragas.embeddings.base import (
     BaseRagasEmbeddings,
-    LangchainEmbeddingsWrapper,
     LlamaIndexEmbeddingsWrapper,
 )
 from ragas.executor import Executor
@@ -28,7 +27,6 @@
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
     from langchain_core.documents import Document as LCDocument
-    from langchain_core.embeddings.embeddings import Embeddings as LangchainEmbeddings
     from langchain_core.language_models import BaseLanguageModel as LangchainLLM
     from llama_index.core.base.embeddings.base import (
         BaseEmbedding as LlamaIndexEmbedding,
@@ -55,22 +53,18 @@ class TestsetGenerator:
     ----------
     llm : BaseRagasLLM
         The language model to use for the generation process.
-    embedding_model: BaseRagasEmbeddings
-        Embedding model for generation process.
     knowledge_graph : KnowledgeGraph, default empty
         The knowledge graph to use for the generation process.
     """
 
     llm: BaseRagasLLM
-    embedding_model: BaseRagasEmbeddings
     knowledge_graph: KnowledgeGraph = field(default_factory=KnowledgeGraph)
     persona_list: t.Optional[t.List[Persona]] = None
 
     @classmethod
     def from_langchain(
         cls,
         llm: LangchainLLM,
-        embedding_model: LangchainEmbeddings,
         knowledge_graph: t.Optional[KnowledgeGraph] = None,
     ) -> TestsetGenerator:
         """
@@ -79,15 +73,13 @@ def from_langchain(
         knowledge_graph = knowledge_graph or KnowledgeGraph()
         return cls(
             LangchainLLMWrapper(llm),
-            LangchainEmbeddingsWrapper(embedding_model),
             knowledge_graph,
         )
 
     @classmethod
     def from_llama_index(
         cls,
         llm: LlamaIndexLLM,
-        embedding_model: LlamaIndexEmbedding,
         knowledge_graph: t.Optional[KnowledgeGraph] = None,
     ) -> TestsetGenerator:
         """
@@ -96,7 +88,6 @@ def from_llama_index(
         knowledge_graph = knowledge_graph or KnowledgeGraph()
         return cls(
             LlamaIndexLLMWrapper(llm),
-            LlamaIndexEmbeddingsWrapper(embedding_model),
             knowledge_graph,
         )
 
@@ -157,17 +148,15 @@ def generate_with_langchain_docs(
                        Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
                        Alternatively you can provide your own transforms through the `transforms` parameter."""
             )
-        if not self.embedding_model and not transforms_embedding_model:
+        if not transforms_embedding_model:
             raise ValueError(
-                """An embedding client was not provided.
-                       Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
-                       Alternatively you can provide your own transforms through the `transforms` parameter."""
+                """An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."""
             )
 
         if not transforms:
             transforms = default_transforms(
                 llm=transforms_llm or self.llm,
-                embedding_model=transforms_embedding_model or self.embedding_model,
+                embedding_model=transforms_embedding_model,
             )
 
         # convert the documents to Ragas nodes
@@ -221,22 +210,19 @@ def generate_with_llamaindex_docs(
             raise ValueError(
                 "An llm client was not provided. Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
             )
-        if not self.embedding_model and not transforms_embedding_model:
+        if not transforms_embedding_model:
             raise ValueError(
-                "An embedding client was not provided. Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
+                "An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
             )
 
         if not transforms:
             if transforms_llm is None:
                 llm_for_transforms = self.llm
             else:
                 llm_for_transforms = LlamaIndexLLMWrapper(transforms_llm)
-            if transforms_embedding_model is None:
-                embedding_model_for_transforms = self.embedding_model
-            else:
-                embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
-                    transforms_embedding_model
-                )
+            embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
+                transforms_embedding_model
+            )
             transforms = default_transforms(
                 llm=llm_for_transforms,
                 embedding_model=embedding_model_for_transforms,

diff --git a/src/ragas/testset/synthesizers/single_hop/specific.py b/src/ragas/testset/synthesizers/single_hop/specific.py
@@ -38,7 +38,6 @@ class SingleHopScenario(BaseScenario):
 
 @dataclass
 class SingleHopSpecificQuerySynthesizer(SingleHopQuerySynthesizer):
-
     name: str = "single_hop_specifc_query_synthesizer"
     theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt()
 
@@ -71,6 +70,8 @@ async def _generate_scenarios(
             ):
                 nodes.append(node)
 
+        if len(nodes) == 0:
+            raise ValueError("No nodes found with the `entities` property.")
         samples_per_node = int(np.ceil(n / len(nodes)))
 
         scenarios = []

diff --git a/src/ragas/testset/synthesizers/testset_schema.py b/src/ragas/testset/synthesizers/testset_schema.py
@@ -2,6 +2,10 @@
 
 import typing as t
 from dataclasses import dataclass, field
+from datetime import datetime
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
 
 from ragas.cost import CostCallbackHandler, TokenUsage
 from ragas.dataset_schema import (
@@ -11,6 +15,7 @@
     RagasDataset,
     SingleTurnSample,
 )
+from ragas.utils import RAGAS_API_URL
 
 
 class TestsetSample(BaseSample):
@@ -29,6 +34,16 @@ class TestsetSample(BaseSample):
     synthesizer_name: str
 
 
+class TestsetPacket(BaseModel):
+    """
+    A packet of testset samples to be uploaded to the server.
+    """
+
+    samples: t.List[TestsetSample]
+    run_id: str = Field(default_factory=lambda: str(uuid4()))
+    created_at: str = Field(default_factory=lambda: datetime.now().isoformat())
+
+
 @dataclass
 class Testset(RagasDataset[TestsetSample]):
     """
@@ -118,3 +133,18 @@ def total_cost(
             cost_per_input_token=cost_per_input_token,
             cost_per_output_token=cost_per_output_token,
         )
+
+    def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
+        import requests
+
+        packet = TestsetPacket(samples=self.samples)
+        response = requests.post(
+            f"{base_url}/alignment/testset", json=packet.model_dump()
+        )
+        if response.status_code != 200:
+            raise Exception(f"Failed to upload results: {response.text}")
+
+        testset_endpoint = f"https://app.ragas.io/alignment/testset/{packet.run_id}"
+        if verbose:
+            print(f"Testset uploaded! View at {testset_endpoint}")
+        return testset_endpoint
diff --git a/src/ragas/testset/transforms/extractors/llm_based.py b/src/ragas/testset/transforms/extractors/llm_based.py
@@ -263,7 +263,7 @@ class NERExtractor(LLMBasedExtractor):
     """
 
     property_name: str = "entities"
-    prompt: NERPrompt = NERPrompt()
+    prompt: PydanticPrompt[TextWithExtractionLimit, NEROutput] = NERPrompt()
     max_num_entities: int = 10
 
     async def extract(self, node: Node) -> t.Tuple[str, t.List[str]]:
@@ -282,9 +282,7 @@ class TopicDescription(BaseModel):
 
 
 class TopicDescriptionPrompt(PydanticPrompt[StringIO, TopicDescription]):
-    instruction: str = (
-        "Provide a concise description of the main topic(s) discussed in the following text."
-    )
+    instruction: str = "Provide a concise description of the main topic(s) discussed in the following text."
     input_model: t.Type[StringIO] = StringIO
     output_model: t.Type[TopicDescription] = TopicDescription
     examples: t.List[t.Tuple[StringIO, TopicDescription]] = [