diff --git a/docs/getstarted/rag_evaluation.md b/docs/getstarted/rag_evaluation.md index f1fc512dd..6e163113e 100644 --- a/docs/getstarted/rag_evaluation.md +++ b/docs/getstarted/rag_evaluation.md @@ -8,7 +8,11 @@ The dataset used here is from [Amnesty QA RAG](https://huggingface.co/datasets/e ```python from datasets import load_dataset -dataset = load_dataset("explodinggradients/amnesty_qa","english_v3") +dataset = load_dataset( + "explodinggradients/amnesty_qa", + "english_v3", + trust_remote_code=True +) ``` Load the dataset into Ragas EvaluationDataset object. diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py index e9e5b5bb8..fd53925b3 100644 --- a/src/ragas/evaluation.py +++ b/src/ragas/evaluation.py @@ -7,8 +7,6 @@ from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager from langchain_core.embeddings import Embeddings as LangchainEmbeddings from langchain_core.language_models import BaseLanguageModel as LangchainLLM -from llama_index.core.base.embeddings.base import BaseEmbedding as LlamaIndexEmbedding -from llama_index.core.base.llms.base import BaseLLM as LlamaIndexLLM from ragas._analytics import EvaluationEvent, track, track_was_completed from ragas.callbacks import ChainType, RagasTracer, new_group @@ -21,14 +19,13 @@ from ragas.embeddings.base import ( BaseRagasEmbeddings, LangchainEmbeddingsWrapper, - LlamaIndexEmbeddingsWrapper, embedding_factory, ) from ragas.exceptions import ExceptionInRunner from ragas.executor import Executor from ragas.integrations.helicone import helicone_config from ragas.llms import llm_factory -from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper, LlamaIndexLLMWrapper +from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper from ragas.metrics import AspectCritic from ragas.metrics._answer_correctness import AnswerCorrectness from ragas.metrics.base import ( @@ -59,10 +56,8 @@ def evaluate( dataset: t.Union[Dataset, EvaluationDataset], metrics: t.Optional[t.Sequence[Metric]] = None, - llm: t.Optional[BaseRagasLLM | LangchainLLM | LlamaIndexLLM] = None, - embeddings: t.Optional[ - BaseRagasEmbeddings | LangchainEmbeddings | LlamaIndexEmbedding - ] = None, + llm: t.Optional[BaseRagasLLM | LangchainLLM] = None, + embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None, callbacks: Callbacks = None, in_ci: bool = False, run_config: RunConfig = RunConfig(), @@ -187,12 +182,8 @@ def evaluate( # set the llm and embeddings if isinstance(llm, LangchainLLM): llm = LangchainLLMWrapper(llm, run_config=run_config) - elif isinstance(llm, LlamaIndexLLM): - llm = LlamaIndexLLMWrapper(llm, run_config=run_config) if isinstance(embeddings, LangchainEmbeddings): embeddings = LangchainEmbeddingsWrapper(embeddings) - elif isinstance(embeddings, LlamaIndexEmbedding): - embeddings = LlamaIndexEmbeddingsWrapper(embeddings) # init llms and embeddings binary_metrics = [] diff --git a/src/ragas/metrics/__init__.py b/src/ragas/metrics/__init__.py index dabdb58c4..ebf92ebbc 100644 --- a/src/ragas/metrics/__init__.py +++ b/src/ragas/metrics/__init__.py @@ -79,6 +79,7 @@ "ContextRecall", "context_recall", "AspectCritic", + "AspectCriticWithReference", "AnswerRelevancy", "answer_relevancy", "ContextEntityRecall", diff --git a/src/ragas/testset/synthesizers/generate.py b/src/ragas/testset/synthesizers/generate.py index 6db8039a1..c006e6c6a 100644 --- a/src/ragas/testset/synthesizers/generate.py +++ b/src/ragas/testset/synthesizers/generate.py @@ -12,7 +12,6 @@ from ragas.cost import TokenUsageParser from ragas.embeddings.base import ( BaseRagasEmbeddings, - LangchainEmbeddingsWrapper, LlamaIndexEmbeddingsWrapper, ) from ragas.executor import Executor @@ -28,7 +27,6 @@ if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks from langchain_core.documents import Document as LCDocument - from langchain_core.embeddings.embeddings import Embeddings as LangchainEmbeddings from langchain_core.language_models import BaseLanguageModel as LangchainLLM from llama_index.core.base.embeddings.base import ( BaseEmbedding as LlamaIndexEmbedding, @@ -55,14 +53,11 @@ class TestsetGenerator: ---------- llm : BaseRagasLLM The language model to use for the generation process. - embedding_model: BaseRagasEmbeddings - Embedding model for generation process. knowledge_graph : KnowledgeGraph, default empty The knowledge graph to use for the generation process. """ llm: BaseRagasLLM - embedding_model: BaseRagasEmbeddings knowledge_graph: KnowledgeGraph = field(default_factory=KnowledgeGraph) persona_list: t.Optional[t.List[Persona]] = None @@ -70,7 +65,6 @@ class TestsetGenerator: def from_langchain( cls, llm: LangchainLLM, - embedding_model: LangchainEmbeddings, knowledge_graph: t.Optional[KnowledgeGraph] = None, ) -> TestsetGenerator: """ @@ -79,7 +73,6 @@ def from_langchain( knowledge_graph = knowledge_graph or KnowledgeGraph() return cls( LangchainLLMWrapper(llm), - LangchainEmbeddingsWrapper(embedding_model), knowledge_graph, ) @@ -87,7 +80,6 @@ def from_langchain( def from_llama_index( cls, llm: LlamaIndexLLM, - embedding_model: LlamaIndexEmbedding, knowledge_graph: t.Optional[KnowledgeGraph] = None, ) -> TestsetGenerator: """ @@ -96,7 +88,6 @@ def from_llama_index( knowledge_graph = knowledge_graph or KnowledgeGraph() return cls( LlamaIndexLLMWrapper(llm), - LlamaIndexEmbeddingsWrapper(embedding_model), knowledge_graph, ) @@ -157,17 +148,15 @@ def generate_with_langchain_docs( Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter.""" ) - if not self.embedding_model and not transforms_embedding_model: + if not transforms_embedding_model: raise ValueError( - """An embedding client was not provided. - Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter. - Alternatively you can provide your own transforms through the `transforms` parameter.""" + """An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter.""" ) if not transforms: transforms = default_transforms( llm=transforms_llm or self.llm, - embedding_model=transforms_embedding_model or self.embedding_model, + embedding_model=transforms_embedding_model, ) # convert the documents to Ragas nodes @@ -221,9 +210,9 @@ def generate_with_llamaindex_docs( raise ValueError( "An llm client was not provided. Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter." ) - if not self.embedding_model and not transforms_embedding_model: + if not transforms_embedding_model: raise ValueError( - "An embedding client was not provided. Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter." + "An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter." ) if not transforms: @@ -231,12 +220,9 @@ def generate_with_llamaindex_docs( llm_for_transforms = self.llm else: llm_for_transforms = LlamaIndexLLMWrapper(transforms_llm) - if transforms_embedding_model is None: - embedding_model_for_transforms = self.embedding_model - else: - embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper( - transforms_embedding_model - ) + embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper( + transforms_embedding_model + ) transforms = default_transforms( llm=llm_for_transforms, embedding_model=embedding_model_for_transforms, diff --git a/src/ragas/testset/synthesizers/single_hop/specific.py b/src/ragas/testset/synthesizers/single_hop/specific.py index e3a795501..283bca8d7 100644 --- a/src/ragas/testset/synthesizers/single_hop/specific.py +++ b/src/ragas/testset/synthesizers/single_hop/specific.py @@ -38,7 +38,6 @@ class SingleHopScenario(BaseScenario): @dataclass class SingleHopSpecificQuerySynthesizer(SingleHopQuerySynthesizer): - name: str = "single_hop_specifc_query_synthesizer" theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt() @@ -71,6 +70,8 @@ async def _generate_scenarios( ): nodes.append(node) + if len(nodes) == 0: + raise ValueError("No nodes found with the `entities` property.") samples_per_node = int(np.ceil(n / len(nodes))) scenarios = [] diff --git a/src/ragas/testset/synthesizers/testset_schema.py b/src/ragas/testset/synthesizers/testset_schema.py index ac80478c2..608ada9c4 100644 --- a/src/ragas/testset/synthesizers/testset_schema.py +++ b/src/ragas/testset/synthesizers/testset_schema.py @@ -2,6 +2,10 @@ import typing as t from dataclasses import dataclass, field +from datetime import datetime +from uuid import uuid4 + +from pydantic import BaseModel, Field from ragas.cost import CostCallbackHandler, TokenUsage from ragas.dataset_schema import ( @@ -11,6 +15,7 @@ RagasDataset, SingleTurnSample, ) +from ragas.utils import RAGAS_API_URL class TestsetSample(BaseSample): @@ -29,6 +34,16 @@ class TestsetSample(BaseSample): synthesizer_name: str +class TestsetPacket(BaseModel): + """ + A packet of testset samples to be uploaded to the server. + """ + + samples: t.List[TestsetSample] + run_id: str = Field(default_factory=lambda: str(uuid4())) + created_at: str = Field(default_factory=lambda: datetime.now().isoformat()) + + @dataclass class Testset(RagasDataset[TestsetSample]): """ @@ -118,3 +133,18 @@ def total_cost( cost_per_input_token=cost_per_input_token, cost_per_output_token=cost_per_output_token, ) + + def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str: + import requests + + packet = TestsetPacket(samples=self.samples) + response = requests.post( + f"{base_url}/alignment/testset", json=packet.model_dump() + ) + if response.status_code != 200: + raise Exception(f"Failed to upload results: {response.text}") + + testset_endpoint = f"https://app.ragas.io/alignment/testset/{packet.run_id}" + if verbose: + print(f"Testset uploaded! View at {testset_endpoint}") + return testset_endpoint diff --git a/src/ragas/testset/transforms/extractors/llm_based.py b/src/ragas/testset/transforms/extractors/llm_based.py index 83e29c3f6..dca9c66f4 100644 --- a/src/ragas/testset/transforms/extractors/llm_based.py +++ b/src/ragas/testset/transforms/extractors/llm_based.py @@ -263,7 +263,7 @@ class NERExtractor(LLMBasedExtractor): """ property_name: str = "entities" - prompt: NERPrompt = NERPrompt() + prompt: PydanticPrompt[TextWithExtractionLimit, NEROutput] = NERPrompt() max_num_entities: int = 10 async def extract(self, node: Node) -> t.Tuple[str, t.List[str]]: @@ -282,9 +282,7 @@ class TopicDescription(BaseModel): class TopicDescriptionPrompt(PydanticPrompt[StringIO, TopicDescription]): - instruction: str = ( - "Provide a concise description of the main topic(s) discussed in the following text." - ) + instruction: str = "Provide a concise description of the main topic(s) discussed in the following text." input_model: t.Type[StringIO] = StringIO output_model: t.Type[TopicDescription] = TopicDescription examples: t.List[t.Tuple[StringIO, TopicDescription]] = [