Skip to content

Commit

Permalink
feat: upload testset generation upload (#1647)
Browse files Browse the repository at this point in the history
  • Loading branch information
jjmachan authored Nov 11, 2024
1 parent fd44b4c commit b990d68
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 40 deletions.
6 changes: 5 additions & 1 deletion docs/getstarted/rag_evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ The dataset used here is from [Amnesty QA RAG](https://huggingface.co/datasets/e

```python
from datasets import load_dataset
dataset = load_dataset("explodinggradients/amnesty_qa","english_v3")
dataset = load_dataset(
"explodinggradients/amnesty_qa",
"english_v3",
trust_remote_code=True
)
```

Load the dataset into Ragas EvaluationDataset object.
Expand Down
15 changes: 3 additions & 12 deletions src/ragas/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager
from langchain_core.embeddings import Embeddings as LangchainEmbeddings
from langchain_core.language_models import BaseLanguageModel as LangchainLLM
from llama_index.core.base.embeddings.base import BaseEmbedding as LlamaIndexEmbedding
from llama_index.core.base.llms.base import BaseLLM as LlamaIndexLLM

from ragas._analytics import EvaluationEvent, track, track_was_completed
from ragas.callbacks import ChainType, RagasTracer, new_group
Expand All @@ -21,14 +19,13 @@
from ragas.embeddings.base import (
BaseRagasEmbeddings,
LangchainEmbeddingsWrapper,
LlamaIndexEmbeddingsWrapper,
embedding_factory,
)
from ragas.exceptions import ExceptionInRunner
from ragas.executor import Executor
from ragas.integrations.helicone import helicone_config
from ragas.llms import llm_factory
from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper, LlamaIndexLLMWrapper
from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper
from ragas.metrics import AspectCritic
from ragas.metrics._answer_correctness import AnswerCorrectness
from ragas.metrics.base import (
Expand Down Expand Up @@ -59,10 +56,8 @@
def evaluate(
dataset: t.Union[Dataset, EvaluationDataset],
metrics: t.Optional[t.Sequence[Metric]] = None,
llm: t.Optional[BaseRagasLLM | LangchainLLM | LlamaIndexLLM] = None,
embeddings: t.Optional[
BaseRagasEmbeddings | LangchainEmbeddings | LlamaIndexEmbedding
] = None,
llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
callbacks: Callbacks = None,
in_ci: bool = False,
run_config: RunConfig = RunConfig(),
Expand Down Expand Up @@ -187,12 +182,8 @@ def evaluate(
# set the llm and embeddings
if isinstance(llm, LangchainLLM):
llm = LangchainLLMWrapper(llm, run_config=run_config)
elif isinstance(llm, LlamaIndexLLM):
llm = LlamaIndexLLMWrapper(llm, run_config=run_config)
if isinstance(embeddings, LangchainEmbeddings):
embeddings = LangchainEmbeddingsWrapper(embeddings)
elif isinstance(embeddings, LlamaIndexEmbedding):
embeddings = LlamaIndexEmbeddingsWrapper(embeddings)

# init llms and embeddings
binary_metrics = []
Expand Down
1 change: 1 addition & 0 deletions src/ragas/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"ContextRecall",
"context_recall",
"AspectCritic",
"AspectCriticWithReference",
"AnswerRelevancy",
"answer_relevancy",
"ContextEntityRecall",
Expand Down
30 changes: 8 additions & 22 deletions src/ragas/testset/synthesizers/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from ragas.cost import TokenUsageParser
from ragas.embeddings.base import (
BaseRagasEmbeddings,
LangchainEmbeddingsWrapper,
LlamaIndexEmbeddingsWrapper,
)
from ragas.executor import Executor
Expand All @@ -28,7 +27,6 @@
if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks
from langchain_core.documents import Document as LCDocument
from langchain_core.embeddings.embeddings import Embeddings as LangchainEmbeddings
from langchain_core.language_models import BaseLanguageModel as LangchainLLM
from llama_index.core.base.embeddings.base import (
BaseEmbedding as LlamaIndexEmbedding,
Expand All @@ -55,22 +53,18 @@ class TestsetGenerator:
----------
llm : BaseRagasLLM
The language model to use for the generation process.
embedding_model: BaseRagasEmbeddings
Embedding model for generation process.
knowledge_graph : KnowledgeGraph, default empty
The knowledge graph to use for the generation process.
"""

llm: BaseRagasLLM
embedding_model: BaseRagasEmbeddings
knowledge_graph: KnowledgeGraph = field(default_factory=KnowledgeGraph)
persona_list: t.Optional[t.List[Persona]] = None

@classmethod
def from_langchain(
cls,
llm: LangchainLLM,
embedding_model: LangchainEmbeddings,
knowledge_graph: t.Optional[KnowledgeGraph] = None,
) -> TestsetGenerator:
"""
Expand All @@ -79,15 +73,13 @@ def from_langchain(
knowledge_graph = knowledge_graph or KnowledgeGraph()
return cls(
LangchainLLMWrapper(llm),
LangchainEmbeddingsWrapper(embedding_model),
knowledge_graph,
)

@classmethod
def from_llama_index(
cls,
llm: LlamaIndexLLM,
embedding_model: LlamaIndexEmbedding,
knowledge_graph: t.Optional[KnowledgeGraph] = None,
) -> TestsetGenerator:
"""
Expand All @@ -96,7 +88,6 @@ def from_llama_index(
knowledge_graph = knowledge_graph or KnowledgeGraph()
return cls(
LlamaIndexLLMWrapper(llm),
LlamaIndexEmbeddingsWrapper(embedding_model),
knowledge_graph,
)

Expand Down Expand Up @@ -157,17 +148,15 @@ def generate_with_langchain_docs(
Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
Alternatively you can provide your own transforms through the `transforms` parameter."""
)
if not self.embedding_model and not transforms_embedding_model:
if not transforms_embedding_model:
raise ValueError(
"""An embedding client was not provided.
Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
Alternatively you can provide your own transforms through the `transforms` parameter."""
"""An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."""
)

if not transforms:
transforms = default_transforms(
llm=transforms_llm or self.llm,
embedding_model=transforms_embedding_model or self.embedding_model,
embedding_model=transforms_embedding_model,
)

# convert the documents to Ragas nodes
Expand Down Expand Up @@ -221,22 +210,19 @@ def generate_with_llamaindex_docs(
raise ValueError(
"An llm client was not provided. Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
)
if not self.embedding_model and not transforms_embedding_model:
if not transforms_embedding_model:
raise ValueError(
"An embedding client was not provided. Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
"An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
)

if not transforms:
if transforms_llm is None:
llm_for_transforms = self.llm
else:
llm_for_transforms = LlamaIndexLLMWrapper(transforms_llm)
if transforms_embedding_model is None:
embedding_model_for_transforms = self.embedding_model
else:
embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
transforms_embedding_model
)
embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
transforms_embedding_model
)
transforms = default_transforms(
llm=llm_for_transforms,
embedding_model=embedding_model_for_transforms,
Expand Down
3 changes: 2 additions & 1 deletion src/ragas/testset/synthesizers/single_hop/specific.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class SingleHopScenario(BaseScenario):

@dataclass
class SingleHopSpecificQuerySynthesizer(SingleHopQuerySynthesizer):

name: str = "single_hop_specifc_query_synthesizer"
theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt()

Expand Down Expand Up @@ -71,6 +70,8 @@ async def _generate_scenarios(
):
nodes.append(node)

if len(nodes) == 0:
raise ValueError("No nodes found with the `entities` property.")
samples_per_node = int(np.ceil(n / len(nodes)))

scenarios = []
Expand Down
30 changes: 30 additions & 0 deletions src/ragas/testset/synthesizers/testset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import typing as t
from dataclasses import dataclass, field
from datetime import datetime
from uuid import uuid4

from pydantic import BaseModel, Field

from ragas.cost import CostCallbackHandler, TokenUsage
from ragas.dataset_schema import (
Expand All @@ -11,6 +15,7 @@
RagasDataset,
SingleTurnSample,
)
from ragas.utils import RAGAS_API_URL


class TestsetSample(BaseSample):
Expand All @@ -29,6 +34,16 @@ class TestsetSample(BaseSample):
synthesizer_name: str


class TestsetPacket(BaseModel):
"""
A packet of testset samples to be uploaded to the server.
"""

samples: t.List[TestsetSample]
run_id: str = Field(default_factory=lambda: str(uuid4()))
created_at: str = Field(default_factory=lambda: datetime.now().isoformat())


@dataclass
class Testset(RagasDataset[TestsetSample]):
"""
Expand Down Expand Up @@ -118,3 +133,18 @@ def total_cost(
cost_per_input_token=cost_per_input_token,
cost_per_output_token=cost_per_output_token,
)

def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
import requests

packet = TestsetPacket(samples=self.samples)
response = requests.post(
f"{base_url}/alignment/testset", json=packet.model_dump()
)
if response.status_code != 200:
raise Exception(f"Failed to upload results: {response.text}")

testset_endpoint = f"https://app.ragas.io/alignment/testset/{packet.run_id}"
if verbose:
print(f"Testset uploaded! View at {testset_endpoint}")
return testset_endpoint
6 changes: 2 additions & 4 deletions src/ragas/testset/transforms/extractors/llm_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ class NERExtractor(LLMBasedExtractor):
"""

property_name: str = "entities"
prompt: NERPrompt = NERPrompt()
prompt: PydanticPrompt[TextWithExtractionLimit, NEROutput] = NERPrompt()
max_num_entities: int = 10

async def extract(self, node: Node) -> t.Tuple[str, t.List[str]]:
Expand All @@ -282,9 +282,7 @@ class TopicDescription(BaseModel):


class TopicDescriptionPrompt(PydanticPrompt[StringIO, TopicDescription]):
instruction: str = (
"Provide a concise description of the main topic(s) discussed in the following text."
)
instruction: str = "Provide a concise description of the main topic(s) discussed in the following text."
input_model: t.Type[StringIO] = StringIO
output_model: t.Type[TopicDescription] = TopicDescription
examples: t.List[t.Tuple[StringIO, TopicDescription]] = [
Expand Down

0 comments on commit b990d68

Please sign in to comment.