diff --git a/Makefile b/Makefile index 1997c1249..0e9b7d4fe 100644 --- a/Makefile +++ b/Makefile @@ -36,10 +36,11 @@ test-e2e: ## Run end2end tests run-ci: format lint type test ## Running all CI checks # Docs -rewrite-docs: ## Use GPT4 to rewrite the documentation - @echo "Rewriting the documentation in directory $(DIR)..." - $(Q)python $(GIT_ROOT)/docs/python alphred.py --directory $(DIR) -docsite: ## Build and serve documentation +build-docsite: ## Use GPT4 to rewrite the documentation + @echo "convert ipynb notebooks to md files" + $(Q)python $(GIT_ROOT)/docs/ipynb_to_md.py + @(Q)mkdocs build +serve-docsite: ## Build and serve documentation $(Q)mkdocs serve --dirtyreload # Benchmarks diff --git a/docs/howtos/customizations/_caching.md b/docs/howtos/customizations/_caching.md new file mode 100644 index 000000000..ec71c6390 --- /dev/null +++ b/docs/howtos/customizations/_caching.md @@ -0,0 +1,100 @@ +# Caching in Ragas + +You can use caching to speed up your evaluations and testset generation by avoiding redundant computations. We use Exact Match Caching to cache the responses from the LLM and Embedding models. + +You can use the [DiskCacheBackend][ragas.cache.DiskCacheBackend] which uses a local disk cache to store the cached responses. You can also implement your own custom cacher by implementing the [CacheInterface][ragas.cache.CacheInterface]. + + +## Using DefaultCacher + +Let's see how you can use the [DiskCacheBackend][ragas.cache.DiskCacheBackend] LLM and Embedding models. + + + +```python +from ragas.cache import DiskCacheBackend + +cacher = DiskCacheBackend() + +# check if the cache is empty and clear it +print(len(cacher.cache)) +cacher.cache.clear() +print(len(cacher.cache)) +``` + + + + + DiskCacheBackend(cache_dir=.cache) + + + +Create an LLM and Embedding model with the cacher, here I'm using the `ChatOpenAI` from [langchain-openai](https://github.com/langchain-ai/langchain-openai) as an example. + + + +```python +from langchain_openai import ChatOpenAI +from ragas.llms import LangchainLLMWrapper + +cached_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"), cache=cacher) +``` + + +```python +# if you want to see the cache in action, set the logging level to debug +import logging +from ragas.utils import set_logging_level + +set_logging_level("ragas.cache", logging.DEBUG) +``` + +Now let's run a simple evaluation. + + +```python +from ragas import evaluate +from ragas import EvaluationDataset + +from ragas.metrics import FactualCorrectness, AspectCritic +from datasets import load_dataset + +# Define Answer Correctness with AspectCritic +answer_correctness = AspectCritic( + name="answer_correctness", + definition="Is the answer correct? Does it match the reference answer?", + llm=cached_llm, +) + +metrics = [answer_correctness, FactualCorrectness(llm=cached_llm)] + +# load the dataset +dataset = load_dataset( + "explodinggradients/amnesty_qa", "english_v3", trust_remote_code=True +) +eval_dataset = EvaluationDataset.from_hf_dataset(dataset["eval"]) + +# evaluate the dataset +results = evaluate( + dataset=eval_dataset, + metrics=metrics, +) + +results +``` + +This took almost 2mins to run in our local machine. Now let's run it again to see the cache in action. + + +```python +results = evaluate( + dataset=eval_dataset, + metrics=metrics, +) + +results +``` + +Runs almost instantaneously. + +You can also use this with testset generation also by replacing the `generator_llm` with a cached version of it. Refer to the [testset generation](../../getstarted/rag_testset_generation.md) section for more details. diff --git a/docs/howtos/customizations/caching.ipynb b/docs/howtos/customizations/caching.ipynb new file mode 100644 index 000000000..7972b2215 --- /dev/null +++ b/docs/howtos/customizations/caching.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Caching in Ragas\n", + "\n", + "You can use caching to speed up your evaluations and testset generation by avoiding redundant computations. We use Exact Match Caching to cache the responses from the LLM and Embedding models.\n", + "\n", + "You can use the [DiskCacheBackend][ragas.cache.DiskCacheBackend] which uses a local disk cache to store the cached responses. You can also implement your own custom cacher by implementing the [CacheInterface][ragas.cache.CacheInterface].\n", + "\n", + "\n", + "## Using DefaultCacher\n", + "\n", + "Let's see how you can use the [DiskCacheBackend][ragas.cache.DiskCacheBackend] LLM and Embedding models.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DiskCacheBackend(cache_dir=.cache)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ragas.cache import DiskCacheBackend\n", + "\n", + "cacher = DiskCacheBackend()\n", + "\n", + "# check if the cache is empty and clear it\n", + "print(len(cacher.cache))\n", + "cacher.cache.clear()\n", + "print(len(cacher.cache))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create an LLM and Embedding model with the cacher, here I'm using the `ChatOpenAI` from [langchain-openai](https://github.com/langchain-ai/langchain-openai) as an example.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from ragas.llms import LangchainLLMWrapper\n", + "\n", + "cached_llm = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"), cache=cacher)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# if you want to see the cache in action, set the logging level to debug\n", + "import logging\n", + "from ragas.utils import set_logging_level\n", + "\n", + "set_logging_level(\"ragas.cache\", logging.DEBUG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's run a simple evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ragas import evaluate\n", + "from ragas import EvaluationDataset\n", + "\n", + "from ragas.metrics import FactualCorrectness, AspectCritic\n", + "from datasets import load_dataset\n", + "\n", + "# Define Answer Correctness with AspectCritic\n", + "answer_correctness = AspectCritic(\n", + " name=\"answer_correctness\",\n", + " definition=\"Is the answer correct? Does it match the reference answer?\",\n", + " llm=cached_llm,\n", + ")\n", + "\n", + "metrics = [answer_correctness, FactualCorrectness(llm=cached_llm)]\n", + "\n", + "# load the dataset\n", + "dataset = load_dataset(\n", + " \"explodinggradients/amnesty_qa\", \"english_v3\", trust_remote_code=True\n", + ")\n", + "eval_dataset = EvaluationDataset.from_hf_dataset(dataset[\"eval\"])\n", + "\n", + "# evaluate the dataset\n", + "results = evaluate(\n", + " dataset=eval_dataset,\n", + " metrics=metrics,\n", + ")\n", + "\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This took almost 2mins to run in our local machine. Now let's run it again to see the cache in action." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = evaluate(\n", + " dataset=eval_dataset,\n", + " metrics=metrics,\n", + ")\n", + "\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Runs almost instantaneously.\n", + "\n", + "You can also use this with testset generation also by replacing the `generator_llm` with a cached version of it. Refer to the [testset generation](../../getstarted/rag_testset_generation.md) section for more details." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/references/cache.md b/docs/references/cache.md new file mode 100644 index 000000000..288f548f9 --- /dev/null +++ b/docs/references/cache.md @@ -0,0 +1,3 @@ +::: ragas.cache + options: + members_order: "source" diff --git a/mkdocs.yml b/mkdocs.yml index 22f35c55e..c7bccea0c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -77,6 +77,7 @@ nav: - General: - Customise models: howtos/customizations/customize_models.md - Run Config: howtos/customizations/_run_config.md + - Caching: howtos/customizations/_caching.md - Metrics: - Modify Prompts: howtos/customizations/metrics/_modifying-prompts-metrics.md - Adapt Metrics to Languages: howtos/customizations/metrics/_metrics_language_adaptation.md @@ -88,6 +89,7 @@ nav: - Persona Generation: howtos/customizations/testgenerator/_persona_generator.md - Custom Single-hop Query: howtos/customizations/testgenerator/_testgen-custom-single-hop.md - Custom Multi-hop Query: howtos/customizations/testgenerator/_testgen-customisation.md + - Applications: - howtos/applications/index.md - Metrics: @@ -107,6 +109,7 @@ nav: - Embeddings: references/embeddings.md - RunConfig: references/run_config.md - Executor: references/executor.md + - Cache: references/cache.md - Evaluation: - Schemas: references/evaluation_schema.md - Metrics: references/metrics.md @@ -237,3 +240,4 @@ extra_javascript: - _static/js/header_border.js - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js - _static/js/toggle.js + - https://cdn.octolane.com/tag.js?pk=c7c9b2b863bf7eaf4e2a # octolane for analytics diff --git a/src/ragas/cache.py b/src/ragas/cache.py index 47c42a3d1..da40e4584 100644 --- a/src/ragas/cache.py +++ b/src/ragas/cache.py @@ -2,24 +2,55 @@ import hashlib import inspect import json +import logging from abc import ABC, abstractmethod from typing import Any, Optional from pydantic import BaseModel, GetCoreSchemaHandler from pydantic_core import CoreSchema, core_schema +logger = logging.getLogger(__name__) + class CacheInterface(ABC): + """Abstract base class defining the interface for cache implementations. + + This class provides a standard interface that all cache implementations must follow. + It supports basic cache operations like get, set and key checking. + """ + @abstractmethod def get(self, key: str) -> Any: + """Retrieve a value from the cache by key. + + Args: + key: The key to look up in the cache. + + Returns: + The cached value associated with the key. + """ pass @abstractmethod def set(self, key: str, value) -> None: + """Store a value in the cache with the given key. + + Args: + key: The key to store the value under. + value: The value to cache. + """ pass @abstractmethod def has_key(self, key: str) -> bool: + """Check if a key exists in the cache. + + Args: + key: The key to check for. + + Returns: + True if the key exists in the cache, False otherwise. + """ pass @classmethod @@ -30,11 +61,21 @@ def __get_pydantic_core_schema__( Define how Pydantic generates a schema for BaseRagasEmbeddings. """ return core_schema.no_info_after_validator_function( - cls, core_schema.is_instance_schema(cls) # The validator function + cls, + core_schema.is_instance_schema(cls), # The validator function ) class DiskCacheBackend(CacheInterface): + """A cache implementation that stores data on disk using the diskcache library. + + This cache backend persists data to disk, allowing it to survive between program runs. + It implements the CacheInterface for use with Ragas caching functionality. + + Args: + cache_dir (str, optional): Directory where cache files will be stored. Defaults to ".cache". + """ + def __init__(self, cache_dir: str = ".cache"): try: from diskcache import Cache @@ -46,18 +87,49 @@ def __init__(self, cache_dir: str = ".cache"): self.cache = Cache(cache_dir) def get(self, key: str) -> Any: + """Retrieve a value from the disk cache by key. + + Args: + key: The key to look up in the cache. + + Returns: + The cached value associated with the key, or None if not found. + """ return self.cache.get(key) def set(self, key: str, value) -> None: + """Store a value in the disk cache with the given key. + + Args: + key: The key to store the value under. + value: The value to cache. + """ self.cache.set(key, value) def has_key(self, key: str) -> bool: + """Check if a key exists in the disk cache. + + Args: + key: The key to check for. + + Returns: + True if the key exists in the cache, False otherwise. + """ return key in self.cache def __del__(self): + """Cleanup method to properly close the cache when the object is destroyed.""" if hasattr(self, "cache"): self.cache.close() + def __repr__(self): + """Return string representation of the cache object. + + Returns: + String showing the cache directory location. + """ + return f"DiskCacheBackend(cache_dir={self.cache.directory})" + def _make_hashable(o): if isinstance(o, (tuple, list)): @@ -93,6 +165,19 @@ def _generate_cache_key(func, args, kwargs): def cacher(cache_backend: Optional[CacheInterface] = None): + """Decorator that adds caching functionality to a function. + + This decorator can be applied to both synchronous and asynchronous functions to cache their results. + If no cache backend is provided, the original function is returned unchanged. + + Args: + cache_backend (Optional[CacheInterface]): The cache backend to use for storing results. + If None, caching is disabled. + + Returns: + Callable: A decorated function that implements caching behavior. + """ + def decorator(func): if cache_backend is None: return func @@ -107,6 +192,7 @@ async def async_wrapper(*args, **kwargs): cache_key = _generate_cache_key(func, args, kwargs) if backend.has_key(cache_key): + logger.debug(f"Cache hit for {cache_key}") return backend.get(cache_key) result = await func(*args, **kwargs) @@ -118,6 +204,7 @@ def sync_wrapper(*args, **kwargs): cache_key = _generate_cache_key(func, args, kwargs) if backend.has_key(cache_key): + logger.debug(f"Cache hit for {cache_key}") return backend.get(cache_key) result = func(*args, **kwargs) diff --git a/src/ragas/utils.py b/src/ragas/utils.py index c6577fadf..06eb2ee36 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -240,3 +240,22 @@ def batched(iterable: t.Iterable, n: int) -> t.Iterator[t.Tuple]: iterator = iter(iterable) while batch := tuple(itertools.islice(iterator, n)): yield batch + + +def set_logging_level(logger_name: str, level: int): + """ + Set the logging level for a logger. Useful for debugging. + """ + logger = logging.getLogger(logger_name) + logger.setLevel(level) + + # Create a console handler and set its level + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + + # Create a formatter and add it to the handler + formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s") + console_handler.setFormatter(formatter) + + # Add the handler to the logger + logger.addHandler(console_handler)