diff --git a/docs/integrations/langchain.ipynb b/docs/integrations/langchain.ipynb index 090db119a..145fff7ee 100644 --- a/docs/integrations/langchain.ipynb +++ b/docs/integrations/langchain.ipynb @@ -51,7 +51,7 @@ "index = VectorstoreIndexCreator().from_loaders([loader])\n", "\n", "\n", - "llm = ChatOpenAI(temperature= 0)\n", + "llm = ChatOpenAI(temperature=0)\n", "qa_chain = RetrievalQA.from_chain_type(\n", " llm,\n", " retriever=index.vectorstore.as_retriever(),\n", @@ -373,6 +373,7 @@ ], "source": [ "from langchain.schema import Document\n", + "\n", "fake_result = result.copy()\n", "fake_result[\"source_documents\"] = [Document(page_content=\"I love christmas\")]\n", "eval_result = context_recall_chain(fake_result)\n", diff --git a/src/ragas/metrics/answer_relevance.py b/src/ragas/metrics/answer_relevance.py index 7d75c9c9c..44d4132be 100644 --- a/src/ragas/metrics/answer_relevance.py +++ b/src/ragas/metrics/answer_relevance.py @@ -7,8 +7,9 @@ from datasets import Dataset from langchain.callbacks.manager import trace_as_chain_group from langchain.embeddings import OpenAIEmbeddings -from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate from langchain.embeddings.base import Embeddings +from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate + from ragas.metrics.base import EvaluationMode, MetricWithLLM from ragas.metrics.llms import generate @@ -45,7 +46,7 @@ class AnswerRelevancy(MetricWithLLM): Here indicates the number questions generated per answer. Ideal range between 3 to 5. embeddings: Embedding - The langchain wrapper of Embedding object. + The langchain wrapper of Embedding object. E.g. HuggingFaceEmbeddings('BAAI/bge-base-en') """ diff --git a/tests/benchmarks/benchmark_eval.py b/tests/benchmarks/benchmark_eval.py index 4d7ec425d..c6f585796 100644 --- a/tests/benchmarks/benchmark_eval.py +++ b/tests/benchmarks/benchmark_eval.py @@ -2,7 +2,12 @@ from torch.cuda import is_available from ragas import evaluate -from ragas.metrics import answer_relevancy, context_relevancy, faithfulness +from ragas.metrics import ( + answer_relevancy, + context_recall, + context_relevancy, + faithfulness, +) from ragas.metrics.critique import harmfulness DEVICE = "cuda" if is_available() else "cpu" @@ -13,6 +18,12 @@ if __name__ == "__main__": result = evaluate( ds.select(range(5)), - metrics=[answer_relevancy, context_relevancy, faithfulness, harmfulness], + metrics=[ + answer_relevancy, + context_relevancy, + faithfulness, + harmfulness, + context_recall, + ], ) print(result)