From fc093727b9cb226853fe7b29d057155db9caf069 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Mon, 10 Jul 2023 12:46:11 +0530 Subject: [PATCH] docs: moved quickstart (#54) --- Makefile | 6 +- README.md | 4 +- {examples => docs/guides}/data_prep.py | 0 {examples => docs}/quickstart.ipynb | 16 ++++++ .../assesments/metrics_assesments.ipynb | 57 ++++++++++--------- 5 files changed, 52 insertions(+), 31 deletions(-) rename {examples => docs/guides}/data_prep.py (100%) rename {examples => docs}/quickstart.ipynb (96%) diff --git a/Makefile b/Makefile index a56b9a93d..e5f60bf91 100644 --- a/Makefile +++ b/Makefile @@ -8,14 +8,14 @@ format: ## Running code formatter: black and isort @echo "(isort) Ordering imports..." @isort . @echo "(black) Formatting codebase..." - @black --config pyproject.toml src tests examples experiments + @black --config pyproject.toml src tests docs experiments @echo "(black) Formatting stubs..." @find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \; @echo "(ruff) Running fix only..." - @ruff check src examples tests --fix-only + @ruff check src docs tests --fix-only lint: ## Running lint checker: ruff @echo "(ruff) Linting development project..." - @ruff check src examples tests + @ruff check src docs tests type: ## Running type checker: pyright @echo "(pyright) Typechecking codebase..." @pyright src diff --git a/README.md b/README.md index d4d00598c..9bf95b516 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ License - + Open In Colab @@ -77,7 +77,7 @@ results = evaluate(dataset) # {'ragas_score': 0.860, 'context_relavency': 0.817, # 'faithfulness': 0.892, 'answer_relevancy': 0.874} ``` -If you want a more in-depth explanation of core components, check out our [quick-start notebook](./examples/quickstart.ipynb) +If you want a more in-depth explanation of core components, check out our [quick-start notebook](./docs/quickstart.ipynb) ## :luggage: Metrics Ragas measures your pipeline's performance against two dimensions diff --git a/examples/data_prep.py b/docs/guides/data_prep.py similarity index 100% rename from examples/data_prep.py rename to docs/guides/data_prep.py diff --git a/examples/quickstart.ipynb b/docs/quickstart.ipynb similarity index 96% rename from examples/quickstart.ipynb rename to docs/quickstart.ipynb index f01be8446..205837316 100644 --- a/examples/quickstart.ipynb +++ b/docs/quickstart.ipynb @@ -5,6 +5,11 @@ "id": "2e63f667", "metadata": {}, "source": [ + "\n", + " \"Open\n", + "\n", "# Quickstart\n", "\n", "welcome to the ragas quickstart. We're going to get you up and running with ragas as qickly as you can so that you can go back to improving your Retrieval Augmented Generation pipelines while this library makes sure your changes are improving your entire pipeline.\n", @@ -12,6 +17,17 @@ "to kick things of lets start with the data" ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "18274e1f", + "metadata": {}, + "outputs": [], + "source": [ + "# if using colab uncomment this\n", + "#!pip install ragas" + ] + }, { "cell_type": "code", "execution_count": 1, diff --git a/experiments/assesments/metrics_assesments.ipynb b/experiments/assesments/metrics_assesments.ipynb index ab79696d8..4746ea274 100644 --- a/experiments/assesments/metrics_assesments.ipynb +++ b/experiments/assesments/metrics_assesments.ipynb @@ -177,8 +177,8 @@ "outputs": [], "source": [ "def get_corr(targets, predictions):\n", - " scores = [kendalltau(x, y).correlation for x, y in zip(targets, predictions)]\n", - " return [score if not np.isnan(score) else 0 for score in scores ]" + " scores = [kendalltau(x, y).correlation for x, y in zip(targets, predictions)]\n", + " return [score if not np.isnan(score) else 0 for score in scores]" ] }, { @@ -355,18 +355,25 @@ "metadata": {}, "outputs": [], "source": [ - "def gpt_faithfulness(question:list, context:list, answer:list):\n", - " prompt = [faithfulness.format(c,q, a) for c,q,a in zip(question,context,answer)]\n", - " output = [output for output in llm(prompt)['choices']]\n", - " scores = [(out[\"text\"].strip()) for out in output ]\n", - " scores = [int(score) if score in ['1','2','3','4','5'] else 1 for score in scores]\n", + "def gpt_faithfulness(question: list, context: list, answer: list):\n", + " prompt = [\n", + " faithfulness.format(c, q, a) for c, q, a in zip(question, context, answer)\n", + " ]\n", + " output = [output for output in llm(prompt)[\"choices\"]]\n", + " scores = [(out[\"text\"].strip()) for out in output]\n", + " scores = [\n", + " int(score) if score in [\"1\", \"2\", \"3\", \"4\", \"5\"] else 1 for score in scores\n", + " ]\n", " return scores\n", "\n", - "def gpt_relevance(question:list, answer:list):\n", - " prompt = [relevence.format(q,a) for q,a in zip(question,answer)]\n", - " output = [output for output in llm(prompt)['choices']]\n", - " scores = [(out[\"text\"].strip()) for out in output ]\n", - " scores = [int(score) if score in ['1','2','3','4','5'] else 1 for score in scores]\n", + "\n", + "def gpt_relevance(question: list, answer: list):\n", + " prompt = [relevence.format(q, a) for q, a in zip(question, answer)]\n", + " output = [output for output in llm(prompt)[\"choices\"]]\n", + " scores = [(out[\"text\"].strip()) for out in output]\n", + " scores = [\n", + " int(score) if score in [\"1\", \"2\", \"3\", \"4\", \"5\"] else 1 for score in scores\n", + " ]\n", " return scores" ] }, @@ -425,7 +432,11 @@ "metadata": {}, "outputs": [], "source": [ - "q,a,c = wikiqa_ragas['train'][0]['question'],wikiqa_ragas['train'][0]['generated_without_rag'],wikiqa_ragas['train'][0]['context']" + "q, a, c = (\n", + " wikiqa_ragas[\"train\"][0][\"question\"],\n", + " wikiqa_ragas[\"train\"][0][\"generated_without_rag\"],\n", + " wikiqa_ragas[\"train\"][0][\"context\"],\n", + ")" ] }, { @@ -446,7 +457,7 @@ } ], "source": [ - "gpt_faithfulness([q],[c], [a])" + "gpt_faithfulness([q], [c], [a])" ] }, { @@ -517,12 +528,12 @@ "def predict_(examples):\n", " scores = {}\n", " questions = examples[\"question\"]\n", - " context = examples['context']\n", + " context = examples[\"context\"]\n", " for col in COLUMNS:\n", " passage = examples[col]\n", " inputs = list(zip(questions, passage))\n", - " #scores[f\"{col}_relevance\"] = t5_qgen.predict(inputs, show_progress=False)\n", - " scores[f\"{col}_relevance\"] = gpt_faithfulness(questions,context,passage)\n", + " # scores[f\"{col}_relevance\"] = t5_qgen.predict(inputs, show_progress=False)\n", + " scores[f\"{col}_relevance\"] = gpt_faithfulness(questions, context, passage)\n", " return scores" ] }, @@ -553,10 +564,7 @@ }, "outputs": [], "source": [ - "output = (\n", - " wikiqa_ragas[\"train\"]\n", - " .map(predict_relevance, batched=True, batch_size=10)\n", - ")" + "output = wikiqa_ragas[\"train\"].map(predict_relevance, batched=True, batch_size=10)" ] }, { @@ -622,10 +630,7 @@ } ], "source": [ - "output = (\n", - " wikiqa_ragas[\"train\"]\n", - " .map(predict_relevance, batched=True, batch_size=10)\n", - ")" + "output = wikiqa_ragas[\"train\"].map(predict_relevance, batched=True, batch_size=10)" ] }, { @@ -877,7 +882,7 @@ "metadata": {}, "outputs": [], "source": [ - "def predict_faithfulness(examples,scoring_fun=NLI.score):\n", + "def predict_faithfulness(examples, scoring_fun=NLI.score):\n", " scores = {}\n", " questions = examples[\"question\"]\n", " contexts = examples[\"answer_context\"]\n",