Skip to content

Commit

Permalink
Minor changes (#38)
Browse files Browse the repository at this point in the history
* added readme

* added how to use

* added metrics to docs

* update readme

* set model_max_length

* fix import paths
  • Loading branch information
shahules786 authored Jun 9, 2023
1 parent ee6a295 commit 48ae599
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 39 deletions.
72 changes: 34 additions & 38 deletions experiments/assesments/metrics_assesments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,19 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 1,
"id": "7bfb2480",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/alerts/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import json\n",
"from datasets import load_dataset\n",
Expand All @@ -55,7 +64,7 @@
"metadata": {},
"outputs": [],
"source": [
"os.chdir(\"/Users/shahules/belar/\")"
"os.chdir('/Users/shahules/belar/src/')"
]
},
{
Expand Down Expand Up @@ -135,7 +144,7 @@
},
{
"cell_type": "code",
"execution_count": 129,
"execution_count": 7,
"id": "f9f4280e",
"metadata": {},
"outputs": [
Expand All @@ -144,7 +153,7 @@
"output_type": "stream",
"text": [
"Found cached dataset parquet (/Users/shahules/.cache/huggingface/datasets/explodinggradients___parquet/explodinggradients--ragas-wikiqa-5b5116e5cb909aca/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n",
"100%|█| 1/1 [00:00<00:00, 58.\n"
"100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 242.78it/s]\n"
]
}
],
Expand All @@ -162,7 +171,7 @@
},
{
"cell_type": "code",
"execution_count": 153,
"execution_count": 8,
"id": "eca20daf",
"metadata": {},
"outputs": [],
Expand All @@ -184,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "f3e35532",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -216,7 +225,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "335081e3",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -252,7 +261,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 11,
"id": "b2642e5b",
"metadata": {},
"outputs": [],
Expand All @@ -267,7 +276,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 13,
"id": "26ca4af4",
"metadata": {},
"outputs": [
Expand All @@ -284,7 +293,7 @@
"0"
]
},
"execution_count": 19,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -305,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "ca1c56d6",
"metadata": {},
"outputs": [],
Expand All @@ -327,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "cd7fed9c",
"metadata": {},
"outputs": [],
Expand All @@ -343,7 +352,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"id": "35113558",
"metadata": {},
"outputs": [],
Expand All @@ -354,7 +363,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"id": "4e82d0df",
"metadata": {},
"outputs": [
Expand All @@ -368,10 +377,10 @@
{
"data": {
"text/plain": [
"3.514920235612768"
"3.5533440372846865"
]
},
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -399,40 +408,27 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 13,
"id": "cc263805",
"metadata": {},
"outputs": [],
"source": [
"from experimental.relevance import QGen"
"from ragas.metrics.answer_relevance import QGen"
]
},
{
"cell_type": "code",
"execution_count": 125,
"execution_count": 14,
"id": "38deaf06",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/alerts/lib/python3.8/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
"For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
"- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n",
"- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
"- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"t5_qgen = QGen(\"t5-base\", \"cpu\")"
]
},
{
"cell_type": "code",
"execution_count": 126,
"execution_count": 15,
"id": "45942810",
"metadata": {},
"outputs": [],
Expand All @@ -457,7 +453,7 @@
},
{
"cell_type": "code",
"execution_count": 127,
"execution_count": 16,
"id": "ab00e4fe",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -522,12 +518,12 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 17,
"id": "b6d76ae2",
"metadata": {},
"outputs": [],
"source": [
"## import cross encoder"
"from ragas.metrics.context_relevance import context_relavancy"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion src/ragas/metrics/answer_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
class QGen:
def __init__(self, model_name: str, device: str) -> None:
config = AutoConfig.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = "[PAD]"
architecture = np.intersect1d(
Expand Down

0 comments on commit 48ae599

Please sign in to comment.