From f2d1ce137238d2e6fd2ebc83e65905f0a6db5ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Messias=20Lima=20Pereira?= Date: Tue, 14 Jan 2025 10:46:12 -0300 Subject: [PATCH] =?UTF-8?q?Adjustment=20when=20calculating=20hash=20|=20Ad?= =?UTF-8?q?justment=20of=20the=20hash=20calculation=E2=80=A6=20(#1837)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … method When trying to load the saved models after adaptation, alerts like these were always triggered: Loaded prompt hash does not match the saved hash. Loaded prompt hash does not match the saved hash. Furthermore, in Python, the default hash() function may yield different results for the same string across different sessions. To achieve consistent hash values, for tha i using the hashlib module to calculate de hash of prompt, which provides stable hashing algorithms. --- src/ragas/prompt/pydantic_prompt.py | 43 +++++++++++++++-------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/ragas/prompt/pydantic_prompt.py b/src/ragas/prompt/pydantic_prompt.py index 6443d5347..fbf28010f 100644 --- a/src/ragas/prompt/pydantic_prompt.py +++ b/src/ragas/prompt/pydantic_prompt.py @@ -4,6 +4,8 @@ import json import logging import os +import hashlib + import typing as t from langchain_core.exceptions import OutputParserException @@ -226,12 +228,7 @@ async def adapt( """ Adapt the prompt to a new language. """ - - # set the original hash, this is used to - # identify the original prompt object when loading from file - if self.original_hash is None: - self.original_hash = hash(self) - + strings = get_all_strings(self.examples) translated_strings = await translate_statements_prompt.generate( llm=llm, @@ -257,6 +254,8 @@ async def adapt( ) new_prompt.instruction = translated_instruction.statements[0] + new_prompt.original_hash = hash(new_prompt) + return new_prompt def __repr__(self): @@ -276,7 +275,7 @@ def __str__(self): ensure_ascii=False, )[1:-1] return f"{self.__class__.__name__}({json_str})" - + def __hash__(self): # convert examples to json string for hashing examples = [] @@ -285,19 +284,23 @@ def __hash__(self): examples.append( (input_model.model_dump_json(), output_model.model_dump_json()) ) - - # not sure if input_model and output_model should be included - return hash( - ( - self.name, - self.input_model, - self.output_model, - self.instruction, - *examples, - self.language, - ) - ) - + + # create a SHA-256 hash object + hasher = hashlib.sha256() + + # update the hash object with the bytes of each attribute + hasher.update(self.name.encode('utf-8')) + hasher.update(self.input_model.__name__.encode('utf-8')) + hasher.update(self.output_model.__name__.encode('utf-8')) + hasher.update(self.instruction.encode('utf-8')) + for example in examples: + hasher.update(example[0].encode('utf-8')) + hasher.update(example[1].encode('utf-8')) + hasher.update(self.language.encode('utf-8')) + + # return the integer value of the hash + return int(hasher.hexdigest(), 16) + def __eq__(self, other): if not isinstance(other, PydanticPrompt): return False