From f2d1ce137238d2e6fd2ebc83e65905f0a6db5ceb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Messias=20Lima=20Pereira?=
 <jmessiaslp856@gmail.com>
Date: Tue, 14 Jan 2025 10:46:12 -0300
Subject: [PATCH] =?UTF-8?q?Adjustment=20when=20calculating=20hash=20|=20Ad?=
 =?UTF-8?q?justment=20of=20the=20hash=20calculation=E2=80=A6=20(#1837)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… method

When trying to load the saved models after adaptation, alerts like these
were always triggered:

Loaded prompt hash does not match the saved hash.
Loaded prompt hash does not match the saved hash.

Furthermore, in Python, the default hash() function may yield different
results for the same string across different sessions. To achieve
consistent hash values, for tha i using the hashlib module to calculate
de hash of prompt, which provides stable hashing algorithms.
---
 src/ragas/prompt/pydantic_prompt.py | 43 +++++++++++++++--------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/ragas/prompt/pydantic_prompt.py b/src/ragas/prompt/pydantic_prompt.py
index 6443d5347..fbf28010f 100644
--- a/src/ragas/prompt/pydantic_prompt.py
+++ b/src/ragas/prompt/pydantic_prompt.py
@@ -4,6 +4,8 @@
 import json
 import logging
 import os
+import hashlib
+
 import typing as t
 
 from langchain_core.exceptions import OutputParserException
@@ -226,12 +228,7 @@ async def adapt(
         """
         Adapt the prompt to a new language.
         """
-
-        # set the original hash, this is used to
-        # identify the original prompt object when loading from file
-        if self.original_hash is None:
-            self.original_hash = hash(self)
-
+        
         strings = get_all_strings(self.examples)
         translated_strings = await translate_statements_prompt.generate(
             llm=llm,
@@ -257,6 +254,8 @@ async def adapt(
             )
             new_prompt.instruction = translated_instruction.statements[0]
 
+        new_prompt.original_hash = hash(new_prompt)
+
         return new_prompt
 
     def __repr__(self):
@@ -276,7 +275,7 @@ def __str__(self):
             ensure_ascii=False,
         )[1:-1]
         return f"{self.__class__.__name__}({json_str})"
-
+        
     def __hash__(self):
         # convert examples to json string for hashing
         examples = []
@@ -285,19 +284,23 @@ def __hash__(self):
             examples.append(
                 (input_model.model_dump_json(), output_model.model_dump_json())
             )
-
-        # not sure if input_model and output_model should be included
-        return hash(
-            (
-                self.name,
-                self.input_model,
-                self.output_model,
-                self.instruction,
-                *examples,
-                self.language,
-            )
-        )
-
+    
+        # create a SHA-256 hash object
+        hasher = hashlib.sha256()
+    
+        # update the hash object with the bytes of each attribute
+        hasher.update(self.name.encode('utf-8'))
+        hasher.update(self.input_model.__name__.encode('utf-8'))
+        hasher.update(self.output_model.__name__.encode('utf-8'))
+        hasher.update(self.instruction.encode('utf-8'))
+        for example in examples:
+            hasher.update(example[0].encode('utf-8'))
+            hasher.update(example[1].encode('utf-8'))
+        hasher.update(self.language.encode('utf-8'))
+    
+        # return the integer value of the hash
+        return int(hasher.hexdigest(), 16)
+    
     def __eq__(self, other):
         if not isinstance(other, PydanticPrompt):
             return False