Adjustment when calculating hash | Adjustment of the hash calculation… (

#1837) … method When trying to load the saved models after adaptation, alerts like these were always triggered: Loaded prompt hash does not match the saved hash. Loaded prompt hash does not match the saved hash. Furthermore, in Python, the default hash() function may yield different results for the same string across different sessions. To achieve consistent hash values, for tha i using the hashlib module to calculate de hash of prompt, which provides stable hashing algorithms.
explodinggradients · Jan 14, 2025 · f2d1ce1 · f2d1ce1
1 parent 433d84f
commit f2d1ce1
Showing 1 changed file with 23 additions and 20 deletions.
diff --git a/src/ragas/prompt/pydantic_prompt.py b/src/ragas/prompt/pydantic_prompt.py
@@ -4,6 +4,8 @@
 import json
 import logging
 import os
+import hashlib
+
 import typing as t
 
 from langchain_core.exceptions import OutputParserException
@@ -226,12 +228,7 @@ async def adapt(
         """
         Adapt the prompt to a new language.
         """
-
-        # set the original hash, this is used to
-        # identify the original prompt object when loading from file
-        if self.original_hash is None:
-            self.original_hash = hash(self)
-
+
         strings = get_all_strings(self.examples)
         translated_strings = await translate_statements_prompt.generate(
             llm=llm,
@@ -257,6 +254,8 @@ async def adapt(
             )
             new_prompt.instruction = translated_instruction.statements[0]
 
+        new_prompt.original_hash = hash(new_prompt)
+
         return new_prompt
 
     def __repr__(self):
@@ -276,7 +275,7 @@ def __str__(self):
             ensure_ascii=False,
         )[1:-1]
         return f"{self.__class__.__name__}({json_str})"
-
+        
     def __hash__(self):
         # convert examples to json string for hashing
         examples = []
@@ -285,19 +284,23 @@ def __hash__(self):
             examples.append(
                 (input_model.model_dump_json(), output_model.model_dump_json())
             )
-
-        # not sure if input_model and output_model should be included
-        return hash(
-            (
-                self.name,
-                self.input_model,
-                self.output_model,
-                self.instruction,
-                *examples,
-                self.language,
-            )
-        )
-
+
+        # create a SHA-256 hash object
+        hasher = hashlib.sha256()
+
+        # update the hash object with the bytes of each attribute
+        hasher.update(self.name.encode('utf-8'))
+        hasher.update(self.input_model.__name__.encode('utf-8'))
+        hasher.update(self.output_model.__name__.encode('utf-8'))
+        hasher.update(self.instruction.encode('utf-8'))
+        for example in examples:
+            hasher.update(example[0].encode('utf-8'))
+            hasher.update(example[1].encode('utf-8'))
+        hasher.update(self.language.encode('utf-8'))
+
+        # return the integer value of the hash
+        return int(hasher.hexdigest(), 16)
+
     def __eq__(self, other):
         if not isinstance(other, PydanticPrompt):
             return False