Skip to content

Commit

Permalink
Adjustment when calculating hash | Adjustment of the hash calculation… (
Browse files Browse the repository at this point in the history
#1837)

… method

When trying to load the saved models after adaptation, alerts like these
were always triggered:

Loaded prompt hash does not match the saved hash.
Loaded prompt hash does not match the saved hash.

Furthermore, in Python, the default hash() function may yield different
results for the same string across different sessions. To achieve
consistent hash values, for tha i using the hashlib module to calculate
de hash of prompt, which provides stable hashing algorithms.
  • Loading branch information
joaorura authored Jan 14, 2025
1 parent 433d84f commit f2d1ce1
Showing 1 changed file with 23 additions and 20 deletions.
43 changes: 23 additions & 20 deletions src/ragas/prompt/pydantic_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import json
import logging
import os
import hashlib

import typing as t

from langchain_core.exceptions import OutputParserException
Expand Down Expand Up @@ -226,12 +228,7 @@ async def adapt(
"""
Adapt the prompt to a new language.
"""

# set the original hash, this is used to
# identify the original prompt object when loading from file
if self.original_hash is None:
self.original_hash = hash(self)


strings = get_all_strings(self.examples)
translated_strings = await translate_statements_prompt.generate(
llm=llm,
Expand All @@ -257,6 +254,8 @@ async def adapt(
)
new_prompt.instruction = translated_instruction.statements[0]

new_prompt.original_hash = hash(new_prompt)

return new_prompt

def __repr__(self):
Expand All @@ -276,7 +275,7 @@ def __str__(self):
ensure_ascii=False,
)[1:-1]
return f"{self.__class__.__name__}({json_str})"

def __hash__(self):
# convert examples to json string for hashing
examples = []
Expand All @@ -285,19 +284,23 @@ def __hash__(self):
examples.append(
(input_model.model_dump_json(), output_model.model_dump_json())
)

# not sure if input_model and output_model should be included
return hash(
(
self.name,
self.input_model,
self.output_model,
self.instruction,
*examples,
self.language,
)
)


# create a SHA-256 hash object
hasher = hashlib.sha256()

# update the hash object with the bytes of each attribute
hasher.update(self.name.encode('utf-8'))
hasher.update(self.input_model.__name__.encode('utf-8'))
hasher.update(self.output_model.__name__.encode('utf-8'))
hasher.update(self.instruction.encode('utf-8'))
for example in examples:
hasher.update(example[0].encode('utf-8'))
hasher.update(example[1].encode('utf-8'))
hasher.update(self.language.encode('utf-8'))

# return the integer value of the hash
return int(hasher.hexdigest(), 16)

def __eq__(self, other):
if not isinstance(other, PydanticPrompt):
return False
Expand Down

0 comments on commit f2d1ce1

Please sign in to comment.