Skip to content

Commit

Permalink
fix: remove reproducibility arg (#1790)
Browse files Browse the repository at this point in the history
Remove reproducibility arg in light on alignment feature which better
solves the same problem.
fixes: #1711
  • Loading branch information
shahules786 authored Dec 24, 2024
1 parent 9403320 commit f2ec778
Show file tree
Hide file tree
Showing 7 changed files with 0 additions and 99 deletions.
16 changes: 0 additions & 16 deletions src/ragas/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
MetricWithLLM,
MultiTurnMetric,
SingleTurnMetric,
is_reproducable,
)
from ragas.run_config import RunConfig
from ragas.utils import convert_v1_to_v2_dataset
Expand All @@ -60,7 +59,6 @@ def evaluate(
llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
callbacks: Callbacks = None,
in_ci: bool = False,
run_config: t.Optional[RunConfig] = None,
token_usage_parser: t.Optional[TokenUsageParser] = None,
raise_exceptions: bool = False,
Expand Down Expand Up @@ -93,10 +91,6 @@ def evaluate(
Lifecycle Langchain Callbacks to run during evaluation. Check the
[langchain documentation](https://python.langchain.com/docs/modules/callbacks/)
for more information.
in_ci: bool
Whether the evaluation is running in CI or not. If set to True then some
metrics will be run to increase the reproducability of the evaluations. This
will increase the runtime and cost of evaluations. Default is False.
run_config: RunConfig, optional
Configuration for runtime settings like timeout and retries. If not provided,
default values are used.
Expand Down Expand Up @@ -193,7 +187,6 @@ def evaluate(
binary_metrics = []
llm_changed: t.List[int] = []
embeddings_changed: t.List[int] = []
reproducable_metrics: t.List[int] = []
answer_correctness_is_set = -1

# loop through the metrics and perform initializations
Expand All @@ -214,12 +207,6 @@ def evaluate(
if isinstance(metric, AnswerCorrectness):
if metric.answer_similarity is None:
answer_correctness_is_set = i
# set reproducibility for metrics if in CI
if in_ci and is_reproducable(metric):
if metric.reproducibility == 1: # type: ignore
# only set a value if not already set
metric.reproducibility = 3 # type: ignore
reproducable_metrics.append(i)

# init all the models
metric.init(run_config)
Expand Down Expand Up @@ -354,9 +341,6 @@ def evaluate(
AnswerCorrectness, metrics[answer_correctness_is_set]
).answer_similarity = None

for i in reproducable_metrics:
metrics[i].reproducibility = 1 # type: ignore

# flush the analytics batcher
from ragas._analytics import _analytics_batcher

Expand Down
1 change: 0 additions & 1 deletion src/ragas/integrations/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ def evaluate(
callbacks=callbacks,
show_progress=show_progress,
run_config=run_config or RunConfig(),
in_ci=in_ci,
token_usage_parser=token_usage_parser,
)

Expand Down
18 changes: 0 additions & 18 deletions src/ragas/metrics/_context_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,27 +109,10 @@ class LLMContextPrecisionWithReference(MetricWithLLM, SingleTurnMetric):
default_factory=ContextPrecisionPrompt
)
max_retries: int = 1
_reproducibility: int = 1

def _get_row_attributes(self, row: t.Dict) -> t.Tuple[str, t.List[str], t.Any]:
return row["user_input"], row["retrieved_contexts"], row["reference"]

@property
def reproducibility(self):
return self._reproducibility

@reproducibility.setter
def reproducibility(self, value):
if value < 1:
logger.warning("reproducibility cannot be less than 1, setting to 1")
value = 1
elif value % 2 == 0:
logger.warning(
"reproducibility level cannot be set to even number, setting to odd"
)
value += 1
self._reproducibility = value

def _calculate_average_precision(
self, verifications: t.List[Verification]
) -> float:
Expand Down Expand Up @@ -173,7 +156,6 @@ async def _ascore(
context=context,
answer=reference,
),
n=self.reproducibility,
llm=self.llm,
callbacks=callbacks,
)
Expand Down
23 changes: 0 additions & 23 deletions src/ragas/metrics/_context_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,28 +113,6 @@ class LLMContextRecall(MetricWithLLM, SingleTurnMetric):
default_factory=ContextRecallClassificationPrompt
)
max_retries: int = 1
_reproducibility: int = 1

@property
def reproducibility(self):
return self._reproducibility

@reproducibility.setter
def reproducibility(self, value):
if value < 1:
logger.warning("reproducibility cannot be less than 1, setting to 1")
value = 1
elif value % 2 == 0:
logger.warning(
"reproducibility level cannot be set to even number, setting to odd"
)
value += 1
self._reproducibility = value

def __post_init__(self) -> None:
if self.reproducibility < 1:
logger.warning("reproducibility cannot be less than 1, setting to 1")
self.reproducibility = 1

def _compute_score(self, responses: t.List[ContextRecallClassification]) -> float:
response = [1 if item.attributed else 0 for item in responses]
Expand Down Expand Up @@ -166,7 +144,6 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
),
llm=self.llm,
callbacks=callbacks,
n=self.reproducibility,
)
)
classification_dicts = []
Expand Down
17 changes: 0 additions & 17 deletions src/ragas/metrics/_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,23 +178,6 @@ class Faithfulness(MetricWithLLM, SingleTurnMetric):
statement_prompt: PydanticPrompt = field(default_factory=LongFormAnswerPrompt)
sentence_segmenter: t.Optional[HasSegmentMethod] = None
max_retries: int = 1
_reproducibility: int = 1

@property
def reproducibility(self):
return self._reproducibility

@reproducibility.setter
def reproducibility(self, value):
if value < 1:
logger.warning("reproducibility cannot be less than 1, setting to 1")
value = 1
elif value % 2 == 0:
logger.warning(
"reproducibility level cannot be set to even number, setting to odd"
)
value += 1
self._reproducibility = value

def __post_init__(self):
if self.sentence_segmenter is None:
Expand Down
17 changes: 0 additions & 17 deletions src/ragas/metrics/_noise_sensitivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,6 @@ class NoiseSensitivity(MetricWithLLM, SingleTurnMetric):
statement_prompt: PydanticPrompt = field(default_factory=LongFormAnswerPrompt)
sentence_segmenter: t.Optional[HasSegmentMethod] = None
max_retries: int = 1
_reproducibility: int = 1

@property
def reproducibility(self):
return self._reproducibility

@reproducibility.setter
def reproducibility(self, value):
if value < 1:
logger.warning("reproducibility cannot be less than 1, setting to 1")
value = 1
elif value % 2 == 0:
logger.warning(
"reproducibility level cannot be set to even number, setting to odd"
)
value += 1
self._reproducibility = value

def __post_init__(self):
if self.sentence_segmenter is None:
Expand Down
7 changes: 0 additions & 7 deletions src/ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,11 +712,4 @@ def get_segmenter(
)


def is_reproducable(metric: Metric) -> bool:
"""
Check if a metric is reproducible by checking if it has a `_reproducibility` attribute.
"""
return hasattr(metric, "_reproducibility")


ensembler = Ensember()

0 comments on commit f2ec778

Please sign in to comment.