diff --git a/pyproject.toml b/pyproject.toml
index 86d0d59..3f50e73 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,8 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "draive"
-version = "0.24.1"
+description = "Framework designed to simplify and accelerate the development of LLM-based applications."
+version = "0.25.0"
 readme = "README.md"
 maintainers = [
   { name = "Kacper Kaliński", email = "kacper.kalinski@miquido.com" },
@@ -20,21 +21,42 @@ classifiers = [
   "Topic :: Software Development :: Libraries :: Application Frameworks",
 ]
 license = { file = "LICENSE" }
-dependencies = ["numpy~=1.26"]
+dependencies = [
+  "numpy~=1.26",
+]
 
 [project.urls]
 Homepage = "https://miquido.com"
 Repository = "https://github.com/miquido/draive.git"
 
 [project.optional-dependencies]
-sentencepiece = ["sentencepiece~=0.2"]
-fastembed = ["fastembed~=0.3.0"]
-openai = ["openai~=1.32", "tiktoken~=0.7"]
-anthropic = ["anthropic~=0.29.0"]
-mistral = ["httpx~=0.27", "draive[sentencepiece]"]
-gemini = ["httpx~=0.27", "draive[sentencepiece]"]
-ollama = ["httpx~=0.27"]
-mistralrs = ["mistralrs~=0.1.19"]
+sentencepiece = [
+  "sentencepiece~=0.2",
+]
+fastembed = [
+  "fastembed~=0.3.0",
+]
+openai = [
+  "openai~=1.32",
+  "tiktoken~=0.7",
+]
+anthropic = [
+  "anthropic~=0.29.0",
+]
+mistral = [
+  "httpx~=0.27",
+  "draive[sentencepiece]",
+]
+gemini = [
+  "httpx~=0.27",
+  "draive[sentencepiece]",
+]
+ollama = [
+  "httpx~=0.27",
+]
+mistralrs = [
+  "mistralrs~=0.1.19",
+]
 
 dev = [
   "draive[sentencepiece]",
diff --git a/src/draive/anthropic/lmm.py b/src/draive/anthropic/lmm.py
index a2db894..82c653c 100644
--- a/src/draive/anthropic/lmm.py
+++ b/src/draive/anthropic/lmm.py
@@ -360,7 +360,7 @@ async def _completion(  # noqa: PLR0913, PLR0912, C901
             else:
                 raise AnthropicException("Invalid Anthropic completion", completion)
 
-        case "end_turn":
+        case "end_turn" | "stop_sequence":
             if (tool_calls := tool_calls) and (tools := tools):
                 ctx.record(ResultTrace.of(tool_calls))
                 return LMMToolRequests(
diff --git a/src/draive/evaluation/scenario.py b/src/draive/evaluation/scenario.py
index 6e1c02a..84ae8b7 100644
--- a/src/draive/evaluation/scenario.py
+++ b/src/draive/evaluation/scenario.py
@@ -1,8 +1,7 @@
-from asyncio import gather
 from collections.abc import Callable, Sequence
 from typing import Protocol, overload, runtime_checkable
 
-from draive.evaluation.evaluator import EvaluatorResult, PreparedEvaluator
+from draive.evaluation.evaluator import EvaluatorResult
 from draive.parameters import DataModel, Field
 from draive.types import frozenlist
 from draive.utils import freeze
@@ -33,6 +32,7 @@ class PreparedScenarioEvaluator[Value](Protocol):
     async def __call__(
         self,
         value: Value,
+        /,
     ) -> ScenarioEvaluatorResult: ...
 
 
@@ -41,11 +41,13 @@ class ScenarioEvaluatorDefinition[Value, **Args](Protocol):
     @property
     def __name__(self) -> str: ...
 
-    def __call__(
+    async def __call__(
         self,
+        value: Value,
+        /,
         *args: Args.args,
         **kwargs: Args.kwargs,
-    ) -> Sequence[PreparedEvaluator[Value]] | PreparedEvaluator[Value]: ...
+    ) -> Sequence[EvaluatorResult]: ...
 
 
 class ScenarioEvaluator[Value, **Args]:
@@ -64,25 +66,13 @@ def prepared(
         *args: Args.args,
         **kwargs: Args.kwargs,
     ) -> PreparedScenarioEvaluator[Value]:
-        prepared_evaluators: Sequence[PreparedEvaluator[Value]]
-        match self._definition(*args, **kwargs):
-            case [*evaluators]:
-                prepared_evaluators = evaluators
-
-            case evaluator:
-                prepared_evaluators = (evaluator,)
-
         async def evaluate(
             value: Value,
         ) -> ScenarioEvaluatorResult:
-            return ScenarioEvaluatorResult(
-                name=self.name,
-                evaluations=tuple(
-                    await gather(
-                        *[evaluator(value) for evaluator in prepared_evaluators],
-                        return_exceptions=False,
-                    ),
-                ),
+            return await self(
+                value,
+                *args,
+                **kwargs,
             )
 
         return evaluate
@@ -94,21 +84,14 @@ async def __call__(
         *args: Args.args,
         **kwargs: Args.kwargs,
     ) -> ScenarioEvaluatorResult:
-        prepared_evaluators: Sequence[PreparedEvaluator[Value]]
-        match self._definition(*args, **kwargs):
-            case [*evaluators]:
-                prepared_evaluators = evaluators
-
-            case evaluator:
-                prepared_evaluators = (evaluator,)
-
         return ScenarioEvaluatorResult(
             name=self.name,
             evaluations=tuple(
-                await gather(
-                    *[evaluator(value) for evaluator in prepared_evaluators],
-                    return_exceptions=False,
-                ),
+                await self._definition(
+                    value,
+                    *args,
+                    **kwargs,
+                )
             ),
         )
 
diff --git a/src/draive/evaluation/suite.py b/src/draive/evaluation/suite.py
index b1afbe2..bbe592d 100644
--- a/src/draive/evaluation/suite.py
+++ b/src/draive/evaluation/suite.py
@@ -1,10 +1,11 @@
 from asyncio import Lock, gather
 from collections.abc import Callable
 from pathlib import Path
-from typing import Protocol, overload, runtime_checkable
+from typing import Protocol, Self, overload, runtime_checkable
 from uuid import UUID, uuid4
 
-from draive.evaluation.scenario import ScenarioEvaluatorResult
+from draive.evaluation.evaluator import EvaluatorResult, PreparedEvaluator
+from draive.evaluation.scenario import PreparedScenarioEvaluator, ScenarioEvaluatorResult
 from draive.parameters import DataModel, Field
 from draive.scope import ctx
 from draive.types import frozenlist
@@ -33,7 +34,7 @@ class EvaluationSuiteCaseResult[CaseParameters: DataModel, Value: DataModel | st
     value: Value = Field(
         description="Evaluated value",
     )
-    results: frozenlist[ScenarioEvaluatorResult] = Field(
+    results: frozenlist[ScenarioEvaluatorResult | EvaluatorResult] = Field(
         description="Evaluation results",
     )
 
@@ -43,10 +44,40 @@ def passed(self) -> bool:
 
 
 class EvaluationCaseResult[Value: DataModel | str](DataModel):
+    @classmethod
+    def of(
+        cls,
+        results: ScenarioEvaluatorResult | EvaluatorResult,
+        *_results: ScenarioEvaluatorResult | EvaluatorResult,
+        value: Value,
+    ) -> Self:
+        return cls(
+            value=value,
+            results=(results, *_results),
+        )
+
+    @classmethod
+    async def evaluating(
+        cls,
+        value: Value,
+        /,
+        evaluators: PreparedScenarioEvaluator[Value] | PreparedEvaluator[Value],
+        *_evaluators: PreparedScenarioEvaluator[Value] | PreparedEvaluator[Value],
+    ) -> Self:
+        return cls(
+            value=value,
+            results=tuple(
+                await gather(
+                    *[evaluator(value) for evaluator in [evaluators, *_evaluators]],
+                    return_exceptions=False,
+                ),
+            ),
+        )
+
     value: Value = Field(
         description="Evaluated value",
     )
-    results: frozenlist[ScenarioEvaluatorResult] = Field(
+    results: frozenlist[ScenarioEvaluatorResult | EvaluatorResult] = Field(
         description="Evaluation results",
     )
 
@@ -55,7 +86,7 @@ class EvaluationCaseResult[Value: DataModel | str](DataModel):
 class EvaluationSuiteDefinition[CaseParameters: DataModel, Value: DataModel | str](Protocol):
     async def __call__(
         self,
-        evaluation_case: CaseParameters,
+        parameters: CaseParameters,
     ) -> EvaluationCaseResult[Value]: ...
 
 
@@ -89,33 +120,36 @@ def __init__(
     @overload
     async def __call__(
         self,
+        parameters: CaseParameters | UUID | None,
+        /,
         *,
-        evaluated_case: CaseParameters | UUID | None,
         reload: bool = False,
     ) -> EvaluationSuiteCaseResult[CaseParameters, Value]: ...
 
     @overload
     async def __call__(
         self,
+        /,
         *,
         reload: bool = False,
     ) -> list[EvaluationSuiteCaseResult[CaseParameters, Value]]: ...
 
     async def __call__(
         self,
+        parameters: CaseParameters | UUID | None = None,
+        /,
         *,
-        evaluated_case: CaseParameters | UUID | None = None,
         reload: bool = False,
     ) -> (
         list[EvaluationSuiteCaseResult[CaseParameters, Value]]
         | EvaluationSuiteCaseResult[CaseParameters, Value]
     ):
         async with self._lock:
-            match evaluated_case:
+            match parameters:
                 case None:
                     return await gather(
                         *[
-                            self._evaluate(evaluated_case=case)
+                            self._evaluate(case=case)
                             for case in (await self._data(reload=reload)).cases
                         ],
                         return_exceptions=False,
@@ -130,14 +164,14 @@ async def __call__(
                         iter([case for case in available_cases if case.identifier == identifier]),
                         None,
                     ):
-                        return await self._evaluate(evaluated_case=evaluation_case)
+                        return await self._evaluate(case=evaluation_case)
 
                     else:
                         raise ValueError(f"Evaluation case with ID {identifier} does not exists.")
 
                 case case_parameters:
                     return await self._evaluate(
-                        evaluated_case=EvaluationSuiteCase[CaseParameters](
+                        case=EvaluationSuiteCase[CaseParameters](
                             parameters=case_parameters,
                         )
                     )
@@ -145,27 +179,25 @@ async def __call__(
     async def _evaluate(
         self,
         *,
-        evaluated_case: EvaluationSuiteCase[CaseParameters],
+        case: EvaluationSuiteCase[CaseParameters],
     ) -> EvaluationSuiteCaseResult[CaseParameters, Value]:
-        case_result: EvaluationCaseResult[Value] = await self._definition(
-            evaluation_case=evaluated_case.parameters
-        )
+        result: EvaluationCaseResult[Value] = await self._definition(parameters=case.parameters)
 
         return EvaluationSuiteCaseResult[CaseParameters, Value](
-            case=evaluated_case,
-            value=case_result.value,
-            results=case_result.results,
+            case=case,
+            value=result.value,
+            results=result.results,
         )
 
     async def _data(
         self,
         reload: bool = False,
     ) -> EvaluationSuiteData[CaseParameters]:
-        if (data := self._data_cache) and not reload:
-            return data
+        if reload or self._data_cache is None:
+            self._data_cache = await self._storage.load()
+            return self._data_cache
 
         else:
-            self._data_cache = await self._storage.load()
             return self._data_cache
 
     async def cases(