Skip to content

Commit

Permalink
Finalize examples
Browse files Browse the repository at this point in the history
  • Loading branch information
KaQuMiQ committed Jul 19, 2024
1 parent f6f90e7 commit 689e4bd
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 170 deletions.
58 changes: 58 additions & 0 deletions src/draive/evaluators/score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from typing import Any

from draive.evaluation import EvaluationScore
from draive.parameters import DataModel, Field, ParameterValidationContext, ParameterValidationError

__all__ = [
"CommonScoreModel",
]


def _score_validator(
value: Any,
context: ParameterValidationContext,
) -> float:
match value:
case float() as float_value:
return float_value

case int() as int_value:
return float(int_value)

case str() as str_value:
try:
return float(str_value)

except Exception as exc:
raise ParameterValidationError.invalid_type(
context=context,
expected=float,
received=str,
) from exc

case _:
raise ParameterValidationError.invalid_type(
context=context,
expected=float,
received=type(value),
)


class CommonScoreModel(DataModel):
score: float = Field(
description="Decimal score value",
validator=_score_validator,
)
comment: str | None = Field(
description="Explanation of the score",
default=None,
)

def normalized(
self,
divider: float | None = None,
) -> EvaluationScore:
return EvaluationScore(
value=self.score / divider if divider else self.score,
comment=self.comment,
)
56 changes: 35 additions & 21 deletions src/draive/evaluators/text_coherence.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from draive.evaluation import evaluator
from draive.evaluation import EvaluationScore, evaluator
from draive.evaluators.score import CommonScoreModel
from draive.generation import generate_model
from draive.parameters import DataModel

__all__ = [
"text_coherence_evaluator",
]


class CoherenceScore(DataModel):
score: float
comment: str | None = None


INSTRUCTION: str = """\
You will be given a reference text and a compared text based on the reference text.
Your task is to rate the compared text using only the Coherence metric, \
Expand Down Expand Up @@ -45,10 +40,14 @@ class CoherenceScore(DataModel):
do not exceed this value.
"""

INPUT: str = """
Reference text: {reference}
INPUT_TEMPLATE: str = """
<REFERENCE_TEXT>
{reference}
</REFERENCE_TEXT>
Compered text: {compared}
<COMPARED_TEXT>
{compared}
</COMPARED_TEXT>
"""


Expand All @@ -57,14 +56,29 @@ async def text_coherence_evaluator(
compared: str,
/,
reference: str,
) -> float:
model: CoherenceScore = await generate_model(
CoherenceScore,
) -> EvaluationScore:
if not compared:
return EvaluationScore(
value=0,
comment="Input text was empty!",
)

if not reference:
return EvaluationScore(
value=0,
comment="Reference text was empty!",
)

score: CommonScoreModel = await generate_model(
CommonScoreModel,
instruction=INSTRUCTION,
input=INPUT.format(reference=reference, compared=compared),
input=INPUT_TEMPLATE.format(
reference=reference,
compared=compared,
),
examples=[
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Solar energy is a renewable energy source that is gaining popularity. "
"Solar panels convert sunlight into electricity. "
Expand All @@ -78,10 +92,10 @@ async def text_coherence_evaluator(
"Technology is developing fast. People like to save money."
),
),
CoherenceScore(score=0.0),
CommonScoreModel(score=0.0),
),
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Coffee is a popular beverage worldwide. "
"It's made from roasted coffee beans. Caffeine in coffee "
Expand All @@ -95,10 +109,10 @@ async def text_coherence_evaluator(
"Some people add milk or sugar to their coffee."
),
),
CoherenceScore(score=2.0),
CommonScoreModel(score=2.0),
),
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Honey is a natural sweetener produced by bees. "
"It has antibacterial properties and is rich in antioxidants. "
Expand All @@ -113,8 +127,8 @@ async def text_coherence_evaluator(
"honey's high caloric content necessitates mindful consumption."
),
),
CoherenceScore(score=4.0),
CommonScoreModel(score=4.0),
),
],
)
return model.score / 4
return score.normalized(divider=4)
57 changes: 36 additions & 21 deletions src/draive/evaluators/text_conciseness.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from draive.evaluation import evaluator
from draive.evaluation import EvaluationScore, evaluator
from draive.evaluators.score import CommonScoreModel
from draive.generation import generate_model
from draive.parameters import DataModel

__all__ = [
"text_conciseness_evaluator",
]


class ConcisenessScore(DataModel):
score: float
comment: str | None = None


INSTRUCTION: str = """\
You will be given a reference text and a compared text based on the reference text.
Your task is to rate the compared text using only the Conciseness metric, \
Expand Down Expand Up @@ -44,10 +39,15 @@ class ConcisenessScore(DataModel):
do not exceed this value.
"""

INPUT: str = """
Reference text: {reference}

Compered text: {compared}
INPUT_TEMPLATE: str = """
<REFERENCE_TEXT>
{reference}
</REFERENCE_TEXT>
<COMPARED_TEXT>
{compared}
</COMPARED_TEXT>
"""


Expand All @@ -56,14 +56,29 @@ async def text_conciseness_evaluator(
compared: str,
/,
reference: str,
) -> float:
model: ConcisenessScore = await generate_model(
ConcisenessScore,
) -> EvaluationScore:
if not compared:
return EvaluationScore(
value=0,
comment="Input text was empty!",
)

if not reference:
return EvaluationScore(
value=0,
comment="Reference text was empty!",
)

score: CommonScoreModel = await generate_model(
CommonScoreModel,
instruction=INSTRUCTION,
input=f"Reference text: {reference}\n\nCompered text: {compared}",
input=INPUT_TEMPLATE.format(
reference=reference,
compared=compared,
),
examples=[
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Solar energy is a renewable energy source that is gaining popularity. "
"Solar panels convert sunlight into electricity. "
Expand All @@ -85,10 +100,10 @@ async def text_conciseness_evaluator(
"but then you save on all those coffee shop visits."
),
),
ConcisenessScore(score=0.0),
CommonScoreModel(score=0.0),
),
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Coffee is a popular beverage worldwide. "
"It's made from roasted coffee beans. Caffeine in coffee "
Expand All @@ -103,10 +118,10 @@ async def text_conciseness_evaluator(
"important to consume it in moderation."
),
),
ConcisenessScore(score=2.0),
CommonScoreModel(score=2.0),
),
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"The water cycle, also known as the hydrologic cycle, "
"describes the continuous movement of water within the Earth and "
Expand All @@ -118,8 +133,8 @@ async def text_conciseness_evaluator(
"It includes evaporation, condensation, precipitation, and runoff."
),
),
ConcisenessScore(score=4.0),
CommonScoreModel(score=4.0),
),
],
)
return model.score / 4
return score.normalized(divider=4)
57 changes: 36 additions & 21 deletions src/draive/evaluators/text_consistency.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from draive.evaluation import evaluator
from draive.evaluation import EvaluationScore, evaluator
from draive.evaluators.score import CommonScoreModel
from draive.generation import generate_model
from draive.parameters import DataModel

__all__ = [
"text_consistency_evaluator",
]


class ConsistencyScore(DataModel):
score: float
comment: str | None = None


INSTRUCTION: str = """\
You will be given a reference text and a compared text based on the reference text.
Your task is to rate the compared text using only the Consistency metric, \
Expand Down Expand Up @@ -47,10 +42,15 @@ class ConsistencyScore(DataModel):
do not exceed this value.
"""

INPUT: str = """
Reference text: {reference}

Compered text: {compared}
INPUT_TEMPLATE: str = """
<REFERENCE_TEXT>
{reference}
</REFERENCE_TEXT>
<COMPARED_TEXT>
{compared}
</COMPARED_TEXT>
"""


Expand All @@ -59,14 +59,29 @@ async def text_consistency_evaluator(
compared: str,
/,
reference: str,
) -> float:
model: ConsistencyScore = await generate_model(
ConsistencyScore,
) -> EvaluationScore:
if not compared:
return EvaluationScore(
value=0,
comment="Input text was empty!",
)

if not reference:
return EvaluationScore(
value=0,
comment="Reference text was empty!",
)

score: CommonScoreModel = await generate_model(
CommonScoreModel,
instruction=INSTRUCTION,
input=f"Reference text: {reference}\n\nCompered text: {compared}",
input=INPUT_TEMPLATE.format(
reference=reference,
compared=compared,
),
examples=[
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Dolphins are intelligent marine mammals. They use echolocation "
"to navigate and hunt. Dolphins live in social groups called pods."
Expand All @@ -77,10 +92,10 @@ async def text_consistency_evaluator(
"to learn hunting techniques."
),
),
ConsistencyScore(score=0.0),
CommonScoreModel(score=0.0),
),
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Coffee is a popular beverage worldwide. "
"It's made from roasted coffee beans. Caffeine in coffee "
Expand All @@ -95,10 +110,10 @@ async def text_consistency_evaluator(
"the risk of certain diseases."
),
),
ConsistencyScore(score=2.0),
CommonScoreModel(score=2.0),
),
(
INPUT.format(
INPUT_TEMPLATE.format(
reference=(
"Photosynthesis is the process by which plants use sunlight to "
"produce energy. It requires water, carbon dioxide, and chlorophyll. "
Expand All @@ -111,8 +126,8 @@ async def text_consistency_evaluator(
"they release oxygen into the environment."
),
),
ConsistencyScore(score=4.0),
CommonScoreModel(score=4.0),
),
],
)
return model.score / 4
return score.normalized(divider=4)
Loading

0 comments on commit 689e4bd

Please sign in to comment.