Skip to content

Commit

Permalink
Evals
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 20, 2024
1 parent bc97f2e commit 720de6f
Show file tree
Hide file tree
Showing 76 changed files with 576 additions and 226 deletions.
5 changes: 1 addition & 4 deletions docs/cookbook/examples/extras/image_car_damage.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@ Here's the image we're going to extract data from.
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Extras\Image\Image;
use Cognesy\Instructor\Features\Schema\Attributes\Description;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Utils\Str;
use Cognesy\Instructor\Extras\Image\Image;use Cognesy\Instructor\Features\Schema\Attributes\Description;use Cognesy\Instructor\Instructor;use Cognesy\Instructor\Utils\Str;

enum DamageSeverity : string {
case Minor = 'minor';
Expand Down
3 changes: 1 addition & 2 deletions docs/cookbook/examples/extras/image_to_data.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ Here's the image we're going to extract data from.
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Extras\Image\Image;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Extras\Image\Image;use Cognesy\Instructor\Instructor;

class Vendor {
public ?string $name = '';
Expand Down
4 changes: 1 addition & 3 deletions docs/cookbook/examples/extras/image_to_data_anthropic.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ Here's the image we're going to extract data from.
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Image\Image;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Enums\Mode;use Cognesy\Instructor\Extras\Image\Image;use Cognesy\Instructor\Instructor;

class Vendor {
public ?string $name = '';
Expand Down
4 changes: 1 addition & 3 deletions docs/cookbook/examples/extras/image_to_data_gemini.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ Here's the image we're going to extract data from.
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Image\Image;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Enums\Mode;use Cognesy\Instructor\Extras\Image\Image;use Cognesy\Instructor\Instructor;

class Vendor {
public ?string $name = '';
Expand Down
2 changes: 1 addition & 1 deletion docs/cookbook/examples/extras/web_to_objects.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ $loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Web\Webpage;
use Cognesy\Instructor\Features\Schema\Attributes\Instructions;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Utils\Web\Webpage;

class Company {
public string $name = '';
Expand Down
7 changes: 5 additions & 2 deletions docs/internals/script.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ The Script class serves as a flexible and dynamic container for managing, hydrat

```php
<?php
use Cognesy\Instructor\Utils\Messages\Script;use Cognesy\Instructor\Utils\Messages\Section;
use Cognesy\Instructor\Utils\Messages\Script;
use Cognesy\Instructor\Utils\Messages\Section;

// Create sections
$section1 = new Section('introduction');
Expand Down Expand Up @@ -154,7 +155,9 @@ The Script class uses several internal traits to handle various aspects of its f

```php
<?php
use Cognesy\Instructor\Utils\Messages\Script;use Cognesy\Instructor\Utils\Messages\ScriptParameters;use Cognesy\Instructor\Utils\Messages\Section;
use Cognesy\Instructor\Utils\Messages\Script;
use Cognesy\Instructor\Utils\Messages\ScriptParameters;
use Cognesy\Instructor\Utils\Messages\Section;

// Define sections
$intro = (new Section('introduction'))
Expand Down
2 changes: 1 addition & 1 deletion evals/ComplexExtraction/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public function __construct(array $expectations) {
public function evaluate(Execution $execution) : Evaluation {
$expectedEvents = $this->expectations['events'];
/** @var Sequence $events */
$events = $execution->data()->get('response')?->value();
$events = $execution->get('response')?->value();
$result = ($expectedEvents - count($events->list)) / $expectedEvents;
return new Evaluation(
metric: new PercentageCorrectness('found', $result),
Expand Down
10 changes: 6 additions & 4 deletions evals/LLMModes/CompanyEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ public function __construct(array $expectations) {
}

public function evaluate(Execution $execution) : Evaluation {
$isCorrect = match ($execution->data()->get('mode')) {
$mode = $execution->get('case.mode');
$isCorrect = match ($mode) {
Mode::Text => $this->validateText($execution),
Mode::Tools => $this->validateToolsData($execution),
default => $this->validateDefault($execution),
Expand All @@ -35,21 +36,22 @@ public function evaluate(Execution $execution) : Evaluation {
// INTERNAL /////////////////////////////////////////////////

private function validateToolsData(Execution $execution) : bool {
$data = $execution->data()->get('response')->toolsData[0];
$data = $execution->get('response')->toolsData[0];
return 'store_company' === ($data['name'] ?? '')
&& 'ACME' === ($data['arguments']['name'] ?? '')
&& 2020 === (int) ($data['arguments']['year'] ?? 0);
}

private function validateDefault(Execution $execution) : bool {
$decoded = $execution->data()->get('response')?->json()->toArray();
$decoded = $execution->get('response')?->json()->toArray();
return $this->expectations['name'] === ($decoded['name'] ?? '')
&& $this->expectations['year'] === ($decoded['year'] ?? 0);
}

private function validateText(Execution $execution) : bool {
$content = $execution->get('response')?->content();
return Str::contains(
$execution->data()->get('response')?->content(),
$content,
[
$this->expectations['name'],
(string) $this->expectations['year']
Expand Down
18 changes: 9 additions & 9 deletions examples/A05_Extras/ImageCarDamage/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

Here's the image we're going to extract data from.

![Receipt](/images/car-damage.jpg)
![Car Photo](/images/car-damage.jpg)


## Example
Expand All @@ -29,7 +29,6 @@

use Cognesy\Instructor\Extras\Image\Image;
use Cognesy\Instructor\Features\Schema\Attributes\Description;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Utils\Str;

enum DamageSeverity : string {
Expand Down Expand Up @@ -66,13 +65,14 @@ class DamageAssessment {
public string $summary;
}

$assessment = (new Instructor)->respond(
input: Image::fromFile(__DIR__ . '/car-damage.jpg'),
responseModel: DamageAssessment::class,
prompt: 'Identify and assess each car damage location and severity separately.',
model: 'gpt-4o',
options: ['max_tokens' => 4096]
);
$assessment = Image::fromFile(__DIR__ . '/car-damage.jpg')
->toData(
responseModel: DamageAssessment::class,
prompt: 'Identify and assess each car damage location and severity separately.',
connection: 'openai',
model: 'gpt-4o',
options: ['max_tokens' => 4096]
);

dump($assessment);
assert(Str::contains($assessment->make, 'Toyota', false));
Expand Down
2 changes: 1 addition & 1 deletion examples/A05_Extras/WebToObjects/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Web\Webpage;
use Cognesy\Instructor\Features\Schema\Attributes\Instructions;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Utils\Web\Webpage;

class Company {
public string $name = '';
Expand Down
8 changes: 4 additions & 4 deletions src/Extras/Evals/Console/Display.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ public function footer(Experiment $experiment) {
}

public function before(Execution $execution) : void {
$connection = $execution->data()->get('connection');
$mode = $execution->data()->get('mode')->value;
$streamed = $execution->data()->get('isStreamed');
$connection = $execution->get('case.connection');
$mode = $execution->get('case.mode')->value;
$streamed = $execution->get('case.isStreamed');

Console::printColumns([
[10, $connection, STR_PAD_RIGHT, Color::WHITE],
Expand Down Expand Up @@ -85,7 +85,7 @@ public function displayExceptions(array $exceptions) : void {
// INTERNAL /////////////////////////////////////////////////

private function displayResult(Execution $execution) : void {
$answer = $execution->data()->get('notes');
$answer = $execution->get('output.notes');
$answerLine = str_replace("\n", '\n', $answer);
$timeElapsed = $execution->timeElapsed();
$tokensPerSec = $execution->outputTps();
Expand Down
18 changes: 7 additions & 11 deletions src/Extras/Evals/Data/Evaluation.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Features\LLM\Data\Usage;
use Cognesy\Instructor\Utils\DataMap;
use Cognesy\Instructor\Utils\Uuid;
use DateTime;

Expand All @@ -13,7 +14,7 @@ class Evaluation
private ?DateTime $startedAt;
private float $timeElapsed = 0.0;
private ?Usage $usage;
private array $metadata;
private DataMap $data;

public ?Metric $metric = null;
public ?Feedback $feedback = null;
Expand All @@ -29,7 +30,7 @@ public function __construct(
$this->metric = $metric;
$this->feedback = $feedback;
$this->usage = $usage;
$this->metadata = $metadata;
$this->data = new DataMap($metadata);
}

public function id() : string {
Expand All @@ -53,21 +54,16 @@ public function usage() : Usage {
return $this->usage;
}

public function metric() : Metric {
return $this->metric;
public function data() : DataMap {
return $this->data;
}

public function feedback() : Feedback {
return $this->feedback;
}

public function metadata(string $key, mixed $default = null) : mixed {
return $this->metadata[$key] ?? $default;
}

public function withMetadata(string $key, mixed $value) : self {
$this->metadata[$key] = $value;
return $this;
public function metric() : Metric {
return $this->metric;
}

public function hasMetric(string $metricName) : bool {
Expand Down
15 changes: 9 additions & 6 deletions src/Extras/Evals/Data/Feedback.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

class Feedback
{
/** @var ParameterFeedback[] $items */
/** @var FeedbackItem[] $items */
private array $items;

/**
* @param string|FeedbackItem[] $items
*/
public function __construct(
string|array $items = []
) {
Expand All @@ -17,12 +20,12 @@ public static function none() : static {
return new static();
}

/** @return ParameterFeedback[] */
/** @return FeedbackItem[] */
public function items() : array {
return $this->items;
}

public function add(?ParameterFeedback $item) : static {
public function add(?FeedbackItem $item) : static {
if (is_null($item)) {
return $this;
}
Expand Down Expand Up @@ -50,7 +53,7 @@ public function __toString() : string {
return implode(
separator: "\n",
array: array_map(
callback: fn(ParameterFeedback $item) => $item->parameterName . ': ' . $item->feedback,
callback: fn(FeedbackItem $item) => $item->context . ': ' . $item->feedback,
array: $this->items
));
}
Expand All @@ -59,7 +62,7 @@ public function __toString() : string {

/**
* @param array|string $items
* @return ParameterFeedback[]
* @return FeedbackItem[]
*/
private function toFeedbackItems(array|string $items) : array {
$feedbackItems = [];
Expand All @@ -70,7 +73,7 @@ private function toFeedbackItems(array|string $items) : array {
}
$param = $item['parameterName'] ?? '';
$feedback = $item['feedback'] ?? '';
$feedbackItems[] = new ParameterFeedback($param, $feedback);
$feedbackItems[] = new FeedbackItem($param, $feedback);
}
return $feedbackItems;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,25 @@

namespace Cognesy\Instructor\Extras\Evals\Data;

use Cognesy\Instructor\Extras\Evals\Enums\FeedbackCategory;
use Cognesy\Instructor\Features\Schema\Attributes\Description;

class ParameterFeedback
class FeedbackItem
{
#[Description('The name of the parameter that the feedback is about.')]
public string $parameterName = '';
public string $context = '';
#[Description('The feedback on the parameters correctness or the issues with its value.')]
public string $feedback = '';
#[Description('The category of the feedback.')]
public FeedbackCategory $category;

public function __construct(
string $parameterName = '',
string $context = '',
string $feedback = '',
FeedbackCategory $category = FeedbackCategory::Other,
) {
$this->parameterName = $parameterName;
$this->context = $context;
$this->feedback = $feedback;
$this->category = $category;
}
}
10 changes: 10 additions & 0 deletions src/Extras/Evals/Enums/FeedbackCategory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

namespace Cognesy\Instructor\Extras\Evals\Enums;

enum FeedbackCategory : string
{
case Error = 'Error';
case Improvement = 'Improvement';
case Other = 'Other';
}
21 changes: 12 additions & 9 deletions src/Extras/Evals/Evaluators/ArrayMatchEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExecution;
use Cognesy\Instructor\Extras\Evals\Data\Evaluation;
use Cognesy\Instructor\Extras\Evals\Data\Feedback;
use Cognesy\Instructor\Extras\Evals\Data\ParameterFeedback;
use Cognesy\Instructor\Extras\Evals\Data\FeedbackItem;
use Cognesy\Instructor\Extras\Evals\Enums\FeedbackCategory;
use Cognesy\Instructor\Extras\Evals\Execution;
use Cognesy\Instructor\Extras\Evals\Metrics\Generic\MatchCount;
use Cognesy\Instructor\Extras\Evals\Utils\CompareNestedArrays;
Expand All @@ -20,7 +21,7 @@ public function __construct(
) {}

public function evaluate(Execution $execution): Evaluation {
$data = $execution->data()->get('response')?->json()->toArray();
$data = $execution->get('response')?->json()->toArray();
$differences = (new CompareNestedArrays)->compare($this->expected, $data);
$total = count((new Dot($data))->flatten());
$matches = $total - count($differences);
Expand All @@ -44,15 +45,17 @@ private function makeFeedback(array $differences) : Feedback {
return $feedback;
}

private function getFeedback(string $key, mixed $expectedVal, mixed $actualVal) : ?ParameterFeedback {
private function getFeedback(string $key, mixed $expectedVal, mixed $actualVal) : ?FeedbackItem {
return match(true) {
($expectedVal !== null) && ($actualVal === null) => new ParameterFeedback(
parameterName: $key,
feedback: "Expected `$key`, but param not found in result"
($expectedVal !== null) && ($actualVal === null) => new FeedbackItem(
context: $key,
feedback: "Expected `$key`, but param not found in result",
category: FeedbackCategory::Error
),
($actualVal !== $expectedVal) => new ParameterFeedback(
parameterName: $key,
feedback: "Expected `$key` value `$expectedVal`, but actual is `$actualVal`"
($actualVal !== $expectedVal) => new FeedbackItem(
context: $key,
feedback: "Expected `$key` value `$expectedVal`, but actual is `$actualVal`",
category: FeedbackCategory::Error
),
default => null,
};
Expand Down
Loading

0 comments on commit 720de6f

Please sign in to comment.