Skip to content

Commit

Permalink
Evals - cont
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 11, 2024
1 parent 066a205 commit bed43c8
Show file tree
Hide file tree
Showing 24 changed files with 458 additions and 339 deletions.
98 changes: 52 additions & 46 deletions evals/LLMModes/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
$loader->add('Cognesy\\Evals\\', __DIR__ . '../../evals/');

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Combination;
use Cognesy\Instructor\Extras\Evals\Data\EvalInput;
use Cognesy\Instructor\Extras\Evals\Data\EvalSchema;
use Cognesy\Instructor\Extras\Evals\Evaluator;
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Extras\Evals\Data\Experiment;
use Cognesy\Instructor\Extras\Evals\Data\ExperimentData;
use Cognesy\Instructor\Extras\Evals\Data\InferenceSchema;
use Cognesy\Instructor\Extras\Evals\Inference\InferenceParams;
use Cognesy\Instructor\Extras\Evals\Inference\RunInference;
use Cognesy\Instructor\Extras\Evals\Mappings\ConnectionModes;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Runner;
use Cognesy\Instructor\Extras\Evals\Utils\Combination;
use Cognesy\Instructor\Utils\Str;

$connections = [
Expand Down Expand Up @@ -48,69 +52,71 @@
//

$combinations = Combination::generator(
mapping: ConnectionModes::class,
mapping: InferenceParams::class,
sources: [
'isStreaming' => $streamingModes,
'mode' => $modes,
'connection' => $connections,
],
);

function evalFn(EvalInput $er) {
$decoded = json_decode($er->response->json(), true);
$isCorrect = match($er->mode) {
Mode::Text => Str::contains($er->response->content(), ['ACME', '2020']),
Mode::Tools => validateToolsData($er->response->toolsData),
default => ('ACME' === ($decoded['name'] ?? '') && 2020 === ($decoded['year'] ?? 0)),
};
return $isCorrect;
}
class CompanyEval implements CanEvaluateExperiment
{
public function evaluate(Experiment $experiment) : Metric {
$decoded = json_decode($experiment->response->json(), true);
$isCorrect = match ($experiment->mode) {
Mode::Text => Str::contains($experiment->response->content(), ['ACME', '2020']),
Mode::Tools => $this->validateToolsData($experiment->response->toolsData),
default => ('ACME' === ($decoded['name'] ?? '') && 2020 === ($decoded['year'] ?? 0)),
};
return new BooleanCorrectness($isCorrect);
}

function validateToolsData(array $data) : bool {
return 'store_company' === ($data[0]['name'] ?? '')
&& 'ACME' === ($data[0]['arguments']['name'] ?? '')
&& 2020 === (int) ($data[0]['arguments']['year'] ?? 0);
private function validateToolsData(array $data) : bool {
return 'store_company' === ($data[0]['name'] ?? '')
&& 'ACME' === ($data[0]['arguments']['name'] ?? '')
&& 2020 === (int) ($data[0]['arguments']['year'] ?? 0);
}
}

//Debug::enable();

$schema = new EvalSchema(
toolName: 'store_company',
toolDescription: 'Store company information',
schema: [
'type' => 'object',
'description' => 'Company information',
'properties' => [
'year' => [
'type' => 'integer',
'description' => 'Founding year',
],
'name' => [
'type' => 'string',
'description' => 'Company name',
],
],
'required' => ['name', 'year'],
'additionalProperties' => false,
]
);

$evaluator = new Evaluator(
$data = (new ExperimentData)->withInferenceConfig(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
//['role' => 'user', 'content' => 'EXAMPLE CONTEXT: Sony was established in 1946 by Akio Morita.'],
//['role' => 'user', 'content' => 'EXAMPLE RESPONSE: ```json{"name":"Sony","year":1899}```'],
['role' => 'user', 'content' => 'What is the name and founding year of our company?'],
],
schema: $schema,
schema: new InferenceSchema(
toolName: 'store_company',
toolDescription: 'Store company information',
schema: [
'type' => 'object',
'description' => 'Company information',
'properties' => [
'year' => [
'type' => 'integer',
'description' => 'Founding year',
],
'name' => [
'type' => 'string',
'description' => 'Company name',
],
],
'required' => ['name', 'year'],
'additionalProperties' => false,
]
),
);

$evaluator = new Runner(
data: $data,
runner: new RunInference(),
evalFn: fn(EvalInput $evalInput) => evalFn($evalInput),
evaluation: new CompanyEval(),
);

$outputs = $evaluator->execute(
// connections: $connections,
// modes: $modes,
// streamingModes: $streamingModes
combinations: $combinations
);
55 changes: 33 additions & 22 deletions evals/SimpleExtraction/run.php
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
<?php

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Combination;
use Cognesy\Instructor\Extras\Evals\Data\EvalInput;
use Cognesy\Instructor\Extras\Evals\Evaluator;
use Cognesy\Instructor\Extras\Evals\Instructor\RunInstructor;
use Cognesy\Instructor\Extras\Evals\Mappings\ConnectionModes;
use Cognesy\Instructor\Utils\Debug\Debug;
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Extras\Evals\Data\Experiment;
use Cognesy\Instructor\Extras\Evals\Data\ExperimentData;
use Cognesy\Instructor\Extras\Evals\Inference\InferenceParams;
use Cognesy\Instructor\Extras\Evals\Inference\RunInstructor;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Runner;
use Cognesy\Instructor\Extras\Evals\Utils\Combination;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
Expand Down Expand Up @@ -38,7 +41,7 @@
];

$combinations = Combination::generator(
mapping: ConnectionModes::class,
mapping: InferenceParams::class,
sources: [
'isStreaming' => $streamingModes,
'mode' => $modes,
Expand All @@ -51,31 +54,39 @@ class Company {
public int $foundingYear;
}

function evalFn(EvalInput $er) {
/** @var Person $decoded */
$person = $er->response->value();
return $person->name === 'ACME'
&& $person->foundingYear === 2020;
class CompanyEval implements CanEvaluateExperiment
{
public function evaluate(Experiment $experiment) : Metric {
/** @var Person $decoded */
$person = $experiment->response->value();
$result = $person->name === 'ACME'
&& $person->foundingYear === 2020;
return new BooleanCorrectness($result);
}
}

//Debug::enable();

//$report = file_get_contents(__DIR__ . '/report.txt');
//$examples = require 'examples.php';
//$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>';
//$responseModel = Sequence::of(ProjectEvent::class);

$outputs = (new Evaluator(
$data = (new ExperimentData)->withInstructorConfig(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
//['role' => 'user', 'content' => 'EXAMPLE CONTEXT: Sony was established in 1946 by Akio Morita.'],
//['role' => 'user', 'content' => 'EXAMPLE RESPONSE: ```json{"name":"Sony","year":1899}```'],
['role' => 'user', 'content' => 'What is the name and founding year of our company?'],
],
schema: Company::class,
responseModel: Company::class,
);

//Debug::enable();

//$report = file_get_contents(__DIR__ . '/report.txt');
//$examples = require 'examples.php';
//$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>';
//$responseModel = Sequence::of(ProjectEvent::class);

$outputs = (new Runner(
data: $data,
runner: new RunInstructor(),
evalFn: fn(EvalInput $er) => evalFn($er),
evaluation: new CompanyEval(),
))->execute(
combinations: $combinations
);
18 changes: 9 additions & 9 deletions src/Extras/Evals/Console/Display.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace Cognesy\Instructor\Extras\Evals\Console;

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Data\EvalOutput;
use Cognesy\Instructor\Extras\Evals\Data\Experiment;
use Cognesy\Instructor\Utils\Cli\Color;
use Cognesy\Instructor\Utils\Cli\Console;
use Cognesy\Instructor\Utils\Debug\Debug;
Expand All @@ -20,13 +20,13 @@ public function before(Mode $mode, string $connection, bool $isStreamed) : void
Console::print('', [Color::GRAY, Color::BG_BLACK]);
}

public function after(EvalOutput $evalResponse) : void {
$answer = $evalResponse->notes;
public function after(Experiment $eval) : void {
$answer = $eval->notes;
$answerLine = str_replace("\n", '\n', $answer);
$metric = $evalResponse->metric;
$timeElapsed = $evalResponse->timeElapsed;
$tokensPerSec = $evalResponse->outputTps();
$exception = $evalResponse->exception;
$metric = $eval->metric;
$timeElapsed = $eval->timeElapsed;
$tokensPerSec = $eval->outputTps();
$exception = $eval->exception;

if ($exception) {
//Console::print(' ');
Expand All @@ -36,14 +36,14 @@ public function after(EvalOutput $evalResponse) : void {
[9, '', STR_PAD_LEFT, [Color::DARK_YELLOW]],
[10, '', STR_PAD_LEFT, [Color::CYAN]],
[6, '!!!', STR_PAD_BOTH, [Color::WHITE, COLOR::BOLD, Color::BG_MAGENTA]],
[60, ' ' . $this->exc2txt($exception, 80), STR_PAD_RIGHT, [Color::RED, Color::BG_BLACK]],
[60, $this->exc2txt($exception, 80), STR_PAD_RIGHT, [Color::RED, Color::BG_BLACK]],
], 120);
} else {
echo Console::columns([
[9, $this->timeFormat($timeElapsed), STR_PAD_LEFT, [Color::DARK_YELLOW]],
[10, $this->tokensPerSecFormat($tokensPerSec), STR_PAD_LEFT, [Color::CYAN]],
[6, $metric->toString(), STR_PAD_BOTH, $metric->toCliColor()],
[60, ' ' . $answerLine, STR_PAD_RIGHT, [Color::WHITE, Color::BG_BLACK]],
[60, $answerLine, STR_PAD_RIGHT, [Color::WHITE, Color::BG_BLACK]],
], 120);
}
echo "\n";
Expand Down
11 changes: 0 additions & 11 deletions src/Extras/Evals/Contracts/CanEvaluate.php

This file was deleted.

10 changes: 10 additions & 0 deletions src/Extras/Evals/Contracts/CanEvaluateExperiment.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

namespace Cognesy\Instructor\Extras\Evals\Contracts;

use Cognesy\Instructor\Extras\Evals\Data\Experiment;

interface CanEvaluateExperiment
{
public function evaluate(Experiment $experiment) : Metric;
}
5 changes: 2 additions & 3 deletions src/Extras/Evals/Contracts/CanExecuteExperiment.php
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
<?php
namespace Cognesy\Instructor\Extras\Evals\Contracts;

use Cognesy\Instructor\Extras\Evals\Data\EvalInput;
use Cognesy\Instructor\Extras\Evals\Data\Experiment;
use Cognesy\Instructor\Features\LLM\Data\LLMResponse;

interface CanExecuteExperiment
{
public function withEvalInput(EvalInput $input): self;
public function execute(): void;
public function execute(Experiment $experiment): void;
public function getLLMResponse(): LLMResponse;
public function getAnswer(): mixed;
}
37 changes: 0 additions & 37 deletions src/Extras/Evals/Data/EvalInput.php

This file was deleted.

33 changes: 0 additions & 33 deletions src/Extras/Evals/Data/EvalOutput.php

This file was deleted.

Loading

0 comments on commit bed43c8

Please sign in to comment.