Skip to content

Commit

Permalink
Evals - cont
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 11, 2024
1 parent bed43c8 commit 97e22f0
Show file tree
Hide file tree
Showing 21 changed files with 513 additions and 449 deletions.
60 changes: 60 additions & 0 deletions evals/ComplexExtraction/run.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php

use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Extras\Evals\Data\InferenceCases;
use Cognesy\Instructor\Extras\Evals\Data\InstructorData;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Extras\Evals\Inference\RunInstructor;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Runner;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

$cases = InferenceCases::get(
connections: [],
modes: [],
stream: []
);

class Company {
public string $name;
public int $foundingYear;
}

$data = new InstructorData(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
//['role' => 'user', 'content' => 'EXAMPLE CONTEXT: Sony was established in 1946 by Akio Morita.'],
//['role' => 'user', 'content' => 'EXAMPLE RESPONSE: ```json{"name":"Sony","year":1899}```'],
['role' => 'user', 'content' => 'What is the name and founding year of our company?'],
],
responseModel: Company::class,
);

class CompanyEval implements CanEvaluateExperiment
{
public function evaluate(Experiment $experiment) : Metric {
/** @var Person $decoded */
$person = $experiment->response->value();
$result = $person->name === 'ACME'
&& $person->foundingYear === 2020;
return new BooleanCorrectness($result);
}
}

//Debug::enable();

//$report = file_get_contents(__DIR__ . '/report.txt');
//$examples = require 'examples.php';
//$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>';
//$responseModel = Sequence::of(ProjectEvent::class);

$runner = new Runner(
executor: new RunInstructor($data),
evaluator: new CompanyEval(),
);

$outputs = $runner->execute($cases);
51 changes: 51 additions & 0 deletions evals/LLMModes/CompanyEval.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<?php

namespace Cognesy\Evals\LLMModes;

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Utils\Str;

class CompanyEval implements CanEvaluateExperiment
{
private array $expectations;

public function __construct(array $expectations) {
$this->expectations = $expectations;
}

public function evaluate(Experiment $experiment) : Metric {
$isCorrect = match ($experiment->mode) {
Mode::Text => $this->validateText($experiment),
Mode::Tools => $this->validateToolsData($experiment),
default => $this->validateDefault($experiment),
};
return new BooleanCorrectness($isCorrect);
}

private function validateToolsData(Experiment $experiment) : bool {
$data = $experiment->response->toolsData;
return 'store_company' === ($data[0]['name'] ?? '')
&& 'ACME' === ($data[0]['arguments']['name'] ?? '')
&& 2020 === (int) ($data[0]['arguments']['year'] ?? 0);
}

private function validateDefault(Experiment $experiment) : bool {
$decoded = json_decode($experiment->response->json(), true);
return $this->expectations['name'] === ($decoded['name'] ?? '')
&& $this->expectations['foundingYear'] === ($decoded['year'] ?? 0);
}

private function validateText(Experiment $experiment) : bool {
return Str::contains(
$experiment->response->content(),
[
$this->expectations['name'],
(string) $this->expectations['foundingYear']
]
);
}
}
87 changes: 16 additions & 71 deletions evals/LLMModes/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,18 @@
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
$loader->add('Cognesy\\Evals\\', __DIR__ . '../../evals/');

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Extras\Evals\Data\Experiment;
use Cognesy\Instructor\Extras\Evals\Data\ExperimentData;
use Cognesy\Instructor\Extras\Evals\Data\InferenceCases;
use Cognesy\Instructor\Extras\Evals\Data\InferenceData;
use Cognesy\Instructor\Extras\Evals\Data\InferenceSchema;
use Cognesy\Instructor\Extras\Evals\Inference\InferenceParams;
use Cognesy\Instructor\Extras\Evals\Inference\RunInference;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Runner;
use Cognesy\Instructor\Extras\Evals\Utils\Combination;
use Cognesy\Instructor\Utils\Str;
use Cognesy\Evals\LLMModes\CompanyEval;

$connections = [
'azure',
'cohere1',
'cohere2',
'fireworks',
'gemini',
'groq',
'mistral',
'ollama',
'openai',
'openrouter',
'together',
];

$streamingModes = [
true,
false,
];

$modes = [
Mode::Text,
Mode::MdJson,
Mode::Json,
Mode::JsonSchema,
Mode::Tools,
];
$cases = InferenceCases::except(
connections: [],
modes: [],
stream: []
);

//
// NOT SUPPORTED BY PROVIDERS
Expand All @@ -51,37 +24,9 @@
// azure, Mode::JsonSchema, sync|stream
//

$combinations = Combination::generator(
mapping: InferenceParams::class,
sources: [
'isStreaming' => $streamingModes,
'mode' => $modes,
'connection' => $connections,
],
);

class CompanyEval implements CanEvaluateExperiment
{
public function evaluate(Experiment $experiment) : Metric {
$decoded = json_decode($experiment->response->json(), true);
$isCorrect = match ($experiment->mode) {
Mode::Text => Str::contains($experiment->response->content(), ['ACME', '2020']),
Mode::Tools => $this->validateToolsData($experiment->response->toolsData),
default => ('ACME' === ($decoded['name'] ?? '') && 2020 === ($decoded['year'] ?? 0)),
};
return new BooleanCorrectness($isCorrect);
}

private function validateToolsData(array $data) : bool {
return 'store_company' === ($data[0]['name'] ?? '')
&& 'ACME' === ($data[0]['arguments']['name'] ?? '')
&& 2020 === (int) ($data[0]['arguments']['year'] ?? 0);
}
}

//Debug::enable();

$data = (new ExperimentData)->withInferenceConfig(
$data = new InferenceData(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
Expand Down Expand Up @@ -111,12 +56,12 @@ private function validateToolsData(array $data) : bool {
),
);

$evaluator = new Runner(
data: $data,
runner: new RunInference(),
evaluation: new CompanyEval(),
$runner = new Runner(
executor: new RunInference($data),
evaluator: new CompanyEval(expectations: [
'name' => 'ACME',
'foundingYear' => 2020
]),
);

$outputs = $evaluator->execute(
combinations: $combinations
);
$outputs = $runner->execute(cases: $cases);
8 changes: 8 additions & 0 deletions evals/SimpleExtraction/Company.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?php

namespace Cognesy\Evals\SimpleExtraction;

class Company {
public string $name;
public int $foundingYear;
}
24 changes: 24 additions & 0 deletions evals/SimpleExtraction/CompanyEval.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

namespace Cognesy\Evals\SimpleExtraction;

use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;

class CompanyEval implements CanEvaluateExperiment
{
private array $expectations;

public function __construct(array $expectations) {
$this->expectations = $expectations;
}

public function evaluate(Experiment $experiment) : Metric {
$company = $experiment->response->value();
$isCorrect = $company->name === $this->expectations['name']
&& $company->foundingYear === $this->expectations['foundingYear'];
return new BooleanCorrectness($isCorrect);
}
}
87 changes: 17 additions & 70 deletions evals/SimpleExtraction/run.php
Original file line number Diff line number Diff line change
@@ -1,71 +1,23 @@
<?php

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment;
use Cognesy\Instructor\Extras\Evals\Contracts\Metric;
use Cognesy\Instructor\Extras\Evals\Data\Experiment;
use Cognesy\Instructor\Extras\Evals\Data\ExperimentData;
use Cognesy\Instructor\Extras\Evals\Inference\InferenceParams;
use Cognesy\Instructor\Extras\Evals\Data\InferenceCases;
use Cognesy\Instructor\Extras\Evals\Data\InstructorData;
use Cognesy\Instructor\Extras\Evals\Inference\RunInstructor;
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness;
use Cognesy\Instructor\Extras\Evals\Runner;
use Cognesy\Instructor\Extras\Evals\Utils\Combination;
use Cognesy\Evals\SimpleExtraction\CompanyEval;
use Cognesy\Evals\SimpleExtraction\Company;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

$connections = [
// 'azure',
// 'cohere1',
'cohere2',
// 'fireworks',
// 'gemini',
// 'groq',
// 'mistral',
// 'ollama',
// 'openai',
// 'openrouter',
// 'together',
];

$streamingModes = [
false,
true,
];

$modes = [
Mode::MdJson,
Mode::Json,
Mode::JsonSchema,
Mode::Tools,
];

$combinations = Combination::generator(
mapping: InferenceParams::class,
sources: [
'isStreaming' => $streamingModes,
'mode' => $modes,
'connection' => $connections,
],
$cases = InferenceCases::except(
connections: [],
modes: [Mode::Text],
stream: []
);

class Company {
public string $name;
public int $foundingYear;
}

class CompanyEval implements CanEvaluateExperiment
{
public function evaluate(Experiment $experiment) : Metric {
/** @var Person $decoded */
$person = $experiment->response->value();
$result = $person->name === 'ACME'
&& $person->foundingYear === 2020;
return new BooleanCorrectness($result);
}
}

$data = (new ExperimentData)->withInstructorConfig(
$data = new InstructorData(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
Expand All @@ -76,17 +28,12 @@ public function evaluate(Experiment $experiment) : Metric {
responseModel: Company::class,
);

//Debug::enable();

//$report = file_get_contents(__DIR__ . '/report.txt');
//$examples = require 'examples.php';
//$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>';
//$responseModel = Sequence::of(ProjectEvent::class);

$outputs = (new Runner(
data: $data,
runner: new RunInstructor(),
evaluation: new CompanyEval(),
))->execute(
combinations: $combinations
$runner = new Runner(
executor: new RunInstructor($data),
evaluator: new CompanyEval(expectations: [
'name' => 'ACME',
'foundingYear' => 2020
]),
);

$outputs = $runner->execute($cases);
12 changes: 8 additions & 4 deletions src/Enums/Mode.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ enum Mode : string
case Text = 'text'; // unstructured text response

public function is(array|Mode $mode) : bool {
if (is_array($mode)) {
return in_array($this, $mode);
}
return $this->value === $mode->value;
return match(true) {
is_array($mode) => $this->isIn($mode),
default => $this->value === $mode->value,
};
}

public function isIn(array $modes) : bool {
return in_array($this, $modes);
}
}
Loading

0 comments on commit 97e22f0

Please sign in to comment.