-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
15c8020
commit 60e6989
Showing
36 changed files
with
918 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,70 @@ | ||
<?php | ||
|
||
use Cognesy\Instructor\Enums\Mode; | ||
use Cognesy\Instructor\Extras\Evals\Contracts\CanEvaluateExperiment; | ||
use Cognesy\Instructor\Extras\Evals\Contracts\Metric; | ||
use Cognesy\Instructor\Extras\Evals\Data\Evaluation; | ||
use Cognesy\Instructor\Extras\Evals\Data\Feedback; | ||
use Cognesy\Instructor\Extras\Evals\Data\InferenceCases; | ||
use Cognesy\Instructor\Extras\Evals\Data\InstructorData; | ||
use Cognesy\Instructor\Extras\Evals\Experiment; | ||
use Cognesy\Instructor\Extras\Evals\Inference\RunInstructor; | ||
use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness; | ||
use Cognesy\Instructor\Extras\Evals\ExperimentSuite; | ||
use Cognesy\Instructor\Extras\Evals\Metrics\PercentageCorrectness; | ||
use Cognesy\Instructor\Extras\Sequence\Sequence; | ||
use Cognesy\Instructor\Features\LLM\Data\Usage; | ||
|
||
$loader = require 'vendor/autoload.php'; | ||
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); | ||
|
||
$cases = InferenceCases::get( | ||
connections: [], | ||
modes: [], | ||
stream: [] | ||
); | ||
|
||
class Company { | ||
public string $name; | ||
public int $foundingYear; | ||
} | ||
|
||
class CompanyEval implements CanEvaluateExperiment | ||
{ | ||
public array $expectations; | ||
|
||
public function __construct(array $expectations) { | ||
$this->expectations = $expectations; | ||
} | ||
|
||
public function evaluate(Experiment $experiment) : Evaluation { | ||
$expectedEvents = $this->expectations['events']; | ||
/** @var Sequence $events */ | ||
$events = $experiment->response->value(); | ||
$result = ($expectedEvents - count($events->list)) / $expectedEvents; | ||
return new Evaluation( | ||
metric: new PercentageCorrectness('found', $result), | ||
feedback: Feedback::none(), | ||
usage: Usage::none(), | ||
); | ||
} | ||
} | ||
|
||
$report = file_get_contents(__DIR__ . '/report.txt'); | ||
$examples = require 'examples.php'; | ||
$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>'; | ||
$responseModel = Sequence::of(ProjectEvent::class); | ||
|
||
$data = new InstructorData( | ||
messages: [ | ||
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'], | ||
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'], | ||
//['role' => 'user', 'content' => 'EXAMPLE CONTEXT: Sony was established in 1946 by Akio Morita.'], | ||
//['role' => 'user', 'content' => 'EXAMPLE RESPONSE: ```json{"name":"Sony","year":1899}```'], | ||
['role' => 'user', 'content' => 'What is the name and founding year of our company?'], | ||
], | ||
responseModel: Company::class, | ||
); | ||
|
||
class CompanyEval implements CanEvaluateExperiment | ||
{ | ||
public function evaluate(Experiment $experiment) : Metric { | ||
/** @var Person $decoded */ | ||
$person = $experiment->response->value(); | ||
$result = $person->name === 'ACME' | ||
&& $person->foundingYear === 2020; | ||
return new BooleanCorrectness($result); | ||
} | ||
} | ||
|
||
//Debug::enable(); | ||
|
||
//$report = file_get_contents(__DIR__ . '/report.txt'); | ||
//$examples = require 'examples.php'; | ||
//$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>'; | ||
//$responseModel = Sequence::of(ProjectEvent::class); | ||
|
||
$runner = new ExperimentSuite( | ||
cases: InferenceCases::only( | ||
connections: ['openai', 'anthropic', 'gemini', 'cohere'], | ||
modes: [Mode::Tools], | ||
stream: [true, false] | ||
), | ||
executor: new RunInstructor($data), | ||
evaluator: new CompanyEval(), | ||
evaluators: new CompanyEval(expectations: ['events' => 12]), | ||
); | ||
|
||
$outputs = $runner->execute($cases); | ||
$outputs = $runner->execute(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<?php | ||
|
||
namespace Cognesy\Instructor\Extras\Evals\Aggregators; | ||
|
||
use Cognesy\Instructor\Extras\Evals\Contracts\CanAggregateValues; | ||
use Cognesy\Instructor\Extras\Evals\Contracts\Metric; | ||
use Cognesy\Instructor\Extras\Evals\Experiment; | ||
use Cognesy\Instructor\Extras\Evals\Metrics\NullMetric; | ||
|
||
class FirstMetric implements CanAggregateValues | ||
{ | ||
public function aggregate(Experiment $experiment): Metric { | ||
$firstEval = $experiment->evaluations[0] ?? null; | ||
return $firstEval ? $firstEval->metric : new NullMetric(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<?php | ||
|
||
namespace Cognesy\Instructor\Extras\Evals\Aggregators; | ||
|
||
use Cognesy\Instructor\Extras\Evals\Contracts\CanAggregateValues; | ||
use Cognesy\Instructor\Extras\Evals\Contracts\Metric; | ||
use Cognesy\Instructor\Extras\Evals\Experiment; | ||
use Cognesy\Instructor\Extras\Evals\Metrics\NullMetric; | ||
|
||
class SelectedMetric implements CanAggregateValues | ||
{ | ||
public function __construct( | ||
private string $name | ||
) {} | ||
|
||
public function aggregate(Experiment $experiment): Metric { | ||
foreach ($experiment->evaluations as $evaluation) { | ||
if ($evaluation->metric->name() === $this->name) { | ||
return $evaluation->metric; | ||
} | ||
} | ||
return new NullMetric(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<?php | ||
|
||
namespace Cognesy\Instructor\Extras\Evals\Contracts; | ||
|
||
use Cognesy\Instructor\Extras\Evals\Experiment; | ||
|
||
interface CanAggregateValues | ||
{ | ||
/** | ||
* Aggregate the given values into a single metric. | ||
* | ||
* @param Experiment $experiment | ||
* @return Metric | ||
*/ | ||
public function aggregate(Experiment $experiment): Metric; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
<?php | ||
|
||
namespace Cognesy\Instructor\Extras\Evals\Data; | ||
|
||
use Cognesy\Instructor\Features\Schema\Attributes\Description; | ||
|
||
#[Description("The result of correctness evaluation.")] | ||
class BooleanCorrectnessAnalysis | ||
{ | ||
#[Description("Step by step assessment of the expected versus actual results.")] | ||
public string $assessment; | ||
#[Description("Decision if the actual result is correct.")] | ||
public bool $isCorrect; | ||
#[Description("If the result is incorrect - list of individual issues found in the actual result considering the expected values. Otherwise empty.")] | ||
/** @var ParameterFeedback[] */ | ||
public array $feedback; | ||
} |
Oops, something went wrong.