From 15c8020289adedf66b15004aec583b46e21bf4cf Mon Sep 17 00:00:00 2001 From: ddebowczyk Date: Fri, 11 Oct 2024 12:09:25 +0200 Subject: [PATCH] Evals - cont --- evals/ComplexExtraction/run.php | 4 +-- evals/LLMModes/run.php | 17 +++-------- evals/SimpleExtraction/run.php | 7 +++-- .../Evals/{Runner.php => ExperimentSuite.php} | 20 +++++++------ tests/Feature/Utils/ConsoleTest.php | 28 +++++++++++++------ 5 files changed, 41 insertions(+), 35 deletions(-) rename src/Extras/Evals/{Runner.php => ExperimentSuite.php} (87%) diff --git a/evals/ComplexExtraction/run.php b/evals/ComplexExtraction/run.php index fbfc6465..8e6612d9 100644 --- a/evals/ComplexExtraction/run.php +++ b/evals/ComplexExtraction/run.php @@ -7,7 +7,7 @@ use Cognesy\Instructor\Extras\Evals\Experiment; use Cognesy\Instructor\Extras\Evals\Inference\RunInstructor; use Cognesy\Instructor\Extras\Evals\Metrics\BooleanCorrectness; -use Cognesy\Instructor\Extras\Evals\Runner; +use Cognesy\Instructor\Extras\Evals\ExperimentSuite; $loader = require 'vendor/autoload.php'; $loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); @@ -52,7 +52,7 @@ public function evaluate(Experiment $experiment) : Metric { //$prompt = 'Extract a list of project events with all the details from the provided input in JSON format using schema: <|json_schema|>'; //$responseModel = Sequence::of(ProjectEvent::class); -$runner = new Runner( +$runner = new ExperimentSuite( executor: new RunInstructor($data), evaluator: new CompanyEval(), ); diff --git a/evals/LLMModes/run.php b/evals/LLMModes/run.php index b2192013..d5cf5e8f 100644 --- a/evals/LLMModes/run.php +++ b/evals/LLMModes/run.php @@ -7,7 +7,7 @@ use Cognesy\Instructor\Extras\Evals\Data\InferenceData; use Cognesy\Instructor\Extras\Evals\Data\InferenceSchema; use Cognesy\Instructor\Extras\Evals\Inference\RunInference; -use Cognesy\Instructor\Extras\Evals\Runner; +use Cognesy\Instructor\Extras\Evals\ExperimentSuite; use Cognesy\Evals\LLMModes\CompanyEval; $cases = InferenceCases::except( @@ -16,16 +16,6 @@ stream: [] ); -// -// NOT SUPPORTED BY PROVIDERS -// -// groq, Mode::JsonSchema, stream -// groq, Mode::Json, stream -// azure, Mode::JsonSchema, sync|stream -// - -//Debug::enable(); - $data = new InferenceData( messages: [ ['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'], @@ -56,12 +46,13 @@ ), ); -$runner = new Runner( +$runner = new ExperimentSuite( executor: new RunInference($data), evaluator: new CompanyEval(expectations: [ 'name' => 'ACME', 'foundingYear' => 2020 ]), + cases: $cases, ); -$outputs = $runner->execute(cases: $cases); +$outputs = $runner->execute(); diff --git a/evals/SimpleExtraction/run.php b/evals/SimpleExtraction/run.php index f922a944..b75e8557 100644 --- a/evals/SimpleExtraction/run.php +++ b/evals/SimpleExtraction/run.php @@ -4,7 +4,7 @@ use Cognesy\Instructor\Extras\Evals\Data\InferenceCases; use Cognesy\Instructor\Extras\Evals\Data\InstructorData; use Cognesy\Instructor\Extras\Evals\Inference\RunInstructor; -use Cognesy\Instructor\Extras\Evals\Runner; +use Cognesy\Instructor\Extras\Evals\ExperimentSuite; use Cognesy\Evals\SimpleExtraction\CompanyEval; use Cognesy\Evals\SimpleExtraction\Company; @@ -28,12 +28,13 @@ responseModel: Company::class, ); -$runner = new Runner( +$runner = new ExperimentSuite( executor: new RunInstructor($data), evaluator: new CompanyEval(expectations: [ 'name' => 'ACME', 'foundingYear' => 2020 ]), + cases: $cases, ); -$outputs = $runner->execute($cases); +$outputs = $runner->execute(); diff --git a/src/Extras/Evals/Runner.php b/src/Extras/Evals/ExperimentSuite.php similarity index 87% rename from src/Extras/Evals/Runner.php rename to src/Extras/Evals/ExperimentSuite.php index f0d8a5f4..a77bfbc0 100644 --- a/src/Extras/Evals/Runner.php +++ b/src/Extras/Evals/ExperimentSuite.php @@ -8,20 +8,26 @@ use Exception; use Generator; -class Runner { - private array $exceptions = []; - private array $experiments = []; +class ExperimentSuite { private Display $display; + private CanExecuteExperiment $executor; private CanEvaluateExperiment $evaluator; + private Generator $cases; + + private array $exceptions = []; + private array $experiments = []; public function __construct( CanExecuteExperiment $executor, CanEvaluateExperiment $evaluator, + Generator $cases, ) { + $this->display = new Display(); + $this->executor = $executor; $this->evaluator = $evaluator; - $this->display = new Display(); + $this->cases = $cases; } // PUBLIC ////////////////////////////////////////////////// @@ -30,10 +36,8 @@ public function __construct( * @param Generator $cases * @return array */ - public function execute( - Generator $cases - ) : array { - foreach ($cases as $case) { + public function execute() : array { + foreach ($this->cases as $case) { $experiment = (new Experiment( id: (string) $case, connection: $case->connection, diff --git a/tests/Feature/Utils/ConsoleTest.php b/tests/Feature/Utils/ConsoleTest.php index c2a9c4fe..c35dff7b 100644 --- a/tests/Feature/Utils/ConsoleTest.php +++ b/tests/Feature/Utils/ConsoleTest.php @@ -1,36 +1,46 @@ toBe('Sample text '); + expect($output)->toBe('Sample text' . Color::RESET . ' '); }); it('aligns single column text to the left by default', function () { $output = Console::columns([[-1, 'Left aligned', STR_PAD_LEFT]], 80); - expect($output)->toBe(str_pad('Left aligned', 80, ' ', STR_PAD_LEFT)); -})->skip(); + expect($output)->toBe( + str_pad('Left aligned', 80, ' ', STR_PAD_LEFT) + . Color::RESET . ' ' + ); +}); it('aligns single column text to the right', function () { $output = Console::columns([[-1, 'Right aligned', STR_PAD_RIGHT]], 80); - expect($output)->toBe(str_pad('Right aligned', 80, ' ', STR_PAD_RIGHT)); -})->skip(); + expect($output)->toBe( + str_pad('Right aligned', 80, ' ', STR_PAD_RIGHT) + . Color::RESET . ' ' + ); +}); it('truncates and appends ellipsis to long text based on maxWidth', function () { $longText = str_repeat('A', 100); $output = Console::columns([[-1, $longText]], 80); - $expected = str_pad(substr($longText, 0, 79) . '… ', 80); + $expected = str_pad(substr($longText, 0, 79) . '…' . Color::RESET . ' ', 80); expect($output)->toBe($expected); }); it('handles mixed array of strings and column specifications', function () { $output = Console::columns(['Static text', [-1, 'Dynamic text', STR_PAD_RIGHT]], 80); - expect($output)->toBe('Static text Dynamic text '); + expect($output)->toBe( + 'Static text' . Color::RESET . ' ' + . 'Dynamic text' + . ' ' . Color::RESET . ' '); }); it('ensures minWidth of 80 if maxWidth is less', function () { $output = Console::columns([[-1, 'Min width enforced', STR_PAD_RIGHT]], 10); - expect(strlen($output))->toBeGreaterThanOrEqual(80); -})->skip(); \ No newline at end of file + expect(strlen($output))->toBe(80 + strlen(Color::RESET . ' ')); +}); \ No newline at end of file