Skip to content

Commit

Permalink
Evals - cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 24, 2024
1 parent d19cc4e commit f1f18d4
Show file tree
Hide file tree
Showing 33 changed files with 602 additions and 260 deletions.
8 changes: 5 additions & 3 deletions evals/ComplexExtraction/ProjectsEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

class ProjectsEval implements CanObserveExecution
{
public array $expectations;
private string $key;
private array $expectations;

public function __construct(array $expectations) {
public function __construct(string $key, array $expectations) {
$this->key = $key;
$this->expectations = $expectations;
}

Expand All @@ -24,7 +26,7 @@ public function observe(Execution $execution): Observation {
$result = ($expectedEvents - count($events->events)) / $expectedEvents;
return Observation::make(
type: 'metric',
key: 'execution.fractionFound',
key: $this->key,
value: $result,
metadata: [
'executionId' => $execution->id(),
Expand Down
3 changes: 1 addition & 2 deletions evals/ComplexExtraction/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation;
use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod;
use Cognesy\Instructor\Extras\Evals\Evaluators\ArrayMatchEval;
use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases;
use Cognesy\Instructor\Extras\Evals\Executors\Data\InstructorData;
use Cognesy\Instructor\Extras\Evals\Executors\RunInstructor;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Utils\Debug\Debug;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
Expand All @@ -32,6 +30,7 @@
executor: new RunInstructor($data),
processors: [
new ProjectsEval(
key: 'execution.fractionFound',
expectations: ['events' => 12]
),
],
Expand Down
10 changes: 7 additions & 3 deletions evals/LLMModes/CompanyEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
namespace Cognesy\Evals\LLMModes;

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations;
use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations;
use Cognesy\Instructor\Extras\Evals\Execution;
use Cognesy\Instructor\Extras\Evals\Observation;
use Cognesy\Instructor\Utils\Str;

class CompanyEval implements CanProvideExecutionObservations
class CompanyEval implements CanGenerateObservations
{
private string $key;
private array $expectations;
Expand All @@ -21,7 +21,11 @@ public function __construct(
$this->expectations = $expectations;
}

public function observations(Execution $subject): iterable {
public function accepts(mixed $subject): bool {
return $subject instanceof Execution;
}

public function observations(mixed $subject): iterable {
yield $this->correctness($subject);
}

Expand Down
6 changes: 4 additions & 2 deletions notes/NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

## High priority

- Evals!!!
- Evals / eval framework
* execution level correctness metric
* add input, output, etc. tokens default metrics
* simplify contracts - currently 5 (!) contracts for observations
- Add 'Output' section to each example, generate it and include in docs, so reader can see what they can expect
- Logging via PSR-3
- Schema abstraction layer - decouple names and descriptions from the model
- Prompt optimization via TextGrad
- Eval framework
- Agents
- Indexing to vector DB
- CLI app
Expand Down
10 changes: 2 additions & 8 deletions src/Extras/Embeddings/EmbeddingsResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,11 @@ public function split(int $index) : array {
return [
new EmbeddingsResponse(
vectors: array_slice($this->vectors, 0, $index),
usage: new Usage(
inputTokens: $this->usage()->inputTokens,
outputTokens: $this->usage()->outputTokens,
),
usage: Usage::copy($this->usage()), // TODO: token split is arbitrary
),
new EmbeddingsResponse(
vectors: array_slice($this->vectors, $index),
usage: new Usage(
inputTokens: 0,
outputTokens: 0,
),
usage: new Usage(), // TODO: token split is arbitrary
),
];
}
Expand Down
28 changes: 28 additions & 0 deletions src/Extras/Evals/Contracts/CanGenerateObservations.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

namespace Cognesy\Instructor\Extras\Evals\Contracts;

use Cognesy\Instructor\Extras\Evals\Observation;

/**
* Interface for generating observations for a given subject.
* @template T
*/
interface CanGenerateObservations
{
/**
* Evaluates whether the given subject is acceptable.
*
* @param T $subject The subject to evaluate.
* @return bool True if the subject is acceptable, false otherwise.
*/
public function accepts(mixed $subject) : bool;

/**
* Generates a series of observations for the given subject.
*
* @param T $subject The subject to observe.
* @return iterable<Observation> A collection of observations.
*/
public function observations(mixed $subject) : iterable;
}
2 changes: 1 addition & 1 deletion src/Extras/Evals/Contracts/CanObserveExecution.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
interface CanObserveExecution
{
/**
* Summarize the experiment.
* Observe the experiment.
*
* @param Execution $execution
* @return Observation
Expand Down
2 changes: 1 addition & 1 deletion src/Extras/Evals/Contracts/CanObserveExperiment.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
interface CanObserveExperiment
{
/**
* Summarize the experiment.
* Observe the experiment.
*
* @param Experiment $experiment
* @return Observation
Expand Down
16 changes: 0 additions & 16 deletions src/Extras/Evals/Contracts/CanProvideExecutionObservations.php

This file was deleted.

17 changes: 0 additions & 17 deletions src/Extras/Evals/Contracts/CanSummarizeExecution.php

This file was deleted.

17 changes: 0 additions & 17 deletions src/Extras/Evals/Contracts/CanSummarizeExperiment.php

This file was deleted.

38 changes: 30 additions & 8 deletions src/Extras/Evals/Evaluators/ArrayMatchEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,48 @@
namespace Cognesy\Instructor\Extras\Evals\Evaluators;

use Adbar\Dot;
use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations;
use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations;
use Cognesy\Instructor\Extras\Evals\Enums\FeedbackType;
use Cognesy\Instructor\Extras\Evals\Execution;
use Cognesy\Instructor\Extras\Evals\Feedback\Feedback;
use Cognesy\Instructor\Extras\Evals\Feedback\FeedbackItem;
use Cognesy\Instructor\Extras\Evals\Observation;
use Cognesy\Instructor\Extras\Evals\Utils\CompareNestedArrays;

class ArrayMatchEval implements CanProvideExecutionObservations
/**
* This class evaluates the match between expected and actual arrays
* and generates observations for precision, recall, and detailed feedback.
*
* @template T of Execution
*/
class ArrayMatchEval implements CanGenerateObservations
{
public function __construct(
private array $expected,
) {}

public function observations(Execution $subject): iterable {
return [
$this->precision($subject, $this->analyse($subject)),
$this->recall($subject, $this->analyse($subject)),
...$this->critique($subject),
];
/**
* Checks if the provided subject is an instance of Execution.
*
* @param T $subject The subject to be checked.
* @return bool True if the subject is an instance of Execution, false otherwise.
*/
public function accepts(mixed $subject): bool {
return $subject instanceof Execution;
}

/**
* Generates a series of observational metrics for the given subject.
*
* @param mixed $subject The subject to analyze.
* @return iterable<Observation> An iterable collection of observational metrics.
*/
public function observations(mixed $subject): iterable {
$analysis = $this->analyse($subject);

yield $this->precision($subject, $analysis);
yield $this->recall($subject, $analysis);
yield from $this->critique($subject);
}

// INTERNAL /////////////////////////////////////////////////
Expand Down
22 changes: 15 additions & 7 deletions src/Extras/Evals/Evaluators/LLMBooleanCorrectnessEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
namespace Cognesy\Instructor\Extras\Evals\Evaluators;

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations;
use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations;
use Cognesy\Instructor\Extras\Evals\Evaluators\Data\BooleanCorrectnessAnalysis;
use Cognesy\Instructor\Extras\Evals\Execution;
use Cognesy\Instructor\Extras\Evals\Feedback\Feedback;
use Cognesy\Instructor\Extras\Evals\Observation;
use Cognesy\Instructor\Instructor;

class LLMBooleanCorrectnessEval implements CanProvideExecutionObservations
class LLMBooleanCorrectnessEval implements CanGenerateObservations
{
private BooleanCorrectnessAnalysis $result;

Expand All @@ -23,11 +23,19 @@ public function __construct(
$this->instructor = $instructor ?? new Instructor();
}

public function observations(Execution $subject): iterable {
return array_filter([
$this->measure($subject),
...$this->critique($subject),
]);
public function accepts(mixed $subject): bool {
return $subject instanceof Execution;
}

/**
* Compiles an array of observations for the given subject by measuring and critiquing it.
*
* @param mixed $subject The subject to be observed.
* @return iterable<Observation> The set of observations gathered from measurement and critique.
*/
public function observations(mixed $subject): iterable {
yield $this->measure($subject);
yield from $this->critique($subject);
}

// INTERNAL /////////////////////////////////////////////////
Expand Down
16 changes: 9 additions & 7 deletions src/Extras/Evals/Evaluators/LLMGradedCorrectnessEval.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
namespace Cognesy\Instructor\Extras\Evals\Evaluators;

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations;
use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations;
use Cognesy\Instructor\Extras\Evals\Evaluators\Data\GradedCorrectnessAnalysis;
use Cognesy\Instructor\Extras\Evals\Execution;
use Cognesy\Instructor\Extras\Evals\Feedback\Feedback;
use Cognesy\Instructor\Extras\Evals\Observation;
use Cognesy\Instructor\Instructor;

class LLMGradedCorrectnessEval implements CanProvideExecutionObservations
class LLMGradedCorrectnessEval implements CanGenerateObservations
{
private GradedCorrectnessAnalysis $result;

Expand All @@ -23,11 +23,13 @@ public function __construct(
$this->instructor = $instructor ?? new Instructor();
}

public function observations(Execution $subject): iterable {
return array_filter([
$this->measure($subject),
...$this->critique($subject),
]);
public function accepts(mixed $subject): bool {
return $subject instanceof Execution;
}

public function observations(mixed $subject): iterable {
yield $this->measure($subject);
yield from $this->critique($subject);
}

// INTERNAL /////////////////////////////////////////////////
Expand Down
Loading

0 comments on commit f1f18d4

Please sign in to comment.