diff --git a/evals/ComplexExtraction/ProjectsEval.php b/evals/ComplexExtraction/ProjectsEval.php index f421a34d..e42f13d5 100644 --- a/evals/ComplexExtraction/ProjectsEval.php +++ b/evals/ComplexExtraction/ProjectsEval.php @@ -8,9 +8,11 @@ class ProjectsEval implements CanObserveExecution { - public array $expectations; + private string $key; + private array $expectations; - public function __construct(array $expectations) { + public function __construct(string $key, array $expectations) { + $this->key = $key; $this->expectations = $expectations; } @@ -24,7 +26,7 @@ public function observe(Execution $execution): Observation { $result = ($expectedEvents - count($events->events)) / $expectedEvents; return Observation::make( type: 'metric', - key: 'execution.fractionFound', + key: $this->key, value: $result, metadata: [ 'executionId' => $execution->id(), diff --git a/evals/ComplexExtraction/run.php b/evals/ComplexExtraction/run.php index ee9bd95e..bb955bd0 100644 --- a/evals/ComplexExtraction/run.php +++ b/evals/ComplexExtraction/run.php @@ -5,12 +5,10 @@ use Cognesy\Instructor\Enums\Mode; use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation; use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod; -use Cognesy\Instructor\Extras\Evals\Evaluators\ArrayMatchEval; use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases; use Cognesy\Instructor\Extras\Evals\Executors\Data\InstructorData; use Cognesy\Instructor\Extras\Evals\Executors\RunInstructor; use Cognesy\Instructor\Extras\Evals\Experiment; -use Cognesy\Instructor\Utils\Debug\Debug; $loader = require 'vendor/autoload.php'; $loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); @@ -32,6 +30,7 @@ executor: new RunInstructor($data), processors: [ new ProjectsEval( + key: 'execution.fractionFound', expectations: ['events' => 12] ), ], diff --git a/evals/LLMModes/CompanyEval.php b/evals/LLMModes/CompanyEval.php index db853705..cce0a26c 100644 --- a/evals/LLMModes/CompanyEval.php +++ b/evals/LLMModes/CompanyEval.php @@ -3,12 +3,12 @@ namespace Cognesy\Evals\LLMModes; use Cognesy\Instructor\Enums\Mode; -use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations; +use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations; use Cognesy\Instructor\Extras\Evals\Execution; use Cognesy\Instructor\Extras\Evals\Observation; use Cognesy\Instructor\Utils\Str; -class CompanyEval implements CanProvideExecutionObservations +class CompanyEval implements CanGenerateObservations { private string $key; private array $expectations; @@ -21,7 +21,11 @@ public function __construct( $this->expectations = $expectations; } - public function observations(Execution $subject): iterable { + public function accepts(mixed $subject): bool { + return $subject instanceof Execution; + } + + public function observations(mixed $subject): iterable { yield $this->correctness($subject); } diff --git a/notes/NOTES.md b/notes/NOTES.md index 5aa5cd49..43df5271 100644 --- a/notes/NOTES.md +++ b/notes/NOTES.md @@ -2,12 +2,14 @@ ## High priority -- Evals!!! +- Evals / eval framework + * execution level correctness metric + * add input, output, etc. tokens default metrics + * simplify contracts - currently 5 (!) contracts for observations - Add 'Output' section to each example, generate it and include in docs, so reader can see what they can expect - Logging via PSR-3 - Schema abstraction layer - decouple names and descriptions from the model - Prompt optimization via TextGrad -- Eval framework - Agents - Indexing to vector DB - CLI app diff --git a/src/Extras/Embeddings/EmbeddingsResponse.php b/src/Extras/Embeddings/EmbeddingsResponse.php index 57e073b7..440e554c 100644 --- a/src/Extras/Embeddings/EmbeddingsResponse.php +++ b/src/Extras/Embeddings/EmbeddingsResponse.php @@ -37,17 +37,11 @@ public function split(int $index) : array { return [ new EmbeddingsResponse( vectors: array_slice($this->vectors, 0, $index), - usage: new Usage( - inputTokens: $this->usage()->inputTokens, - outputTokens: $this->usage()->outputTokens, - ), + usage: Usage::copy($this->usage()), // TODO: token split is arbitrary ), new EmbeddingsResponse( vectors: array_slice($this->vectors, $index), - usage: new Usage( - inputTokens: 0, - outputTokens: 0, - ), + usage: new Usage(), // TODO: token split is arbitrary ), ]; } diff --git a/src/Extras/Evals/Contracts/CanGenerateObservations.php b/src/Extras/Evals/Contracts/CanGenerateObservations.php new file mode 100644 index 00000000..672f7824 --- /dev/null +++ b/src/Extras/Evals/Contracts/CanGenerateObservations.php @@ -0,0 +1,28 @@ + A collection of observations. + */ + public function observations(mixed $subject) : iterable; +} diff --git a/src/Extras/Evals/Contracts/CanObserveExecution.php b/src/Extras/Evals/Contracts/CanObserveExecution.php index 32f423d1..918450ed 100644 --- a/src/Extras/Evals/Contracts/CanObserveExecution.php +++ b/src/Extras/Evals/Contracts/CanObserveExecution.php @@ -8,7 +8,7 @@ interface CanObserveExecution { /** - * Summarize the experiment. + * Observe the experiment. * * @param Execution $execution * @return Observation diff --git a/src/Extras/Evals/Contracts/CanObserveExperiment.php b/src/Extras/Evals/Contracts/CanObserveExperiment.php index 3f9eead0..d86024e3 100644 --- a/src/Extras/Evals/Contracts/CanObserveExperiment.php +++ b/src/Extras/Evals/Contracts/CanObserveExperiment.php @@ -8,7 +8,7 @@ interface CanObserveExperiment { /** - * Summarize the experiment. + * Observe the experiment. * * @param Experiment $experiment * @return Observation diff --git a/src/Extras/Evals/Contracts/CanProvideExecutionObservations.php b/src/Extras/Evals/Contracts/CanProvideExecutionObservations.php deleted file mode 100644 index 678f6642..00000000 --- a/src/Extras/Evals/Contracts/CanProvideExecutionObservations.php +++ /dev/null @@ -1,16 +0,0 @@ - - */ - public function observations(Execution $subject): iterable; -} diff --git a/src/Extras/Evals/Contracts/CanSummarizeExecution.php b/src/Extras/Evals/Contracts/CanSummarizeExecution.php deleted file mode 100644 index d97b69a7..00000000 --- a/src/Extras/Evals/Contracts/CanSummarizeExecution.php +++ /dev/null @@ -1,17 +0,0 @@ -precision($subject, $this->analyse($subject)), - $this->recall($subject, $this->analyse($subject)), - ...$this->critique($subject), - ]; + /** + * Checks if the provided subject is an instance of Execution. + * + * @param T $subject The subject to be checked. + * @return bool True if the subject is an instance of Execution, false otherwise. + */ + public function accepts(mixed $subject): bool { + return $subject instanceof Execution; + } + + /** + * Generates a series of observational metrics for the given subject. + * + * @param mixed $subject The subject to analyze. + * @return iterable An iterable collection of observational metrics. + */ + public function observations(mixed $subject): iterable { + $analysis = $this->analyse($subject); + + yield $this->precision($subject, $analysis); + yield $this->recall($subject, $analysis); + yield from $this->critique($subject); } // INTERNAL ///////////////////////////////////////////////// diff --git a/src/Extras/Evals/Evaluators/LLMBooleanCorrectnessEval.php b/src/Extras/Evals/Evaluators/LLMBooleanCorrectnessEval.php index 280e3b61..8bc52ccf 100644 --- a/src/Extras/Evals/Evaluators/LLMBooleanCorrectnessEval.php +++ b/src/Extras/Evals/Evaluators/LLMBooleanCorrectnessEval.php @@ -3,14 +3,14 @@ namespace Cognesy\Instructor\Extras\Evals\Evaluators; use Cognesy\Instructor\Enums\Mode; -use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations; +use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations; use Cognesy\Instructor\Extras\Evals\Evaluators\Data\BooleanCorrectnessAnalysis; use Cognesy\Instructor\Extras\Evals\Execution; use Cognesy\Instructor\Extras\Evals\Feedback\Feedback; use Cognesy\Instructor\Extras\Evals\Observation; use Cognesy\Instructor\Instructor; -class LLMBooleanCorrectnessEval implements CanProvideExecutionObservations +class LLMBooleanCorrectnessEval implements CanGenerateObservations { private BooleanCorrectnessAnalysis $result; @@ -23,11 +23,19 @@ public function __construct( $this->instructor = $instructor ?? new Instructor(); } - public function observations(Execution $subject): iterable { - return array_filter([ - $this->measure($subject), - ...$this->critique($subject), - ]); + public function accepts(mixed $subject): bool { + return $subject instanceof Execution; + } + + /** + * Compiles an array of observations for the given subject by measuring and critiquing it. + * + * @param mixed $subject The subject to be observed. + * @return iterable The set of observations gathered from measurement and critique. + */ + public function observations(mixed $subject): iterable { + yield $this->measure($subject); + yield from $this->critique($subject); } // INTERNAL ///////////////////////////////////////////////// diff --git a/src/Extras/Evals/Evaluators/LLMGradedCorrectnessEval.php b/src/Extras/Evals/Evaluators/LLMGradedCorrectnessEval.php index 05448c6e..c0049174 100644 --- a/src/Extras/Evals/Evaluators/LLMGradedCorrectnessEval.php +++ b/src/Extras/Evals/Evaluators/LLMGradedCorrectnessEval.php @@ -3,14 +3,14 @@ namespace Cognesy\Instructor\Extras\Evals\Evaluators; use Cognesy\Instructor\Enums\Mode; -use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations; +use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations; use Cognesy\Instructor\Extras\Evals\Evaluators\Data\GradedCorrectnessAnalysis; use Cognesy\Instructor\Extras\Evals\Execution; use Cognesy\Instructor\Extras\Evals\Feedback\Feedback; use Cognesy\Instructor\Extras\Evals\Observation; use Cognesy\Instructor\Instructor; -class LLMGradedCorrectnessEval implements CanProvideExecutionObservations +class LLMGradedCorrectnessEval implements CanGenerateObservations { private GradedCorrectnessAnalysis $result; @@ -23,11 +23,13 @@ public function __construct( $this->instructor = $instructor ?? new Instructor(); } - public function observations(Execution $subject): iterable { - return array_filter([ - $this->measure($subject), - ...$this->critique($subject), - ]); + public function accepts(mixed $subject): bool { + return $subject instanceof Execution; + } + + public function observations(mixed $subject): iterable { + yield $this->measure($subject); + yield from $this->critique($subject); } // INTERNAL ///////////////////////////////////////////////// diff --git a/src/Extras/Evals/Execution.php b/src/Extras/Evals/Execution.php index 8ec309d2..fb80e78b 100644 --- a/src/Extras/Evals/Execution.php +++ b/src/Extras/Evals/Execution.php @@ -2,14 +2,13 @@ namespace Cognesy\Instructor\Extras\Evals; +use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations; use Cognesy\Instructor\Extras\Evals\Contracts\CanObserveExecution; -use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations; use Cognesy\Instructor\Extras\Evals\Contracts\CanRunExecution; -use Cognesy\Instructor\Extras\Evals\Contracts\CanSummarizeExecution; use Cognesy\Instructor\Extras\Evals\Observation\MakeObservations; use Cognesy\Instructor\Extras\Evals\Observation\SelectObservations; -use Cognesy\Instructor\Extras\Evals\Observers\ExecutionDuration; -use Cognesy\Instructor\Extras\Evals\Observers\ExecutionTotalTokens; +use Cognesy\Instructor\Extras\Evals\Observers\DurationObserver; +use Cognesy\Instructor\Extras\Evals\Observers\TokenUsageObserver; use Cognesy\Instructor\Features\LLM\Data\Usage; use Cognesy\Instructor\Utils\DataMap; use Cognesy\Instructor\Utils\Uuid; @@ -20,8 +19,8 @@ class Execution { /** @var CanObserveExecution[] */ private array $defaultObservers = [ - ExecutionDuration::class, - ExecutionTotalTokens::class, + DurationObserver::class, + TokenUsageObserver::class, ]; private CanRunExecution $action; @@ -202,22 +201,22 @@ public function hasSummaries() : bool { private function makeObservations() : array { $observations = MakeObservations::for($this) - ->withSources([ + ->withObservers([ $this->processors, $this->defaultObservers, ]) ->only([ CanObserveExecution::class, - CanProvideExecutionObservations::class, + CanGenerateObservations::class, ]); $summaries = MakeObservations::for($this) - ->withSources([ + ->withObservers([ $this->postprocessors ]) ->only([ - CanSummarizeExecution::class, - CanProvideExecutionObservations::class, + CanObserveExecution::class, + CanGenerateObservations::class, ]); return array_filter(array_merge($observations, $summaries)); diff --git a/src/Extras/Evals/Experiment.php b/src/Extras/Evals/Experiment.php index ebfa8ed6..0e4ee3f7 100644 --- a/src/Extras/Evals/Experiment.php +++ b/src/Extras/Evals/Experiment.php @@ -2,15 +2,15 @@ namespace Cognesy\Instructor\Extras\Evals; use Cognesy\Instructor\Extras\Evals\Console\Display; +use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations; use Cognesy\Instructor\Extras\Evals\Contracts\CanObserveExperiment; use Cognesy\Instructor\Extras\Evals\Contracts\CanRunExecution; -use Cognesy\Instructor\Extras\Evals\Contracts\CanSummarizeExperiment; use Cognesy\Instructor\Extras\Evals\Observation\MakeObservations; use Cognesy\Instructor\Extras\Evals\Observation\SelectObservations; -use Cognesy\Instructor\Extras\Evals\Observers\ExperimentDuration; +use Cognesy\Instructor\Extras\Evals\Observers\DurationObserver; use Cognesy\Instructor\Extras\Evals\Observers\ExperimentFailureRate; use Cognesy\Instructor\Extras\Evals\Observers\ExperimentLatency; -use Cognesy\Instructor\Extras\Evals\Observers\ExperimentTotalTokens; +use Cognesy\Instructor\Extras\Evals\Observers\TokenUsageObserver; use Cognesy\Instructor\Features\LLM\Data\Usage; use Cognesy\Instructor\Utils\DataMap; use Cognesy\Instructor\Utils\Uuid; @@ -20,8 +20,8 @@ class Experiment { private array $defaultProcessors = [ - ExperimentDuration::class, - ExperimentTotalTokens::class, + DurationObserver::class, + TokenUsageObserver::class, ExperimentLatency::class, ExperimentFailureRate::class, ]; @@ -203,22 +203,23 @@ private function accumulateUsage() : Usage { private function makeObservations() : array { // execute observers $observations = MakeObservations::for($this) - ->withSources([ + ->withObservers([ $this->processors, $this->defaultProcessors, ]) ->only([ CanObserveExperiment::class, + CanGenerateObservations::class, ]); // execute summarizers $summaries = MakeObservations::for($this) - ->withSources([ + ->withObservers([ $this->postprocessors, ]) ->only([ - CanSummarizeExperiment::class, CanObserveExperiment::class, + CanGenerateObservations::class, ]); return array_filter(array_merge($observations, $summaries)); diff --git a/src/Extras/Evals/Observation/MakeObservations.php b/src/Extras/Evals/Observation/MakeObservations.php index 939fedd7..2ec964df 100644 --- a/src/Extras/Evals/Observation/MakeObservations.php +++ b/src/Extras/Evals/Observation/MakeObservations.php @@ -2,43 +2,63 @@ namespace Cognesy\Instructor\Extras\Evals\Observation; +use Cognesy\Instructor\Extras\Evals\Contracts\CanGenerateObservations; use Cognesy\Instructor\Extras\Evals\Contracts\CanObserveExecution; use Cognesy\Instructor\Extras\Evals\Contracts\CanObserveExperiment; -use Cognesy\Instructor\Extras\Evals\Contracts\CanProvideExecutionObservations; -use Cognesy\Instructor\Extras\Evals\Contracts\CanSummarizeExecution; -use Cognesy\Instructor\Extras\Evals\Contracts\CanSummarizeExperiment; -use Cognesy\Instructor\Extras\Evals\Execution; -use Cognesy\Instructor\Extras\Evals\Experiment; use Cognesy\Instructor\Extras\Evals\Observation; use Exception; +/** + * Makes observations based on an observed subject and a set of observers. + */ class MakeObservations { public function __construct( - private $sources = [], - private ?Experiment $experiment = null, - private ?Execution $execution = null, + private mixed $subject, + private array $observers = [], ) {} - public static function for(Experiment|Execution $subject) : self { - return new self( - experiment: $subject instanceof Experiment ? $subject : null, - execution: $subject instanceof Execution ? $subject : null, - ); + /** + * Creates a new instance of the class with the given subject. + * + * @param mixed $subject The subject to be assigned to the new instance. + * + * @return self Returns a new instance of the class. + */ + public static function for(mixed $subject) : self { + return new self(subject: $subject); } - public function withSources(array $sources) : self { - if (is_array($sources[0] ?? null)) { - $sources = array_merge(...$sources); + /** + * Sets the observers for the current instance. + * + * @param array $observers An array of observers to be assigned. + * + * @return self Returns the current instance with the updated observers. + */ + public function withObservers(array $observers) : self { + if (is_array($observers[0] ?? null)) { + $observers = array_merge(...$observers); } - $this->sources = $sources; + $this->observers = $observers; return $this; } + /** + * Retrieves all observations from the current context. + * + * @return array List of all observations. + */ public function all() : array { return $this->observations(); } + /** + * Retrieves observations for the given types of observers. + * + * @param array $types List of observer types to generate the observations for. + * @return array List of observations. + */ public function only(array $types) : array { return $this->observations($types); } @@ -46,28 +66,24 @@ public function only(array $types) : array { // INTERNAL //////////////////////////////////////////////// private function observations(array $types = null) : array { - $observations = []; - foreach ($this->sources($this->sources, $types) as $source) { - $observations[] = match(true) { - $source instanceof CanProvideExecutionObservations => $source->observations($this->execution), - $source instanceof CanObserveExperiment => $this->wrapObservation($source->observe(...), $this->experiment), - $source instanceof CanSummarizeExperiment => $this->wrapObservation($source->summarize(...), $this->experiment), - $source instanceof CanObserveExecution => $this->wrapObservation($source->observe(...), $this->execution), - $source instanceof CanSummarizeExecution => $this->wrapObservation($source->summarize(...), $this->execution), - default => throw new Exception('Invalid observation source: ' . get_class($source)), + $sources = []; + foreach ($this->observers($this->observers, $types) as $observer) { + $sources[] = match(true) { + $observer instanceof CanGenerateObservations => $this->wrapGenerator($observer, $this->subject), + $observer instanceof CanObserveExperiment => $this->wrapObservation($observer->observe(...), $this->subject), + $observer instanceof CanObserveExecution => $this->wrapObservation($observer->observe(...), $this->subject), + default => throw new Exception('Invalid observation source: ' . get_class($observer)), }; } - return $this->getObservations($observations); + return $this->getObservations($sources); } private function getObservations(iterable $sources) : array { // filter out empty items and turn array to Observation[] $result = []; - foreach ($sources as $source) { - foreach ($source as $observation) { - if ($observation instanceof Observation) { - $result[] = $observation; - } + foreach ($sources as $observer) { + foreach ($observer as $observation) { + $result[] = $observation; } } return $result; @@ -76,39 +92,49 @@ private function getObservations(iterable $sources) : array { /** * @param callable $callback * @param object $subject - * @return Observation + * @return iterable + */ + private function wrapObservation(callable $callback, mixed $subject) : iterable { + if ($subject !== null) { + yield $callback($subject); + } + } + + /** + * @param CanGenerateObservations $generator + * @param object $subject + * @return iterable */ - private function wrapObservation(callable $callback, ?object $subject) : array { - if ($subject === null) { - return []; + private function wrapGenerator(CanGenerateObservations $generator, mixed $subject) : iterable { + if ($generator->accepts($subject)) { + yield from $generator->observations($subject); } - return [$callback($subject)]; } - private function sources(array $sources, array $types = null) : iterable { - $instances = $this->makeInstances($sources); + private function observers(array $observers, array $types = null) : iterable { + $instances = $this->makeInstances($observers); return match(true) { empty($types) => $instances, default => array_filter($instances, fn($instance) => $this->isOneOf($instance, $types)), }; } - private function makeInstances(array $sources) : array { + private function makeInstances(array $observers) : array { $instances = []; - foreach ($sources as $source) { + foreach ($observers as $observer) { $instances[] = match(true) { - is_string($source) => new $source, - is_object($source) => $source, - default => throw new Exception('Invalid observation source type: ' . gettype($source)), + is_string($observer) => new $observer, + is_object($observer) => $observer, + default => throw new Exception('Invalid observation source type: ' . gettype($observer)), }; } return $instances; } - private function isOneOf(object $source, array $types) : bool { + private function isOneOf(object $observer, array $types) : bool { return array_reduce( array: $types, - callback: fn($carry, $type) => $carry || is_a($source, $type, true), + callback: fn($carry, $type) => $carry || is_a($observer, $type, true), initial: false ); } diff --git a/src/Extras/Evals/Observers/DurationObserver.php b/src/Extras/Evals/Observers/DurationObserver.php new file mode 100644 index 00000000..db871208 --- /dev/null +++ b/src/Extras/Evals/Observers/DurationObserver.php @@ -0,0 +1,53 @@ + true, + $subject instanceof Execution => true, + }; + } + + public function observations(mixed $subject): iterable { + yield match(true) { + $subject instanceof Experiment => $this->experimentDuration($subject), + $subject instanceof Execution => $this->executionDuration($subject), + }; + } + + private function experimentDuration(Experiment $experiment): Observation { + return Observation::make( + type: 'metric', + key: 'experiment.timeElapsed', + value: $experiment->timeElapsed(), + metadata: [ + 'experimentId' => $experiment->id(), + 'unit' => 'seconds', + 'format' => '%.2f', + 'aggregationMethod' => 'sum', + ], + ); + } + + private function executionDuration(Execution $execution): Observation { + return Observation::make( + type: 'metric', + key: 'execution.timeElapsed', + value: $execution->timeElapsed(), + metadata: [ + 'executionId' => $execution->id(), + 'unit' => 'seconds', + 'format' => '%.2f', + 'aggregationMethod' => 'sum', + ], + ); + } +} diff --git a/src/Extras/Evals/Observers/ExecutionDuration.php b/src/Extras/Evals/Observers/ExecutionDuration.php deleted file mode 100644 index 7ba07fca..00000000 --- a/src/Extras/Evals/Observers/ExecutionDuration.php +++ /dev/null @@ -1,23 +0,0 @@ -timeElapsed(), - metadata: [ - 'executionId' => $execution->id(), - 'unit' => 'seconds', - 'format' => '%.2f sec', - ], - ); - } -} \ No newline at end of file diff --git a/src/Extras/Evals/Observers/ExecutionTotalTokens.php b/src/Extras/Evals/Observers/ExecutionTotalTokens.php deleted file mode 100644 index 61539e98..00000000 --- a/src/Extras/Evals/Observers/ExecutionTotalTokens.php +++ /dev/null @@ -1,23 +0,0 @@ -usage()->total(), - metadata: [ - 'executionId' => $execution->id(), - 'unit' => 'tokens', - 'format' => '%d tokens', - ], - ); - } -} diff --git a/src/Extras/Evals/Observers/ExperimentDuration.php b/src/Extras/Evals/Observers/ExperimentDuration.php deleted file mode 100644 index d2538d7f..00000000 --- a/src/Extras/Evals/Observers/ExperimentDuration.php +++ /dev/null @@ -1,24 +0,0 @@ -timeElapsed(), - metadata: [ - 'experimentId' => $experiment->id(), - 'unit' => 'seconds', - 'format' => '%.2f', - 'aggregationMethod' => 'sum', - ], - ); - } -} diff --git a/src/Extras/Evals/Observers/ExperimentFailureRate.php b/src/Extras/Evals/Observers/ExperimentFailureRate.php index 22f839a9..7dce3d0b 100644 --- a/src/Extras/Evals/Observers/ExperimentFailureRate.php +++ b/src/Extras/Evals/Observers/ExperimentFailureRate.php @@ -8,6 +8,12 @@ class ExperimentFailureRate implements CanObserveExperiment { + /** + * Observes the given experiment to record its failure rate and other related metrics. + * + * @param Experiment $experiment The experiment to observe. + * @return Observation The observation containing the experiment's failure rate and metadata. + */ public function observe(Experiment $experiment): Observation { return Observation::make( type: 'metric', @@ -16,7 +22,7 @@ public function observe(Experiment $experiment): Observation { metadata: [ 'experimentId' => $experiment->id(), 'unit' => 'fraction', - 'format' => '%d.2', + 'format' => '%.2f', 'failed' => $this->metrics($experiment)->failed, 'total' => $this->metrics($experiment)->total, 'aggregationMethod' => 'mean', @@ -24,6 +30,14 @@ public function observe(Experiment $experiment): Observation { ); } + /** + * Calculates and returns the metrics for the given experiment, including + * failure rate, total executions, and failed executions. + * + * @param Experiment $experiment The experiment instance from which metrics are calculated. + * + * @return object An anonymous object containing failureRate, total, and failed properties. + */ private function metrics(Experiment $experiment) : object { $executionCount = count($experiment->executions()); $executionsFailed = array_reduce($experiment->executions(), function ($carry, $execution) { diff --git a/src/Extras/Evals/Observers/ExperimentTotalTokens.php b/src/Extras/Evals/Observers/ExperimentTotalTokens.php deleted file mode 100644 index 2fe0ebe5..00000000 --- a/src/Extras/Evals/Observers/ExperimentTotalTokens.php +++ /dev/null @@ -1,24 +0,0 @@ -usage()->total(), - metadata: [ - 'experimentId' => $experiment->id(), - 'unit' => 'tokens', - 'format' => '%d', - 'aggregationMethod' => 'sum', - ], - ); - } -} diff --git a/src/Extras/Evals/Observers/TokenUsageObserver.php b/src/Extras/Evals/Observers/TokenUsageObserver.php new file mode 100644 index 00000000..ccd2e98a --- /dev/null +++ b/src/Extras/Evals/Observers/TokenUsageObserver.php @@ -0,0 +1,106 @@ + + */ +class TokenUsageObserver implements CanGenerateObservations +{ + /** + * Checks if the given subject is an instance of Experiment or Execution. + * + * @param mixed $subject The subject to be checked. + * @return bool Returns true if the subject is of accepted type. + */ + public function accepts(mixed $subject): bool { + return match(true) { + $subject instanceof Experiment => true, + $subject instanceof Execution => true, + }; + } + + /** + * Generates observations for the subject. + * + * @param T $subject The subject for which observations need to be generated. + * @return iterable Yields a series of Observation objects. + */ + public function observations(mixed $subject): iterable { + yield from match(true) { + $subject instanceof Experiment => $this->experimentUsage($subject), + $subject instanceof Execution => $this->executionUsage($subject), + }; + } + + // INTERNAL //////////////////////////////////////////////// + + /** + * Generate observations from an Experiment + * + * @param Execution $execution Observation subject. + * @return iterable Yields Observation objects with token usage. + */ + private function executionUsage(Execution $execution): iterable { + $observations = [ + 'execution.tokens.total' => $execution->usage()->total(), + 'execution.tokens.output' => $execution->usage()->output(), + 'execution.tokens.input' => $execution->usage()->input(), + 'execution.tokens.cache' => $execution->usage()->cache(), + ]; + foreach ($observations as $key => $value) { + yield $this->makeObservation('executionId', $execution->id(), $key, $value); + } + } + + /** + * Generate observations from an Experiment + * + * @param Experiment $experiment Observation subject. + * @return iterable Yields Observation objects with token usage. + */ + private function experimentUsage(Experiment $experiment): iterable { + $observations = [ + 'experiment.tokens.total' => $experiment->usage()->total(), + 'experiment.tokens.output' => $experiment->usage()->output(), + 'experiment.tokens.input' => $experiment->usage()->input(), + 'experiment.tokens.cache' => $experiment->usage()->cache(), + ]; + foreach ($observations as $key => $value) { + yield $this->makeObservation('experimentId', $experiment->id(), $key, $value); + } + } + + /** + * Create an Observation. + * + * @param string $id Object identifier. + * @param string $key The key for the observation metric. + * @param mixed $value The value for the observation metric. + * @return Observation The created Observation object. + */ + private function makeObservation(string $idName, string $id, string $key, mixed $value): Observation { + return Observation::make( + type: 'metric', + key: $key, + value: $value, + metadata: [ + $idName => $id, + 'unit' => 'tokens', + 'format' => '%d', + 'aggregationMethod' => 'sum', + ], + ); + } +} diff --git a/src/Features/Http/HttpClient.php b/src/Features/Http/HttpClient.php index 6111d79e..38a3191d 100644 --- a/src/Features/Http/HttpClient.php +++ b/src/Features/Http/HttpClient.php @@ -11,41 +11,94 @@ use Cognesy\Instructor\Utils\Settings; use InvalidArgumentException; +/** + * The HttpClient class is responsible for managing HTTP client configurations and instantiating + * appropriate HTTP driver implementations based on the provided configuration. + * + * @property EventDispatcher $events Instance for dispatching events. + * @property CanHandleHttp $driver Instance that handles HTTP requests. + */ class HttpClient { protected EventDispatcher $events; protected CanHandleHttp $driver; + /** + * Constructor method for initializing the HTTP client. + * + * @param string $client The client configuration name to load. + * @param EventDispatcher|null $events The event dispatcher instance to use. + * @return void + */ public function __construct(string $client = '', EventDispatcher $events = null) { $this->events = $events ?? new EventDispatcher(); $config = HttpClientConfig::load($client ?: Settings::get('http', "defaultClient")); $this->driver = $this->makeDriver($config); } + /** + * Static factory method to create an instance of the HTTP handler. + * + * @param string $client The client configuration name to load. + * @param EventDispatcher|null $events The event dispatcher instance to use. + * @return CanHandleHttp Returns an instance that can handle HTTP operations. + */ public static function make(string $client = '', ?EventDispatcher $events = null) : CanHandleHttp { return (new self($client, $events))->get(); } + /** + * Configures the HttpClient instance with the given client name. + * + * @param string $name The name of the client to load the configuration for. + * @return self Returns the instance of the class for method chaining. + */ public function withClient(string $name) : self { $config = HttpClientConfig::load($name); $this->driver = $this->makeDriver($config); return $this; } + /** + * Configures the HttpClient instance with the given configuration. + * + * @param HttpClientConfig $config The configuration object to set up the HttpClient. + * @return self Returns the instance of the class for method chaining. + */ public function withConfig(HttpClientConfig $config) : self { $this->driver = $this->makeDriver($config); return $this; } + /** + * Sets the HTTP handler driver for the instance. + * + * @param CanHandleHttp $driver The driver capable of handling HTTP requests. + * @return self Returns the instance of the class for method chaining. + */ public function withDriver(CanHandleHttp $driver) : self { $this->driver = $driver; return $this; } + /** + * Retrieves the current HTTP handler instance. + * + * @return CanHandleHttp The HTTP handler associated with the current context. + */ public function get() : CanHandleHttp { return $this->driver; } + // INTERNAL /////////////////////////////////////////////////////// + + /** + * Creates an HTTP driver instance based on the specified configuration. + * + * @param HttpClientConfig $config The configuration object defining the type of HTTP client and its settings. + * @return CanHandleHttp The instantiated HTTP driver corresponding to the specified client type. + * @throws InvalidArgumentException If the specified client type is not supported. + */ private function makeDriver(HttpClientConfig $config) : CanHandleHttp { return match ($config->httpClientType) { HttpClientType::Guzzle => new GuzzleDriver(config: $config, events: $this->events), diff --git a/src/Features/LLM/Data/LLMResponse.php b/src/Features/LLM/Data/LLMResponse.php index 4f0cffd1..e60ed294 100644 --- a/src/Features/LLM/Data/LLMResponse.php +++ b/src/Features/LLM/Data/LLMResponse.php @@ -82,11 +82,7 @@ private function makeFromPartialResponses(array $partialResponses = []) : self { } $content .= $partialResponse->contentDelta; $this->responseData[] = $partialResponse->responseData; - $this->usage()->inputTokens += $partialResponse->usage()->inputTokens; - $this->usage()->outputTokens += $partialResponse->usage()->outputTokens; - $this->usage()->cacheWriteTokens += $partialResponse->usage()->cacheWriteTokens; - $this->usage()->cacheReadTokens += $partialResponse->usage()->cacheReadTokens; - $this->usage()->reasoningTokens += $partialResponse->usage()->reasoningTokens; + $this->usage()->accumulate($partialResponse->usage); $this->finishReason = $partialResponse->finishReason; } $this->content = $content; diff --git a/src/Features/LLM/Data/Usage.php b/src/Features/LLM/Data/Usage.php index cbf6dae0..090041de 100644 --- a/src/Features/LLM/Data/Usage.php +++ b/src/Features/LLM/Data/Usage.php @@ -26,6 +26,16 @@ public static function fromArray(array $value) : static { ); } + public static function copy(Usage $usage) : static { + return new Usage( + inputTokens: $usage->inputTokens, + outputTokens: $usage->outputTokens, + cacheWriteTokens: $usage->cacheWriteTokens, + cacheReadTokens: $usage->cacheReadTokens, + reasoningTokens: $usage->reasoningTokens, + ); + } + public function total() : int { return $this->inputTokens + $this->outputTokens diff --git a/src/Features/LLM/Drivers/OpenAIDriver.php b/src/Features/LLM/Drivers/OpenAIDriver.php index dcb46c3f..1f956cfb 100644 --- a/src/Features/LLM/Drivers/OpenAIDriver.php +++ b/src/Features/LLM/Drivers/OpenAIDriver.php @@ -218,7 +218,7 @@ private function makeUsage(array $data): Usage { ?? 0, cacheWriteTokens: 0, cacheReadTokens: $data['usage']['prompt_tokens_details']['cached_tokens'] ?? 0, - reasoningTokens: 0, + reasoningTokens: $data['usage']['prompt_tokens_details']['reasoning_tokens'] ?? 0, ); } } diff --git a/src/Features/LLM/Inference.php b/src/Features/LLM/Inference.php index 93df6e31..7c0757b2 100644 --- a/src/Features/LLM/Inference.php +++ b/src/Features/LLM/Inference.php @@ -22,6 +22,11 @@ use Cognesy\Instructor\Utils\Settings; use InvalidArgumentException; +/** + * Class Inference + * + * Handles LLM inference operations including configuration management, HTTP client handling, and event dispatching. + */ class Inference { protected LLMConfig $config; @@ -31,6 +36,17 @@ class Inference protected CanHandleHttp $httpClient; protected CachedContext $cachedContext; + /** + * Constructor for initializing dependencies and configurations. + * + * @param string $connection The connection string. + * @param LLMConfig|null $config Configuration object. + * @param CanHandleHttp|null $httpClient HTTP client handler. + * @param CanHandleInference|null $driver Inference handler. + * @param EventDispatcher|null $events Event dispatcher. + * + * @return void + */ public function __construct( string $connection = '', LLMConfig $config = null, @@ -48,6 +64,16 @@ public function __construct( // STATIC ////////////////////////////////////////////////////////////////// + /** + * Generates a text response based on the provided messages and configuration. + * + * @param string|array $messages The input messages to process. + * @param string $connection The connection string. + * @param string $model The model identifier. + * @param array $options Additional options for the inference. + * + * @return string The generated text response. + */ public static function text( string|array $messages, string $connection = '', @@ -67,12 +93,26 @@ public static function text( // PUBLIC ////////////////////////////////////////////////////////////////// + /** + * Updates the configuration and reinitializes the driver. + * + * @param LLMConfig $config The configuration object to set. + * + * @return self + */ public function withConfig(LLMConfig $config): self { $this->config = $config; $this->driver = $this->makeDriver($this->config, $this->httpClient); return $this; } + /** + * Sets the connection and updates the configuration and driver. + * + * @param string $connection The connection string to be used. + * + * @return self Returns the current instance with the updated connection. + */ public function withConnection(string $connection): self { if (empty($connection)) { return $this; @@ -82,22 +122,53 @@ public function withConnection(string $connection): self { return $this; } + /** + * Sets a custom HTTP client and updates the inference driver accordingly. + * + * @param CanHandleHttp $httpClient The custom HTTP client handler. + * + * @return self Returns the current instance for method chaining. + */ public function withHttpClient(CanHandleHttp $httpClient): self { $this->httpClient = $httpClient; $this->driver = $this->makeDriver($this->config, $this->httpClient); return $this; } + /** + * Sets the driver for inference handling and returns the current instance. + * + * @param CanHandleInference $driver The inference handler to be set. + * + * @return self + */ public function withDriver(CanHandleInference $driver): self { $this->driver = $driver; return $this; } + /** + * Enable or disable debugging for the current instance. + * + * @param bool $debug Whether to enable debug mode. Default is true. + * + * @return self + */ public function withDebug(bool $debug = true) : self { Debug::setEnabled($debug); // TODO: fix me - debug should not be global, should be request specific return $this; } + /** + * Sets a cached context with provided messages, tools, tool choices, and response format. + * + * @param string|array $messages Messages to be cached in the context. + * @param array $tools Tools to be included in the cached context. + * @param string|array $toolChoice Tool choices for the cached context. + * @param array $responseFormat Format for responses in the cached context. + * + * @return self + */ public function withCachedContext( string|array $messages = [], array $tools = [], @@ -108,6 +179,19 @@ public function withCachedContext( return $this; } + /** + * Creates an inference request and returns the inference response. + * + * @param string|array $messages The input messages for the inference. + * @param string $model The model to be used for the inference. + * @param array $tools The tools to be used for the inference. + * @param string|array $toolChoice The choice of tools for the inference. + * @param array $responseFormat The format of the response. + * @param array $options Additional options for the inference. + * @param Mode $mode The mode of operation for the inference. + * + * @return InferenceResponse The response from the inference request. + */ public function create( string|array $messages = [], string $model = '', @@ -132,6 +216,15 @@ public function create( // INTERNAL //////////////////////////////////////////////////////////////// + /** + * Creates and returns an appropriate driver instance based on the given configuration. + * + * @param LLMConfig $config Configuration object specifying the provider type and other necessary settings. + * @param CanHandleHttp $httpClient An HTTP client instance to handle HTTP requests. + * + * @return CanHandleInference A driver instance matching the specified provider type. + * @throws InvalidArgumentException If the provider type is not supported. + */ protected function makeDriver(LLMConfig $config, CanHandleHttp $httpClient): CanHandleInference { return match ($config->providerType) { LLMProviderType::Anthropic => new AnthropicDriver($config, $httpClient, $this->events), diff --git a/src/Features/Schema/Visitors/SchemaToJsonSchema.php b/src/Features/Schema/Visitors/SchemaToJsonSchema.php index 9080c373..1427b545 100644 --- a/src/Features/Schema/Visitors/SchemaToJsonSchema.php +++ b/src/Features/Schema/Visitors/SchemaToJsonSchema.php @@ -14,6 +14,10 @@ use DateTime; use DateTimeImmutable; +/** + * Responsible for converting different schema types to their corresponding JSON schema representations. + * Provides methods to visit and convert various schema objects. + */ class SchemaToJsonSchema implements CanVisitSchema { private array $result = []; diff --git a/src/Instructor.php b/src/Instructor.php index 7d6dde13..4f6b2903 100644 --- a/src/Instructor.php +++ b/src/Instructor.php @@ -12,9 +12,19 @@ use Cognesy\Instructor\Utils\Debug\Debug; /** - * Main access point to Instructor. + * The Instructor class manages the lifecycle and functionalities of Instructor instance. * - * Use respond() method to generate structured responses from LLM calls. + * It uses various traits including event management, environment settings, and request handling. + * + * @uses Events\Traits\HandlesEvents + * @uses Events\Traits\HandlesEventListeners + * @uses Traits\HandlesEnv + * @uses Traits\HandlesInvocation + * @uses Traits\HandlesOverrides + * @uses Traits\HandlesPartialUpdates + * @uses Traits\HandlesQueuedEvents + * @uses Traits\HandlesRequest + * @uses Traits\HandlesSequenceUpdates */ class Instructor { use Events\Traits\HandlesEvents; @@ -29,6 +39,10 @@ class Instructor { use Traits\HandlesRequest; use Traits\HandlesSequenceUpdates; + /** + * @param EventDispatcher|null $events An optional EventDispatcher instance for managing events. + * @return void + */ public function __construct( EventDispatcher $events = null, ) { @@ -46,10 +60,22 @@ public function __construct( $this->queueEvent(new InstructorReady()); } + /** + * Initializes an Instructor instance with a specified connection. + * + * @param string $connection The connection string to be used. + * @return Instructor An instance of Instructor with the specified connection. + */ public static function using(string $connection) : Instructor { return (new static)->withConnection($connection); } + /** + * Enables or disables debug mode for the current instance. + * + * @param bool $debug Optional. If true, enables debug mode; otherwise, disables it. Defaults to true. + * @return static The current instance with the updated debug state. + */ public function withDebug(bool $debug = true) : static { Debug::setEnabled($debug); // TODO: fix me - debug should not be global, should be request specific return $this; diff --git a/src/Utils/Env.php b/src/Utils/Env.php index e15da738..fae7b623 100644 --- a/src/Utils/Env.php +++ b/src/Utils/Env.php @@ -3,12 +3,22 @@ use Dotenv\Dotenv; +/** + * Class responsible for managing environment variables. + */ class Env { static private array $paths = [__DIR__.'/../..']; static private array $names = ['.env']; static private Dotenv $dotenv; + /** + * Sets the paths and names for the class. + * + * @param string|array $paths An array or a single string of paths to set. + * @param string|array $names An array or a single string of names to set (optional). + * @return void + */ public static function set(string|array $paths, string|array $names = '') : void { if (is_string($paths)) { $paths = [$paths]; @@ -25,6 +35,15 @@ public static function set(string|array $paths, string|array $names = '') : void self::load(); } + /** + * Retrieves the value of an environment variable. First, attempts to get + * the value from the system's environment variables. If not found, + * checks the manually loaded environment variables. + * + * @param mixed $key The name of the environment variable. + * @param mixed $default The default value to return if the environment variable is not found. + * @return mixed The value of the environment variable or the default value. + */ public static function get(mixed $key, mixed $default = null) : mixed { $value = getenv($key); @@ -37,6 +56,15 @@ public static function get(mixed $key, mixed $default = null) : mixed return $_ENV[$key] ?? $default; } + /** + * Loads environment variables from the specified paths and names. + * + * This method checks if both the paths and names arrays are empty. + * If they are not, it initializes the Dotenv instance with the given paths + * and names, and loads the environment variables safely. + * + * @return void + */ public static function load() : void { if ([] === self::$paths && [] === self::$names) { return; diff --git a/src/Utils/Uuid.php b/src/Utils/Uuid.php index 7981f4d6..8b6e8f76 100644 --- a/src/Utils/Uuid.php +++ b/src/Utils/Uuid.php @@ -2,11 +2,27 @@ namespace Cognesy\Instructor\Utils; +/** + * A class for generating Universally Unique Identifiers (UUID). + * + * Goal: decouple Instructor main code from depending on specific UUID provider libraries + * and make it easier to switch providers. + */ class Uuid { + /** + * Generates a random UUID (version 4) string. + * + * @return string A randomly generated UUID (version 4) string. + */ public static function uuid4() : string { return self::fromRandomBytes(); } + /** + * Generates a UUID using random bytes. + * + * @return string Generated UUID in the format xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + */ private static function fromRandomBytes() : string { // generate uuid using random bytes $data = random_bytes(16);