Skip to content

Commit

Permalink
Evals - cont
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 9, 2024
1 parent 5191339 commit 25c7d2c
Show file tree
Hide file tree
Showing 14 changed files with 193 additions and 151 deletions.
70 changes: 0 additions & 70 deletions evals/Evals/Inference/TaskSchema.php

This file was deleted.

58 changes: 41 additions & 17 deletions evals/LLMModes/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,25 @@
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
$loader->add('Cognesy\\Evals\\', __DIR__ . '../../evals/');

use Cognesy\Evals\Evals\CompareModes;
use Cognesy\Evals\Evals\Data\EvalInput;
use Cognesy\Evals\Evals\Inference\RunInference;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Data\EvalInput;
use Cognesy\Instructor\Extras\Evals\Data\EvalSchema;
use Cognesy\Instructor\Extras\Evals\Evaluator;
use Cognesy\Instructor\Extras\Evals\Inference\RunInference;
use Cognesy\Instructor\Utils\Str;

$connections = [
'azure',
'cohere1',
'cohere2',
'fireworks',
'gemini',
// 'azure',
// 'cohere1',
// 'cohere2',
// 'fireworks',
// 'gemini',
'groq',
'mistral',
'ollama',
'openai',
'openrouter',
'together',
// 'mistral',
// 'ollama',
// 'openai',
// 'openrouter',
// 'together',
];

$streamingModes = [
Expand Down Expand Up @@ -62,18 +63,41 @@ function validateToolsData(array $data) : bool {

//Debug::enable();

$outputs = (new CompareModes(
$schema = new EvalSchema(
toolName: 'store_company',
toolDescription: 'Store company information',
schema: [
'type' => 'object',
'description' => 'Company information',
'properties' => [
'year' => [
'type' => 'integer',
'description' => 'Founding year',
],
'name' => [
'type' => 'string',
'description' => 'Company name',
],
],
'required' => ['name', 'year'],
'additionalProperties' => false,
]
);

$evaluator = new Evaluator(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
//['role' => 'user', 'content' => 'EXAMPLE CONTEXT: Sony was established in 1946 by Akio Morita.'],
//['role' => 'user', 'content' => 'EXAMPLE RESPONSE: ```json{"name":"Sony","year":1899}```'],
['role' => 'user', 'content' => 'What is the name and founding year of our company?'],
],
schema: [],
schema: $schema,
executorClass: RunInference::class,
evalFn: fn(EvalInput $er) => evalFn($er),
))->executeAll(
evalFn: fn(EvalInput $evalInput) => evalFn($evalInput),
);

$outputs = $evaluator->execute(
connections: $connections,
modes: $modes,
streamingModes: $streamingModes
Expand Down
11 changes: 5 additions & 6 deletions evals/SimpleExtraction/run.php
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
<?php

use Cognesy\Evals\Evals\CompareModes;
use Cognesy\Evals\Evals\Data\EvalInput;
use Cognesy\Evals\Evals\Instructor\RunInstructor;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Utils\Debug\Debug;
use Cognesy\Instructor\Extras\Evals\Data\EvalInput;
use Cognesy\Instructor\Extras\Evals\Evaluator;
use Cognesy\Instructor\Extras\Evals\Instructor\RunInstructor;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
Expand Down Expand Up @@ -54,7 +53,7 @@ function evalFn(EvalInput $er) {
&& $person->foundingYear === 2020;
}

$outputs = (new CompareModes(
$outputs = (new Evaluator(
messages: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
Expand All @@ -65,7 +64,7 @@ function evalFn(EvalInput $er) {
schema: Company::class,
executorClass: RunInstructor::class,
evalFn: fn(EvalInput $er) => evalFn($er),
))->executeAll(
))->execute(
connections: $connections,
modes: $modes,
streamingModes: $streamingModes
Expand Down
23 changes: 10 additions & 13 deletions notes/NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## High priority

- Evals!!!
- Logging via PSR-3
- Schema abstraction layer - decouple names and descriptions from the model
- Prompt optimization via TextGrad
Expand All @@ -11,24 +12,19 @@
- CLI app
- Fast/simple REST API server - compatible with OpenAI?
- Revise examples debugging - not sure if it works as expected (what does it demonstrate?)

# Partially done

- Multiple tools with tool selection
- Parallel tool calls
- Export configuration to user folder / use external configuration
- Validators / Deserializers / Transformers - chain of objects, not a single object
- API Client: Clean up predefined models, prices, etc.
- Full control over generated prompt (access to Script object processing)

# TODOs
## Low priority

- Multiple tools with tool selection
- Parallel tool calls
- Batch API support (Gemini, OpenAI, Anthropic)
- Gemini context caching
- Generate unstructured, then format to structured - to improve reasoning
- Full control over generated prompt (access to Script object processing)
- Evals!!!

## API Client

- Move to raw Guzzle or Httplug?
# BACKLOG

## Addon: Modules

Expand Down Expand Up @@ -74,7 +70,8 @@
- How to track API rate limits across multiple requests / parallel executions
- Make using DocBlocks optional - it may not always to be desired to pass this info to LLM
- Add super detailed tests of Module core functionality - esp. around input/output mappings

- Validators / Deserializers / Transformers - chain of objects, not a single object
- API Client: Clean up predefined models, prices, etc.



Expand Down
6 changes: 5 additions & 1 deletion notes/done/client_customization.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ Not documented, for sure unclear to users.

## Solution

Added examples and docs.
Added examples and docs.

## API Client

- Move to raw Guzzle or Httplug?
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<?php

namespace Cognesy\Evals\Evals;
namespace Cognesy\Instructor\Extras\Evals\Console;

use Cognesy\Evals\Evals\Data\EvalOutput;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Data\EvalOutput;
use Cognesy\Instructor\Utils\Cli\Color;
use Cognesy\Instructor\Utils\Cli\Console;
use Cognesy\Instructor\Utils\Debug\Debug;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?php
namespace Cognesy\Evals\Evals\Contracts;
namespace Cognesy\Instructor\Extras\Evals\Contracts;

use Cognesy\Evals\Evals\Data\EvalInput;
use Cognesy\Instructor\Extras\Evals\Data\EvalInput;
use Cognesy\Instructor\Features\LLM\Data\LLMResponse;

interface CanExecuteExperiment
{
public static function executeFor(EvalInput $input): self;
public function executeFor(EvalInput $input): self;
public function getLLMResponse(): LLMResponse;
public function getAnswer(): mixed;
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

namespace Cognesy\Evals\Evals\Data;
namespace Cognesy\Instructor\Extras\Evals\Data;

use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Features\LLM\Data\LLMResponse;
Expand All @@ -14,10 +14,18 @@ public function __construct(
public string $connection = '',
public bool $isStreamed = false,
public ?LLMResponse $response = null,
public int $maxTokens = 512,
) {}

public function withResponse(LLMResponse $response) : self {
$this->response = $response;
return $this;
}

public function evalSchema() : EvalSchema {
if (!$this->schema instanceof EvalSchema) {
throw new \Exception('Schema is not an instance of EvalSchema.');
}
return $this->schema;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

namespace Cognesy\Evals\Evals\Data;
namespace Cognesy\Instructor\Extras\Evals\Data;

use Exception;

Expand Down
55 changes: 55 additions & 0 deletions src/Extras/Evals/Data/EvalSchema.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

namespace Cognesy\Instructor\Extras\Evals\Data;

class EvalSchema
{
public function __construct(
private string $toolName,
private string $toolDescription,
private array $schema = [],
) {}

public function schema() : array {
return $this->schema;
}

public function responseFormatJson() : array {
return [
'type' => 'json_object',
'schema' => $this->schema,
];
}

public function responseFormatJsonSchema() : array {
return [
'type' => 'json_schema',
'description' => $this->toolDescription,
'json_schema' => [
'name' => $this->toolName,
'schema' => $this->schema,
'strict' => true,
],
];
}

public function tools() : array {
return [[
'type' => 'function',
'function' => [
'name' => $this->toolName,
'description' => $this->toolDescription,
'parameters' => $this->schema,
],
]];
}

public function toolChoice() : array {
return [
'type' => 'function',
'function' => [
'name' => $this->toolName,
]
];
}
}
Loading

0 comments on commit 25c7d2c

Please sign in to comment.