Skip to content

Commit

Permalink
Merge branch 'object-input-support'
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Jun 10, 2024
2 parents 31b182d + 06857bd commit ea4acb5
Show file tree
Hide file tree
Showing 190 changed files with 2,958 additions and 1,518 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ archived/
vendor/
php_errors.log
composer.lock
xdebug.log
39 changes: 38 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Here's a simple CLI demo app using Instructor to extract structured data from te
## Feature highlights

- Get structured responses from LLM inference
- 'Structured-to-structured' processing - provide object or array as an input and get object with the results of inference back
- Customize prompts and retry prompts
- Process various types of input data: text, series of chat messages or images
- Receive synchronous or streaming responses
Expand Down Expand Up @@ -125,7 +126,43 @@ var_dump($person);
// }
```
> **NOTE:** Instructor supports classes / objects as response models. In case you want to extract simple types or enums, you need to wrap them in Scalar adapter - see section below: Extracting Scalar Values.
>
>

### Structured-to-structured processing

Instructor offers a way to use structured data as an input. This is
useful when you want to use object data as input and get another object
with a result of LLM inference.

The `input` field of Instructor's `respond()` and `request()` methods
can be an object, but also an array or just a string.

```php
<?php
use Cognesy\Instructor\Instructor;

class Email {
public function __construct(
public string $address = '',
public string $subject = '',
public string $body = '',
) {}
}

$email = new Email(
address: 'joe@gmail',
subject: 'Status update',
body: 'Your account has been updated.'
);

$translation = (new Instructor)->respond(
input: $email,
responseModel: Email::class,
prompt: 'Translate the text fields of email to Spanish. Keep other fields unchanged.',
);
?>
```


### Validation
Expand Down
22 changes: 18 additions & 4 deletions config/autowire.php
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
<?php
namespace Cognesy\config;

use Cognesy\Instructor\ApiClient\CacheConfig;
use Cognesy\Instructor\ApiClient\Context\ApiRequestContext;
use Cognesy\Instructor\ApiClient\Factories\ApiClientFactory;
use Cognesy\Instructor\ApiClient\Factories\ApiRequestFactory;
use Cognesy\Instructor\ApiClient\RequestConfig\ApiRequestConfig;
use Cognesy\Instructor\ApiClient\RequestConfig\CacheConfig;
use Cognesy\Instructor\ApiClient\RequestConfig\DebugConfig;
use Cognesy\Instructor\Configuration\Configuration;
use Cognesy\Instructor\Contracts\CanGeneratePartials;
use Cognesy\Instructor\Contracts\CanGenerateResponse;
Expand Down Expand Up @@ -86,9 +87,11 @@ class: EventLogger::class,
/// CONTEXT //////////////////////////////////////////////////////////////////////////////

$config->declare(
class: ApiRequestContext::class,
class: ApiRequestConfig::class,
context: [
'cacheConfig' => $config->reference(CacheConfig::class),
'debugConfig' => $config->reference(DebugConfig::class),
'events' => $config->reference(EventDispatcher::class),
],
);

Expand All @@ -101,6 +104,15 @@ class: CacheConfig::class,
]
);

$config->declare(
class: DebugConfig::class,
context: [
'debug' => false,
'stopOnDebug' => false,
'forceDebug' => false,
]
);

$config->declare(
class: ModelFactory::class,
context: [
Expand All @@ -115,6 +127,7 @@ class: ModelFactory::class,
'cohere:command' => $config->reference('cohere:command'),
'cohere:command-light' => $config->reference('cohere:command-light'),
'fireworks:mixtral-8x7b' => $config->reference('fireworks:mixtral-8x7b'),
'google:gemini-1.5-flash' => $config->reference('google:gemini-1.5-flash'),
'groq:llama3-8b' => $config->reference('groq:llama3-8b'),
'groq:llama3-70b' => $config->reference('groq:llama3-70b'),
'groq:mixtral-8x7b' => $config->reference('groq:mixtral-8x7b'),
Expand Down Expand Up @@ -149,14 +162,15 @@ class: RequestFactory::class,
'responseModelFactory' => $config->reference(ResponseModelFactory::class),
'modelFactory' => $config->reference(ModelFactory::class),
'apiRequestFactory' => $config->reference(ApiRequestFactory::class),
'requestConfig' => $config->reference(ApiRequestConfig::class),
'events' => $config->reference(EventDispatcher::class),
],
);

$config->declare(
class: ApiRequestFactory::class,
context: [
'context' => $config->reference(ApiRequestContext::class),
'requestConfig' => $config->reference(ApiRequestConfig::class),
],
);

Expand Down
27 changes: 27 additions & 0 deletions config/models/google.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

use Cognesy\Instructor\ApiClient\ModelParams;
use Cognesy\Instructor\Configuration\Configuration;

return function(Configuration $config) : Configuration {
$config->declare(
class: ModelParams::class,
name: 'google:gemini-1.5-flash',
context: [
'label' => 'Google Gemini 1.5 Flash',
'type' => 'gemini',
'name' => 'gemini-1.5-flash',
'maxTokens' => 4096,
'contextSize' => 128_000,
'inputCost' => 1,
'outputCost' => 1,
'roleMap' => [
'user' => 'user',
'assistant' => 'model',
'system' => 'user'
],
],
);

return $config;
};
34 changes: 20 additions & 14 deletions docs/hub/advanced/caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

> This feature is experimental.
You can set `cache` option to `true` to enable caching for your requests.
Instructor will store the response in cache and return it on subsequent requests
with the same parameters (for given API client).
You can enable/disable caching for your requests with `withCache()` method of
`Instructor` class.

When caching is enabled, Instructor will store the response in cache and return
it on subsequent requests with the same parameters (for given API client).

This option is available for all clients. By default, caching is turned off.

Expand All @@ -18,6 +20,7 @@ $loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
use Cognesy\Instructor\Clients\OpenAI\OpenAIClient;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Utils\Env;
use Cognesy\Instructor\Utils\Profiler\Profiler;

class User {
public int $age;
Expand All @@ -27,46 +30,49 @@ class User {
// OpenAI auth params
$yourApiKey = Env::get('OPENAI_API_KEY'); // use your own API key

// Create instance of OpenAI client in debug mode
// Create instance of OpenAI client
$client = (new OpenAIClient($yourApiKey));

/// Get Instructor with the default client component overridden with your own
$instructor = (new Instructor)->withClient($client);

Profiler::mark('start');

$user = $instructor->request(
messages: "Jason is 25 years old.",
responseModel: User::class,
)->get();

$delta = Profiler::mark('no cache')->mili();
dump($user);
echo "Time elapsed (no cache, default): ".$instructor->elapsedTime()." seconds\n\n";
echo "Time elapsed (no cache, default): $delta msec\n\n";

$user2 = $instructor->request(
messages: "Jason is 25 years old.",
responseModel: User::class,
options: ['cache' => true],
)->get();
)->withCache()->get();

$delta = Profiler::mark('cache 1st call')->mili();
dump($user2);
echo "Time elapsed (cache on, 1st call): ".$instructor->elapsedTime()." seconds\n\n";
echo "Time elapsed (cache on, 1st call): $delta msec\n\n";

$user3 = $instructor->request(
messages: "Jason is 25 years old.",
responseModel: User::class,
options: ['cache' => true],
)->get();
)->withCache()->get();

$delta = Profiler::mark('cache 2nd call')->mili();
dump($user3);
echo "Time elapsed (cache on, 2nd call): ".$instructor->elapsedTime()." seconds\n\n";
echo "Time elapsed (cache on, 2nd call): $delta msec\n\n";

$user4 = $instructor->request(
messages: "Jason is 25 years old.",
responseModel: User::class,
options: ['cache' => false],
)->get();
)->withCache(false)->get();

$delta = Profiler::mark('cache 3rd call')->mili();
dump($user4);
echo "Time elapsed (cache turned off again): ".$instructor->elapsedTime()." seconds\n\n";
echo "Time elapsed (cache turned off again): $delta msec\n\n";

?>
```
4 changes: 2 additions & 2 deletions docs/hub/advanced/custom_prompts.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ $user = $instructor
->respond(
messages: "Our user Jason is 25 years old.",
responseModel: User::class,
prompt: "\nYour task is to respond correctly with JSON object. Response must follow JSONSchema:\n{json_schema}\n",
prompt: "\nYour task is to respond correctly with JSON object. Response must follow JSONSchema:\n<|json_schema|>\n",
mode: Mode::Json);

print("\n# Request for Mode::MdJson:\n\n");
$user = $instructor
->respond(
messages: "Our user Jason is 25 years old.",
responseModel: User::class,
prompt: "\nYour task is to respond correctly with strict JSON object containing extracted data within a ```json {} ``` codeblock. Object must validate against this JSONSchema:\n{json_schema}\n",
prompt: "\nYour task is to respond correctly with strict JSON object containing extracted data within a ```json {} ``` codeblock. Object must validate against this JSONSchema:\n<|json_schema|>\n",
mode: Mode::MdJson);

?>
Expand Down
43 changes: 43 additions & 0 deletions docs/hub/advanced/data_inputs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Using structured data as an input

Instructor offers a way to use structured data as an input. This is
useful when you want to use object data as input and get another object
with a result of LLM inference.

The `input` field of Instructor's `respond()` and `request()` methods
can be an object, but also an array or just a string.

```php
<?php
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Instructor;

class Email {
public function __construct(
public string $address = '',
public string $subject = '',
public string $body = '',
) {}
}

$email = new Email(
address: 'joe@gmail',
subject: 'Status update',
body: 'Your account has been updated.'
);

$translatedEmail = (new Instructor)->respond(
input: $email,
responseModel: Email::class,
prompt: 'Translate the text fields of email to Spanish. Keep other fields unchanged.',
);

dump($translatedEmail);

assert($translatedEmail->address === $email->address);
assert($translatedEmail->subject !== $email->subject);
assert($translatedEmail->body !== $email->body);
?>
```
2 changes: 1 addition & 1 deletion docs/hub/advanced/language_programs.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Language programs
# Language programs with Modules

Instructor provides an addon allowing to implement complex processing flows
using LLM in a modular way. This addon to Instructor has been inspired by DSPy
Expand Down
21 changes: 14 additions & 7 deletions docs/hub/advanced/language_programs2.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Language programs
# 'Structure to structure' LLM processing

Instructor provides an addon allowing to implement complex processing flows
using LLM in a modular way. This addon to Instructor has been inspired by DSPy
Expand All @@ -25,13 +25,15 @@ The outputs and flow can be arbitrarily shaped to the needs of specific use case
```php
<?php

use Cognesy\Instructor\Contracts\CanProvideSchema;
use Cognesy\Instructor\Extras\Module\Addons\Predict\Predict;
use Cognesy\Instructor\Extras\Module\CallData\Contracts\HasInputOutputData;
use Cognesy\Instructor\Extras\Module\CallData\Traits\AutoSignature;
use Cognesy\Instructor\Extras\Module\Core\Module;
use Cognesy\Instructor\Extras\Module\Signature\Attributes\InputField;
use Cognesy\Instructor\Extras\Module\Signature\Attributes\OutputField;
use Cognesy\Instructor\Extras\Module\CallData\SignatureData;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Schema\Attributes\Description;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
Expand All @@ -40,9 +42,10 @@ $loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

//#[Description('extract email details from text')]
class ParsedEmail extends SignatureData {
// INPUTS
#[InputField('text containing email')]
public string $text;

// OUTPUTS
#[OutputField('email address of sender')]
public string $senderEmail;
#[OutputField('subject of the email')]
Expand All @@ -52,25 +55,29 @@ class ParsedEmail extends SignatureData {
}

class FixedEmail extends SignatureData {
// INPUTS
#[InputField('subject of the email')]
public string $subject;
#[InputField('body of the email')]
public string $body;

// OUTPUTS
#[OutputField('subject of the email with fixed spelling mistakes')]
public string $fixedSubject;
#[OutputField('body of the email with fixed spelling mistakes')]
public string $fixedBody;
}

class EmailTranslation extends SignatureData {
// Alternative way to define the class signature data without extending a class
class EmailTranslation implements HasInputOutputData, CanProvideSchema {
use AutoSignature;
// INPUTS
#[InputField('subject of email')]
public string $subject;
#[InputField('body of email')]
public string $body;
#[InputField('language to translate to')]
public string $language;

// OUTPUTS
#[OutputField('translated subject of email')]
public string $translatedSubject;
#[OutputField('translated body of email')]
Expand Down Expand Up @@ -101,7 +108,7 @@ class ProcessEmail extends Module {
private Predict $translate;

public function __construct() {
$instructor = new Instructor();
$instructor = (new Instructor);//->withClient(new AnthropicClient(Env::get('ANTHROPIC_API_KEY')));//->wiretap(fn($e) => $e->printDump());

$this->parse = new Predict(signature: ParsedEmail::class, instructor: $instructor);
$this->fix = new Predict(signature: FixedEmail::class, instructor: $instructor);
Expand Down
5 changes: 4 additions & 1 deletion docs/hub/api_support/llm_support_anthropic.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ $user = $instructor->respond(
responseModel: User::class,
model: 'claude-3-haiku-20240307',
mode: Mode::Tools,
//options: ['stream' => true ]
examples: [[
'input' => 'Ive got email Frank - their developer. He asked to come back to him [email protected]. Btw, he plays on drums!',
'output' => ['age' => null, 'name' => 'Frank', 'role' => 'developer', 'hobbies' => ['playing drums'],],
]],
);

print("Completed response model:\n\n");
Expand Down
Loading

0 comments on commit ea4acb5

Please sign in to comment.