-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added unified context caching to direct LLM inference
- Loading branch information
1 parent
c75edf5
commit 7ea47ed
Showing
13 changed files
with
301 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
--- | ||
title: 'Context caching' | ||
docname: 'context_cache_llm' | ||
--- | ||
|
||
## Overview | ||
|
||
Instructor offers a simplified way to work with LLM providers' APIs supporting caching | ||
(currently only Anthropic API), so you can focus on your business logic while still being | ||
able to take advantage of lower latency and costs. | ||
|
||
> **Note 1:** Instructor supports context caching for Anthropic API and OpenAI API. | ||
> **Note 2:** Context caching is automatic for all OpenAI API calls. Read more | ||
> in the [OpenAI API documentation](https://platform.openai.com/docs/guides/prompt-caching). | ||
## Example | ||
|
||
When you need to process multiple requests with the same context, you can use context | ||
caching to improve performance and reduce costs. | ||
|
||
In our example we will be analyzing the README.md file of this Github project and | ||
generating its summary for 2 target audiences. | ||
|
||
|
||
```php | ||
<?php | ||
$loader = require 'vendor/autoload.php'; | ||
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); | ||
|
||
use Cognesy\Instructor\Extras\LLM\Inference; | ||
use Cognesy\Instructor\Utils\Str; | ||
|
||
$content = file_get_contents(__DIR__ . '/../../../README.md'); | ||
|
||
$inference = (new Inference)->withConnection('anthropic')->withCachedContext( | ||
messages: [ | ||
['role' => 'user', 'content' => 'Here is content of README.md file'], | ||
['role' => 'user', 'content' => $content], | ||
['role' => 'user', 'content' => 'Generate short, very domain specific pitch of the project described in README.md'], | ||
['role' => 'assistant', 'content' => 'For whom do you want to generate the pitch?'], | ||
], | ||
); | ||
|
||
$response = $inference->create( | ||
messages: [['role' => 'user', 'content' => 'CTO of lead gen software vendor']], | ||
options: ['max_tokens' => 256], | ||
)->toApiResponse(); | ||
|
||
print("----------------------------------------\n"); | ||
print("\n# Summary for CTO of lead gen vendor\n"); | ||
print(" ($response->cacheReadTokens tokens read from cache)\n\n"); | ||
print("----------------------------------------\n"); | ||
print($response->content . "\n"); | ||
|
||
assert(!empty($response->content)); | ||
assert(Str::contains($response->content, 'Instructor')); | ||
assert(Str::contains($response->content, 'lead', false)); | ||
|
||
$response2 = $inference->create( | ||
messages: [['role' => 'user', 'content' => 'CIO of insurance company']], | ||
options: ['max_tokens' => 256], | ||
)->toApiResponse(); | ||
|
||
print("----------------------------------------\n"); | ||
print("\n# Summary for CIO of insurance company\n"); | ||
print(" ($response2->cacheReadTokens tokens read from cache)\n\n"); | ||
print("----------------------------------------\n"); | ||
print($response2->content . "\n"); | ||
|
||
assert(!empty($response2->content)); | ||
assert(Str::contains($response2->content, 'Instructor')); | ||
assert(Str::contains($response2->content, 'insurance', false)); | ||
//assert($response2->cacheReadTokens > 0); | ||
?> | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
--- | ||
title: 'Context caching' | ||
docname: 'context_cache_llm' | ||
--- | ||
|
||
## Overview | ||
|
||
Instructor offers a simplified way to work with LLM providers' APIs supporting caching | ||
(currently only Anthropic API), so you can focus on your business logic while still being | ||
able to take advantage of lower latency and costs. | ||
|
||
> **Note 1:** Instructor supports context caching for Anthropic API and OpenAI API. | ||
|
||
> **Note 2:** Context caching is automatic for all OpenAI API calls. Read more | ||
> in the [OpenAI API documentation](https://platform.openai.com/docs/guides/prompt-caching). | ||
|
||
## Example | ||
|
||
When you need to process multiple requests with the same context, you can use context | ||
caching to improve performance and reduce costs. | ||
|
||
In our example we will be analyzing the README.md file of this Github project and | ||
generating its summary for 2 target audiences. | ||
|
||
|
||
```php | ||
<?php | ||
$loader = require 'vendor/autoload.php'; | ||
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); | ||
|
||
use Cognesy\Instructor\Extras\LLM\Inference; | ||
use Cognesy\Instructor\Utils\Str; | ||
|
||
$content = file_get_contents(__DIR__ . '/../../../README.md'); | ||
|
||
$inference = (new Inference)->withConnection('anthropic')->withCachedContext( | ||
messages: [ | ||
['role' => 'user', 'content' => 'Here is content of README.md file'], | ||
['role' => 'user', 'content' => $content], | ||
['role' => 'user', 'content' => 'Generate short, very domain specific pitch of the project described in README.md'], | ||
['role' => 'assistant', 'content' => 'For whom do you want to generate the pitch?'], | ||
], | ||
); | ||
|
||
$response = $inference->create( | ||
messages: [['role' => 'user', 'content' => 'CTO of lead gen software vendor']], | ||
options: ['max_tokens' => 256], | ||
)->toApiResponse(); | ||
|
||
print("----------------------------------------\n"); | ||
print("\n# Summary for CTO of lead gen vendor\n"); | ||
print(" ($response->cacheReadTokens tokens read from cache)\n\n"); | ||
print("----------------------------------------\n"); | ||
print($response->content . "\n"); | ||
|
||
assert(!empty($response->content)); | ||
assert(Str::contains($response->content, 'Instructor')); | ||
assert(Str::contains($response->content, 'lead', false)); | ||
|
||
$response2 = $inference->create( | ||
messages: [['role' => 'user', 'content' => 'CIO of insurance company']], | ||
options: ['max_tokens' => 256], | ||
)->toApiResponse(); | ||
|
||
print("----------------------------------------\n"); | ||
print("\n# Summary for CIO of insurance company\n"); | ||
print(" ($response2->cacheReadTokens tokens read from cache)\n\n"); | ||
print("----------------------------------------\n"); | ||
print($response2->content . "\n"); | ||
|
||
assert(!empty($response2->content)); | ||
assert(Str::contains($response2->content, 'Instructor')); | ||
assert(Str::contains($response2->content, 'insurance', false)); | ||
//assert($response2->cacheReadTokens > 0); | ||
?> | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
<?php | ||
|
||
namespace Cognesy\Instructor\Extras\LLM\Data; | ||
|
||
use Cognesy\Instructor\Enums\Mode; | ||
|
||
class CachedContext | ||
{ | ||
public function __construct( | ||
public string|array $messages = [], | ||
public array $tools = [], | ||
public string|array $toolChoice = [], | ||
public array $responseFormat = [], | ||
) { | ||
if (is_string($messages)) { | ||
$this->messages = ['role' => 'user', 'content' => $messages]; | ||
} | ||
} | ||
|
||
public function merged( | ||
string|array $messages = [], | ||
array $tools = [], | ||
string|array $toolChoice = [], | ||
array $responseFormat = [], | ||
) { | ||
if (is_string($messages) && !empty($messages)) { | ||
$messages = ['role' => 'user', 'content' => $messages]; | ||
} | ||
return new CachedContext( | ||
array_merge($this->messages, $messages), | ||
empty($tools) ? $this->tools : $tools, | ||
empty($toolChoice) ? $this->toolChoice : $toolChoice, | ||
empty($responseFormat) ? $this->responseFormat : $responseFormat, | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.