Skip to content

Commit

Permalink
Cleanup of Json class - fixed defects in parsing complex cases
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 3, 2024
1 parent 7ea47ed commit 7a94fdc
Show file tree
Hide file tree
Showing 16 changed files with 505 additions and 93 deletions.
2 changes: 2 additions & 0 deletions .env-dist
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# INSTRUCTOR SECRETS
#########################################################

INSTRUCTOR_CONFIG_PATH='/../../config/'

#########################################################
# LLMS
#########################################################
Expand Down
2 changes: 1 addition & 1 deletion config/llm.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
'apiKey' => Env::get('AZURE_OPENAI_API_KEY', ''),
'endpoint' => '/chat/completions',
'metadata' => [
'apiVersion' => '2023-03-15-preview',
'apiVersion' => '2024-08-01-preview',
'resourceName' => 'instructor-dev',
'deploymentId' => 'gpt-4o-mini',
],
Expand Down
1 change: 1 addition & 0 deletions evals/LLMModes/Modes.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public function forModeJsonSchema(string|array $query, string $connection, array
->withDebug($this->debug)
->create(
messages: array_merge($query, [
['role' => 'user', 'content' => 'Use JSON Schema: ' . json_encode($schema)],
['role' => 'user', 'content' => 'Respond with correct JSON.'],
]),
responseFormat: $this->model->responseFormatJsonSchema(),
Expand Down
37 changes: 20 additions & 17 deletions evals/LLMModes/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,33 @@
use Cognesy\Evals\LLMModes\CompareModes;
use Cognesy\Evals\LLMModes\EvalRequest;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Debug\Debug;
use Cognesy\Instructor\Utils\Json\Json;
use Cognesy\Instructor\Utils\Str;

$connections = [
// 'anthropic',
// 'azure',
'azure',
'cohere1',
// 'fireworks',
// 'gemini',
// 'groq',
// 'mistral',
// 'ollama',
// 'openai',
// 'openrouter',
// 'together'
'fireworks',
'gemini',
'groq',
'mistral',
'ollama',
'openai',
'openrouter',
'together',
];

$streamingModes = [
false,
true,
// false
];

$modes = [
// Mode::Text,
// Mode::MdJson,
// Mode::Json,
// Mode::JsonSchema,
Mode::Text,
Mode::MdJson,
Mode::Json,
Mode::JsonSchema,
Mode::Tools,
];

Expand All @@ -44,9 +44,10 @@
// azure, Mode::JsonSchema, sync|stream
//

//Debug::enable();

function evalFn(EvalRequest $er) {
$json = Json::find($er->answer) ?: '[]';
$decoded = json_decode($json, true) ?: [];
$decoded = Json::from($er->answer)->toArray();
$isCorrect = match($er->mode) {
Mode::Text => Str::contains($er->answer, ['ACME', '2020']),
Mode::Tools => validateToolsData($er->response->toolsData),
Expand All @@ -65,6 +66,8 @@ function validateToolsData(array $data) : bool {
query: [
['role' => 'user', 'content' => 'YOUR GOAL: Use tools to store the information from context based on user questions.'],
['role' => 'user', 'content' => 'CONTEXT: Our company ACME was founded in 2020.'],
//['role' => 'user', 'content' => 'EXAMPLE CONTEXT: Sony was established in 1946 by Akio Morita.'],
//['role' => 'user', 'content' => 'EXAMPLE RESPONSE: ```json{"name":"Sony","year":1899}```'],
['role' => 'user', 'content' => 'What is the name and founding year of our company?'],
],
evalFn: fn(EvalRequest $er) => evalFn($er),
Expand Down
2 changes: 1 addition & 1 deletion examples/A05_Extras/ImageToDataAnthropic/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class Receipt {
public float $total;
}

$receipt = (new Instructor)->withDebug()->withConnection('anthropic')->respond(
$receipt = (new Instructor)->withConnection('anthropic')->respond(
input: Image::fromFile(__DIR__ . '/receipt.png'),
responseModel: Receipt::class,
prompt: 'Extract structured data from the receipt. Return result as JSON following this schema: <|json_schema|>',
Expand Down
2 changes: 1 addition & 1 deletion src/Core/RequestHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ protected function getApiResponse(Request $request) : ApiResponse {
$apiResponse = $this->makeInference($request)->toApiResponse();
$apiResponse->content = match($request->mode()) {
Mode::Text => $apiResponse->content,
default => Json::find($apiResponse->content),
default => Json::from($apiResponse->content)->toString(),
};
} catch (Exception $e) {
$this->events->dispatch(new RequestToLLMFailed($request, $e->getMessage()));
Expand Down
14 changes: 4 additions & 10 deletions src/Core/StreamResponse/PartialsGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public function getPartialResponses(Generator $stream, ResponseModel $responseMo
}
$this->events->dispatch(new ChunkReceived($maybeArgumentChunk));
$this->responseText .= $maybeArgumentChunk;
$this->responseJson = Json::findPartial($this->responseText);
$this->responseJson = Json::fromPartial($this->responseText)->toString();
if (empty($this->responseJson)) {
continue;
}
Expand All @@ -120,7 +120,7 @@ public function getPartialResponses(Generator $stream, ResponseModel $responseMo
}
// finalize last function call
if ($this->toolCalls->count() > 0) {
$this->finalizeToolCall(Json::find($this->responseText), $responseModel->toolName());
$this->finalizeToolCall(Json::from($this->responseText)->toString(), $responseModel->toolName());
}
// finalize sequenceable
$this->sequenceableHandler->finalize();
Expand All @@ -146,14 +146,8 @@ protected function tryGetPartialObject(
string $partialJsonData,
ResponseModel $responseModel,
) : Result {
// dump('raw:', $partialJsonData);
// $found = Json::findPartial($partialJsonData);
// dump('found:', $found);
// $json = Json::fix($found);
// dump('fixed:', $json);

return Chain::from(fn() => Json::fix(Json::findPartial($partialJsonData)))
->through(fn($jsonData) => $this->responseDeserializer->deserialize($jsonData, $responseModel, $this?->toolCalls->last()->name))
return Chain::from(fn() => Json::fromPartial($partialJsonData)->toString())
->through(fn($json) => $this->responseDeserializer->deserialize($json, $responseModel, $this?->toolCalls->last()->name))
->through(fn($object) => $this->responseTransformer->transform($object))
->result();
}
Expand Down
6 changes: 2 additions & 4 deletions src/Core/StreamResponse/Traits/ValidatesPartialResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ private function preventJsonSchemaResponse(bool $check, string $partialResponseT

private function isJsonSchemaResponse(string $responseText) : bool {
try {
$jsonFragment = Json::findPartial($responseText);
$decoded = Json::parsePartial($jsonFragment);
$decoded = Json::fromPartial($responseText)->toArray();
} catch (Exception $e) {
// also covers no JSON at all - which is fine, as some models will respond with text
return false;
Expand Down Expand Up @@ -78,8 +77,7 @@ private function isMatchingResponseModel(
}
// ...detect matching response model
try {
$jsonFragment = Json::findPartial($partialResponseText);
$decoded = Json::parsePartial($jsonFragment);
$decoded = Json::fromPartial($partialResponseText)->toArray();
// we can try removing last item as it is likely to be still incomplete
$decoded = Arrays::removeTail($decoded, 1);
} catch (Exception $e) {
Expand Down
2 changes: 1 addition & 1 deletion src/Extras/LLM/Data/ApiResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public function __construct(
) {}

public function getJson(): string {
return Json::find($this->content);
return Json::from($this->content)->toString();
}

public static function fromPartialResponses(array $partialResponses) : ApiResponse {
Expand Down
12 changes: 6 additions & 6 deletions src/Extras/LLM/Drivers/CohereV1Driver.php
Original file line number Diff line number Diff line change
Expand Up @@ -204,29 +204,29 @@ private function makeContent(array $data) : string {
}

private function makeDelta(array $data) : string {
if ($this->isStreamEnd($data)) {
if (!$this->isStreamChunk($data)) {
return '';
}
return $data['text'] ?? $data['tool_calls'][0]['parameters'] ?? '';
return $data['tool_call_delta']['parameters'] ?? $data['text'] ?? '';
}

private function makeToolArgsDelta(array $data) : string {
if ($this->isStreamEnd($data)) {
if (!$this->isStreamChunk($data)) {
return '';
}
$toolArgs = $data['tool_calls'][0]['parameters'] ?? '';
return ('' === $toolArgs) ? '' : Json::encode($toolArgs);
}

private function makeToolNameDelta(array $data) : string {
if ($this->isStreamEnd($data)) {
if (!$this->isStreamChunk($data)) {
return '';
}
return $data['tool_calls'][0]['name'] ?? '';
}

private function isStreamEnd(array $data) : bool {
return 'stream_end' === ($data['event_type'] ?? '');
private function isStreamChunk(array $data) : bool {
return in_array(($data['event_type'] ?? ''), ['text-generation', 'tool-calls-chunk']);
}

private function withCachedContext(InferenceRequest $request): InferenceRequest {
Expand Down
4 changes: 2 additions & 2 deletions src/Extras/LLM/Drivers/OpenAIDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ public function getEndpointUrl(InferenceRequest $request): string {
}

public function getRequestHeaders() : array {
$extras = [
$extras = array_filter([
"OpenAI-Organization" => $this->config->metadata['organization'] ?? '',
"OpenAI-Project" => $this->config->metadata['project'] ?? '',
];
]);
return array_merge([
'Authorization' => "Bearer {$this->config->apiKey}",
'Content-Type' => 'application/json',
Expand Down
5 changes: 4 additions & 1 deletion src/Extras/LLM/InferenceResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ public function toApiResponse() : ApiResponse {
*/
public function toPartialApiResponses() : Generator {
foreach ($this->streamReader->stream($this->psrStream()) as $partialData) {
$response = $this->driver->toPartialApiResponse(Json::parse($partialData, default: []));
if ($partialData === false) {
continue;
}
$response = $this->driver->toPartialApiResponse(Json::fromPartial($partialData)->toArray());
if ($response === null) {
continue;
}
Expand Down
Loading

0 comments on commit 7a94fdc

Please sign in to comment.