From 3d768864c4018b88743f10a75bd9efd6001c6b55 Mon Sep 17 00:00:00 2001 From: ddebowczyk Date: Tue, 12 Nov 2024 21:15:36 +0100 Subject: [PATCH] Better XML markup for templating chat message sequences --- composer.json | 3 +- docs/advanced/prompts.mdx | 44 ++- prompts/system/mode_json.twig | 0 prompts/system/mode_json_schema.twig | 0 prompts/system/mode_mdjson.twig | 0 prompts/system/mode_text.twig | 0 prompts/system/mode_tools.twig | 1 + src/Extras/Prompt/Prompt.php | 135 ++++++-- src/Features/Core/Data/ChatTemplate.php | 134 ++++---- .../Traits/ChatTemplate/HandlesScript.php | 178 +++++----- src/Traits/HandlesInvocation.php | 315 ++++++++---------- .../Traits/Message/HandlesCreation.php | 102 +++--- .../Messages/Traits/Script/HandlesAccess.php | 133 ++++---- .../Traits/Script/HandlesMutation.php | 165 ++++----- src/Utils/Xml/SelectiveXmlParser.php | 97 ++++++ .../{Xml.php => Xml/SimpleXmlParser.php} | 272 +++++++-------- src/Utils/Xml/Xml.php | 78 +++++ src/Utils/Xml/XmlElement.php | 109 ++++++ src/Utils/Xml/XmlValidator.php | 41 +++ tests/Feature/Extras/PromptTest.php | 16 +- tests/Feature/Utils/SimpleXmlParserTest.php | 120 +++++++ tests/Feature/Utils/XmlParserTest.php | 151 +++++++++ tests/Feature/Utils/XmlTest.php | 221 ++++++------ 23 files changed, 1478 insertions(+), 837 deletions(-) create mode 100644 prompts/system/mode_json.twig create mode 100644 prompts/system/mode_json_schema.twig create mode 100644 prompts/system/mode_mdjson.twig create mode 100644 prompts/system/mode_text.twig create mode 100644 prompts/system/mode_tools.twig create mode 100644 src/Utils/Xml/SelectiveXmlParser.php rename src/Utils/{Xml.php => Xml/SimpleXmlParser.php} (63%) create mode 100644 src/Utils/Xml/Xml.php create mode 100644 src/Utils/Xml/XmlElement.php create mode 100644 src/Utils/Xml/XmlValidator.php create mode 100644 tests/Feature/Utils/SimpleXmlParserTest.php create mode 100644 tests/Feature/Utils/XmlParserTest.php diff --git a/composer.json b/composer.json index 047644d6..8a43c474 100644 --- a/composer.json +++ b/composer.json @@ -75,6 +75,7 @@ "php": "^8.2", "ext-fileinfo": "*", "ext-simplexml": "*", + "ext-xmlreader": "*", "adbario/php-dot-notation": "^3.3", "aimeos/map": "^3.8", "guzzlehttp/guzzle": "^7.8", @@ -89,7 +90,7 @@ "symfony/serializer": "^6.4 || ^7.0", "symfony/type-info": "^7.1", "symfony/validator": "^6.4 || ^7.0", - "vlucas/phpdotenv": "^5.6" + "vlucas/phpdotenv": "^5.6", }, "scripts": { "tests": "@php vendor/bin/pest", diff --git a/docs/advanced/prompts.mdx b/docs/advanced/prompts.mdx index 20d1b693..88e15ada 100644 --- a/docs/advanced/prompts.mdx +++ b/docs/advanced/prompts.mdx @@ -48,8 +48,8 @@ for LLM chat APIs. ```twig - You are a helpful assistant. - What is the capital of {{ country }}? + You are a helpful assistant. + What is the capital of {{ country }}? ``` @@ -76,8 +76,8 @@ schema: required: [name] ---#} - You are a helpful assistant. - What is the capital of {{ country }}? + You are a helpful assistant. + What is the capital of {{ country }}? ``` @@ -176,6 +176,30 @@ echo $prompt->toText(); // Outputs: "Hello, World!" ?> ``` +### In Memory Prompts + +If you need to create an inline prompt (without saving it to a library), you can use following syntax: + +```php +withTemplateContent('Hello, {{ name }}!') + ->withValues(['name' => 'World']) + ->toText(); +?> +``` + +There's shorter syntax for creating in-memory prompts: + +```php +from('Hello, {{ name }}!') + ->with(['name' => 'World']) + ->toText(); +?> +``` + ### Handling Template Variables To check which variables are available in a prompt template: @@ -212,12 +236,22 @@ echo $prompt->toText(); // Outputs: "Hello, World!" The Prompt class also supports converting templates containing chat-specific markup into structured messages: +Here is an example XML that can be used to generate a sequence of chat messages: +```xml + + You are a helpful assistant. + Hello, {{ name }} + +``` + +And here is how you use `Prompt` class to convert XML template into a sequence of messages: + ```php withTemplateContent('You are a helpful assistant.Hello, {{ $name }}') + ->withTemplateContent('You are a helpful assistant.Hello, {{ $name }}') ->withValues(['name' => 'assistant']); $messages = $prompt->toMessages(); diff --git a/prompts/system/mode_json.twig b/prompts/system/mode_json.twig new file mode 100644 index 00000000..e69de29b diff --git a/prompts/system/mode_json_schema.twig b/prompts/system/mode_json_schema.twig new file mode 100644 index 00000000..e69de29b diff --git a/prompts/system/mode_mdjson.twig b/prompts/system/mode_mdjson.twig new file mode 100644 index 00000000..e69de29b diff --git a/prompts/system/mode_text.twig b/prompts/system/mode_text.twig new file mode 100644 index 00000000..e69de29b diff --git a/prompts/system/mode_tools.twig b/prompts/system/mode_tools.twig new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/prompts/system/mode_tools.twig @@ -0,0 +1 @@ + diff --git a/src/Extras/Prompt/Prompt.php b/src/Extras/Prompt/Prompt.php index 3ded3acd..7e904578 100644 --- a/src/Extras/Prompt/Prompt.php +++ b/src/Extras/Prompt/Prompt.php @@ -6,8 +6,10 @@ use Cognesy\Instructor\Extras\Prompt\Data\PromptEngineConfig; use Cognesy\Instructor\Utils\Messages\Message; use Cognesy\Instructor\Utils\Messages\Messages; +use Cognesy\Instructor\Utils\Messages\Script; use Cognesy\Instructor\Utils\Str; -use Cognesy\Instructor\Utils\Xml; +use Cognesy\Instructor\Utils\Xml\Xml; +use Cognesy\Instructor\Utils\Xml\XmlElement; use InvalidArgumentException; class Prompt @@ -20,6 +22,7 @@ class Prompt private string $templateContent; private array $variableValues; private string $rendered; + private $tags = ['chat', 'message', 'content', 'section']; public function __construct( string $path = '', @@ -121,6 +124,10 @@ public function toMessages() : Messages { return $this->makeMessages($this->rendered()); } + public function toScript() : Script { + return $this->makeScript($this->rendered()); + } + public function toArray() : array { return $this->toMessages()->toArray(); } @@ -149,33 +156,7 @@ public function validationErrors() : array { $infoVars = $this->info()->variableNames(); $templateVars = $this->variables(); $valueKeys = array_keys($this->variableValues); - - $messages = []; - foreach($infoVars as $var) { - if (!in_array($var, $valueKeys)) { - $messages[] = "$var: variable defined in template info, but value not provided"; - } - if (!in_array($var, $templateVars)) { - $messages[] = "$var: variable defined in template info, but not used"; - } - } - foreach($valueKeys as $var) { - if (!in_array($var, $infoVars)) { - $messages[] = "$var: value provided, but not defined in template info"; - } - if (!in_array($var, $templateVars)) { - $messages[] = "$var: value provided, but not used in template content"; - } - } - foreach($templateVars as $var) { - if (!in_array($var, $infoVars)) { - $messages[] = "$var: variable used in template, but not defined in template info"; - } - if (!in_array($var, $valueKeys)) { - $messages[] = "$var: variable used in template, but value not provided"; - } - } - return $messages; + return $this->validateVariables($infoVars, $templateVars, $valueKeys); } // INTERNAL /////////////////////////////////////////////////// @@ -195,9 +176,16 @@ private function makeMessages(string $text) : Messages { }; } + private function makeScript(string $text) : Script { + return match(true) { + $this->containsXml($text) && $this->hasChatRoles($text) => $this->makeScriptFromXml($text), + default => Messages::fromString($text), + }; + } + private function hasChatRoles(string $text) : bool { $roleStrings = [ - '', '', '', '', '
', '' + '', '', '
' ]; if (Str::containsAny($text, $roleStrings)) { return true; @@ -209,15 +197,90 @@ private function containsXml(string $text) : bool { return preg_match('/<[^>]+>/', $text) === 1; } + private function makeScriptFromXml(string $text) : Script { + $xml = Xml::from($text)->withTags($this->tags)->toXmlElement(); + $script = new Script(); + $section = $script->section('messages'); + foreach ($xml->children() as $element) { + if ($element->tag() === 'section') { + $section = $script->section($element->attribute('name') ?? 'messages'); + continue; + } + if ($element->tag() !== 'message') { + continue; + } + $section->appendMessage(Message::make( + role: $element->attribute('role', 'user'), + content: match(true) { + $element->hasChildren() => $this->getMessageContent($element), + default => $element->content(), + } + )); + } + return $script; + } + private function makeMessagesFromXml(string $text) : Messages { + $xml = Xml::from($text)->withTags($this->tags)->toXmlElement(); $messages = new Messages(); - $xml = match(Str::contains($text, '')) { - true => Xml::from($text)->toArray(), - default => Xml::from($text)->wrapped('chat')->toArray(), - }; - // TODO: validate - foreach ($xml as $key => $message) { - $messages->appendMessage(Message::make($key, $message)); + foreach ($xml->children() as $element) { + if ($element->tag() !== 'message') { + continue; + } + $messages->appendMessage(Message::make( + role: $element->attribute('role', 'user'), + content: match(true) { + $element->hasChildren() => $this->getMessageContent($element), + default => $element->content(), + } + )); + } + return $messages; + } + + private function getMessageContent(XmlElement $element) : array { + $content = []; + foreach ($element->children() as $child) { + if ($child->tag() !== 'content') { + continue; + } + // check if content type is text, image or audio + $type = $child->attribute('type', 'text'); + $content[] = match($type) { + 'image' => ['type' => 'image_url', 'image_url' => ['url' => $child->content()]], + 'audio' => ['type' => 'input_audio', 'input_audio' => ['data' => $child->content(), 'format' => $child->attribute('format', 'mp3')]], + 'text' => ['type' => 'text', 'text' => $child->content()], + default => throw new InvalidArgumentException("Invalid content type: $type"), + }; + } + return $content; + } + + private function validateVariables(array $infoVars, array $templateVars, array $valueKeys) : array { + $messages = []; + foreach($infoVars as $var) { + if (!in_array($var, $valueKeys)) { + $messages[] = "$var: variable defined in template info, but value not provided"; + } + if (!in_array($var, $templateVars)) { + $messages[] = "$var: variable defined in template info, but not used"; + } + } + foreach($valueKeys as $var) { + if (!in_array($var, $infoVars)) { + $messages[] = "$var: value provided, but not defined in template info"; + } + if (!in_array($var, $templateVars)) { + $messages[] = "$var: value provided, but not used in template content"; + } + } + foreach($templateVars as $var) { + if (!in_array($var, $infoVars)) { + $messages[] = "$var: variable used in template, but not defined in template info"; + } + if (!in_array($var, $valueKeys)) { + $messages[] = "$var: variable used in template, but value not provided"; + } } return $messages; } diff --git a/src/Features/Core/Data/ChatTemplate.php b/src/Features/Core/Data/ChatTemplate.php index dbad4a04..8fd7c0ae 100644 --- a/src/Features/Core/Data/ChatTemplate.php +++ b/src/Features/Core/Data/ChatTemplate.php @@ -1,69 +1,67 @@ -request = $request; - $this->defaultRetryPrompt = Settings::get('llm', 'defaultRetryPrompt'); - $this->defaultPrompts[Mode::MdJson->value] = Settings::get('llm', 'defaultMdJsonPrompt'); - $this->defaultPrompts[Mode::Json->value] = Settings::get('llm', 'defaultJsonPrompt'); - $this->defaultPrompts[Mode::Tools->value] = Settings::get('llm', 'defaultToolsPrompt'); - } - - public static function fromRequest(Request $request) : static { - return new self($request); - } - - public function toMessages() : array { - $this->script = $this - ->makeScript($this->request) - ->mergeScript( - $this->makeCachedScript($this->request->cachedContext()) - ); - - // Add retry messages if needed - $this->addRetryMessages(); - - // Add meta sections - $output = $this - ->withCacheMetaSections($this->withSections($this->script)) - ->select([ - // potentially cached - predefined sections used to construct the script - 'system', - 'pre-cached', - 'pre-cached-prompt', 'cached-prompt', 'post-cached-prompt', - 'pre-cached-examples', 'cached-examples', 'post-cached-examples', - 'pre-cached-input', 'cached-input', 'post-cached-input', - 'cached-messages', - 'post-cached', - // never cached - 'pre-prompt', 'prompt', 'post-prompt', - 'pre-examples', 'examples', 'post-examples', - 'pre-input', 'input', 'post-input', - 'messages', - 'pre-retries', 'retries', 'post-retries' - ]) - ->toArray( - parameters: ['json_schema' => $this->makeJsonSchema() ?? []], - ); - - return $output; - } +request = $request; + $this->defaultRetryPrompt = Settings::get('llm', 'defaultRetryPrompt'); + $this->defaultPrompts[Mode::MdJson->value] = Settings::get('llm', 'defaultMdJsonPrompt'); + $this->defaultPrompts[Mode::Json->value] = Settings::get('llm', 'defaultJsonPrompt'); + $this->defaultPrompts[Mode::Tools->value] = Settings::get('llm', 'defaultToolsPrompt'); + } + + public static function fromRequest(Request $request) : static { + return new self($request); + } + + public function toMessages() : array { + $this->script = $this + ->makeScript($this->request) + ->mergeScript($this->makeCachedScript($this->request->cachedContext())); + + // Add retry messages if needed + $this->addRetryMessages(); + + // Add meta sections + $output = $this + ->withCacheMetaSections($this->withSections($this->script)) + ->select([ + // potentially cached - predefined sections used to construct the script + 'system', + 'pre-cached', + 'pre-cached-prompt', 'cached-prompt', 'post-cached-prompt', + 'pre-cached-examples', 'cached-examples', 'post-cached-examples', + 'pre-cached-input', 'cached-input', 'post-cached-input', + 'cached-messages', + 'post-cached', + // never cached + 'pre-prompt', 'prompt', 'post-prompt', + 'pre-examples', 'examples', 'post-examples', + 'pre-input', 'input', 'post-input', + 'messages', + 'pre-retries', 'retries', 'post-retries' + ]) + ->toArray( + parameters: ['json_schema' => $this->makeJsonSchema() ?? []], + ); + + return $output; + } } \ No newline at end of file diff --git a/src/Features/Core/Data/Traits/ChatTemplate/HandlesScript.php b/src/Features/Core/Data/Traits/ChatTemplate/HandlesScript.php index 7fdf7ac3..d26886ad 100644 --- a/src/Features/Core/Data/Traits/ChatTemplate/HandlesScript.php +++ b/src/Features/Core/Data/Traits/ChatTemplate/HandlesScript.php @@ -1,90 +1,90 @@ -isRequestEmpty($request)) { - throw new Exception('Request cannot be empty - you have to provide content for processing.'); - } - - $script = new Script(); - - // GET DATA - $messages = $this->normalizeMessages($request->messages()); - - // SYSTEM SECTION - $script->section('system')->appendMessages( - $this->makeSystem($messages, $request->system()) - ); - $script->section('messages')->appendMessages( - $this->makeMessages($messages) - ); - $script->section('input')->appendMessages( - $this->makeInput($request->input()) - ); - $script->section('prompt')->appendMessage( - $this->makePrompt($this->request->prompt()) - ); - $script->section('examples')->appendMessages( - $this->makeExamples($request->examples()) - ); - - return $this->filterEmptySections($script); - } - - protected function withSections(Script $script) : Script { - if ($script->section('prompt')->notEmpty()) { - $script->section('pre-prompt')->appendMessageIfEmpty([ - 'role' => 'user', - 'content' => "TASK:", - ]); - } - - if ($script->section('examples')->notEmpty()) { - $script->section('pre-examples')->appendMessageIfEmpty([ - 'role' => 'user', - 'content' => "EXAMPLES:", - ]); - } - - if ($script->section('input')->notEmpty()) { - $script->section('pre-input')->appendMessageIfEmpty([ - 'role' => 'user', - 'content' => "INPUT:", - ]); - $script->section('post-input')->appendMessageIfEmpty([ - 'role' => 'user', - 'content' => "RESPONSE:", - ]); - } - - if ($script->section('retries')->notEmpty()) { - $script->section('pre-retries')->appendMessageIfEmpty([ - 'role' => 'user', - 'content' => "FEEDBACK:", - ]); - $script->section('post-retries')->appendMessageIfEmpty([ - 'role' => 'user', - 'content' => "CORRECTED RESPONSE:", - ]); - } - - return $script; - } - - private function isRequestEmpty(Request $request) : bool { - return match(true) { - !empty($request->messages()) => false, - !empty($request->input()) => false, - !empty($request->prompt()) => false, - !empty($request->system()) => false, // ? - !empty($request->examples()) => false, // ? - default => true, - }; - } +isRequestEmpty($request)) { + throw new Exception('Request cannot be empty - you have to provide content for processing.'); + } + + $script = new Script(); + + // GET DATA + $messages = $this->normalizeMessages($request->messages()); + + // SYSTEM SECTION + $script->section('system')->appendMessages( + $this->makeSystem($messages, $request->system()) + ); + $script->section('messages')->appendMessages( + $this->makeMessages($messages) + ); + $script->section('input')->appendMessages( + $this->makeInput($request->input()) + ); + $script->section('prompt')->appendMessage( + $this->makePrompt($request->prompt()) + ); + $script->section('examples')->appendMessages( + $this->makeExamples($request->examples()) + ); + + return $this->filterEmptySections($script); + } + + protected function withSections(Script $script) : Script { + if ($script->section('prompt')->notEmpty()) { + $script->section('pre-prompt')->appendMessageIfEmpty([ + 'role' => 'user', + 'content' => "TASK:", + ]); + } + + if ($script->section('examples')->notEmpty()) { + $script->section('pre-examples')->appendMessageIfEmpty([ + 'role' => 'user', + 'content' => "EXAMPLES:", + ]); + } + + if ($script->section('input')->notEmpty()) { + $script->section('pre-input')->appendMessageIfEmpty([ + 'role' => 'user', + 'content' => "INPUT:", + ]); + $script->section('post-input')->appendMessageIfEmpty([ + 'role' => 'user', + 'content' => "RESPONSE:", + ]); + } + + if ($script->section('retries')->notEmpty()) { + $script->section('pre-retries')->appendMessageIfEmpty([ + 'role' => 'user', + 'content' => "FEEDBACK:", + ]); + $script->section('post-retries')->appendMessageIfEmpty([ + 'role' => 'user', + 'content' => "CORRECTED RESPONSE:", + ]); + } + + return $script; + } + + private function isRequestEmpty(Request $request) : bool { + return match(true) { + !empty($request->messages()) => false, + !empty($request->input()) => false, + !empty($request->prompt()) => false, + !empty($request->system()) => false, // ? + !empty($request->examples()) => false, // ? + default => true, + }; + } } \ No newline at end of file diff --git a/src/Traits/HandlesInvocation.php b/src/Traits/HandlesInvocation.php index 58cf93bd..2eb67bc6 100644 --- a/src/Traits/HandlesInvocation.php +++ b/src/Traits/HandlesInvocation.php @@ -1,168 +1,147 @@ -request( - messages: $messages, - input: $input, - responseModel: $responseModel, - system: $system, - prompt: $prompt, - examples: $examples, - model: $model, - maxRetries: $maxRetries, - options: $options, - toolName: $toolName, - toolDescription: $toolDescription, - retryPrompt: $retryPrompt, - mode: $mode, - )->get(); - } - - /** - * Creates the request to be executed - */ - public function request( - string|array $messages = '', - string|array|object $input = '', - string|array|object $responseModel = [], - string $system = '', - string $prompt = '', - array $examples = [], - string $model = '', - int $maxRetries = 0, - array $options = [], - string $toolName = '', - string $toolDescription = '', - string $retryPrompt = '', - Mode $mode = Mode::Tools, - ) : InstructorResponse { - $this->queueEvent(new RequestReceived()); - $this->dispatchQueuedEvents(); - - if (empty($responseModel)) { - throw new Exception('Response model cannot be empty. Provide a class name, instance, or schema array.'); - } - - $requestedSchema = $responseModel; - $responseModel = $this->makeResponseModel($requestedSchema, $toolName, $toolDescription); - - $request = new Request( - messages: $messages ?? [], - input: $input ?? [], - requestedSchema: $requestedSchema ?? [], - responseModel: $responseModel, - system: $system ?? '', - prompt: $prompt ?? '', - examples: $examples ?? [], - model: $model ?? '', - maxRetries: $maxRetries ?? 0, - options: $options ?? [], - toolName: $toolName ?? '', - toolDescription: $toolDescription ?? '', - retryPrompt: $retryPrompt ?? '', - mode: $mode ?? Mode::Tools, - cachedContext: $this->cachedContext ?? [], - ); - - $requestHandler = new RequestHandler( - request: $request, - responseGenerator: new ResponseGenerator( - $this->responseDeserializer, - $this->responseValidator, - $this->responseTransformer, - $this->events, - ), - partialsGenerator: new PartialsGenerator( - $this->responseDeserializer, - $this->responseTransformer, - $this->events, - ), - connection: $this->connection, - driver: $this->driver, - httpClient: $this->httpClient, - events: $this->events, - ); - - return new InstructorResponse( - request: $request, - requestHandler: $requestHandler, - events: $this->events, - ); - } - - // INTERNAL ///////////////////////////////////////////////// - - protected function getInference(Request $request) : InferenceResponse { - $inference = new Inference( - connection: $this->connection, - httpClient: $this->httpClient, - driver: $this->driver, - events: $this->events, - ); - return $inference - ->create( - $request->toMessages(), - $request->model(), - $request->toolCallSchema(), - $request->toolChoice(), - $request->responseFormat(), - $request->options(), - $request->mode() - ); - } - - private function makeResponseModel( - string|array|object $requestedSchema, - string $toolName, - string $toolDescription, - ) : ResponseModel { - $toolName = $toolName ?: Settings::get('llm', 'defaultToolName', 'extracted_data'); - $toolDescription = $toolDescription ?: Settings::get('llm', 'defaultToolDescription', 'Function call based on user instructions.'); - $schemaFactory = new SchemaFactory( - Settings::get('llm', 'useObjectReferences', false) - ); - $responseModelFactory = new ResponseModelFactory( - new ToolCallBuilder($schemaFactory, new ReferenceQueue()), - $schemaFactory, - $this->events - ); - return $responseModelFactory->fromAny($requestedSchema, $toolName, $toolDescription); - } -} \ No newline at end of file +request( + messages: $messages, + input: $input, + responseModel: $responseModel, + system: $system, + prompt: $prompt, + examples: $examples, + model: $model, + maxRetries: $maxRetries, + options: $options, + toolName: $toolName, + toolDescription: $toolDescription, + retryPrompt: $retryPrompt, + mode: $mode, + )->get(); + } + + /** + * Creates the request to be executed + */ + public function request( + string|array $messages = '', + string|array|object $input = '', + string|array|object $responseModel = [], + string $system = '', + string $prompt = '', + array $examples = [], + string $model = '', + int $maxRetries = 0, + array $options = [], + string $toolName = '', + string $toolDescription = '', + string $retryPrompt = '', + Mode $mode = Mode::Tools, + ) : InstructorResponse { + $this->queueEvent(new RequestReceived()); + $this->dispatchQueuedEvents(); + + if (empty($responseModel)) { + throw new Exception('Response model cannot be empty. Provide a class name, instance, or schema array.'); + } + + $requestedSchema = $responseModel; + $responseModel = $this->makeResponseModel($requestedSchema, $toolName, $toolDescription); + + $request = new Request( + messages: $messages ?? [], + input: $input ?? [], + requestedSchema: $requestedSchema ?? [], + responseModel: $responseModel, + system: $system ?? '', + prompt: $prompt ?? '', + examples: $examples ?? [], + model: $model ?? '', + maxRetries: $maxRetries ?? 0, + options: $options ?? [], + toolName: $toolName ?? '', + toolDescription: $toolDescription ?? '', + retryPrompt: $retryPrompt ?? '', + mode: $mode ?? Mode::Tools, + cachedContext: $this->cachedContext ?? [], + ); + + $requestHandler = new RequestHandler( + request: $request, + responseGenerator: new ResponseGenerator( + $this->responseDeserializer, + $this->responseValidator, + $this->responseTransformer, + $this->events, + ), + partialsGenerator: new PartialsGenerator( + $this->responseDeserializer, + $this->responseTransformer, + $this->events, + ), + connection: $this->connection, + driver: $this->driver, + httpClient: $this->httpClient, + events: $this->events, + ); + + return new InstructorResponse( + request: $request, + requestHandler: $requestHandler, + events: $this->events, + ); + } + + // INTERNAL ///////////////////////////////////////////////// + + private function makeResponseModel( + string|array|object $requestedSchema, + string $toolName, + string $toolDescription, + ) : ResponseModel { + $toolName = $toolName ?: Settings::get('llm', 'defaultToolName', 'extracted_data'); + $toolDescription = $toolDescription ?: Settings::get('llm', 'defaultToolDescription', 'Function call based on user instructions.'); + $schemaFactory = new SchemaFactory( + Settings::get('llm', 'useObjectReferences', false) + ); + $responseModelFactory = new ResponseModelFactory( + new ToolCallBuilder($schemaFactory, new ReferenceQueue()), + $schemaFactory, + $this->events + ); + return $responseModelFactory->fromAny($requestedSchema, $toolName, $toolDescription); + } +} diff --git a/src/Utils/Messages/Traits/Message/HandlesCreation.php b/src/Utils/Messages/Traits/Message/HandlesCreation.php index ad3522ed..c1ea12a6 100644 --- a/src/Utils/Messages/Traits/Message/HandlesCreation.php +++ b/src/Utils/Messages/Traits/Message/HandlesCreation.php @@ -1,52 +1,52 @@ - static::fromString($message), - is_array($message) => static::fromArray($message), - $message instanceof static => $message->clone(), - default => throw new Exception('Invalid message type'), - }; - } - - public static function fromInput(string|array|object $input, string $role = '') : static { - return match(true) { - $input instanceof Message => $input, - $input instanceof CanProvideMessage => $input->toMessage(), - default => new Message($role, Text::fromAny($input)), - }; - } - - public function clone() : static { - return new static($this->role, $this->content); - } + static::fromString($message), + is_array($message) => static::fromArray($message), + $message instanceof static => $message->clone(), + default => throw new Exception('Invalid message type'), + }; + } + + public static function fromInput(string|array|object $input, string $role = '') : static { + return match(true) { + $input instanceof Message => $input, + $input instanceof CanProvideMessage => $input->toMessage(), + default => new Message($role, Text::fromAny($input)), + }; + } + + public function clone() : static { + return new static($this->role, $this->content); + } } \ No newline at end of file diff --git a/src/Utils/Messages/Traits/Script/HandlesAccess.php b/src/Utils/Messages/Traits/Script/HandlesAccess.php index e0aa3030..a0f0bc9a 100644 --- a/src/Utils/Messages/Traits/Script/HandlesAccess.php +++ b/src/Utils/Messages/Traits/Script/HandlesAccess.php @@ -1,66 +1,67 @@ -sections; - } - - public function section(string $name) : Section { - $index = $this->sectionIndex($name); - if ($index === -1) { - $this->createSection(new Section($name)); - $index = $this->sectionIndex($name); - } - return $this->sections[$index]; - } - - public function sectionNames() : array { - return $this->map(fn(Section $section) => $section->name); - } - - public function hasSection(string $name) : bool { - return $this->sectionIndex($name) !== -1; - } - - public function reduce(callable $callback, mixed $initial = null) : mixed { - return array_reduce($this->sections, $callback, $initial); - } - - public function map(callable $callback) : array { - return array_map($callback, $this->sections); - } - - public function isEmpty() : bool { - return match (true) { - empty($this->sections) => true, - default => $this->reduce(fn(mixed $carry, Section $section) => $carry && $section->isEmpty(), true), - }; - } - - public function notEmpty() : bool { - return !$this->isEmpty(); - } - - public function hasComposites() : bool { - return $this->reduce(fn(bool $carry, Section $section) => $carry || $section->hasComposites(), false); - } - - // INTERNAL //////////////////////////////////////////////////// - - private function sectionIndex(string $name) : int { - $index = -1; - foreach ($this->sections as $i => $section) { - if ($section->name === $name) { - $index = $i; - break; - } - } - return $index; - } -} +sections; + } + + public function section(string $name) : Section { + $index = $this->sectionIndex($name); + if ($index === -1) { + $section = $this->createSection($name); + } else { + $section = $this->sections[$index]; + } + return $section; + } + + public function sectionNames() : array { + return $this->map(fn(Section $section) => $section->name); + } + + public function hasSection(string $name) : bool { + return $this->sectionIndex($name) !== -1; + } + + public function reduce(callable $callback, mixed $initial = null) : mixed { + return array_reduce($this->sections, $callback, $initial); + } + + public function map(callable $callback) : array { + return array_map($callback, $this->sections); + } + + public function isEmpty() : bool { + return match (true) { + empty($this->sections) => true, + default => $this->reduce(fn(mixed $carry, Section $section) => $carry && $section->isEmpty(), true), + }; + } + + public function notEmpty() : bool { + return !$this->isEmpty(); + } + + public function hasComposites() : bool { + return $this->reduce(fn(bool $carry, Section $section) => $carry || $section->hasComposites(), false); + } + + // INTERNAL //////////////////////////////////////////////////// + + private function sectionIndex(string $name) : int { + $index = -1; + foreach ($this->sections as $i => $section) { + if ($section->name === $name) { + $index = $i; + break; + } + } + return $index; + } +} diff --git a/src/Utils/Messages/Traits/Script/HandlesMutation.php b/src/Utils/Messages/Traits/Script/HandlesMutation.php index 08b91d42..205bbd82 100644 --- a/src/Utils/Messages/Traits/Script/HandlesMutation.php +++ b/src/Utils/Messages/Traits/Script/HandlesMutation.php @@ -1,83 +1,84 @@ -hasSection($section->name())) { - throw new Exception("Section with name '{$section->name()}' already exists - use mergeSection() instead."); - } - $this->appendSection($section); - return $this; - } - - public function appendSection(Section $section) : static { - if ($this->hasSection($section->name())) { - throw new Exception("Section with name '{$section->name()}' already exists - use mergeSection() instead."); - } - $this->sections = $this->appendSections([$section]); - return $this; - } - - public function mergeSection(Section $section) : static { - if ($this->hasSection($section->name())) { - $this->section($section->name())->mergeSection($section); - } else { - $this->appendSection($section); - } - return $this; - } - - public function overrideScript(Script $script) : static { - foreach($script->sections as $section) { - if ($this->hasSection($section->name())) { - $this->removeSection($section->name()); - } - $this->appendSection($section); - } - $this->mergeParameters($script->parameters()); - return $this; - } - - public function mergeScript(Script $script) : static { - foreach($script->sections as $section) { - $this->mergeSection($section); - } - $this->mergeParameters($script->parameters()); - return $this; - } - - public function mergeParameters(array|ScriptParameters $parameters) : static { - $this->parameters = $this->parameters()->merge($parameters); - return $this; - } - - public function removeSection(string $name) : static { - $this->sections = array_filter($this->sections, fn($section) => $section->name() !== $name); - return $this; - } - - // INTERNAL //////////////////////////////////////////////////// - - private function insert(array $array, int $index, array $new) : array { - return array_merge( - array_slice($array, 0, $index), - $new, - array_slice($array, $index) - ); - } - - private function appendSections(array $array) : array { - return array_merge($this->sections, $array); - } - - private function prependSections(array $array) { - return array_merge($array, $this->sections); - } +hasSection($name)) { + throw new Exception("Section with name '{$name()}' already exists - use mergeSection() instead."); + } + $section = new Section($name); + $this->appendSection($section); + return $section; + } + + public function appendSection(Section $section) : static { + if ($this->hasSection($section->name())) { + throw new Exception("Section with name '{$section->name()}' already exists - use mergeSection() instead."); + } + $this->sections = $this->appendSections([$section]); + return $this; + } + + public function mergeSection(Section $section) : static { + if ($this->hasSection($section->name())) { + $this->section($section->name())->mergeSection($section); + } else { + $this->appendSection($section); + } + return $this; + } + + public function overrideScript(Script $script) : static { + foreach($script->sections as $section) { + if ($this->hasSection($section->name())) { + $this->removeSection($section->name()); + } + $this->appendSection($section); + } + $this->mergeParameters($script->parameters()); + return $this; + } + + public function mergeScript(Script $script) : static { + foreach($script->sections as $section) { + $this->mergeSection($section); + } + $this->mergeParameters($script->parameters()); + return $this; + } + + public function mergeParameters(array|ScriptParameters $parameters) : static { + $this->parameters = $this->parameters()->merge($parameters); + return $this; + } + + public function removeSection(string $name) : static { + $this->sections = array_filter($this->sections, fn($section) => $section->name() !== $name); + return $this; + } + + // INTERNAL //////////////////////////////////////////////////// + + private function insert(array $array, int $index, array $new) : array { + return array_merge( + array_slice($array, 0, $index), + $new, + array_slice($array, $index) + ); + } + + private function appendSections(array $array) : array { + return array_merge($this->sections, $array); + } + + private function prependSections(array $array) { + return array_merge($array, $this->sections); + } } \ No newline at end of file diff --git a/src/Utils/Xml/SelectiveXmlParser.php b/src/Utils/Xml/SelectiveXmlParser.php new file mode 100644 index 00000000..f5ef402e --- /dev/null +++ b/src/Utils/Xml/SelectiveXmlParser.php @@ -0,0 +1,97 @@ +validator = new XmlValidator(); + } + + public function parse(string $xmlContent): array { + $this->validator->validate($xmlContent); + $reader = XMLReader::xml($xmlContent); + return $this->parseNodes($reader); + } + + // INTERNAL /////////////////////////////////////////////////// + + private function parseNodes(XMLReader $reader): array { + $nodes = []; + while ($reader->read()) { + if ($reader->nodeType === XMLReader::ELEMENT && $this->canParseTag($reader->localName)) { + $nodes[] = $this->parseNode($reader); + } + } + return $nodes; + } + + private function parseNode(XMLReader $reader): array { + $node = [ + 'tag' => $reader->localName, + 'content' => '', + 'attributes' => $this->getAttributes($reader), + 'children' => [], + ]; + + if ($reader->isEmptyElement) { + return $node; + } + + while ($reader->read()) { + $result = match ($reader->nodeType) { + XMLReader::ELEMENT => $this->handleElement($reader, $node), + XMLReader::TEXT, XMLReader::CDATA => $this->handleText($reader, $node), + XMLReader::END_ELEMENT => ['return' => $node], + default => ['continue' => $node], + }; + + if (isset($result['return'])) { + return $result['return']; + } + $node = $result['continue'] ?? $result['node']; + } + + return $node; + } + + private function handleElement(XMLReader $reader, array $node): array { + if ($this->canParseTag($reader->localName)) { + $node['children'][] = $this->parseNode($reader); + return ['continue' => $node]; + } + + $node['content'] .= $reader->readOuterXML(); + $reader->next(); + return ['continue' => $node]; + } + + private function handleText(XMLReader $reader, array $node): array { + $node['content'] .= $reader->value; + return ['continue' => $node]; + } + + private function getAttributes(XMLReader $reader): array { + $attributes = []; + if ($reader->hasAttributes) { + while ($reader->moveToNextAttribute()) { + $attributes[$reader->name] = $reader->value; + } + $reader->moveToElement(); + } + return $attributes; + } + + private function canParseTag(string $localName) : bool { + if (empty($this->parsedTags)) { + return true; + } + return in_array($localName, $this->parsedTags); + } +} \ No newline at end of file diff --git a/src/Utils/Xml.php b/src/Utils/Xml/SimpleXmlParser.php similarity index 63% rename from src/Utils/Xml.php rename to src/Utils/Xml/SimpleXmlParser.php index e121add4..df273f4f 100644 --- a/src/Utils/Xml.php +++ b/src/Utils/Xml/SimpleXmlParser.php @@ -1,150 +1,122 @@ -xmlString = $xmlString; - } - - /** - * Create a new instance from XML string - * @param string $xmlString - * @return self - */ - public static function from(string $xmlString): self { - return new self($xmlString); - } - - /** - * Include attributes in the resulting array - * @return self - */ - public function withAttributes(): self { - $this->includeAttributes = true; - return $this; - } - - /** - * Include root element in the resulting array - * @return self - */ - public function withRoot(): self { - $this->includeRoot = true; - return $this; - } - - public function wrapped(string $root = 'root'): self { - $this->xmlString = "<$root>{$this->xmlString}"; - return $this; - } - - /** - * Set the naming convention for the resulting array - * @param string $convention - * @return self - */ - public function withNaming(string $convention): self { - $this->namingConvention = $convention; - return $this; - } - - /** - * Return the array representation of the XML - * @return array - */ - public function toArray(): array { - if ($this->parsedArray === null) { - $this->parsedArray = $this->convertXmlToArray(); - } - return $this->parsedArray; - } - - // INTERNAL /////////////////////////////////////////////////// - - private function convertXmlToArray(): array { - if ($this->xmlString === '') { - return []; - } - - $xmlElement = new SimpleXMLElement($this->xmlString, LIBXML_NOCDATA); - $array = $this->xmlToArray($xmlElement); - - if ($this->includeRoot) { - return [$xmlElement->getName() => $array]; - } - - return $array; - } - - private function xmlToArray(SimpleXMLElement $element): array|string { - $result = []; - - // Handle attributes if required - if ($this->includeAttributes) { - foreach ($element->attributes() as $attrKey => $attrValue) { - $result['_attributes'][$attrKey] = (string) $attrValue; - } - } - - // Handle child elements - foreach ($element->children() as $child) { - $childName = $this->sanitizeName($child->getName()); - $childValue = $this->xmlToArray($child); - - if (isset($result[$childName])) { - if (!is_array($result[$childName]) || !isset($result[$childName][0])) { - $result[$childName] = [$result[$childName]]; - } - $result[$childName][] = $childValue; - } else { - $result[$childName] = $childValue; - } - } - - // Handle text content or CDATA - $textContent = trim((string) $element); - if (strlen($textContent) > 0) { - if ($this->includeAttributes && count($result) > 0) { - $result['_value'] = $textContent; - } else { - return $textContent; - } - } - - return $result; - } - - /** - * TODO: allow conversion of names to snake_case or camelCase - * @param string $name - * @return string - */ - private function sanitizeName(string $name): string { - return match($this->namingConvention) { - 'camel' => Str::camel($name), - 'snake' => Str::snake($name), - default => $name, - }; - } -} +includeAttributes = true; + return $this; + } + + public function withRoot(): self { + $this->includeRoot = true; + return $this; + } + + public function wrapped(string $root = 'root'): self { + $this->xmlString = "<$root>{$this->xmlString}"; + return $this; + } + + public function asCamelCase(): self { + $this->namingConvention = 'camel'; + return $this; + } + + public function asSnakeCase(): self { + $this->namingConvention = 'snake'; + return $this; + } + + public function withNaming(string $namingConvention): self { + $this->namingConvention = $namingConvention; + return $this; + } + + public function toArray(): array { + if ($this->parsedData === null) { + $this->parsedData = $this->convertXmlToArray(); + } + return $this->parsedData; + } + + private function convertXmlToArray(): array { + if ($this->xmlString === '') { + return []; + } + $xmlElement = new SimpleXMLElement($this->xmlString, LIBXML_NOCDATA); + $array = $this->xmlToArray($xmlElement); + if ($this->includeRoot) { + return [$xmlElement->getName() => $array]; + } + return $array; + } + + private function xmlToArray(SimpleXMLElement $element): array|string { + $result = []; + + // Handle attributes if required + if ($this->includeAttributes) { + foreach ($element->attributes() as $attrKey => $attrValue) { + $result['_attributes'][$attrKey] = (string) $attrValue; + } + } + + // Handle child elements + foreach ($element->children() as $child) { + $childName = $this->sanitizeName($child->getName()); + $childValue = $this->xmlToArray($child); + + if (isset($result[$childName])) { + if (!is_array($result[$childName]) || !isset($result[$childName][0])) { + $result[$childName] = [$result[$childName]]; + } + $result[$childName][] = $childValue; + } else { + $result[$childName] = $childValue; + } + } + + // Handle text content or CDATA + $textContent = trim((string) $element); + if (strlen($textContent) > 0) { + if ($this->includeAttributes && count($result) > 0) { + $result['_value'] = $textContent; + } else { + return $textContent; + } + } + + return $result; + } + + /** + * Conversion of names to snake_case or camelCase + * @param string $name + * @return string + */ + private function sanitizeName(string $name): string { + return match($this->namingConvention) { + 'camel' => Str::camel($name), + 'snake' => Str::snake($name), + default => $name, + }; + } +} \ No newline at end of file diff --git a/src/Utils/Xml/Xml.php b/src/Utils/Xml/Xml.php new file mode 100644 index 00000000..7f28a9f7 --- /dev/null +++ b/src/Utils/Xml/Xml.php @@ -0,0 +1,78 @@ +xmlString = $xmlString; + } + + /** + * Create a new instance from XML string + * @param string $xmlString + * @return self + */ + public static function from(string $xmlString): self { + return new self($xmlString); + } + + public function withTags(array $parsedTags): self { + $this->parsedTags = $parsedTags; + return $this; + } + + public function wrapped(string $root = 'root'): self { + $this->xmlString = "<$root>{$this->xmlString}"; + return $this; + } + + /** + * Return the array representation of the XML + * @return array + */ + public function toArray(): array { + return $this->parsedData(); + } + + public function toXmlElement(): XmlElement { + return XmlElement::fromArray($this->parsedData()); + } + + // INTERNAL /////////////////////////////////////////////////// + + private function parsedData(): array { + if ($this->parsedData === null) { + $array = match(true) { + ($this->xmlString === '') => [], + default => (new SelectiveXmlParser($this->parsedTags))->parse($this->xmlString), + }; + $this->parsedData = $array[0] ?? self::empty(); + } + return $this->parsedData; + } + + private static function empty(): array { + return [ + 'tag' => '', + 'content' => '', + 'attributes' => [], + 'children' => [], + ]; + } +} diff --git a/src/Utils/Xml/XmlElement.php b/src/Utils/Xml/XmlElement.php new file mode 100644 index 00000000..85a8997b --- /dev/null +++ b/src/Utils/Xml/XmlElement.php @@ -0,0 +1,109 @@ +tag = $tag; + $this->content = $content; + $this->attributes = $attributes; + foreach ($children as $child) { + $this->children[] = self::fromArray($child); + } + } + + public static function fromArray(array $data): self { + return new self( + tag: $data['tag'] ?? '', + content: $data['content'] ?? '', + attributes: $data['attributes'] ?? [], + children: $data['children'] ?? [], + ); + } + + public function tag(): string { + return $this->tag; + } + + public function content(): string { + return $this->content; + } + + public function attributes(): array { + return $this->attributes; + } + + /** + * @return XmlElement[] + */ + public function children(): array { + return $this->children; + } + + public function get(string $path) : XmlElement { + $parts = explode('.', $path); + $current = $this; + foreach ($parts as $part) { + $current = $current->children[$part]; + } + return $current; + } + + public function first(string $tag): ?XmlElement { + foreach ($this->children as $child) { + if ($child->tag() === $tag) { + return $child; + } + } + return null; + } + + /** + * @return XmlElement[] + */ + public function all(string $tag): array { + $result = []; + foreach ($this->children as $child) { + if ($child->tag() === $tag) { + $result[] = $child; + } + } + return $result; + } + + public function attribute(string $name, mixed $default = null): ?string { + return $this->attributes[$name] ?? $default; + } + + public function toArray(): array { + $children = []; + foreach ($this->children as $child) { + $children[] = $child->toArray(); + } + return [ + 'tag' => $this->tag, + 'content' => $this->content, + 'attributes' => $this->attributes, + 'children' => $children, + ]; + } + + public function hasChildren() : bool { + return count($this->children) > 0; + } + + public function hasContent() : bool { + return $this->content !== ''; + } +} \ No newline at end of file diff --git a/src/Utils/Xml/XmlValidator.php b/src/Utils/Xml/XmlValidator.php new file mode 100644 index 00000000..4af47c4c --- /dev/null +++ b/src/Utils/Xml/XmlValidator.php @@ -0,0 +1,41 @@ +loadXML($xml, LIBXML_NONET); + + if (!$result) { + $errors = libxml_get_errors(); + libxml_clear_errors(); + libxml_use_internal_errors($previous); + + throw new RuntimeException( + 'Invalid XML: ' . $this->formatLibXmlError($errors[0]) + ); + } + + libxml_clear_errors(); + libxml_use_internal_errors($previous); + } + + private function formatLibXmlError(\LibXMLError $error): string + { + $message = $error->message; + if ($error->line !== 0) { + $message .= " on line {$error->line}"; + } + if ($error->column !== 0) { + $message .= " column {$error->column}"; + } + return trim($message); + } +} diff --git a/tests/Feature/Extras/PromptTest.php b/tests/Feature/Extras/PromptTest.php index e24fec56..f528dc80 100644 --- a/tests/Feature/Extras/PromptTest.php +++ b/tests/Feature/Extras/PromptTest.php @@ -4,6 +4,7 @@ use Cognesy\Instructor\Extras\Prompt\Prompt; use Cognesy\Instructor\Extras\Prompt\Data\PromptEngineConfig; use Cognesy\Instructor\Utils\Messages\Messages; +use Cognesy\Instructor\Utils\Messages\Script; // RECOMMENDED, READING FRIENDLY SYNTAX @@ -78,8 +79,8 @@ }); it('can convert template with chat markup to messages', function () { - $prompt = Prompt::using('demo-blade') - ->withTemplateContent('You are helpful assistant.Hello, {{ $name }}') + $prompt = Prompt::blade() + ->withTemplateContent('You are helpful assistant.Hello, {{ $name }}') ->withValues(['name' => 'assistant']); $messages = $prompt->toMessages(); expect($messages)->toBeInstanceOf(Messages::class); @@ -87,6 +88,17 @@ expect($messages->toArray())->toHaveCount(2); }); +it('can convert template with chat markup to script', function () { + $prompt = Prompt::blade() + ->withTemplateContent('
You are helpful assistant.
Hello, {{ $name }}') + ->withValues(['name' => 'assistant']); + $script = $prompt->toScript(); + expect($script)->toBeInstanceOf(Script::class) + ->and($script->toString())->toContain('Hello, assistant') + ->and($script->hasSection('system'))->toBeTrue() + ->and($script->hasSection('messages'))->toBeTrue(); +}); + it('can load a template by name - Twig', function () { $prompt = Prompt::using('demo-twig')->withTemplate('hello'); expect($prompt->template())->toContain('Hello'); diff --git a/tests/Feature/Utils/SimpleXmlParserTest.php b/tests/Feature/Utils/SimpleXmlParserTest.php new file mode 100644 index 00000000..118df644 --- /dev/null +++ b/tests/Feature/Utils/SimpleXmlParserTest.php @@ -0,0 +1,120 @@ +content'; + $xml = SimpleXmlParser::from($xmlString)->withAttributes(); + $expected = [ + '_attributes' => ['attr' => 'value'], + 'child' => 'content' + ]; + expect($xml->toArray())->toEqual($expected); +}); + +it('converts XML to array without attributes', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString); + $expected = ['child' => 'content']; + expect($xml->toArray())->toEqual($expected); +}); + +it('includes root element in array', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString)->withRoot(); + $expected = ['root' => ['child' => 'content']]; + expect($xml->toArray())->toEqual($expected); +}); + +it('converts names to snake_case', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString)->withNaming('snake'); + $expected = ['child_element' => 'content']; + expect($xml->toArray())->toEqual($expected); +}); + +it('converts names to camelCase', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString)->withNaming('camel'); + $expected = ['childElement' => 'content']; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles empty XML string', function () { + $xmlString = ''; + $xml = SimpleXmlParser::from($xmlString); + $expected = []; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles XML with multiple children', function () { + $xmlString = 'content1content2'; + $xml = SimpleXmlParser::from($xmlString); + $expected = ['child' => ['content1', 'content2']]; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles XML with multiple children with the same name', function () { + $xmlString = 'content1content2'; + $xml = SimpleXmlParser::from($xmlString)->withNaming('snake'); + $expected = ['child' => ['content1', 'content2']]; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles CDATA in XML', function () { + $xmlString = 'content]]>'; + $xml = SimpleXmlParser::from($xmlString)->withRoot(); + $expected = ['root' => ['child' => 'content']]; + expect($xml->toArray())->toEqual($expected); +}); + +it('throws exception for invalid XML', function () { + $xmlString = 'content'; + expect(fn() => SimpleXmlParser::from($xmlString)->toArray())->toThrow(Exception::class); +}); + +it('handles attributes with special characters in XML', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString)->withAttributes(); + $expected = [ + '_attributes' => ['attr' => 'value & more'], + 'child' => 'content' + ]; + expect($xml->toArray())->toEqual($expected); +}); + +it('wraps XML string with specified root element', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString)->withRoot()->wrapped('wrapper'); + $expected = ['wrapper' => ['child' => 'content']]; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles special characters in XML', function () { + $xmlString = 'content & more content'; + $xml = SimpleXmlParser::from($xmlString); + $expected = ['child' => 'content & more content']; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles empty elements in XML', function () { + $xmlString = ''; + $xml = SimpleXmlParser::from($xmlString); + $expected = ['child' => []]; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles nested elements in XML', function () { + $xmlString = 'content'; + $xml = SimpleXmlParser::from($xmlString); + $expected = ['parent' => ['child' => 'content']]; + expect($xml->toArray())->toEqual($expected); +}); + +it('handles mixed content in XML', function () { + $xmlString = 'textcontentmore text'; + $xml = SimpleXmlParser::from($xmlString)->withRoot()->toArray(); + $expected = ['root' => ['textmore text', ['child' => 'content']]]; + expect($xml)->toEqual($expected); +})->skip(); + diff --git a/tests/Feature/Utils/XmlParserTest.php b/tests/Feature/Utils/XmlParserTest.php new file mode 100644 index 00000000..980efb37 --- /dev/null +++ b/tests/Feature/Utils/XmlParserTest.php @@ -0,0 +1,151 @@ +Hello!'; + + $expected = [[ + 'tag' => 'chat', + 'content' => '', + 'attributes' => [], + 'children' => [ + [ + 'tag' => 'message', + 'content' => 'Hello!', + 'attributes' => ['role' => 'system'], + 'children' => [], + ] + ], + ]]; + + $result = $parser->parse($xml); + expect($result)->toEqual($expected); +}); + +it('handles attributes correctly', function () { + $parser = new SelectiveXmlParser(['user']); + $xml = 'Hello'; + + $expected = [[ + 'tag' => 'user', + 'content' => 'Hello', + 'attributes' => ['attribute' => 'value1'], + 'children' => [], + ]]; + + $result = $parser->parse($xml); + expect($result)->toEqual($expected); +}); + +it('handles nested tags correctly', function () { + $parser = new SelectiveXmlParser(['chat', 'message', 'user']); + $xml = 'Outer message Inner message'; + + $expected = [[ + 'tag' => 'chat', + 'content' => '', + 'attributes' => [], + 'children' => [ + [ + 'tag' => 'message', + 'content' => 'Outer message ', + 'attributes' => [], + 'children' => [ + [ + 'tag' => 'user', + 'content' => 'Inner message', + 'attributes' => ['attribute' => 'value'], + 'children' => [], + ] + ], + ] + ], + ]]; + + $result = $parser->parse($xml); + expect($result)->toEqual($expected); +}); + +it('handles empty elements correctly', function () { + $parser = new SelectiveXmlParser(['chat', 'message']); + $xml = ''; + + $expected = [[ + 'tag' => 'chat', + 'content' => '', + 'attributes' => [], + 'children' => [ + [ + 'tag' => 'message', + 'content' => '', + 'attributes' => [], + 'children' => [], + ] + ], + ]]; + + $result = $parser->parse($xml); + expect($result)->toEqual($expected); +}); + +it('handles unknown tags correctly', function () { + $parser = new SelectiveXmlParser(['chat']); + $xml = 'Some unknown content'; + + $expected = [[ + 'tag' => 'chat', + 'content' => 'Some unknown content', + 'attributes' => [], + 'children' => [], + ]]; + + $result = $parser->parse($xml); + expect($result)->toEqual($expected); +}); + +it('handles mixed content and children correctly', function () { + $parser = new SelectiveXmlParser(['chat', 'message', 'user']); + $xml = 'Text beforeInside userText after'; + + $expected = [[ + 'tag' => 'chat', + 'content' => '', + 'attributes' => [], + 'children' => [ + [ + 'tag' => 'message', + 'content' => 'Text beforeText after', + 'attributes' => [], + 'children' => [ + [ + 'tag' => 'user', + 'content' => 'Inside user', + 'attributes' => [], + 'children' => [], + ] + ], + ] + ], + ]]; + + $result = $parser->parse($xml); + expect($result)->toEqual($expected); +}); + +it('throws exception for multiple root elements', function () { + $parser = new SelectiveXmlParser(['root']); + $xml = 'FirstSecond'; + expect(fn() => $parser->parse($xml)) + ->toThrow(RuntimeException::class, 'Invalid XML'); +}); + +it('allows single root with multiple children', function () { + $parser = new SelectiveXmlParser(['root', 'child']); + $xml = 'FirstSecond'; + + $result = $parser->parse($xml); + expect($result)->toHaveCount(1); + expect($result[0]['children'])->toHaveCount(2); +}); \ No newline at end of file diff --git a/tests/Feature/Utils/XmlTest.php b/tests/Feature/Utils/XmlTest.php index 413298bb..db3a5e8b 100644 --- a/tests/Feature/Utils/XmlTest.php +++ b/tests/Feature/Utils/XmlTest.php @@ -1,119 +1,102 @@ -content'; - $xml = Xml::from($xmlString)->withAttributes(); - $expected = [ - '_attributes' => ['attr' => 'value'], - 'child' => 'content' - ]; - expect($xml->toArray())->toEqual($expected); -}); - -it('converts XML to array without attributes', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString); - $expected = ['child' => 'content']; - expect($xml->toArray())->toEqual($expected); -}); - -it('includes root element in array', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString)->withRoot(); - $expected = ['root' => ['child' => 'content']]; - expect($xml->toArray())->toEqual($expected); -}); - -it('converts names to snake_case', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString)->withNaming('snake'); - $expected = ['child_element' => 'content']; - expect($xml->toArray())->toEqual($expected); -}); - -it('converts names to camelCase', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString)->withNaming('camel'); - $expected = ['childElement' => 'content']; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles empty XML string', function () { - $xmlString = ''; - $xml = Xml::from($xmlString); - $expected = []; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles XML with multiple children', function () { - $xmlString = 'content1content2'; - $xml = Xml::from($xmlString); - $expected = ['child' => ['content1', 'content2']]; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles XML with multiple children with the same name', function () { - $xmlString = 'content1content2'; - $xml = Xml::from($xmlString)->withNaming('snake'); - $expected = ['child' => ['content1', 'content2']]; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles CDATA in XML', function () { - $xmlString = 'content]]>'; - $xml = Xml::from($xmlString)->withRoot(); - $expected = ['root' => ['child' => 'content']]; - expect($xml->toArray())->toEqual($expected); -}); - -it('throws exception for invalid XML', function () { - $xmlString = 'content'; - expect(fn() => Xml::from($xmlString)->toArray())->toThrow(Exception::class); -}); - -it('handles attributes with special characters in XML', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString)->withAttributes(); - $expected = [ - '_attributes' => ['attr' => 'value & more'], - 'child' => 'content' - ]; - expect($xml->toArray())->toEqual($expected); -}); - -it('wraps XML string with specified root element', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString)->withRoot()->wrapped('wrapper'); - $expected = ['wrapper' => ['child' => 'content']]; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles special characters in XML', function () { - $xmlString = 'content & more content'; - $xml = Xml::from($xmlString); - $expected = ['child' => 'content & more content']; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles empty elements in XML', function () { - $xmlString = ''; - $xml = Xml::from($xmlString); - $expected = ['child' => []]; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles nested elements in XML', function () { - $xmlString = 'content'; - $xml = Xml::from($xmlString); - $expected = ['parent' => ['child' => 'content']]; - expect($xml->toArray())->toEqual($expected); -}); - -it('handles mixed content in XML', function () { - $xmlString = 'textcontentmore text'; - $xml = Xml::from($xmlString)->withRoot()->toArray(); - $expected = ['root' => ['textmore text', ['child' => 'content']]]; - expect($xml)->toEqual($expected); -})->skip(); \ No newline at end of file +sample content'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->attribute('attr'))->toEqual('value'); + expect($xml->first('child')?->content())->toEqual('sample content'); +}); + +it('converts XML to array without attributes', function () { + $xmlString = 'content'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->first('child')?->content())->toEqual('content'); +}); + +it('includes root element in array', function () { + $xmlString = 'content'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->tag())->toEqual('root'); +}); + +it('handles empty XML string', function () { + $xmlString = ''; + $xml = Xml::from($xmlString)->toArray(); + $expected = [ + 'tag' => '', + 'content' => '', + 'attributes' => [], + 'children' => [], + ]; + expect($xml)->toEqual($expected); +}); + +it('handles XML with multiple children', function () { + $xmlString = 'content1content2'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->all('child'))->toHaveLength(2); +}); + +it('handles CDATA in XML', function () { + $xmlString = 'content]]>'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->first('child')->content())->toEqual('content'); +}); + +it('throws exception for invalid XML', function () { + $xmlString = 'content'; + $xml = Xml::from($xmlString)->withTags(['root','child']); + expect(fn() => $xml->toArray())->toThrow(Exception::class); +}); + +it('handles attributes with special characters in XML', function () { + $xmlString = 'content'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->attribute('attr'))->toEqual('value & more'); +}); + +it('wraps XML string with specified root element', function () { + $xmlString = 'content'; + $xml = Xml::from($xmlString)->withTags(['wrapper', 'child'])->wrapped('wrapper')->toXmlElement(); + expect($xml->tag())->toEqual('wrapper'); + expect($xml->first('child')->content())->toEqual('content'); +}); + +it('handles special characters in XML', function () { + $xmlString = 'content & more content'; + $xml = Xml::from($xmlString)->withTags(['root','child'])->toXmlElement(); + expect($xml->first('child')->content())->toEqual('content & more content'); +}); + +it('handles empty elements in XML', function () { + $xmlString = ''; + $xml = Xml::from($xmlString)->toXmlElement(); + expect($xml->first('child')->content())->toEqual(''); +}); + +it('handles selective parsing', function () { + $xmlString = ''; + $xml = Xml::from($xmlString)->withTags(['root'])->toXmlElement(); + expect($xml->content())->toEqual(''); +}); + +it('handles selective parsing with nested tags', function () { + $xmlString = 'xxx'; + $xml = Xml::from($xmlString)->withTags(['root', 'child'])->toXmlElement(); + expect($xml->first('child')->content())->toEqual('xxx'); +}); + +it('handles nested elements in XML', function () { + $xmlString = 'content'; + $xml = Xml::from($xmlString)->toXmlElement(); + expect($xml->first('parent')->first('child')->content())->toEqual('content'); +}); + +it('handles mixed content in XML', function () { + $xmlString = 'textcontentmore text'; + $xml = Xml::from($xmlString)->toXmlElement(); + expect($xml->content())->toEqual('textmore text'); +}); +