diff --git a/docs/hub/advanced/language_programs.md b/docs/hub/advanced/language_programs.md index 912ae3ab..728a8b5e 100644 --- a/docs/hub/advanced/language_programs.md +++ b/docs/hub/advanced/language_programs.md @@ -38,10 +38,8 @@ $loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); class EmailAnalysis extends SignatureData { #[InputField('content of email')] public string $text; - #[OutputField('identify most relevant email topic: sales, support, other, spam')] public string $topic; - #[OutputField('one word sentiment: positive, neutral, negative')] public string $sentiment; @@ -62,19 +60,14 @@ class CategoryCount { class EmailStats extends SignatureData { #[InputField('directory containing emails')] public string $directory; - #[OutputField('number of emails')] public int $emails; - #[OutputField('number of spam emails')] public int $spam; - #[OutputField('average sentiment ratio')] public float $sentimentRatio; - #[OutputField('spam ratio')] public float $spamRatio; - #[OutputField('category counts')] public CategoryCount $categories; @@ -83,56 +76,56 @@ class EmailStats extends SignatureData { } } +// MODULE DECLARATIONS //////////////////////////////////////////////////////////////////// + class ReadEmails extends Module { public function __construct( private array $directoryContents = [] ) {} public function signature() : string|Signature { - return 'directory -> emails'; + return 'directory -> emails : string[]'; } - public function forward(string $directory) : array { + protected function forward(string $directory) : array { return $this->directoryContents[$directory]; } } class ParseEmail extends Module { public function signature() : string|Signature { - return 'email -> sender, body'; + return 'email -> sender, subject, body'; } protected function forward(string $email) : array { $parts = explode(',', $email); return [ 'sender' => trim(explode(':', $parts[0])[1]), - 'body' => trim(explode(':', $parts[1])[1]), + 'subject' => trim(explode(':', $parts[1])[1]), + 'body' => trim(explode(':', $parts[2])[1]), ]; } } class GetStats extends Module { - private ReadEmails $readEmails; - private ParseEmail $parseEmail; - private Predict $analyseEmail; - - public function __construct(Instructor $instructor, array $directoryContents = []) { - $this->readEmails = new ReadEmails($directoryContents); - $this->parseEmail = new ParseEmail(); - $this->analyseEmail = new Predict(signature: EmailAnalysis::class, instructor: $instructor); - } + public function __construct( + private ReadEmails $readEmails, + private ParseEmail $parseEmail, + private Predict $analyseEmail, + ) {} public function signature() : string|Signature { return EmailStats::class; } - public function forward(string $directory) : EmailStats { + protected function forward(string $directory) : EmailStats { $emails = $this->readEmails->withArgs(directory: $directory)->get('emails'); $aggregateSentiment = 0; $categories = new CategoryCount; foreach ($emails as $email) { $parsedEmail = $this->parseEmail->withArgs(email: $email); - $emailAnalysis = $this->analyseEmail->with(EmailAnalysis::for($parsedEmail->get('body'))); + $emailData = EmailAnalysis::for(text: $parsedEmail->get('body')); + $emailAnalysis = $this->analyseEmail->with($emailData); $topic = $emailAnalysis->get('topic'); $sentiment = $emailAnalysis->get('sentiment'); $topic = (in_array($topic, ['sales', 'support', 'spam'])) ? $topic : 'other'; @@ -160,19 +153,26 @@ class GetStats extends Module { } $directoryContents['inbox'] = [ - 'sender: jl@gmail.com, body: I am happy about the discount you offered and accept contract renewal', - 'sender: xxx, body: Get Viagra and Ozempic for free', - 'sender: joe@wp.pl, body: My internet connection keeps failing', - 'sender: paul@x.io, body: How long do I have to wait for the pricing of custom support service?!?', - 'sender: joe@wp.pl, body: 2 weeks of waiting and still no improvement of my connection', + 'sender: jl@gmail.com, subject: Offer, body: I am happy about the discount you offered and accept contract renewal', + 'sender: xxx, subject: Free!!!, body: Get Ozempic for free', + 'sender: joe@wp.pl, subject: Problem, body: My internet connection keeps failing', + 'sender: paul@x.io, subject: Still no pricing, body: How long do I have to wait for the pricing of custom support service?!?', + 'sender: joe@wp.pl, subject: Slow connection, body: 2 weeks of waiting and still no improvement of my connection', ]; +// PREPARE DEPENDENCIES + $instructor = (new Instructor); -$getStats = new GetStats($instructor, $directoryContents); +$readEmails = new ReadEmails($directoryContents); +$parseEmail = new ParseEmail(); +$analyseEmail = new Predict(signature: EmailAnalysis::class, instructor: $instructor); +$getStats = new GetStats($readEmails, $parseEmail, $analyseEmail); + +// EXECUTE LANGUAGE PROGRAM + $emailStats = $getStats->with(EmailStats::for('inbox')); echo "Results:\n"; dump($emailStats->get()); ?> ``` - diff --git a/docs/hub/advanced/language_programs2.md b/docs/hub/advanced/language_programs2.md new file mode 100644 index 00000000..2b4f8f30 --- /dev/null +++ b/docs/hub/advanced/language_programs2.md @@ -0,0 +1,167 @@ +# Language programs + +Instructor provides an addon allowing to implement complex processing flows +using LLM in a modular way. This addon to Instructor has been inspired by DSPy +library for Python (https://github.com/stanfordnlp/dspy). + +This example demonstrates multistep processing with LLMs: + - parse text to extract email data from text (sender, subject and content) -> result is an object containing parsed email data + - fix spelling mistakes in the subject and content fields -> result is an object containing fixed email subject and content + - translate subject into specified language -> result is an object containing translated data + +All the steps are packaged into a single, reusable module, which is easy to call via: + +``` +(new ProcessEmail)->withArgs( + text: $text, + language: $language, +); +``` + +`ProcessEmail` inherits from a `Module`, which is a base class for Instructor modules. It returns a predefined object containing, in this case, the data from all steps of processing. + +The outputs and flow can be arbitrarily shaped to the needs of specific use case (within the bounds of how Module & Predict components work). + +```php +add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); + +// DATA MODEL DECLARATIONS //////////////////////////////////////////////////////////////// + +//#[Description('extract email details from text')] +class ParsedEmail extends SignatureData { + #[InputField('text containing email')] + public string $text; + + #[OutputField('email address of sender')] + public string $senderEmail; + #[OutputField('subject of the email')] + public string $subject; + #[OutputField('body of the email')] + public string $body; +} + +class FixedEmail extends SignatureData { + #[InputField('subject of the email')] + public string $subject; + #[InputField('body of the email')] + public string $body; + + #[OutputField('subject of the email with fixed spelling mistakes')] + public string $fixedSubject; + #[OutputField('body of the email with fixed spelling mistakes')] + public string $fixedBody; +} + +class EmailTranslation extends SignatureData { + #[InputField('subject of email')] + public string $subject; + #[InputField('body of email')] + public string $body; + #[InputField('language to translate to')] + public string $language; + + #[OutputField('translated subject of email')] + public string $translatedSubject; + #[OutputField('translated body of email')] + public string $translatedBody; +} + +class Email { + public function __construct( + public string $senderEmail, + public string $subject, + public string $body + ) {} +} + +class EmailProcessingResults { + public function __construct( + public Email $original, + public Email $fixed, + public Email $translated + ) {} +} + +// MODULE DECLARATIONS //////////////////////////////////////////////////////////////////// + +class ProcessEmail extends Module { + private Predict $parse; + private Predict $fix; + private Predict $translate; + + public function __construct() { + $instructor = new Instructor(); + + $this->parse = new Predict(signature: ParsedEmail::class, instructor: $instructor); + $this->fix = new Predict(signature: FixedEmail::class, instructor: $instructor); + $this->translate = new Predict(signature: EmailTranslation::class, instructor: $instructor); + } + + public function signature(): string { + return 'text: string, language: string -> result: EmailProcessingResults'; + } + + public function forward(string $text, string $language): EmailProcessingResults { + $parsedEmail = $this->parse->with( + ParsedEmail::fromArgs( + text: $text + ) + )->result(); + + $fixedEmail = $this->fix->with( + FixedEmail::fromArgs( + subject: $parsedEmail->subject, + body: $parsedEmail->body + ) + )->result(); + + $translatedEmail = $this->translate->with( + EmailTranslation::fromArgs( + subject: $fixedEmail->fixedSubject, + body: $fixedEmail->fixedBody, + language: $language + ) + )->result(); + + return new EmailProcessingResults( + new Email( + $parsedEmail->senderEmail, + $parsedEmail->subject, + $parsedEmail->body + ), + new Email( + $parsedEmail->senderEmail, + $fixedEmail->fixedSubject, + $fixedEmail->fixedBody + ), + new Email( + $parsedEmail->senderEmail, + $translatedEmail->translatedSubject, + $translatedEmail->translatedBody + ) + ); + } +} + +// EXECUTE LANGUAGE PROGRAM /////////////////////////////////////////////////////////////// + +$text = 'sender: jl@gmail.com, subject: Ofer, body: Im hapy abut the discount you offered and accept contrac renewal'; +$language = 'French'; + +$result = (new ProcessEmail)->withArgs(text: $text, language: $language)->result(); + +echo "Results:\n"; +dump($result); +?> +``` diff --git a/mkdocs.yml b/mkdocs.yml index 9339d2b2..82bebee7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -211,6 +211,7 @@ nav: - Custom validation using Symfony Validator: 'hub/advanced/custom_validator.md' - Extracting arguments of function or method: 'hub/advanced/function_arguments.md' - Language programs: 'hub/advanced/language_programs.md' + - Language programs: 'hub/advanced/language_programs2.md' - Streaming partial updates during inference: 'hub/advanced/partial_updates.md' - Providing example inputs and outputs: 'hub/advanced/providing_examples.md' - Extracting scalar values: 'hub/advanced/scalars.md'