Skip to content

Commit

Permalink
Add PSR-3 compliant logging interface and default StreamLogger (#75)
Browse files Browse the repository at this point in the history
* feat: implement configurable logging interface with PSR-3 support

* feat: rename StdoutLogger to StreamLogger and add tests for the StreamLogger

* fix: Possible null reference for logger in helpers

* feat: Update docs on logger

* feat: Update the StreamLogger to work with multiple streams
  • Loading branch information
CodeWithKyrian authored Jan 11, 2025
1 parent 920ae45 commit 6547841
Show file tree
Hide file tree
Showing 14 changed files with 460 additions and 36 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ Transformers::setup()
->setAuthToken('...') // Set the auth token for downloading models. Defaults to `null`
->setUserAgent('...') // Set the user agent for downloading models. Defaults to `transformers-php/{version}`
->setImageDriver('...') // Set the image driver for processing images. Defaults to `IMAGICK'
->apply(); // Apply the configuration
->setLogger('...'); // Set the logger for TransformersPHP. Defaults to `null`
```

You can call the `set` methods in any order, or leave any out entirely, in which case, it uses the default values. For
Expand Down Expand Up @@ -399,4 +399,4 @@ This package is a WIP, but here's a list of tasks and architectures currently te
1. **[YOLOS](https://huggingface.co/docs/transformers/model_doc/yolos)** (from Huazhong University of Science &
Technology) released with the
paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://arxiv.org/abs/2106.00666)
by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"require": {
"php": "^8.1",
"ext-ffi": "*",
"psr/log": "^1.1.3|^2.0|^3.0",
"codewithkyrian/jinja-php": "^1.0",
"codewithkyrian/transformers-libsloader": "^2.0",
"imagine/imagine": "^1.3",
Expand Down
11 changes: 10 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ Transformers::setup()
->setRemotePathTemplate('custom/path/{model}/{file}')
->setAuthToken('your-token')
->setUserAgent('your-user-agent')
->setImageDriver(ImageDriver::IMAGICK);
->setImageDriver(ImageDriver::IMAGICK)
->setLogger(new StreamLogger('transformers-php'));
```

::: tip
Expand Down Expand Up @@ -105,6 +106,14 @@ Transformers::setup()
->apply();
```

### `setLogger(LoggerInterface $logger)`

This setting allows you to set a custom logger for TransformersPHP. No logger is set by default, but you can set a
logger to debug TransformersPHP's internal behavior. The logger should implement the `Psr\Log\LoggerInterface` interface. TransformersPHP
comes with a `StreamLogger` class, similar to Monolog's `StreamHandler`, which can be used to log to a stream (STDOUT, STDERR,
or a file) and can be customized to log at different levels (debug, info, warning, error, critical). You can also pass in a
logger that is already configured and ready to use e.g. a Laravel logger.

## Standalone PHP Projects

In a standalone PHP project, the best place to add global configuration is in your project's bootstrap or initialization
Expand Down
4 changes: 3 additions & 1 deletion examples/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

use Codewithkyrian\Transformers\Transformers;
use Codewithkyrian\Transformers\Utils\ImageDriver;
use Codewithkyrian\Transformers\Utils\StreamLogger;

require_once './vendor/autoload.php';

Transformers::setup()
->setCacheDir('/Users/Kyrian/.transformers')
->setImageDriver(ImageDriver::VIPS);
->setImageDriver(ImageDriver::VIPS)
->setLogger(new StreamLogger(STDOUT));
2 changes: 1 addition & 1 deletion examples/pipelines/asr.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
//$audioUrl = __DIR__ . '/../sounds/sample-1.mp3';

$streamer = WhisperTextStreamer::make()
//->onTimestampStart(fn($timestamp) => dump($timestamp));
->onStream(fn($text) => print($text));


$output = $transcriber($audioUrl,
maxNewTokens: 256,
chunkLengthSecs: 24,
Expand Down
20 changes: 11 additions & 9 deletions src/Models/Auto/PretrainedMixin.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use Codewithkyrian\Transformers\Exceptions\UnsupportedModelTypeException;
use Codewithkyrian\Transformers\Models\ModelArchitecture;
use Codewithkyrian\Transformers\Models\Pretrained\PretrainedModel;
use Codewithkyrian\Transformers\Transformers;
use Codewithkyrian\Transformers\Utils\AutoConfig;

/**
Expand All @@ -18,6 +19,7 @@ abstract class PretrainedMixin
{
/**
* Mapping from model type to model class.
*
* @var array<string, array<string, string>> The model class mappings.
*/
const MODEL_CLASS_MAPPINGS = [];
Expand All @@ -37,15 +39,16 @@ abstract class PretrainedMixin
* @param string|null $cacheDir The cache directory to save the model in.
* @param string $revision The revision of the model.
* @param string|null $modelFilename The filename of the model.
*
* @return PretrainedModel The instantiated pretrained model.
*/
public static function fromPretrained(
string $modelNameOrPath,
bool $quantized = true,
?array $config = null,
?string $cacheDir = null,
string $revision = 'main',
?string $modelFilename = null,
string $modelNameOrPath,
bool $quantized = true,
?array $config = null,
?string $cacheDir = null,
string $revision = 'main',
?string $modelFilename = null,
?callable $onProgress = null
): PretrainedModel
{
Expand All @@ -54,7 +57,6 @@ public static function fromPretrained(
foreach (static::MODEL_CLASS_MAPPINGS as $modelClassMapping) {
$modelClass = $modelClassMapping[$config->modelType] ?? null;


if ($modelClass === null) continue;

$modelArchitecture = self::getModelArchitecture($modelClass);
Expand All @@ -72,7 +74,7 @@ public static function fromPretrained(
}

if (static::BASE_IF_FAIL) {
// echo "Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel.";
Transformers::getLogger()->warning("Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel.");

return PretrainedModel::fromPretrained(
modelNameOrPath: $modelNameOrPath,
Expand Down Expand Up @@ -109,4 +111,4 @@ protected static function getModelArchitecture($modelClass): ModelArchitecture
default => ModelArchitecture::EncoderOnly,
};
}
}
}
53 changes: 38 additions & 15 deletions src/Models/Pretrained/PretrainedModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
use Codewithkyrian\Transformers\Models\ModelArchitecture;
use Codewithkyrian\Transformers\Models\Output\ModelOutput;
use Codewithkyrian\Transformers\Tensor\Tensor;
use Codewithkyrian\Transformers\Transformers;
use Codewithkyrian\Transformers\Utils\AutoConfig;
use Codewithkyrian\Transformers\Utils\GenerationConfig;
use Codewithkyrian\Transformers\Utils\Hub;
Expand All @@ -49,9 +50,7 @@ public function __construct(
public InferenceSession $session,
public ModelArchitecture $modelArchitecture = ModelArchitecture::EncoderOnly,
...$args
)
{
}
) {}


/**
Expand All @@ -72,6 +71,7 @@ public function __construct(
* @param string $revision The specific model version to use. It can be a branch name, a tag name,
* @param string|null $modelFilename The name of the model file to load. If not provided, will default to the
* @param ModelArchitecture $modelArchitecture
*
* @return self The model instantiated from the configuration.
* @throws HubException
*/
Expand Down Expand Up @@ -217,7 +217,7 @@ public static function fromPretrained(
default:
{
if ($modelArchitecture != ModelArchitecture::EncoderOnly) {
echo "WARNING: {$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly.";
Transformers::getLogger()?->warning("{$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly.");
}


Expand Down Expand Up @@ -251,6 +251,7 @@ public static function fromPretrained(
* @param bool $fatal Whether to raise an error if the file could not be loaded.
* @param callable|null $onProgress
* @param mixed ...$sessionOptions
*
* @return InferenceSession|null
* @throws HubException
*/
Expand Down Expand Up @@ -283,7 +284,9 @@ public function __invoke(array $modelInputs): array|ModelOutput
/**
* Forward method for a pretrained model. If not overridden by a subclass, the correct forward method
* will be chosen based on the model type.
*
* @param array $modelInputs The input data to the model in the format specified in the ONNX model.
*
* @return array{logits: Tensor, hidden_states: Tensor, attentions: Tensor} The output data from the model in the format specified in the ONNX model.
*/
public function forward(array $modelInputs): array
Expand Down Expand Up @@ -315,6 +318,7 @@ public function runSession(InferenceSession $session, array $inputs): array
/**
* @param InferenceSession $session
* @param Tensor[] $inputs
*
* @return Tensor[]
* @throws MissingModelInputException
*/
Expand Down Expand Up @@ -345,20 +349,27 @@ public function validateInputs(array $inputNames, array $inputs): array


if ($numInputsProvided > $numInputsNeeded) {
// No missing inputs, but too many inputs were provided.
// Warn the user and ignore the extra inputs.
// No missing inputs, but too many inputs were provided so we warn the user and ignore the extra inputs.
$ignored = array_diff(array_keys($inputs), $inputNames);
echo 'WARNING: Too many inputs were provided (' . $numInputsProvided . ' > ' . $numInputsNeeded . ').
The following inputs will be ignored: "' . implode(', ', $ignored) . '".';

$warning = sprintf(
'Too many inputs were provided (%d > %d). The following inputs will be ignored: "%s".',
$numInputsProvided,
$numInputsNeeded,
implode(', ', $ignored)
);

Transformers::getLogger()->warning($warning);
}

// return array_map(fn($i) => $i->toArray(), $inputs);
return $inputs;
}

/**
* Prepares an attention mask for a sequence of tokens based on configuration options.
*
* @param Tensor $tokens The input tokens.
*
* @return Tensor The attention mask tensor.
* @private
*/
Expand All @@ -379,7 +390,7 @@ public function prepareAttentionMask(Tensor $tokens): Tensor
if ($isPadTokenInInputs && $isPadTokenNotEqualToEosTokenId) {
$mo = Tensor::mo();

$data = $mo->f(fn($x) => $x != $padTokenId, $tokens);
$data = $mo->f(fn ($x) => $x != $padTokenId, $tokens);

return new Tensor($data, $tokens->dtype(), $tokens->shape());
} else {
Expand All @@ -389,9 +400,11 @@ public function prepareAttentionMask(Tensor $tokens): Tensor

/**
* Add position IDs to the feeds object.
*
* @param array $inputNames The names of the inputs to the model.
* @param array $feeds The input to the model.
* @param bool $useCacheBranch Whether to use the cache branch of the model.
*
* @return void
*/
public function preparePositionIds(array $inputNames, array &$feeds, bool $useCacheBranch): void
Expand Down Expand Up @@ -430,6 +443,7 @@ public function preparePositionIds(array $inputNames, array &$feeds, bool $useCa
*
* @param array $decoderResults The decoder results object.
* @param ?array $pastKeyValues The previous past key values.
*
* @return array An object containing past key values.
*/
public function getPastKeyValues(array $decoderResults, ?array $pastKeyValues): array
Expand Down Expand Up @@ -458,6 +472,7 @@ public function getPastKeyValues(array $decoderResults, ?array $pastKeyValues):
* Returns an object containing attentions from the given decoder results object.
*
* @param array $decoderResults The decoder results object.
*
* @return array An object containing attentions.
*/
public function getAttentions(array $decoderResults): array
Expand Down Expand Up @@ -540,11 +555,13 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v
}

/** Generates text based on the given inputs and generation configuration using the model.
*
* @param Tensor $inputs The input token ids.
* @param GenerationConfig|null $generationConfig The generation configuration to use. If null, default configuration will be used.
* @param LogitsProcessorList|null $logitsProcessor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created.
* @param Tensor|null $inputsAttentionMask An optional attention mask for the inputs.
* @param Streamer|null $streamer
*
* @return array An array of generated output sequences, where each sequence is an array of token IDs.
* @throws Exception
*/
Expand Down Expand Up @@ -615,7 +632,7 @@ public function generate(

$beams = $this->getStartBeams($inputs, $generationConfig, $numOutputTokens, $inputsAttentionMask);

while (array_some($beams, fn($beam) => !$beam['done']) && $numOutputTokens < $maxOutputTokens) {
while (array_some($beams, fn ($beam) => !$beam['done']) && $numOutputTokens < $maxOutputTokens) {
$newestBeams = [];
foreach ($beams as $beam) {
if ($beam['done']) {
Expand Down Expand Up @@ -676,7 +693,7 @@ public function generate(
// Group and select best beams
$newestBeams = array_merge(...array_map(
function ($group) use ($generationConfig) {
usort($group, fn($a, $b) => $b['score'] <=> $a['score']);
usort($group, fn ($a, $b) => $b['score'] <=> $a['score']);
return array_slice(
$group,
0,
Expand All @@ -702,7 +719,7 @@ function ($group) use ($generationConfig) {
function ($batch) use ($key, $generationConfig) {
if ($generationConfig->num_return_sequences > 1) {
return array_slice(
array_map(fn($beam) => $beam[$key], $batch),
array_map(fn ($beam) => $beam[$key], $batch),
0,
$generationConfig->num_return_sequences
);
Expand Down Expand Up @@ -752,7 +769,9 @@ function ($batch) use ($key, $generationConfig) {
/**
* This function merges multiple generation configs together to form a final generation config to be used by the model for text generation.
* It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object.
*
* @param ?GenerationConfig $generationConfig A `GenerationConfig` object containing generation parameters.
*
* @return GenerationConfig The final generation config object to be used by the model for text generation.
*/
protected function getGenerationConfig(?GenerationConfig $generationConfig): GenerationConfig
Expand Down Expand Up @@ -854,6 +873,7 @@ protected function getLogitsProcessor(
* @param GenerationConfig $generationConfig The generation config.
* @param int $numOutputTokens The number of tokens to generate.
* @param Tensor|null $inputsAttentionMask The attention mask for the input token ids.
*
* @return array{ inputs: Tensor, output_token_ids: Tensor, score: float, done: bool, id: int } The initial beam for text generation.
*
*/
Expand All @@ -877,6 +897,7 @@ public function getStartBeams(
* Runs the beam for text generation task
*
* @param array $beam The current beam being generated.
*
* @return array The updated beam after a single generation step.
*
*/
Expand All @@ -890,14 +911,15 @@ public function runBeam(array &$beam): array
*
* @param array $beam
* @param array $output
*
* @throws Exception
*/
public function addAttentionsToBeam(array &$beam, array $output): void
{
if ($this->config->isEncoderDecoder) {
if (empty($output['cross_attentions'])) {
throw new Exception(
"`output_attentions` is true, but the model did not produce cross-attentions. " .
"`output_attentions` is true, but the model did not produce cross-attentions. ".
"This is most likely because the model was not exported with `output_attentions=True`."
);
}
Expand All @@ -909,7 +931,7 @@ public function addAttentionsToBeam(array &$beam, array $output): void

if (empty($output['decoder_attentions'])) {
throw new Exception(
"`output_attentions` is true, but the model did not produce decoder-attentions. " .
"`output_attentions` is true, but the model did not produce decoder-attentions. ".
"This is most likely because the model was not exported with `output_attentions=True`."
);
}
Expand All @@ -935,6 +957,7 @@ public function updateBeam(array &$beam, int $newTokenId): void
* Groups an array of beam objects by their ids.
*
* @param array $beams The array of beam objects to group.
*
* @return array An array of arrays, where each inner array contains beam objects with the same id.
*/
public function groupBeams(array $beams): array
Expand Down
4 changes: 3 additions & 1 deletion src/Models/Pretrained/VisionEncoderDecoderModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use Codewithkyrian\Transformers\Models\Auto\AutoModel;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForCausalLM;
use Codewithkyrian\Transformers\Models\ModelArchitecture;
use Codewithkyrian\Transformers\Transformers;
use Codewithkyrian\Transformers\Utils\AutoConfig;
use Codewithkyrian\Transformers\Utils\GenerationConfig;
use Codewithkyrian\Transformers\Utils\InferenceSession;
Expand All @@ -32,6 +33,7 @@ class VisionEncoderDecoderModel extends PretrainedModel

/**
* Creates a new instance of the `VisionEncoderDecoderModel` class.
*
* @param AutoConfig $config The configuration array specifying the hyperparameters and other model settings.
* @param mixed $session The ONNX session containing the encoder model.
* @param InferenceSession $decoderMergedSession The ONNX session containing the merged decoder model.
Expand Down Expand Up @@ -60,7 +62,7 @@ public function __construct(
?? AutoModel::ENCODER_DECODER_MODEL_MAPPING[$encoderModelType];

if (!$encoderModel) {
echo "Model type for encoder '{$encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/CodeWithKyrian/transformers-php/issues/new/choose.";
Transformers::getLogger()?->warning("Model type for encoder '{$encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/CodeWithKyrian/transformers-php/issues/new/choose.");
}

// Validate decoder
Expand Down
Loading

0 comments on commit 6547841

Please sign in to comment.