diff --git a/README.md b/README.md index 1848e0a..028e62e 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ Transformers::setup() ->setAuthToken('...') // Set the auth token for downloading models. Defaults to `null` ->setUserAgent('...') // Set the user agent for downloading models. Defaults to `transformers-php/{version}` ->setImageDriver('...') // Set the image driver for processing images. Defaults to `IMAGICK' - ->apply(); // Apply the configuration + ->setLogger('...'); // Set the logger for TransformersPHP. Defaults to `null` ``` You can call the `set` methods in any order, or leave any out entirely, in which case, it uses the default values. For @@ -399,4 +399,4 @@ This package is a WIP, but here's a list of tasks and architectures currently te 1. **[YOLOS](https://huggingface.co/docs/transformers/model_doc/yolos)** (from Huazhong University of Science & Technology) released with the paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://arxiv.org/abs/2106.00666) - by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu. \ No newline at end of file + by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu. diff --git a/composer.json b/composer.json index 1b3c443..3e179e2 100644 --- a/composer.json +++ b/composer.json @@ -15,6 +15,7 @@ "require": { "php": "^8.1", "ext-ffi": "*", + "psr/log": "^1.1.3|^2.0|^3.0", "codewithkyrian/jinja-php": "^1.0", "codewithkyrian/transformers-libsloader": "^2.0", "imagine/imagine": "^1.3", diff --git a/docs/configuration.md b/docs/configuration.md index 36b0183..2376def 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -22,7 +22,8 @@ Transformers::setup() ->setRemotePathTemplate('custom/path/{model}/{file}') ->setAuthToken('your-token') ->setUserAgent('your-user-agent') - ->setImageDriver(ImageDriver::IMAGICK); + ->setImageDriver(ImageDriver::IMAGICK) + ->setLogger(new StreamLogger('transformers-php')); ``` ::: tip @@ -105,6 +106,14 @@ Transformers::setup() ->apply(); ``` +### `setLogger(LoggerInterface $logger)` + +This setting allows you to set a custom logger for TransformersPHP. No logger is set by default, but you can set a +logger to debug TransformersPHP's internal behavior. The logger should implement the `Psr\Log\LoggerInterface` interface. TransformersPHP +comes with a `StreamLogger` class, similar to Monolog's `StreamHandler`, which can be used to log to a stream (STDOUT, STDERR, +or a file) and can be customized to log at different levels (debug, info, warning, error, critical). You can also pass in a +logger that is already configured and ready to use e.g. a Laravel logger. + ## Standalone PHP Projects In a standalone PHP project, the best place to add global configuration is in your project's bootstrap or initialization diff --git a/examples/bootstrap.php b/examples/bootstrap.php index c04afb6..eeebf98 100644 --- a/examples/bootstrap.php +++ b/examples/bootstrap.php @@ -4,9 +4,11 @@ use Codewithkyrian\Transformers\Transformers; use Codewithkyrian\Transformers\Utils\ImageDriver; +use Codewithkyrian\Transformers\Utils\StreamLogger; require_once './vendor/autoload.php'; Transformers::setup() ->setCacheDir('/Users/Kyrian/.transformers') - ->setImageDriver(ImageDriver::VIPS); + ->setImageDriver(ImageDriver::VIPS) + ->setLogger(new StreamLogger(STDOUT)); diff --git a/examples/pipelines/asr.php b/examples/pipelines/asr.php index 2aa16ee..e50edcc 100644 --- a/examples/pipelines/asr.php +++ b/examples/pipelines/asr.php @@ -26,9 +26,9 @@ //$audioUrl = __DIR__ . '/../sounds/sample-1.mp3'; $streamer = WhisperTextStreamer::make() -//->onTimestampStart(fn($timestamp) => dump($timestamp)); ->onStream(fn($text) => print($text)); + $output = $transcriber($audioUrl, maxNewTokens: 256, chunkLengthSecs: 24, diff --git a/src/Models/Auto/PretrainedMixin.php b/src/Models/Auto/PretrainedMixin.php index 88d9fe6..b43683b 100644 --- a/src/Models/Auto/PretrainedMixin.php +++ b/src/Models/Auto/PretrainedMixin.php @@ -8,6 +8,7 @@ use Codewithkyrian\Transformers\Exceptions\UnsupportedModelTypeException; use Codewithkyrian\Transformers\Models\ModelArchitecture; use Codewithkyrian\Transformers\Models\Pretrained\PretrainedModel; +use Codewithkyrian\Transformers\Transformers; use Codewithkyrian\Transformers\Utils\AutoConfig; /** @@ -18,6 +19,7 @@ abstract class PretrainedMixin { /** * Mapping from model type to model class. + * * @var array> The model class mappings. */ const MODEL_CLASS_MAPPINGS = []; @@ -37,15 +39,16 @@ abstract class PretrainedMixin * @param string|null $cacheDir The cache directory to save the model in. * @param string $revision The revision of the model. * @param string|null $modelFilename The filename of the model. + * * @return PretrainedModel The instantiated pretrained model. */ public static function fromPretrained( - string $modelNameOrPath, - bool $quantized = true, - ?array $config = null, - ?string $cacheDir = null, - string $revision = 'main', - ?string $modelFilename = null, + string $modelNameOrPath, + bool $quantized = true, + ?array $config = null, + ?string $cacheDir = null, + string $revision = 'main', + ?string $modelFilename = null, ?callable $onProgress = null ): PretrainedModel { @@ -54,7 +57,6 @@ public static function fromPretrained( foreach (static::MODEL_CLASS_MAPPINGS as $modelClassMapping) { $modelClass = $modelClassMapping[$config->modelType] ?? null; - if ($modelClass === null) continue; $modelArchitecture = self::getModelArchitecture($modelClass); @@ -72,7 +74,7 @@ public static function fromPretrained( } if (static::BASE_IF_FAIL) { -// echo "Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel."; + Transformers::getLogger()->warning("Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel."); return PretrainedModel::fromPretrained( modelNameOrPath: $modelNameOrPath, @@ -109,4 +111,4 @@ protected static function getModelArchitecture($modelClass): ModelArchitecture default => ModelArchitecture::EncoderOnly, }; } -} \ No newline at end of file +} diff --git a/src/Models/Pretrained/PretrainedModel.php b/src/Models/Pretrained/PretrainedModel.php index 2dee292..a96d480 100644 --- a/src/Models/Pretrained/PretrainedModel.php +++ b/src/Models/Pretrained/PretrainedModel.php @@ -25,6 +25,7 @@ use Codewithkyrian\Transformers\Models\ModelArchitecture; use Codewithkyrian\Transformers\Models\Output\ModelOutput; use Codewithkyrian\Transformers\Tensor\Tensor; +use Codewithkyrian\Transformers\Transformers; use Codewithkyrian\Transformers\Utils\AutoConfig; use Codewithkyrian\Transformers\Utils\GenerationConfig; use Codewithkyrian\Transformers\Utils\Hub; @@ -49,9 +50,7 @@ public function __construct( public InferenceSession $session, public ModelArchitecture $modelArchitecture = ModelArchitecture::EncoderOnly, ...$args - ) - { - } + ) {} /** @@ -72,6 +71,7 @@ public function __construct( * @param string $revision The specific model version to use. It can be a branch name, a tag name, * @param string|null $modelFilename The name of the model file to load. If not provided, will default to the * @param ModelArchitecture $modelArchitecture + * * @return self The model instantiated from the configuration. * @throws HubException */ @@ -217,7 +217,7 @@ public static function fromPretrained( default: { if ($modelArchitecture != ModelArchitecture::EncoderOnly) { - echo "WARNING: {$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly."; + Transformers::getLogger()?->warning("{$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly."); } @@ -251,6 +251,7 @@ public static function fromPretrained( * @param bool $fatal Whether to raise an error if the file could not be loaded. * @param callable|null $onProgress * @param mixed ...$sessionOptions + * * @return InferenceSession|null * @throws HubException */ @@ -283,7 +284,9 @@ public function __invoke(array $modelInputs): array|ModelOutput /** * Forward method for a pretrained model. If not overridden by a subclass, the correct forward method * will be chosen based on the model type. + * * @param array $modelInputs The input data to the model in the format specified in the ONNX model. + * * @return array{logits: Tensor, hidden_states: Tensor, attentions: Tensor} The output data from the model in the format specified in the ONNX model. */ public function forward(array $modelInputs): array @@ -315,6 +318,7 @@ public function runSession(InferenceSession $session, array $inputs): array /** * @param InferenceSession $session * @param Tensor[] $inputs + * * @return Tensor[] * @throws MissingModelInputException */ @@ -345,20 +349,27 @@ public function validateInputs(array $inputNames, array $inputs): array if ($numInputsProvided > $numInputsNeeded) { - // No missing inputs, but too many inputs were provided. - // Warn the user and ignore the extra inputs. + // No missing inputs, but too many inputs were provided so we warn the user and ignore the extra inputs. $ignored = array_diff(array_keys($inputs), $inputNames); - echo 'WARNING: Too many inputs were provided (' . $numInputsProvided . ' > ' . $numInputsNeeded . '). - The following inputs will be ignored: "' . implode(', ', $ignored) . '".'; + + $warning = sprintf( + 'Too many inputs were provided (%d > %d). The following inputs will be ignored: "%s".', + $numInputsProvided, + $numInputsNeeded, + implode(', ', $ignored) + ); + + Transformers::getLogger()->warning($warning); } -// return array_map(fn($i) => $i->toArray(), $inputs); return $inputs; } /** * Prepares an attention mask for a sequence of tokens based on configuration options. + * * @param Tensor $tokens The input tokens. + * * @return Tensor The attention mask tensor. * @private */ @@ -379,7 +390,7 @@ public function prepareAttentionMask(Tensor $tokens): Tensor if ($isPadTokenInInputs && $isPadTokenNotEqualToEosTokenId) { $mo = Tensor::mo(); - $data = $mo->f(fn($x) => $x != $padTokenId, $tokens); + $data = $mo->f(fn ($x) => $x != $padTokenId, $tokens); return new Tensor($data, $tokens->dtype(), $tokens->shape()); } else { @@ -389,9 +400,11 @@ public function prepareAttentionMask(Tensor $tokens): Tensor /** * Add position IDs to the feeds object. + * * @param array $inputNames The names of the inputs to the model. * @param array $feeds The input to the model. * @param bool $useCacheBranch Whether to use the cache branch of the model. + * * @return void */ public function preparePositionIds(array $inputNames, array &$feeds, bool $useCacheBranch): void @@ -430,6 +443,7 @@ public function preparePositionIds(array $inputNames, array &$feeds, bool $useCa * * @param array $decoderResults The decoder results object. * @param ?array $pastKeyValues The previous past key values. + * * @return array An object containing past key values. */ public function getPastKeyValues(array $decoderResults, ?array $pastKeyValues): array @@ -458,6 +472,7 @@ public function getPastKeyValues(array $decoderResults, ?array $pastKeyValues): * Returns an object containing attentions from the given decoder results object. * * @param array $decoderResults The decoder results object. + * * @return array An object containing attentions. */ public function getAttentions(array $decoderResults): array @@ -540,11 +555,13 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v } /** Generates text based on the given inputs and generation configuration using the model. + * * @param Tensor $inputs The input token ids. * @param GenerationConfig|null $generationConfig The generation configuration to use. If null, default configuration will be used. * @param LogitsProcessorList|null $logitsProcessor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created. * @param Tensor|null $inputsAttentionMask An optional attention mask for the inputs. * @param Streamer|null $streamer + * * @return array An array of generated output sequences, where each sequence is an array of token IDs. * @throws Exception */ @@ -615,7 +632,7 @@ public function generate( $beams = $this->getStartBeams($inputs, $generationConfig, $numOutputTokens, $inputsAttentionMask); - while (array_some($beams, fn($beam) => !$beam['done']) && $numOutputTokens < $maxOutputTokens) { + while (array_some($beams, fn ($beam) => !$beam['done']) && $numOutputTokens < $maxOutputTokens) { $newestBeams = []; foreach ($beams as $beam) { if ($beam['done']) { @@ -676,7 +693,7 @@ public function generate( // Group and select best beams $newestBeams = array_merge(...array_map( function ($group) use ($generationConfig) { - usort($group, fn($a, $b) => $b['score'] <=> $a['score']); + usort($group, fn ($a, $b) => $b['score'] <=> $a['score']); return array_slice( $group, 0, @@ -702,7 +719,7 @@ function ($group) use ($generationConfig) { function ($batch) use ($key, $generationConfig) { if ($generationConfig->num_return_sequences > 1) { return array_slice( - array_map(fn($beam) => $beam[$key], $batch), + array_map(fn ($beam) => $beam[$key], $batch), 0, $generationConfig->num_return_sequences ); @@ -752,7 +769,9 @@ function ($batch) use ($key, $generationConfig) { /** * This function merges multiple generation configs together to form a final generation config to be used by the model for text generation. * It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object. + * * @param ?GenerationConfig $generationConfig A `GenerationConfig` object containing generation parameters. + * * @return GenerationConfig The final generation config object to be used by the model for text generation. */ protected function getGenerationConfig(?GenerationConfig $generationConfig): GenerationConfig @@ -854,6 +873,7 @@ protected function getLogitsProcessor( * @param GenerationConfig $generationConfig The generation config. * @param int $numOutputTokens The number of tokens to generate. * @param Tensor|null $inputsAttentionMask The attention mask for the input token ids. + * * @return array{ inputs: Tensor, output_token_ids: Tensor, score: float, done: bool, id: int } The initial beam for text generation. * */ @@ -877,6 +897,7 @@ public function getStartBeams( * Runs the beam for text generation task * * @param array $beam The current beam being generated. + * * @return array The updated beam after a single generation step. * */ @@ -890,6 +911,7 @@ public function runBeam(array &$beam): array * * @param array $beam * @param array $output + * * @throws Exception */ public function addAttentionsToBeam(array &$beam, array $output): void @@ -897,7 +919,7 @@ public function addAttentionsToBeam(array &$beam, array $output): void if ($this->config->isEncoderDecoder) { if (empty($output['cross_attentions'])) { throw new Exception( - "`output_attentions` is true, but the model did not produce cross-attentions. " . + "`output_attentions` is true, but the model did not produce cross-attentions. ". "This is most likely because the model was not exported with `output_attentions=True`." ); } @@ -909,7 +931,7 @@ public function addAttentionsToBeam(array &$beam, array $output): void if (empty($output['decoder_attentions'])) { throw new Exception( - "`output_attentions` is true, but the model did not produce decoder-attentions. " . + "`output_attentions` is true, but the model did not produce decoder-attentions. ". "This is most likely because the model was not exported with `output_attentions=True`." ); } @@ -935,6 +957,7 @@ public function updateBeam(array &$beam, int $newTokenId): void * Groups an array of beam objects by their ids. * * @param array $beams The array of beam objects to group. + * * @return array An array of arrays, where each inner array contains beam objects with the same id. */ public function groupBeams(array $beams): array diff --git a/src/Models/Pretrained/VisionEncoderDecoderModel.php b/src/Models/Pretrained/VisionEncoderDecoderModel.php index e0f2900..3c888cd 100644 --- a/src/Models/Pretrained/VisionEncoderDecoderModel.php +++ b/src/Models/Pretrained/VisionEncoderDecoderModel.php @@ -9,6 +9,7 @@ use Codewithkyrian\Transformers\Models\Auto\AutoModel; use Codewithkyrian\Transformers\Models\Auto\AutoModelForCausalLM; use Codewithkyrian\Transformers\Models\ModelArchitecture; +use Codewithkyrian\Transformers\Transformers; use Codewithkyrian\Transformers\Utils\AutoConfig; use Codewithkyrian\Transformers\Utils\GenerationConfig; use Codewithkyrian\Transformers\Utils\InferenceSession; @@ -32,6 +33,7 @@ class VisionEncoderDecoderModel extends PretrainedModel /** * Creates a new instance of the `VisionEncoderDecoderModel` class. + * * @param AutoConfig $config The configuration array specifying the hyperparameters and other model settings. * @param mixed $session The ONNX session containing the encoder model. * @param InferenceSession $decoderMergedSession The ONNX session containing the merged decoder model. @@ -60,7 +62,7 @@ public function __construct( ?? AutoModel::ENCODER_DECODER_MODEL_MAPPING[$encoderModelType]; if (!$encoderModel) { - echo "Model type for encoder '{$encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/CodeWithKyrian/transformers-php/issues/new/choose."; + Transformers::getLogger()?->warning("Model type for encoder '{$encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/CodeWithKyrian/transformers-php/issues/new/choose."); } // Validate decoder diff --git a/src/Pipelines/ZeroShotClassificationPipeline.php b/src/Pipelines/ZeroShotClassificationPipeline.php index 9a52fcf..f3e8fde 100644 --- a/src/Pipelines/ZeroShotClassificationPipeline.php +++ b/src/Pipelines/ZeroShotClassificationPipeline.php @@ -8,6 +8,7 @@ use Codewithkyrian\Transformers\Models\Output\SequenceClassifierOutput; use Codewithkyrian\Transformers\Models\Pretrained\PretrainedModel; use Codewithkyrian\Transformers\PreTrainedTokenizers\PreTrainedTokenizer; +use Codewithkyrian\Transformers\Transformers; use Codewithkyrian\Transformers\Utils\Math; use function Codewithkyrian\Transformers\Utils\timeUsage; @@ -68,13 +69,13 @@ public function __construct(Task|string $task, PretrainedModel $model, ?PreTrain $this->entailmentId = $this->label2id['entailment'] ?? null; if ($this->entailmentId === null) { - echo "Could not find 'entailment' in label2id mapping. Using 2 as entailment_id.\n"; + Transformers::getLogger()?->warning("Could not find 'entailment' in label2id mapping. Using 2 as entailment_id."); $this->entailmentId = 2; } $this->contradictionId = $this->label2id['contradiction'] ?? $this->label2id['not_entailment'] ?? null; if ($this->contradictionId === null) { - echo "Could not find 'contradiction' in label2id mapping. Using 0 as contradiction_id.\n"; + Transformers::getLogger()?->warning("Could not find 'contradiction' in label2id mapping. Using 0 as contradiction_id."); $this->contradictionId = 0; } } diff --git a/src/PreTrainedTokenizers/AutoTokenizer.php b/src/PreTrainedTokenizers/AutoTokenizer.php index 99da502..e40fe85 100644 --- a/src/PreTrainedTokenizers/AutoTokenizer.php +++ b/src/PreTrainedTokenizers/AutoTokenizer.php @@ -6,6 +6,7 @@ namespace Codewithkyrian\Transformers\PreTrainedTokenizers; use Codewithkyrian\Transformers\Tokenizers\TokenizerModel; +use Codewithkyrian\Transformers\Transformers; use Symfony\Component\Console\Output\OutputInterface; /** @@ -41,7 +42,7 @@ class AutoTokenizer 'WhisperTokenizer' => WhisperTokenizer::class, 'CodeGenTokenizer' => CodeGenTokenizer::class, 'CLIPTokenizer' => CLIPTokenizer::class, - 'SiglipTokenizer' => SiglipTokenizer::class, + 'SiglipTokenizer' => SiglipTokenizer::class, // 'MarianTokenizer' => MarianTokenizer::class, 'BloomTokenizer' => BloomTokenizer::class, 'NllbTokenizer' => NllbTokenizer::class, @@ -82,6 +83,7 @@ class AutoTokenizer * @param string $revision * @param mixed $legacy * @param OutputInterface|null $output + * * @return PreTrainedTokenizer|null */ public static function fromPretrained( @@ -104,7 +106,7 @@ public static function fromPretrained( $cls = self::TOKENIZER_CLASS_MAPPING[$tokenizerClassName] ?? null; if ($cls == null) { - echo "Unknown tokenizer class $tokenizerClassName. Using PreTrainedTokenizer. \n"; + Transformers::getLogger()?->warning("Unknown tokenizer class $tokenizerClassName. Using PreTrainedTokenizer."); $cls = PreTrainedTokenizer::class; } diff --git a/src/Transformers.php b/src/Transformers.php index 25441f3..d2c6d69 100644 --- a/src/Transformers.php +++ b/src/Transformers.php @@ -5,6 +5,7 @@ namespace Codewithkyrian\Transformers; use Codewithkyrian\Transformers\Utils\ImageDriver; +use Psr\Log\LoggerInterface; use RuntimeException; class Transformers @@ -21,6 +22,7 @@ class Transformers protected static ImageDriver $imageDriver; + protected static ?LoggerInterface $logger = null; /** * Returns a new instance of the static class. @@ -122,6 +124,20 @@ public function setImageDriver(ImageDriver $imageDriver): static return $this; } + /** + * Set the logger for debugging. + * + * @param LoggerInterface $logger + * + * @return $this + */ + public function setLogger(LoggerInterface $logger) : static + { + self::$logger = $logger; + + return $this; + } + public static function getCacheDir(): string { return self::$cacheDir; @@ -155,4 +171,9 @@ public static function getImageDriver(): ?ImageDriver return self::$imageDriver; } -} \ No newline at end of file + + public static function getLogger(): ?LoggerInterface + { + return self::$logger; + } +} diff --git a/src/Utils/Helpers.php b/src/Utils/Helpers.php index 91cfdbb..21fa416 100644 --- a/src/Utils/Helpers.php +++ b/src/Utils/Helpers.php @@ -4,6 +4,8 @@ namespace Codewithkyrian\Transformers\Utils; +use Codewithkyrian\Transformers\Transformers; + function memoryUsage(): string { $mem = memory_get_usage(true); @@ -188,7 +190,7 @@ function createPattern(array $pattern, bool $invert = true): ?string // NOTE: if invert is true, we wrap the pattern in a group so that it is kept when performing split return $invert ? $escaped : "($escaped)"; } else { - echo 'Unknown pattern type: '.print_r($pattern, true); + Transformers::getLogger()?->error('Unknown pattern type: '.print_r($pattern, true)); return null; } } diff --git a/src/Utils/StreamLogger.php b/src/Utils/StreamLogger.php new file mode 100644 index 0000000..2b05cef --- /dev/null +++ b/src/Utils/StreamLogger.php @@ -0,0 +1,276 @@ + 0) { + // use max 10% of allowed memory for the chunk size, and at least 100KB + $this->streamChunkSize = min(static::MAX_CHUNK_SIZE, max((int) ($phpMemoryLimit / 10), 100 * 1024)); + } else { + // memory is unlimited, set to the default 10MB + $this->streamChunkSize = static::DEFAULT_CHUNK_SIZE; + } + } else { + // no memory limit information, set to the default 10MB + $this->streamChunkSize = static::DEFAULT_CHUNK_SIZE; + } + + if (is_resource($stream)) { + $this->stream = $stream; + + stream_set_chunk_size($this->stream, $this->streamChunkSize); + } elseif (is_string($stream)) { + $this->url = self::canonicalizePath($stream); + } else { + throw new InvalidArgumentException('A stream must either be a resource or a string.'); + } + + $this->fileOpenMode = $fileOpenMode; + $this->filePermission = $filePermission; + $this->useLocking = $useLocking; + } + + public function log($level, Stringable|string $message, array $context = []): void + { + if (! is_resource($this->stream)) { + $url = $this->url; + if ($url === null || $url === '') { + throw new LogicException('Missing stream url, the stream can not be opened. This may be caused by a premature call to close()'); + } + $this->createDir($url); + $this->errorMessage = null; + set_error_handler($this->customErrorHandler(...)); + + try { + $stream = fopen($url, $this->fileOpenMode); + if ($this->filePermission !== null) { + @chmod($url, $this->filePermission); + } + } finally { + restore_error_handler(); + } + if (! is_resource($stream)) { + $this->stream = null; + + throw new UnexpectedValueException('The stream could not be opened in append mode'); + } + stream_set_chunk_size($stream, $this->streamChunkSize); + $this->stream = $stream; + } + + $stream = $this->stream; + if ($this->useLocking) { + // ignoring errors here, there's not much we can do about them + flock($stream, LOCK_EX); + } + + $this->errorMessage = null; + set_error_handler($this->customErrorHandler(...)); + try { + + $params = [ + '%datetime%' => date(static::DATE_FORMAT), + '%level_name%' => $level, + '%message%' => trim($message), + '%context%' => json_encode( + $context, + JSON_UNESCAPED_SLASHES | + JSON_UNESCAPED_UNICODE | + JSON_PRESERVE_ZERO_FRACTION + ), + ]; + fwrite($stream, strtr(static::LOG_FORMAT, $params)); + } finally { + restore_error_handler(); + } + if ($this->errorMessage !== null) { + // close the resource if possible to reopen it, and retry the failed write + if (! $this->retrying && $this->url !== null && $this->url !== 'php://memory') { + $this->retrying = true; + $this->close(); + $this->log($level, $message, $context); + + return; + } + + throw new UnexpectedValueException('Writing to the log file failed'); + } + + $this->retrying = false; + if ($this->useLocking) { + flock($stream, LOCK_UN); + } + } + + public function close(): void + { + if ($this->url !== null && is_resource($this->stream)) { + fclose($this->stream); + } + $this->stream = null; + $this->dirCreated = null; + } + + private function getDirFromStream(string $stream): ?string + { + $pos = strpos($stream, '://'); + if ($pos === false) { + return dirname($stream); + } + + if (str_starts_with($stream, 'file://')) { + return dirname(substr($stream, 7)); + } + + return null; + } + + private function customErrorHandler(int $code, string $msg): bool + { + $this->errorMessage = preg_replace('{^(fopen|mkdir|fwrite)\(.*?\): }', '', $msg); + + return true; + } + + private function createDir(string $url): void + { + // Do not try to create dir if it has already been tried. + if ($this->dirCreated === true) { + return; + } + + $dir = $this->getDirFromStream($url); + if ($dir !== null && ! is_dir($dir)) { + $this->errorMessage = null; + set_error_handler(function (...$args) { + return $this->customErrorHandler(...$args); + }); + $status = mkdir($dir, 0777, true); + restore_error_handler(); + if ($status === false && ! is_dir($dir) && ! str_contains((string) $this->errorMessage, 'File exists')) { + throw new UnexpectedValueException(sprintf('There is no existing directory at "%s" and it could not be created: '.$this->errorMessage, $dir)); + } + } + $this->dirCreated = true; + } + + protected static function getMemoryLimitInBytes(): false|int + { + $limit = ini_get('memory_limit'); + if (! is_string($limit)) { + return false; + } + + // support -1 + if ((int) $limit < 0) { + return (int) $limit; + } + + if (!preg_match('/^\s*(?\d+)(?:\.\d+)?\s*(?[gmk]?)\s*$/i', $limit, $match)) { + return false; + } + + $limit = (int) $match['limit']; + switch (strtolower($match['unit'])) { + case 'g': + $limit *= 1024; + // no break + case 'm': + $limit *= 1024; + // no break + case 'k': + $limit *= 1024; + } + + return $limit; + } + + /** + * Makes sure if a relative path is passed in it is turned into an absolute path + * + * @param string $streamUrl stream URL or path without protocol + */ + public static function canonicalizePath(string $streamUrl): string + { + $prefix = ''; + if (str_starts_with($streamUrl, 'file://')) { + $streamUrl = substr($streamUrl, 7); + $prefix = 'file://'; + } + + // other type of stream, not supported + if (str_contains($streamUrl, '://')) { + return $streamUrl; + } + + // already absolute + if (str_starts_with($streamUrl, '/') || substr($streamUrl, 1, 1) === ':' || str_starts_with($streamUrl, '\\\\')) { + return $prefix.$streamUrl; + } + + $streamUrl = getcwd().'/'.$streamUrl; + + return $prefix.$streamUrl; + } + + public function __destruct() + { + $this->close(); + } +} diff --git a/tests/Utils/StreamLoggerTest.php b/tests/Utils/StreamLoggerTest.php new file mode 100644 index 0000000..1d6c0ad --- /dev/null +++ b/tests/Utils/StreamLoggerTest.php @@ -0,0 +1,83 @@ +outputBuffer = fopen('php://memory', 'rw'); + $this->logger = new StreamLogger($this->outputBuffer); +}); + +afterEach(function () { + fclose($this->outputBuffer); +}); + +it('logs messages with the correct format', function () { + $this->logger->log('info', 'This is a test message'); + + rewind($this->outputBuffer); + $output = stream_get_contents($this->outputBuffer); + + expect($output)->toMatch('/\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[\+\-]\d{2}:\d{2}\] info: This is a test message \[\]\n/'); +}); + +it('handles context correctly in log messages', function () { + $context = ['user_id' => 123, 'action' => 'login']; + $this->logger->log('warning', 'User action recorded', $context); + + rewind($this->outputBuffer); + $output = stream_get_contents($this->outputBuffer); + + $expectedContext = json_encode($context, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRESERVE_ZERO_FRACTION); + + expect($output)->toContain('warning: User action recorded') + ->and($output)->toContain($expectedContext); +}); + +it('handles different log levels correctly', function () { + $levels = ['debug', 'info', 'notice', 'warning', 'error', 'critical', 'alert', 'emergency']; + + foreach ($levels as $level) { + $this->logger->log($level, "Message at $level level"); + + rewind($this->outputBuffer); + $output = stream_get_contents($this->outputBuffer); + expect($output)->toContain("$level: Message at $level level"); + + ftruncate($this->outputBuffer, 0); // Clear buffer + rewind($this->outputBuffer); + } +}); + +it('handles empty context gracefully', function () { + $this->logger->log('info', 'Message with no context'); + + rewind($this->outputBuffer); + $output = stream_get_contents($this->outputBuffer); + + expect($output)->toContain('info: Message with no context []'); +}); + +it('handles stringable objects in the message', function () { + $stringable = new class { + public function __toString(): string + { + return 'Stringable message content'; + } + }; + + $this->logger->log('info', $stringable); + + rewind($this->outputBuffer); + $output = stream_get_contents($this->outputBuffer); + + expect($output)->toContain('info: Stringable message content'); +}); + +it('outputs log messages to STDOUT', function () { + $this->logger->log('info', 'Check output redirection'); + + rewind($this->outputBuffer); + $output = stream_get_contents($this->outputBuffer); + + expect($output)->toContain('info: Check output redirection'); +});