fix: Vips RGBA -> RGBA conversion error

CodeWithKyrian · Aug 27, 2024 · 54bdee0 · 54bdee0
1 parent 75f5d9c
commit 54bdee0
Show file tree

Hide file tree

Showing 8 changed files with 35 additions and 32 deletions.
diff --git a/examples/misc/background-removal.php b/examples/misc/background-removal.php
@@ -8,8 +8,7 @@
 use function Codewithkyrian\Transformers\Utils\{memoryPeak, memoryUsage, timeUsage};
 
 require_once './bootstrap.php';
-
-$modelConfig = ['model_type' => 'custom'];
+$modelConfig = ['model_type' => 'vit'];
 $processorConfig = [
     'do_normalize' => true,
     'do_pad' => false,
@@ -24,9 +23,9 @@
 ];
 
 $model = AutoModel::fromPretrained(modelNameOrPath: 'briaai/RMBG-1.4', config: $modelConfig);
-$processor = AutoProcessor::fromPretrained(modelNameOrPath: 'briaai/RMBG-1.4', config: $processorConfig);
+$processor = AutoProcessor::fromPretrained(modelNameOrPath: 'briaai/RMBG-1.4');
 
-$url = __DIR__ . '/../images/woman-w-bag.jpeg';
+$url = __DIR__ . '/../images/multitask.png';
 
 $image = Image::read($url);
 
@@ -42,7 +41,4 @@
 
 $maskedImage = $image->applyMask($mask);
 
-$maskedImage->save($fileName . '-masked.png');
-
-dd('Done Processing!', timeUsage(), memoryUsage(), memoryPeak());
-
+$maskedImage->save($fileName . '-masked.png');
diff --git a/examples/pipelines/asr.php b/examples/pipelines/asr.php
@@ -16,14 +16,14 @@
 //$transcriber = pipeline('automatic-speech-recognition', 'Xenova/whisper-base');
 //$transcriber = pipeline('automatic-speech-recognition', 'Xenova/wav2vec2-large-xlsr-53-english');
 
-$audioUrl = __DIR__ . '/../sounds/kyrian-dev.wav';
-$audioUrl = __DIR__ . '/../sounds/jfk.wav';
-$audioUrl = __DIR__ . '/../sounds/preamble.wav';
+//$audioUrl = __DIR__ . '/../sounds/kyrian-dev.wav';
+//$audioUrl = __DIR__ . '/../sounds/jfk.wav';
+//$audioUrl = __DIR__ . '/../sounds/preamble.wav';
 //$audioUrl = __DIR__ . '/../sounds/taunt.wav';
 //$audioUrl = __DIR__ . '/../sounds/gettysburg.wav';
 //$audioUrl = __DIR__ . '/../sounds/kyrian-speaking.wav';
 //$audioUrl = __DIR__ . '/../sounds/ted_60.wav';
-//$audioUrl = __DIR__ . '/../sounds/sample-1.mp3';
+$audioUrl = __DIR__ . '/../sounds/sample-1.mp3';
 
 
 $output = $transcriber($audioUrl,

diff --git a/src/Generation/Samplers/Sampler.php b/src/Generation/Samplers/Sampler.php
@@ -55,7 +55,6 @@ public function getLogits(Tensor $logits, int $index): Tensor
 //        array_splice($size, -2, replacement: [1, $vocabSize]);
 //
 //        $logs = $logits->sliceWithBounds($start, $size);
-
         $logits = $logits->slice($index);
 
         if ($this->generationConfig->temperature > 0) {

diff --git a/src/Models/Auto/PretrainedMixin.php b/src/Models/Auto/PretrainedMixin.php
@@ -72,7 +72,7 @@ public static function fromPretrained(
         }
 
         if (static::BASE_IF_FAIL) {
-            trigger_error("Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel.", E_USER_WARNING);
+//            echo "Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel.";
 
             return PretrainedModel::fromPretrained(
                 modelNameOrPath: $modelNameOrPath,

diff --git a/src/Models/Pretrained/PretrainedModel.php b/src/Models/Pretrained/PretrainedModel.php
@@ -41,8 +41,8 @@ class PretrainedModel
     public string $mainInputName = 'input_ids';
 
     /**
-     * @param array $config The model configuration.
-     * @param mixed $session The ONNX session.
+     * @param AutoConfig $config The model configuration.
+     * @param InferenceSession $session The ONNX session.
      */
     public function __construct(
         public AutoConfig        $config,

diff --git a/src/Utils/Audio.php b/src/Utils/Audio.php
@@ -106,7 +106,7 @@ public function toTensor(int $samplerate = 41000, int $chunkSize = 2048): Tensor
         $audioTensor = Tensor::fromString($tensorData, Tensor::float32, [$totalOutputFrames, $this->channels()]);
 
         if ($this->channels() > 1) {
-            $audioTensor = $audioTensor->mean(1);
+            $audioTensor = $audioTensor->mean(1)->multiply(sqrt(2));
         }
 
         return $audioTensor->squeeze();

diff --git a/src/Utils/Image.php b/src/Utils/Image.php
@@ -224,8 +224,11 @@ public function rgb(bool $force = false): static
 
         // If it's a Vips image, we can extract the RGB channels
         if ($this->image instanceof \Imagine\Vips\Image) {
-            $vipImage = $this->image->copy()->getVips()->extract_band(0, ['n' => 3]);
-            return new self($vipImage, 3);
+            /** @var \Imagine\Vips\Image $image */
+            $image = $this->image->copy();
+            $vipImage = $image->getVips()->extract_band(0, ['n' => 3]);
+            $image->setVips($vipImage);
+            return new self($image, 3);
         }
 
         return new self($this->image->copy(), 3);
@@ -400,16 +403,21 @@ public function applyMask(Image $mask): static
             $this->image instanceof \Imagine\Vips\Image => $this->image->copy()->applyMask($mask->image),
 
             $this->image instanceof \Imagine\Imagick\Image => (function () use ($mask) {
-                $maskImagick = $mask->image->copy()->mask()->getImagick();
-                $imageImagick = clone $this->image->getImagick();
-
-                $maskImagick->compositeImage($imageImagick, Imagick::COMPOSITE_DSTIN, 0, 0);
-                $imageImagick->compositeImage($maskImagick, Imagick::COMPOSITE_COPYOPACITY, 0, 0);
-
-                $maskImagick->clear();
-                $maskImagick->destroy();
-
-                return new \Imagine\Imagick\Image($imageImagick, $this->image->palette(), $this->image->metadata());
+//                $maskImagick = $mask->image->copy()->mask()->getImagick();
+//                $imageImagick = clone $this->image->getImagick();
+//
+//                $maskImagick->compositeImage($imageImagick, Imagick::COMPOSITE_DSTIN, 0, 0);
+//                $imageImagick->compositeImage($maskImagick, Imagick::COMPOSITE_COPYOPACITY, 0, 0);
+//
+//                $maskImagick->clear();
+//                $maskImagick->destroy();
+//
+//                return new \Imagine\Imagick\Image($imageImagick, $this->image->palette(), $this->image->metadata());
+                $image = $this->image->copy();
+                $maskImage = $mask->image->copy();
+                $maskImage->effects()->negative();
+                $image->applyMask($maskImage);
+                return $image;
             })(),
 
             $this->image instanceof \Imagine\Gd\Image => (function () use ($mask) {

diff --git a/src/Utils/InferenceSession.php b/src/Utils/InferenceSession.php
@@ -43,6 +43,7 @@ public function __construct(
         $providers = []
     )
     {
+//        $providers = ['CoreMLExecutionProvider', 'CPUExecutionProvider'];
         // session options
         $sessionOptions = OnnxRuntime::CreateSessionOptions();
 
@@ -100,6 +101,7 @@ public function __construct(
                 OnnxRuntime::AddSessionConfigEntry($sessionOptions, $k, $v);
             }
         }
+
         foreach ($providers as $provider) {
             if (!in_array($provider, $this->providers())) {
                 trigger_error('Provider not available: ' . $provider, E_USER_WARNING);
@@ -111,14 +113,13 @@ public function __construct(
                 OnnxRuntime::SessionOptionsAppendExecutionProvider_CUDA_V2($sessionOptions, $cudaOptions);
                 OnnxRuntime::ReleaseCUDAProviderOptions($cudaOptions);
             } elseif ($provider == 'CoreMLExecutionProvider') {
-                OnnxRuntime::OrtSessionOptionsAppendExecutionProvider_CoreML($sessionOptions, 0);
+                OnnxRuntime::OrtSessionOptionsAppendExecutionProvider_CoreML($sessionOptions, 1);
             } elseif ($provider == 'CPUExecutionProvider') {
                 break;
             } else {
                 throw new \InvalidArgumentException('Provider not supported: ' . $provider);
             }
         }
-
         $this->session = $this->loadSession($path, $sessionOptions);
         $this->allocator = OnnxRuntime::GetAllocatorWithDefaultOptions();
         $this->inputs = $this->loadInputs();
@@ -260,7 +261,6 @@ private function loadSession($path, $sessionOptions): ?CData
         } else {
             $session = OnnxRuntime::CreateSession(self::env(), $this->ortString($path), $sessionOptions);
         }
-
         return $session;
     }