Skip to content

Commit

Permalink
Merge pull request #17 from CodeWithKyrian/add-image-to-image-pipeline
Browse files Browse the repository at this point in the history
Add image to image pipeline
  • Loading branch information
CodeWithKyrian authored Apr 8, 2024
2 parents bc3ef74 + 7ea5560 commit d0b1f0d
Show file tree
Hide file tree
Showing 14 changed files with 199 additions and 27 deletions.
22 changes: 22 additions & 0 deletions examples/pipelines/image-to-image.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

declare(strict_types=1);

use Codewithkyrian\Transformers\Generation\Streamers\StdOutStreamer;
use function Codewithkyrian\Transformers\Pipelines\pipeline;
use function Codewithkyrian\Transformers\Utils\memoryUsage;
use function Codewithkyrian\Transformers\Utils\timeUsage;

require_once './bootstrap.php';

ini_set('memory_limit', '2048M');

$upscaler = pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');

$url = __DIR__. '/../images/butterfly.jpg';

$output = $upscaler($url);

$output->save(__DIR__. '/../images/butterfly-super-resolution.jpg');

dd($output->size(), timeUsage(), memoryUsage());
13 changes: 6 additions & 7 deletions src/FeatureExtractors/ImageFeatureExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ class ImageFeatureExtractor extends FeatureExtractor
* The mean values for image normalization.
* @var int|int[]
*/
protected int|array $imageMean;
protected int|array|null $imageMean;

/**
* The standard deviation values for image normalization.
* @var int|int[]
*/
protected int|array $imageStd;
protected int|array|null $imageStd;

/*
* What method to use for resampling.
Expand Down Expand Up @@ -65,14 +65,13 @@ class ImageFeatureExtractor extends FeatureExtractor
protected array|int|null $cropSize;
protected ?bool $doConvertRGB;
protected ?bool $doCropMargin;
protected ?array $padSize;
protected array|int|null $padSize;
protected ?bool $doPad;

public function __construct(public array $config)
{

$this->imageMean = $config['image_mean'] ?? $config['mean'];
$this->imageStd = $config['image_std'] ?? $config['std'];
$this->imageMean = $config['image_mean'] ?? $config['mean'] ?? null;
$this->imageStd = $config['image_std'] ?? $config['std'] ?? null;

$this->resample = $config['resample'] ?? 2; // 2 => bilinear
$this->doRescale = $config['do_rescale'] ?? true;
Expand Down Expand Up @@ -493,7 +492,7 @@ public function preprocess(
// Perform padding after rescaling/normalizing
if ($doPad ?? $this->doPad) {
if ($this->padSize !== null) {
$pixelData = $this->padImage($pixelData, $imgShape, $this->padSize);
[$pixelData, $imgShape] = $this->padImage($pixelData, $imgShape, $this->padSize);
} elseif ($this->sizeDivisibility !== null) {
[$paddedWidth, $paddedHeight] = $this->enforceSizeDivisibility([$imgShape[1], $imgShape[0]], $this->sizeDivisibility);
[$pixelData, $imgShape] = $this->padImage($pixelData, $imgShape, ['width' => $paddedWidth, 'height' => $paddedHeight]);
Expand Down
33 changes: 33 additions & 0 deletions src/FeatureExtractors/Swin2SRImageProcessor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\FeatureExtractors;

class Swin2SRImageProcessor extends ImageFeatureExtractor
{
public function padImage(
array $pixelData,
array $imgShape,
int|array $padSize,
string $mode = 'constant',
bool $center = false,
int $constantValues = 0
): array
{
// NOTE: In this case, `padSize` represents the size of the sliding window for the local attention.
// In other words, the image is padded so that its width and height are multiples of `padSize`.
[$imageHeight, $imageWidth, $imageChannels] = $imgShape;

// NOTE: For Swin2SR models, the original python implementation adds padding even when the image's width/height is already
// a multiple of `pad_size`. However, this is most likely a bug (PR: https://github.com/mv-lab/swin2sr/pull/19).
// For this reason, we only add padding when the image's width/height is not a multiple of `pad_size`.
$padSize = [
'width' => $imageWidth + ($padSize - $imageWidth % $padSize) % $padSize,
'height' => $imageHeight + ($padSize - $imageHeight % $padSize) % $padSize,
];

return parent::padImage($pixelData, $imgShape, $padSize, 'symmetric', false, -1);
}
}
1 change: 1 addition & 0 deletions src/Models/Auto/AutoModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class AutoModel extends PretrainedMixin
'yolos' => \Codewithkyrian\Transformers\Models\Pretrained\YOLOSModel::class,
'owlvit' => \Codewithkyrian\Transformers\Models\Pretrained\OwlVitModel::class,
'owlv2' => \Codewithkyrian\Transformers\Models\Pretrained\OwlV2Model::class,
'swin2sr' => \Codewithkyrian\Transformers\Models\Pretrained\Swin2SRModel::class,
];

const ENCODER_DECODER_MODEL_MAPPING = [
Expand Down
17 changes: 17 additions & 0 deletions src/Models/Auto/AutoModelForImageToImage.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Auto;

class AutoModelForImageToImage extends PretrainedMixin
{
const MODEL_CLASS_MAPPING = [
'swin2sr' => \Codewithkyrian\Transformers\Models\Pretrained\Swin2SRForImageSuperResolution::class,
];

const MODEL_CLASS_MAPPINGS = [
self::MODEL_CLASS_MAPPING,
];
}
2 changes: 2 additions & 0 deletions src/Models/Auto/PretrainedMixin.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use Codewithkyrian\Transformers\Models\Pretrained\PretrainedModel;
use Codewithkyrian\Transformers\Utils\AutoConfig;
use Symfony\Component\Console\Output\OutputInterface;
use function Codewithkyrian\Transformers\Utils\timeUsage;

/**
* Base class of all AutoModels. Contains the `from_pretrained` function
Expand Down Expand Up @@ -50,6 +51,7 @@ public static function fromPretrained(
?OutputInterface $output = null
): PretrainedModel
{

$config = AutoConfig::fromPretrained($modelNameOrPath, $config, $cacheDir, $revision, $output);

foreach (static::MODEL_CLASS_MAPPINGS as $modelClassMapping) {
Expand Down
4 changes: 3 additions & 1 deletion src/Models/Pretrained/PretrainedModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
use OnnxRuntime\InferenceSession;
use Symfony\Component\Console\Output\OutputInterface;
use function Codewithkyrian\Transformers\Utils\array_some;
use function Codewithkyrian\Transformers\Utils\timeUsage;

/**
* A base class for pre-trained models that provides the model configuration and an ONNX session.
Expand Down Expand Up @@ -92,7 +93,6 @@ public static function fromPretrained(
$config = AutoConfig::fromPretrained($modelNameOrPath, $config, $cacheDir, $revision, $output);
}


switch ($modelArchitecture) {
case ModelArchitecture::DecoderOnly:
{
Expand Down Expand Up @@ -153,9 +153,11 @@ public static function fromPretrained(
echo "WARNING: {$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly.";
}


$session = self::constructSession(modelNameOrPath: $modelNameOrPath,
fileName: 'model', cacheDir: $cacheDir, revision: $revision, output: $output);


return new static($config, $session, $modelArchitecture);
}
}
Expand Down
14 changes: 14 additions & 0 deletions src/Models/Pretrained/Swin2SRForImageSuperResolution.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

/**
* Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
*/
class Swin2SRForImageSuperResolution extends Swin2SRPretrainedModel
{

}
11 changes: 11 additions & 0 deletions src/Models/Pretrained/Swin2SRModel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

class Swin2SRModel extends Swin2SRPretrainedModel
{

}
11 changes: 11 additions & 0 deletions src/Models/Pretrained/Swin2SRPretrainedModel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

class Swin2SRPretrainedModel extends PretrainedModel
{

}
62 changes: 62 additions & 0 deletions src/Pipelines/ImageToImagePipeline.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Pipelines;

use Codewithkyrian\Transformers\Utils\Image;
use Codewithkyrian\Transformers\Utils\Tensor;
use Interop\Polite\Math\Matrix\NDArray;
use function Codewithkyrian\Transformers\Utils\prepareImages;
use function Codewithkyrian\Transformers\Utils\timeUsage;

/**
* Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
*
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
* ```php
* $upscaler = pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
* $url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/butterfly.jpg';
* $output = $upscaler($url);
* // Image {
* // data: array(786432) [ 41, 31, 24, 43, ... ],
* // width: 512,
* // height: 512,
* // channels: 3
* // }
* ```
*/
class ImageToImagePipeline extends Pipeline
{

/**
* @param array|string $inputs
* @param mixed ...$args
* @return Image|Image[]
* @throws \Exception
*/
public function __invoke(array|string $inputs, ...$args): array|Image
{
$preparedImages = prepareImages($inputs);

$inputs = ($this->processor)($preparedImages);

$outputs = $this->model->__invoke($inputs);

$toReturn = [];

/** @var Tensor $batch */
foreach ($outputs['reconstruction'] as $batch) {
$output = $batch->squeeze()
->clamp(0, 1)
->multiplyScalar(255)
->round()
->to(NDArray::uint8);

$toReturn[] = Image::fromTensor($output);
}

return count($toReturn) > 1 ? $toReturn : $toReturn[0];
}
}
4 changes: 3 additions & 1 deletion src/Pipelines/Pipeline.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
use Codewithkyrian\Transformers\PretrainedTokenizers\PretrainedTokenizer;
use Codewithkyrian\Transformers\Processors\AutoProcessor;
use Codewithkyrian\Transformers\Processors\Processor;
use Codewithkyrian\Transformers\Utils\Image;
use Codewithkyrian\Transformers\Utils\Tensor;
use Symfony\Component\Console\Output\OutputInterface;
use function Codewithkyrian\Transformers\Utils\timeUsage;

Expand All @@ -29,7 +31,7 @@ public function __construct(
* @param ...$args
* @return array
*/
public function __invoke(array|string $inputs, ...$args): array
public function __invoke(array|string $inputs, ...$args): array|Tensor|Image
{
return [];
}
Expand Down
10 changes: 10 additions & 0 deletions src/Pipelines/Task.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use Codewithkyrian\Transformers\Models\Auto\AutoModel;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForCausalLM;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForImageClassification;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForImageToImage;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForImageFeatureExtraction;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForMaskedLM;
use Codewithkyrian\Transformers\Models\Auto\AutoModelForObjectDetection;
Expand Down Expand Up @@ -44,6 +45,7 @@ enum Task: string
case ImageClassification = 'image-classification';
case ImageFeatureExtraction = 'image-feature-extraction';
case ZeroShotImageClassification = 'zero-shot-image-classification';
case ImageToImage = 'image-to-image';

case ObjectDetection = 'object-detection';
case ZeroShotObjectDetection = 'zero-shot-object-detection';
Expand Down Expand Up @@ -83,6 +85,8 @@ public function pipeline(PretrainedModel $model, ?PretrainedTokenizer $tokenizer

self::ZeroShotImageClassification => new ZeroShotImageClassificationPipeline($this, $model, $tokenizer, $processor),

self::ImageToImage => new ImageToImagePipeline($this, $model, processor: $processor),

self::ObjectDetection => new ObjectDetectionPipeline($this, $model, $tokenizer, $processor),

self::ZeroShotObjectDetection => new ZeroShotObjectDetectionPipeline($this, $model, $tokenizer, $processor),
Expand Down Expand Up @@ -121,6 +125,8 @@ public function defaultModelName(): string

self::ZeroShotImageClassification => 'Xenova/clip-vit-base-patch32', // Original: 'openai/clip-vit-base-patch32'

self::ImageToImage => 'Xenova/swin2SR-classical-sr-x2-64', // Original: 'caidas/swin2SR-classical-sr-x2-64'

self::ObjectDetection => 'Xenova/detr-resnet-50', // Original: 'facebook/detr-resnet-50',

self::ZeroShotObjectDetection => 'Xenova/owlvit-base-patch32', // Original: 'google/owlvit-base-patch32',
Expand Down Expand Up @@ -166,6 +172,8 @@ public function autoModel(

self::ZeroShotImageClassification => AutoModel::fromPretrained($modelNameOrPath, $quantized, $config, $cacheDir, $revision, $modelFilename, $output),

self::ImageToImage => AutoModelForImageToImage::fromPretrained($modelNameOrPath, $quantized, $config, $cacheDir, $revision, $modelFilename, $output),

self::ObjectDetection => AutoModelForObjectDetection::fromPretrained($modelNameOrPath, $quantized, $config, $cacheDir, $revision, $modelFilename, $output),

self::ZeroShotObjectDetection => AutoModelForZeroShotObjectDetection::fromPretrained($modelNameOrPath, $quantized, $config, $cacheDir, $revision, $modelFilename, $output),
Expand All @@ -184,6 +192,7 @@ public function autoTokenizer(
return match ($this) {

self::ImageClassification,
self::ImageToImage,
self::ImageFeatureExtraction,
self::ObjectDetection => null,

Expand Down Expand Up @@ -221,6 +230,7 @@ public function autoProcessor(
self::ImageClassification,
self::ImageFeatureExtraction,
self::ZeroShotImageClassification,
self::ImageToImage,
self::ObjectDetection,
self::ZeroShotObjectDetection => AutoProcessor::fromPretrained($modelNameOrPath, $config, $cacheDir, $revision, $output),

Expand Down
22 changes: 4 additions & 18 deletions src/Utils/Tensor.php
Original file line number Diff line number Diff line change
Expand Up @@ -598,23 +598,9 @@ public function squeeze(?int $dim = null): static
{
$mo = self::getMo();

$result = clone $this;

if ($dim === null) {
$result->buffer = array_filter($result->buffer, fn($value) => $value !== 1);
$result->shape = array_filter($result->shape, fn($value) => $value !== 1);
} else {
$dim = $result->safeIndex($dim, $result->ndim());

if ($result->shape[$dim] !== 1) {
throw new Exception("DimensionError: cannot select an axis to squeeze out which has size not equal to one");
}

array_splice($result->buffer, $dim, 1);
array_splice($result->shape, $dim, 1);
}
$ndArray = $mo->la()->squeeze($this, $dim);

return $result;
return new static($ndArray->buffer(), $ndArray->dtype(), $ndArray->shape(), $ndArray->offset());
}

/**
Expand Down Expand Up @@ -684,10 +670,10 @@ public function round(): static
/**
* Performs Tensor dtype conversion.
*
* @param string $dtype The target data type.
* @param int $dtype The target data type.
* @return static The converted tensor.
*/
public function to(string $dtype): static
public function to(int $dtype): static
{
if ($this->dtype() === $dtype) {
return $this;
Expand Down

0 comments on commit d0b1f0d

Please sign in to comment.