-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from CodeWithKyrian/add-image-to-image-pipeline
Add image to image pipeline
- Loading branch information
Showing
14 changed files
with
199 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
use Codewithkyrian\Transformers\Generation\Streamers\StdOutStreamer; | ||
use function Codewithkyrian\Transformers\Pipelines\pipeline; | ||
use function Codewithkyrian\Transformers\Utils\memoryUsage; | ||
use function Codewithkyrian\Transformers\Utils\timeUsage; | ||
|
||
require_once './bootstrap.php'; | ||
|
||
ini_set('memory_limit', '2048M'); | ||
|
||
$upscaler = pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64'); | ||
|
||
$url = __DIR__. '/../images/butterfly.jpg'; | ||
|
||
$output = $upscaler($url); | ||
|
||
$output->save(__DIR__. '/../images/butterfly-super-resolution.jpg'); | ||
|
||
dd($output->size(), timeUsage(), memoryUsage()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
|
||
namespace Codewithkyrian\Transformers\FeatureExtractors; | ||
|
||
class Swin2SRImageProcessor extends ImageFeatureExtractor | ||
{ | ||
public function padImage( | ||
array $pixelData, | ||
array $imgShape, | ||
int|array $padSize, | ||
string $mode = 'constant', | ||
bool $center = false, | ||
int $constantValues = 0 | ||
): array | ||
{ | ||
// NOTE: In this case, `padSize` represents the size of the sliding window for the local attention. | ||
// In other words, the image is padded so that its width and height are multiples of `padSize`. | ||
[$imageHeight, $imageWidth, $imageChannels] = $imgShape; | ||
|
||
// NOTE: For Swin2SR models, the original python implementation adds padding even when the image's width/height is already | ||
// a multiple of `pad_size`. However, this is most likely a bug (PR: https://github.com/mv-lab/swin2sr/pull/19). | ||
// For this reason, we only add padding when the image's width/height is not a multiple of `pad_size`. | ||
$padSize = [ | ||
'width' => $imageWidth + ($padSize - $imageWidth % $padSize) % $padSize, | ||
'height' => $imageHeight + ($padSize - $imageHeight % $padSize) % $padSize, | ||
]; | ||
|
||
return parent::padImage($pixelData, $imgShape, $padSize, 'symmetric', false, -1); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
|
||
namespace Codewithkyrian\Transformers\Models\Auto; | ||
|
||
class AutoModelForImageToImage extends PretrainedMixin | ||
{ | ||
const MODEL_CLASS_MAPPING = [ | ||
'swin2sr' => \Codewithkyrian\Transformers\Models\Pretrained\Swin2SRForImageSuperResolution::class, | ||
]; | ||
|
||
const MODEL_CLASS_MAPPINGS = [ | ||
self::MODEL_CLASS_MAPPING, | ||
]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
|
||
namespace Codewithkyrian\Transformers\Models\Pretrained; | ||
|
||
/** | ||
* Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration. | ||
*/ | ||
class Swin2SRForImageSuperResolution extends Swin2SRPretrainedModel | ||
{ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
|
||
namespace Codewithkyrian\Transformers\Models\Pretrained; | ||
|
||
class Swin2SRModel extends Swin2SRPretrainedModel | ||
{ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
|
||
namespace Codewithkyrian\Transformers\Models\Pretrained; | ||
|
||
class Swin2SRPretrainedModel extends PretrainedModel | ||
{ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
|
||
namespace Codewithkyrian\Transformers\Pipelines; | ||
|
||
use Codewithkyrian\Transformers\Utils\Image; | ||
use Codewithkyrian\Transformers\Utils\Tensor; | ||
use Interop\Polite\Math\Matrix\NDArray; | ||
use function Codewithkyrian\Transformers\Utils\prepareImages; | ||
use function Codewithkyrian\Transformers\Utils\timeUsage; | ||
|
||
/** | ||
* Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input. | ||
* | ||
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64` | ||
* ```php | ||
* $upscaler = pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64'); | ||
* $url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/butterfly.jpg'; | ||
* $output = $upscaler($url); | ||
* // Image { | ||
* // data: array(786432) [ 41, 31, 24, 43, ... ], | ||
* // width: 512, | ||
* // height: 512, | ||
* // channels: 3 | ||
* // } | ||
* ``` | ||
*/ | ||
class ImageToImagePipeline extends Pipeline | ||
{ | ||
|
||
/** | ||
* @param array|string $inputs | ||
* @param mixed ...$args | ||
* @return Image|Image[] | ||
* @throws \Exception | ||
*/ | ||
public function __invoke(array|string $inputs, ...$args): array|Image | ||
{ | ||
$preparedImages = prepareImages($inputs); | ||
|
||
$inputs = ($this->processor)($preparedImages); | ||
|
||
$outputs = $this->model->__invoke($inputs); | ||
|
||
$toReturn = []; | ||
|
||
/** @var Tensor $batch */ | ||
foreach ($outputs['reconstruction'] as $batch) { | ||
$output = $batch->squeeze() | ||
->clamp(0, 1) | ||
->multiplyScalar(255) | ||
->round() | ||
->to(NDArray::uint8); | ||
|
||
$toReturn[] = Image::fromTensor($output); | ||
} | ||
|
||
return count($toReturn) > 1 ? $toReturn : $toReturn[0]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters