Skip to content

Commit

Permalink
Merge pull request #11 from CodeWithKyrian/add-object-detection-pipeline
Browse files Browse the repository at this point in the history
Add Object Detection Pipeline
  • Loading branch information
CodeWithKyrian authored Apr 3, 2024
2 parents ae63823 + 3e130e2 commit eb27c1e
Show file tree
Hide file tree
Showing 21 changed files with 686 additions and 40 deletions.
4 changes: 3 additions & 1 deletion examples/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@

require_once './vendor/autoload.php';

Transformers::setup()->apply();
Transformers::setup()
// ->setImageDriver(\Codewithkyrian\Transformers\Utils\ImageDriver::GD)
->apply();

51 changes: 44 additions & 7 deletions examples/image-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,55 @@

declare(strict_types=1);

use Codewithkyrian\Transformers\Processors\AutoProcessor;
use Codewithkyrian\Transformers\Utils\Image1;
use Codewithkyrian\Transformers\Transformers;
use Codewithkyrian\Transformers\Utils\Image;
use function Codewithkyrian\Transformers\Utils\memoryUsage;
use Codewithkyrian\Transformers\Utils\ImageDriver;
use Codewithkyrian\Transformers\Utils\Tensor;
use function Codewithkyrian\Transformers\Utils\timeUsage;

require_once './bootstrap.php';

$processor = AutoProcessor::fromPretrained('Xenova/vit-base-patch16-224');
function toTensorTest(ImageDriver $imageDriver): Tensor
{
timeUsage();

$image = Image::read('images/kyrian-cartoon.jpeg');
Transformers::setup()
->setImageDriver($imageDriver)
->apply();

$imageInputs = $processor($image);
$image = Image::read('images/butterfly.jpg');

dd($imageInputs['pixel_values']->shape(), $imageInputs['original_sizes'], $imageInputs['reshaped_input_sizes']);
$image->rgb();

$tensor = $image->toTensor();

dump("$imageDriver->name (toTensor) : ". timeUsage());

return $tensor;
}

function fromTensorTest(ImageDriver $imageDriver, Tensor $tensor) : Image
{
Transformers::setup()
->setImageDriver($imageDriver)
->apply();

$image = Image::fromTensor($tensor);

dump("$imageDriver->name (fromTensor) : ". timeUsage());

return $image;
}


// Run the test
dump("------------ toTensor ------------");
$tensor = toTensorTest(ImageDriver::IMAGICK);
$tensor = toTensorTest(ImageDriver::GD);
$tensor = toTensorTest(ImageDriver::VIPS);


dump("------------ fromTensor ------------");
$image = fromTensorTest(ImageDriver::IMAGICK, $tensor);
$image = fromTensorTest(ImageDriver::GD, $tensor);
$image = fromTensorTest(ImageDriver::VIPS, $tensor);
21 changes: 21 additions & 0 deletions examples/pipelines/object-detection.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

declare(strict_types=1);

namespace Codewithkyrian\Transformers\Pipelines;

use function Codewithkyrian\Transformers\Utils\memoryUsage;
use function Codewithkyrian\Transformers\Utils\timeUsage;

require_once './bootstrap.php';

ini_set('memory_limit', '-1');

$detector = pipeline('object-detection', 'Xenova/detr-resnet-50');

$img = __DIR__. '/../images/cats.jpg';

$output = $detector($img, threshold: 0.9);

dd($output, timeUsage(), memoryUsage());

53 changes: 53 additions & 0 deletions src/FeatureExtractors/DetrFeatureExtractor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\FeatureExtractors;

use Codewithkyrian\Transformers\Models\Output\ObjectDetectionOutput;
use Codewithkyrian\Transformers\Models\Output\ModelOutput;
use Codewithkyrian\Transformers\Processors\Processor;
use Codewithkyrian\Transformers\Utils\Image;
use Codewithkyrian\Transformers\Utils\Tensor;
use Interop\Polite\Math\Matrix\NDArray;

class DetrFeatureExtractor extends ImageFeatureExtractor
{
/**
* Calls the feature extraction process on an array of images, preprocesses
* each image, and concatenates the resulting features into a single Tensor.
* @param Image|array $images The image(s) to extract features from.
* @return array An object containing the concatenated pixel values of the preprocessed images.
*/
public function __invoke(Image|array $images, ...$args): array
{
$result = parent::__invoke($images, $args);


// TODO support differently-sized images, for now assume all images are the same size.
// TODO support different mask sizes (not just 64x64)
// Currently, just fill pixel mask with 1s
$maskSize = [$result['pixel_values']->shape()[0], 64, 64];

$pixelMaskData = array_fill(0, array_product($maskSize), 1);

$pixelMask = new Tensor($pixelMaskData, NDArray::int64, $maskSize);

return ['pixel_values' => $result['pixel_values'], 'pixel_mask' => $pixelMask];
}


/**
* Post-processes the outputs of the model (for object detection).
* @param ObjectDetectionOutput $outputs The outputs of the model that must be post-processed
* @param float $threshold The threshold to use for the scores.
* @param array|null $targetSizes The sizes of the original images.
* @param bool $isZeroShot Whether zero-shot object detection was performed.
* @return array An array of objects containing the post-processed outputs.
*/
public function postProcessObjectDetection(ObjectDetectionOutput $outputs, float $threshold = 0.5, ?array $targetSizes = null, bool $isZeroShot = false): array
{
return Processor::postProcessObjectDetection($outputs, $threshold, $targetSizes, $isZeroShot);
}
}
14 changes: 9 additions & 5 deletions src/FeatureExtractors/ImageFeatureExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ private function calculateReflectOffset(int $val, int $max): int
*/
public function rescale(array &$pixelData): void
{
foreach ($pixelData as &$pixel) {
$pixel *= $this->rescaleFactor;
for ($i = 0; $i < count($pixelData); ++$i) {
$pixelData[$i] *= $this->rescaleFactor;
}
}

Expand Down Expand Up @@ -337,14 +337,15 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
$newWidth = $srcWidth * $shortResizeFactor;
$newHeight = $srcHeight * $shortResizeFactor;

// Downscale to ensure the largest dimension is longestEdge
// The new width and height might be greater than `longest_edge`, so
// we downscale to ensure the largest dimension is longestEdge
$longResizeFactor = $longestEdge !== null
? min($longestEdge / $newWidth, $longestEdge / $newHeight)
: 1;

// Round to avoid floating point precision issues
$finalWidth = (int)floor($newWidth * $longResizeFactor);
$finalHeight = (int)floor($newHeight * $longResizeFactor);
$finalWidth = (int)floor(round($srcWidth * $longResizeFactor, 2));
$finalHeight = (int)floor(round($srcHeight * $longResizeFactor, 2));

if ($this->sizeDivisibility !== null) {
[$finalWidth, $finalHeight] = $this->enforceSizeDivisibility([$finalWidth, $finalHeight], $this->sizeDivisibility);
Expand Down Expand Up @@ -453,11 +454,14 @@ public function preprocess(

$reshapedInputSize = [$image->height(), $image->width()];


// All pixel-level manipulation occurs with data in the hwc format (height, width, channels),
// to emulate the behavior of the original Python code (w/ numpy).
$pixelData = $image->pixelData();

$imgShape = [$image->height(), $image->width(), $image->channels];


if ($this->doRescale) {
$this->rescale($pixelData);
}
Expand Down
3 changes: 3 additions & 0 deletions src/Models/Auto/AutoModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ class AutoModel extends PretrainedMixin
"clip" => \Codewithkyrian\Transformers\Models\Pretrained\CLIPModel::class,
"vit" => \Codewithkyrian\Transformers\Models\Pretrained\ViTModel::class,
"deit" => \Codewithkyrian\Transformers\Models\Pretrained\DeiTModel::class,

'detr' => \Codewithkyrian\Transformers\Models\Pretrained\DETRModel::class,
'yolos' => \Codewithkyrian\Transformers\Models\Pretrained\YOLOSModel::class,
];

const ENCODER_DECODER_MODEL_MAPPING = [
Expand Down
19 changes: 19 additions & 0 deletions src/Models/Auto/AutoModelForObjectDetection.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Auto;

class AutoModelForObjectDetection extends PretrainedMixin
{
const MODEL_CLASS_MAPPING = [
'detr' => \Codewithkyrian\Transformers\Models\Pretrained\DetrForObjectDetection::class,
'yolos' => \Codewithkyrian\Transformers\Models\Pretrained\YolosForObjectDetection::class,
];

const MODEL_CLASS_MAPPINGS = [
self::MODEL_CLASS_MAPPING,
];

}
28 changes: 28 additions & 0 deletions src/Models/Output/DetrSegmentationOutput.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Output;

use Codewithkyrian\Transformers\Utils\Tensor;

class DetrSegmentationOutput implements ModelOutput
{
/**
* These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
*
* @param Tensor $logits Classification logits (including no-object) for all queries.
* @param Tensor $predBoxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
* @param Tensor $predMasks Segmentation masks for all queries.
*/
public function __construct(public readonly Tensor $logits, public readonly Tensor $predBoxes, public readonly Tensor $predMasks)
{
}


public static function fromOutput(array $array): self
{
return new self($array['logits'], $array['pred_boxes'], $array['pred_masks']);
}
}
27 changes: 27 additions & 0 deletions src/Models/Output/ObjectDetectionOutput.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Output;

use Codewithkyrian\Transformers\Utils\Tensor;

class ObjectDetectionOutput implements ModelOutput
{
/**
* These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
*
* @param Tensor $logits Classification logits (including no-object) for all queries.
* @param Tensor $predBoxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
*/
public function __construct(public readonly Tensor $logits, public readonly Tensor $predBoxes)
{
}


public static function fromOutput(array $array): self
{
return new self($array['logits'], $array['pred_boxes']);
}
}
16 changes: 16 additions & 0 deletions src/Models/Pretrained/DetrForObjectDetection.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

use Codewithkyrian\Transformers\Models\Output\ObjectDetectionOutput;

class DetrForObjectDetection extends DetrPretrainedModel
{
public function __invoke(array $modelInputs): ObjectDetectionOutput
{
return ObjectDetectionOutput::fromOutput(parent::__invoke($modelInputs));
}
}
16 changes: 16 additions & 0 deletions src/Models/Pretrained/DetrForSegmentation.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

use Codewithkyrian\Transformers\Models\Output\DetrSegmentationOutput;

class DetrForSegmentation extends DetrPretrainedModel
{
public function __invoke(array $modelInputs): DetrSegmentationOutput
{
return DetrSegmentationOutput::fromOutput(parent::__invoke($modelInputs));
}
}
11 changes: 11 additions & 0 deletions src/Models/Pretrained/DetrModel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

class DetrModel extends DetrPretrainedModel
{

}
11 changes: 11 additions & 0 deletions src/Models/Pretrained/DetrPretrainedModel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

class DetrPretrainedModel extends PretrainedModel
{

}
16 changes: 16 additions & 0 deletions src/Models/Pretrained/YolosForObjectDetection.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

use Codewithkyrian\Transformers\Models\Output\ObjectDetectionOutput;

class YolosForObjectDetection extends YolosPretrainedModel
{
public function __invoke(array $modelInputs): ObjectDetectionOutput
{
return ObjectDetectionOutput::fromOutput(parent::__invoke($modelInputs));
}
}
11 changes: 11 additions & 0 deletions src/Models/Pretrained/YolosModel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

class YolosModel extends YolosPretrainedModel
{

}
11 changes: 11 additions & 0 deletions src/Models/Pretrained/YolosPretrainedModel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

declare(strict_types=1);


namespace Codewithkyrian\Transformers\Models\Pretrained;

class YolosPretrainedModel extends PretrainedModel
{

}
Loading

0 comments on commit eb27c1e

Please sign in to comment.