Skip to content

Commit 74f80be

Browse files
feat: implement enforce size divisibility for image feature extractor
1 parent 901a049 commit 74f80be

File tree

1 file changed

+65
-5
lines changed

1 file changed

+65
-5
lines changed

src/FeatureExtractors/ImageFeatureExtractor.php

+65-5
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ class ImageFeatureExtractor extends FeatureExtractor
1313
{
1414
/**
1515
* The mean values for image normalization.
16+
*
1617
* @var int|int[]
1718
*/
1819
protected int|array|null $imageMean;
1920

2021
/**
2122
* The standard deviation values for image normalization.
23+
*
2224
* @var int|int[]
2325
*/
2426
protected int|array|null $imageStd;
@@ -30,24 +32,28 @@ class ImageFeatureExtractor extends FeatureExtractor
3032

3133
/**
3234
* Whether to rescale the image pixel values to the [0,1] range.
35+
*
3336
* @var bool
3437
*/
3538
protected bool $doRescale;
3639

3740
/**
3841
* The factor to use for rescaling the image pixel values.
42+
*
3943
* @var float
4044
*/
4145
protected float $rescaleFactor;
4246

4347
/**
4448
* Whether to normalize the image pixel values.
49+
*
4550
* @var ?bool
4651
*/
4752
protected ?bool $doNormalize;
4853

4954
/**
5055
* Whether to resize the image.
56+
*
5157
* @var ?bool
5258
*/
5359
protected ?bool $doResize;
@@ -56,6 +62,7 @@ class ImageFeatureExtractor extends FeatureExtractor
5662

5763
/**
5864
* The size to resize the image to.
65+
*
5966
* @var ?array
6067
*/
6168
protected ?array $size;
@@ -100,7 +107,9 @@ public function __construct(public array $config)
100107

101108
/**
102109
* Crops the margin of the image. Gray pixels are considered margin (i.e., pixels with a value below the threshold).
110+
*
103111
* @param int $grayThreshold Value below which pixels are considered to be gray.
112+
*
104113
* @return static The cropped image.
105114
*/
106115
public function cropMargin(Image $image, int $grayThreshold = 200): static
@@ -152,18 +161,20 @@ public function cropMargin(Image $image, int $grayThreshold = 200): static
152161

153162
/**
154163
* Pad the image by a certain amount.
164+
*
155165
* @param Tensor $imageTensor The pixel data to pad.
156166
* @param int[]|int $padSize The dimensions of the padded image.
157167
* @param string $mode The type of padding to add.
158168
* @param bool $center Whether to center the image.
159169
* @param int $constantValues The constant value to use for padding.
170+
*
160171
* @return Tensor The padded pixel data and image dimensions.
161172
* @throws \Exception
162173
*/
163174
public function padImage(
164175
Tensor $imageTensor,
165176
int|array $padSize,
166-
string $tensorFormat = 'CHW', // 'HWC' or 'CHW
177+
string $tensorFormat = 'CHW', // 'HWC' or 'CHW
167178
string $mode = 'constant',
168179
bool $center = false,
169180
int $constantValues = 0
@@ -260,8 +271,10 @@ private function calculateReflectOffset(int $val, int $max): int
260271
/**
261272
* Find the target (width, height) dimension of the output image after
262273
* resizing given the input image and the desired size.
274+
*
263275
* @param Image $image The image to be resized.
264276
* @param int|array|null $size The size to use for resizing the image.
277+
*
265278
* @return array The target (width, height) dimension of the output image after resizing.
266279
*/
267280
public function getResizeOutputImageSize(Image $image, int|array|null $size): array
@@ -336,7 +349,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
336349
} elseif ($this->sizeDivisibility != null) {
337350
return $this->enforceSizeDivisibility([$srcWidth, $srcHeight], $this->sizeDivisibility);
338351
} else {
339-
throw new \Exception("Could not resize image due to unsupported 'size' parameter passed: " . json_encode($size));
352+
throw new \Exception("Could not resize image due to unsupported 'size' parameter passed: ".json_encode($size));
340353
}
341354
}
342355

@@ -349,6 +362,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
349362
* @param ?bool $doPad
350363
* @param ?bool $doConvertRGB
351364
* @param ?bool $doConvertGrayscale
365+
*
352366
* @return array The preprocessed image.
353367
* @throws \Exception
354368
*/
@@ -412,7 +426,7 @@ public function preprocess(
412426
if ($doNormalize ?? $this->doNormalize) {
413427
if (is_array($this->imageMean)) {
414428
// Negate the mean values to add instead of subtract
415-
$negatedMean = array_map(fn($mean) => -$mean, $this->imageMean);
429+
$negatedMean = array_map(fn ($mean) => -$mean, $this->imageMean);
416430
$imageMean = Tensor::repeat($negatedMean, $image->height() * $image->width(), 1);
417431
} else {
418432
$imageMean = Tensor::fill([$image->channels * $image->height() * $image->width()], -$this->imageMean);
@@ -421,7 +435,7 @@ public function preprocess(
421435

422436
if (is_array($this->imageStd)) {
423437
// Inverse the standard deviation values to multiple instead of divide
424-
$inversedStd = array_map(fn($std) => 1 / $std, $this->imageStd);
438+
$inversedStd = array_map(fn ($std) => 1 / $std, $this->imageStd);
425439
$imageStd = Tensor::repeat($inversedStd, $image->height() * $image->width(), 1);
426440
} else {
427441
$imageStd = Tensor::fill([$image->channels * $image->height() * $image->width()], 1 / $this->imageStd);
@@ -433,7 +447,7 @@ public function preprocess(
433447
$imageStd = $imageStd->reshape($imageTensor->shape());
434448

435449
if (count($imageMean) !== $image->channels || count($imageStd) !== $image->channels) {
436-
throw new \Exception("When set to arrays, the length of `imageMean` (" . count($imageMean) . ") and `imageStd` (" . count($imageStd) . ") must match the number of channels in the image ({$image->channels}).");
450+
throw new \Exception("When set to arrays, the length of `imageMean` (".count($imageMean).") and `imageStd` (".count($imageStd).") must match the number of channels in the image ({$image->channels}).");
437451
}
438452

439453
// Normalize pixel data
@@ -461,8 +475,10 @@ public function preprocess(
461475
* Calls the feature extraction process on an array of images,
462476
* preprocesses each image, and concatenates the resulting
463477
* features into a single Tensor.
478+
*
464479
* @param Image|Image[] $images The image(s) to extract features from.
465480
* @param mixed ...$args Additional arguments.
481+
*
466482
* @return array An object containing the concatenated pixel values (and other metadata) of the preprocessed images.
467483
*/
468484
public function __invoke(Image|array $images, ...$args): array
@@ -491,4 +507,48 @@ public function __invoke(Image|array $images, ...$args): array
491507
];
492508
}
493509

510+
/**
511+
* Rounds the height and width down to the closest multiple of size_divisibility
512+
*
513+
* @param array{int, int} $size The size of the image
514+
* @param int $divisor The divisor to use.
515+
*
516+
* @return array{int, int} The rounded size.
517+
*/
518+
private function enforceSizeDivisibility(array $size, int $divisor): array
519+
{
520+
[$width, $height] = $size;
521+
522+
$newWidth = max(intdiv($width, $divisor), 1) * $divisor;
523+
$newHeight = max(intdiv($height, $divisor), 1) * $divisor;
524+
525+
return [$newWidth, $newHeight];
526+
}
527+
528+
/**
529+
* Constrain a value to be a multiple of a number.
530+
*
531+
* @param int $val The value to constrain.
532+
* @param int $multiple The number to constrain to.
533+
* @param int $minVal The minimum value to constrain to.
534+
* @param int|null $maxVal The maximum value to constrain to.
535+
*
536+
* @return int
537+
*/
538+
private function constraintToMultipleOf(int $val, int $multiple, int $minVal = 0, ?int $maxVal = null): int
539+
{
540+
$a = $val / $multiple;
541+
542+
$x = round($a, 0, PHP_ROUND_HALF_EVEN) * $multiple;
543+
544+
if ($maxVal !== null && $x > $maxVal) {
545+
$x = floor($a) * $multiple;
546+
}
547+
548+
if ($x < $minVal) {
549+
$x = ceil($a) * $multiple;
550+
}
551+
552+
return $x;
553+
}
494554
}

0 commit comments

Comments
 (0)