Skip to content

Commit

Permalink
Merge pull request #53 from CodeWithKyrian/52-is-size_divisibility-su…
Browse files Browse the repository at this point in the history
…pported

feat: implement enforce size divisibility for image feature extractor
  • Loading branch information
CodeWithKyrian authored Jul 31, 2024
2 parents 901a049 + 74f80be commit d812db1
Showing 1 changed file with 65 additions and 5 deletions.
70 changes: 65 additions & 5 deletions src/FeatureExtractors/ImageFeatureExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ class ImageFeatureExtractor extends FeatureExtractor
{
/**
* The mean values for image normalization.
*
* @var int|int[]
*/
protected int|array|null $imageMean;

/**
* The standard deviation values for image normalization.
*
* @var int|int[]
*/
protected int|array|null $imageStd;
Expand All @@ -30,24 +32,28 @@ class ImageFeatureExtractor extends FeatureExtractor

/**
* Whether to rescale the image pixel values to the [0,1] range.
*
* @var bool
*/
protected bool $doRescale;

/**
* The factor to use for rescaling the image pixel values.
*
* @var float
*/
protected float $rescaleFactor;

/**
* Whether to normalize the image pixel values.
*
* @var ?bool
*/
protected ?bool $doNormalize;

/**
* Whether to resize the image.
*
* @var ?bool
*/
protected ?bool $doResize;
Expand All @@ -56,6 +62,7 @@ class ImageFeatureExtractor extends FeatureExtractor

/**
* The size to resize the image to.
*
* @var ?array
*/
protected ?array $size;
Expand Down Expand Up @@ -100,7 +107,9 @@ public function __construct(public array $config)

/**
* Crops the margin of the image. Gray pixels are considered margin (i.e., pixels with a value below the threshold).
*
* @param int $grayThreshold Value below which pixels are considered to be gray.
*
* @return static The cropped image.
*/
public function cropMargin(Image $image, int $grayThreshold = 200): static
Expand Down Expand Up @@ -152,18 +161,20 @@ public function cropMargin(Image $image, int $grayThreshold = 200): static

/**
* Pad the image by a certain amount.
*
* @param Tensor $imageTensor The pixel data to pad.
* @param int[]|int $padSize The dimensions of the padded image.
* @param string $mode The type of padding to add.
* @param bool $center Whether to center the image.
* @param int $constantValues The constant value to use for padding.
*
* @return Tensor The padded pixel data and image dimensions.
* @throws \Exception
*/
public function padImage(
Tensor $imageTensor,
int|array $padSize,
string $tensorFormat = 'CHW', // 'HWC' or 'CHW
string $tensorFormat = 'CHW', // 'HWC' or 'CHW
string $mode = 'constant',
bool $center = false,
int $constantValues = 0
Expand Down Expand Up @@ -260,8 +271,10 @@ private function calculateReflectOffset(int $val, int $max): int
/**
* Find the target (width, height) dimension of the output image after
* resizing given the input image and the desired size.
*
* @param Image $image The image to be resized.
* @param int|array|null $size The size to use for resizing the image.
*
* @return array The target (width, height) dimension of the output image after resizing.
*/
public function getResizeOutputImageSize(Image $image, int|array|null $size): array
Expand Down Expand Up @@ -336,7 +349,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
} elseif ($this->sizeDivisibility != null) {
return $this->enforceSizeDivisibility([$srcWidth, $srcHeight], $this->sizeDivisibility);
} else {
throw new \Exception("Could not resize image due to unsupported 'size' parameter passed: " . json_encode($size));
throw new \Exception("Could not resize image due to unsupported 'size' parameter passed: ".json_encode($size));
}
}

Expand All @@ -349,6 +362,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
* @param ?bool $doPad
* @param ?bool $doConvertRGB
* @param ?bool $doConvertGrayscale
*
* @return array The preprocessed image.
* @throws \Exception
*/
Expand Down Expand Up @@ -412,7 +426,7 @@ public function preprocess(
if ($doNormalize ?? $this->doNormalize) {
if (is_array($this->imageMean)) {
// Negate the mean values to add instead of subtract
$negatedMean = array_map(fn($mean) => -$mean, $this->imageMean);
$negatedMean = array_map(fn ($mean) => -$mean, $this->imageMean);
$imageMean = Tensor::repeat($negatedMean, $image->height() * $image->width(), 1);
} else {
$imageMean = Tensor::fill([$image->channels * $image->height() * $image->width()], -$this->imageMean);
Expand All @@ -421,7 +435,7 @@ public function preprocess(

if (is_array($this->imageStd)) {
// Inverse the standard deviation values to multiple instead of divide
$inversedStd = array_map(fn($std) => 1 / $std, $this->imageStd);
$inversedStd = array_map(fn ($std) => 1 / $std, $this->imageStd);
$imageStd = Tensor::repeat($inversedStd, $image->height() * $image->width(), 1);
} else {
$imageStd = Tensor::fill([$image->channels * $image->height() * $image->width()], 1 / $this->imageStd);
Expand All @@ -433,7 +447,7 @@ public function preprocess(
$imageStd = $imageStd->reshape($imageTensor->shape());

if (count($imageMean) !== $image->channels || count($imageStd) !== $image->channels) {
throw new \Exception("When set to arrays, the length of `imageMean` (" . count($imageMean) . ") and `imageStd` (" . count($imageStd) . ") must match the number of channels in the image ({$image->channels}).");
throw new \Exception("When set to arrays, the length of `imageMean` (".count($imageMean).") and `imageStd` (".count($imageStd).") must match the number of channels in the image ({$image->channels}).");
}

// Normalize pixel data
Expand Down Expand Up @@ -461,8 +475,10 @@ public function preprocess(
* Calls the feature extraction process on an array of images,
* preprocesses each image, and concatenates the resulting
* features into a single Tensor.
*
* @param Image|Image[] $images The image(s) to extract features from.
* @param mixed ...$args Additional arguments.
*
* @return array An object containing the concatenated pixel values (and other metadata) of the preprocessed images.
*/
public function __invoke(Image|array $images, ...$args): array
Expand Down Expand Up @@ -491,4 +507,48 @@ public function __invoke(Image|array $images, ...$args): array
];
}

/**
* Rounds the height and width down to the closest multiple of size_divisibility
*
* @param array{int, int} $size The size of the image
* @param int $divisor The divisor to use.
*
* @return array{int, int} The rounded size.
*/
private function enforceSizeDivisibility(array $size, int $divisor): array
{
[$width, $height] = $size;

$newWidth = max(intdiv($width, $divisor), 1) * $divisor;
$newHeight = max(intdiv($height, $divisor), 1) * $divisor;

return [$newWidth, $newHeight];
}

/**
* Constrain a value to be a multiple of a number.
*
* @param int $val The value to constrain.
* @param int $multiple The number to constrain to.
* @param int $minVal The minimum value to constrain to.
* @param int|null $maxVal The maximum value to constrain to.
*
* @return int
*/
private function constraintToMultipleOf(int $val, int $multiple, int $minVal = 0, ?int $maxVal = null): int
{
$a = $val / $multiple;

$x = round($a, 0, PHP_ROUND_HALF_EVEN) * $multiple;

if ($maxVal !== null && $x > $maxVal) {
$x = floor($a) * $multiple;
}

if ($x < $minVal) {
$x = ceil($a) * $multiple;
}

return $x;
}
}

0 comments on commit d812db1

Please sign in to comment.