@@ -13,12 +13,14 @@ class ImageFeatureExtractor extends FeatureExtractor
13
13
{
14
14
/**
15
15
* The mean values for image normalization.
16
+ *
16
17
* @var int|int[]
17
18
*/
18
19
protected int |array |null $ imageMean ;
19
20
20
21
/**
21
22
* The standard deviation values for image normalization.
23
+ *
22
24
* @var int|int[]
23
25
*/
24
26
protected int |array |null $ imageStd ;
@@ -30,24 +32,28 @@ class ImageFeatureExtractor extends FeatureExtractor
30
32
31
33
/**
32
34
* Whether to rescale the image pixel values to the [0,1] range.
35
+ *
33
36
* @var bool
34
37
*/
35
38
protected bool $ doRescale ;
36
39
37
40
/**
38
41
* The factor to use for rescaling the image pixel values.
42
+ *
39
43
* @var float
40
44
*/
41
45
protected float $ rescaleFactor ;
42
46
43
47
/**
44
48
* Whether to normalize the image pixel values.
49
+ *
45
50
* @var ?bool
46
51
*/
47
52
protected ?bool $ doNormalize ;
48
53
49
54
/**
50
55
* Whether to resize the image.
56
+ *
51
57
* @var ?bool
52
58
*/
53
59
protected ?bool $ doResize ;
@@ -56,6 +62,7 @@ class ImageFeatureExtractor extends FeatureExtractor
56
62
57
63
/**
58
64
* The size to resize the image to.
65
+ *
59
66
* @var ?array
60
67
*/
61
68
protected ?array $ size ;
@@ -100,7 +107,9 @@ public function __construct(public array $config)
100
107
101
108
/**
102
109
* Crops the margin of the image. Gray pixels are considered margin (i.e., pixels with a value below the threshold).
110
+ *
103
111
* @param int $grayThreshold Value below which pixels are considered to be gray.
112
+ *
104
113
* @return static The cropped image.
105
114
*/
106
115
public function cropMargin (Image $ image , int $ grayThreshold = 200 ): static
@@ -152,18 +161,20 @@ public function cropMargin(Image $image, int $grayThreshold = 200): static
152
161
153
162
/**
154
163
* Pad the image by a certain amount.
164
+ *
155
165
* @param Tensor $imageTensor The pixel data to pad.
156
166
* @param int[]|int $padSize The dimensions of the padded image.
157
167
* @param string $mode The type of padding to add.
158
168
* @param bool $center Whether to center the image.
159
169
* @param int $constantValues The constant value to use for padding.
170
+ *
160
171
* @return Tensor The padded pixel data and image dimensions.
161
172
* @throws \Exception
162
173
*/
163
174
public function padImage (
164
175
Tensor $ imageTensor ,
165
176
int |array $ padSize ,
166
- string $ tensorFormat = 'CHW ' , // 'HWC' or 'CHW
177
+ string $ tensorFormat = 'CHW ' , // 'HWC' or 'CHW
167
178
string $ mode = 'constant ' ,
168
179
bool $ center = false ,
169
180
int $ constantValues = 0
@@ -260,8 +271,10 @@ private function calculateReflectOffset(int $val, int $max): int
260
271
/**
261
272
* Find the target (width, height) dimension of the output image after
262
273
* resizing given the input image and the desired size.
274
+ *
263
275
* @param Image $image The image to be resized.
264
276
* @param int|array|null $size The size to use for resizing the image.
277
+ *
265
278
* @return array The target (width, height) dimension of the output image after resizing.
266
279
*/
267
280
public function getResizeOutputImageSize (Image $ image , int |array |null $ size ): array
@@ -336,7 +349,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
336
349
} elseif ($ this ->sizeDivisibility != null ) {
337
350
return $ this ->enforceSizeDivisibility ([$ srcWidth , $ srcHeight ], $ this ->sizeDivisibility );
338
351
} else {
339
- throw new \Exception ("Could not resize image due to unsupported 'size' parameter passed: " . json_encode ($ size ));
352
+ throw new \Exception ("Could not resize image due to unsupported 'size' parameter passed: " . json_encode ($ size ));
340
353
}
341
354
}
342
355
@@ -349,6 +362,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
349
362
* @param ?bool $doPad
350
363
* @param ?bool $doConvertRGB
351
364
* @param ?bool $doConvertGrayscale
365
+ *
352
366
* @return array The preprocessed image.
353
367
* @throws \Exception
354
368
*/
@@ -412,7 +426,7 @@ public function preprocess(
412
426
if ($ doNormalize ?? $ this ->doNormalize ) {
413
427
if (is_array ($ this ->imageMean )) {
414
428
// Negate the mean values to add instead of subtract
415
- $ negatedMean = array_map (fn ($ mean ) => -$ mean , $ this ->imageMean );
429
+ $ negatedMean = array_map (fn ($ mean ) => -$ mean , $ this ->imageMean );
416
430
$ imageMean = Tensor::repeat ($ negatedMean , $ image ->height () * $ image ->width (), 1 );
417
431
} else {
418
432
$ imageMean = Tensor::fill ([$ image ->channels * $ image ->height () * $ image ->width ()], -$ this ->imageMean );
@@ -421,7 +435,7 @@ public function preprocess(
421
435
422
436
if (is_array ($ this ->imageStd )) {
423
437
// Inverse the standard deviation values to multiple instead of divide
424
- $ inversedStd = array_map (fn ($ std ) => 1 / $ std , $ this ->imageStd );
438
+ $ inversedStd = array_map (fn ($ std ) => 1 / $ std , $ this ->imageStd );
425
439
$ imageStd = Tensor::repeat ($ inversedStd , $ image ->height () * $ image ->width (), 1 );
426
440
} else {
427
441
$ imageStd = Tensor::fill ([$ image ->channels * $ image ->height () * $ image ->width ()], 1 / $ this ->imageStd );
@@ -433,7 +447,7 @@ public function preprocess(
433
447
$ imageStd = $ imageStd ->reshape ($ imageTensor ->shape ());
434
448
435
449
if (count ($ imageMean ) !== $ image ->channels || count ($ imageStd ) !== $ image ->channels ) {
436
- throw new \Exception ("When set to arrays, the length of `imageMean` ( " . count ($ imageMean ) . ") and `imageStd` ( " . count ($ imageStd ) . ") must match the number of channels in the image ( {$ image ->channels }). " );
450
+ throw new \Exception ("When set to arrays, the length of `imageMean` ( " . count ($ imageMean ). ") and `imageStd` ( " . count ($ imageStd ). ") must match the number of channels in the image ( {$ image ->channels }). " );
437
451
}
438
452
439
453
// Normalize pixel data
@@ -461,8 +475,10 @@ public function preprocess(
461
475
* Calls the feature extraction process on an array of images,
462
476
* preprocesses each image, and concatenates the resulting
463
477
* features into a single Tensor.
478
+ *
464
479
* @param Image|Image[] $images The image(s) to extract features from.
465
480
* @param mixed ...$args Additional arguments.
481
+ *
466
482
* @return array An object containing the concatenated pixel values (and other metadata) of the preprocessed images.
467
483
*/
468
484
public function __invoke (Image |array $ images , ...$ args ): array
@@ -491,4 +507,48 @@ public function __invoke(Image|array $images, ...$args): array
491
507
];
492
508
}
493
509
510
+ /**
511
+ * Rounds the height and width down to the closest multiple of size_divisibility
512
+ *
513
+ * @param array{int, int} $size The size of the image
514
+ * @param int $divisor The divisor to use.
515
+ *
516
+ * @return array{int, int} The rounded size.
517
+ */
518
+ private function enforceSizeDivisibility (array $ size , int $ divisor ): array
519
+ {
520
+ [$ width , $ height ] = $ size ;
521
+
522
+ $ newWidth = max (intdiv ($ width , $ divisor ), 1 ) * $ divisor ;
523
+ $ newHeight = max (intdiv ($ height , $ divisor ), 1 ) * $ divisor ;
524
+
525
+ return [$ newWidth , $ newHeight ];
526
+ }
527
+
528
+ /**
529
+ * Constrain a value to be a multiple of a number.
530
+ *
531
+ * @param int $val The value to constrain.
532
+ * @param int $multiple The number to constrain to.
533
+ * @param int $minVal The minimum value to constrain to.
534
+ * @param int|null $maxVal The maximum value to constrain to.
535
+ *
536
+ * @return int
537
+ */
538
+ private function constraintToMultipleOf (int $ val , int $ multiple , int $ minVal = 0 , ?int $ maxVal = null ): int
539
+ {
540
+ $ a = $ val / $ multiple ;
541
+
542
+ $ x = round ($ a , 0 , PHP_ROUND_HALF_EVEN ) * $ multiple ;
543
+
544
+ if ($ maxVal !== null && $ x > $ maxVal ) {
545
+ $ x = floor ($ a ) * $ multiple ;
546
+ }
547
+
548
+ if ($ x < $ minVal ) {
549
+ $ x = ceil ($ a ) * $ multiple ;
550
+ }
551
+
552
+ return $ x ;
553
+ }
494
554
}
0 commit comments