Merge pull request #65 from tomas-gajarsky/feat/tensor-input

Feat/tensor input
tomas-gajarsky · Jan 30, 2024 · b1a77cc · b1a77cc
2 parents c5714fa + 21b7e48
commit b1a77cc
Show file tree

Hide file tree

Showing 24 changed files with 829 additions and 56 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,15 @@
 # Change Log
 
 
+## 0.4.2
+
+Released on January 30, 2024.
+
+### Added
+* Tensor input support for FaceAnalyzer run method
+* TensorReader for transforming incoming torch tensors
+
+
 ## 0.4.1
 
 Released on December 14, 2023.

diff --git a/conf/analyzer/reader/file.yaml b/conf/analyzer/reader/file.yaml
@@ -0,0 +1,12 @@
+_target_: facetorch.analyzer.reader.ImageReader
+device:
+ _target_: torch.device
+ type: ${analyzer.device} # str
+optimize_transform: ${analyzer.optimize_transforms} # bool
+transform:
+ _target_: torchvision.transforms.Compose
+ transforms:
+ - _target_: facetorch.transforms.SquarePad
+ - _target_: torchvision.transforms.Resize
+ size: [1080] # List[int]
+ antialias: True # bool
diff --git a/conf/analyzer/reader/tensor.yaml b/conf/analyzer/reader/tensor.yaml
@@ -0,0 +1,12 @@
+_target_: facetorch.analyzer.reader.TensorReader
+device:
+ _target_: torch.device
+ type: ${analyzer.device} # str
+optimize_transform: ${analyzer.optimize_transforms} # bool
+transform:
+ _target_: torchvision.transforms.Compose
+ transforms:
+ - _target_: facetorch.transforms.SquarePad
+ - _target_: torchvision.transforms.Resize
+ size: [1080] # List[int]
+ antialias: True # bool
diff --git a/conf/tensor.config.yaml b/conf/tensor.config.yaml
@@ -0,0 +1,31 @@
+defaults:
+ - analyzer: default.yaml
+ - analyzer/reader: tensor.yaml
+ - analyzer/detector: retinaface.yaml
+ - analyzer/unifier: img_380.yaml
+ - analyzer/predictor/embed: r50_vggface_1m.yaml
+ - analyzer/predictor/verify: adaface_ir101_webface12m.yaml
+ - analyzer/predictor/fer: efficientnet_b2_8.yaml
+ - analyzer/predictor/au: open_graph_swin_base.yaml
+ - analyzer/predictor/va: elim_al_alexnet.yaml
+ - analyzer/predictor/deepfake: efficientnet_b7.yaml
+ - analyzer/predictor/align: synergy_mobilenet_v2.yaml
+ - analyzer/utilizer/align: lmk3d_mesh_pose.yaml
+ - analyzer/utilizer/draw_boxes: torchvision_boxes.yaml
+ - analyzer/utilizer/draw_landmarks: torchvision_keypoints.yaml
+ - analyzer/utilizer/save: image_saver.yaml
+ - analyzer/logger: json_format.yaml
+ - _self_
+
+main:
+ sleep: 3
+
+debug: true
+
+path_tensor: /opt/facetorch/data/input/tensor.pt # str
+batch_size: 8 # int
+fix_img_size: True # bool
+return_img_data: False # bool
+include_tensors: True # bool
+path_output: /opt/facetorch/data/output/test_tensor.png # Optional[str]
+
diff --git a/conf/tests.config.5.yaml b/conf/tests.config.5.yaml
@@ -0,0 +1,31 @@
+defaults:
+ - analyzer: default.yaml
+ - analyzer/reader: tensor.yaml
+ - analyzer/detector: retinaface.yaml
+ - analyzer/unifier: img_380.yaml
+ - analyzer/predictor/embed: r50_vggface_1m.yaml
+ - analyzer/predictor/verify: adaface_ir101_webface12m.yaml
+ - analyzer/predictor/fer: efficientnet_b2_8.yaml
+ - analyzer/predictor/au: open_graph_swin_base.yaml
+ - analyzer/predictor/va: elim_al_alexnet.yaml
+ - analyzer/predictor/deepfake: efficientnet_b7.yaml
+ - analyzer/predictor/align: synergy_mobilenet_v2.yaml
+ - analyzer/utilizer/align: lmk3d_mesh_pose.yaml
+ - analyzer/utilizer/draw_boxes: torchvision_boxes.yaml
+ - analyzer/utilizer/draw_landmarks: torchvision_keypoints.yaml
+ - analyzer/utilizer/save: image_saver.yaml
+ - analyzer/logger: json_format.yaml
+ - _self_
+
+main:
+ sleep: 3
+
+debug: true
+
+path_image: /opt/facetorch/data/input/test.jpg # str
+batch_size: 8 # int
+fix_img_size: True # bool
+return_img_data: True # bool
+include_tensors: True # bool
+path_output: /opt/facetorch/tests/data/output/test_tensor.png # str
+path_tensor: /opt/facetorch/data/input/tensor.pt # str
diff --git a/data/input/tensor.pt b/data/input/tensor.pt
diff --git a/data/output/test_tensor.png b/data/output/test_tensor.png
diff --git a/docs/facetorch/analyzer/core.html b/docs/facetorch/analyzer/core.html
@@ -108,23 +108,25 @@ <h1 class="title">Module <code>facetorch.analyzer.core</code></h1>
  @Timer(&#34;FaceAnalyzer.run&#34;, &#34;{name}: {milliseconds:.2f} ms&#34;, logger=logger.debug)
  def run(
  self,
- path_image: str,
+ path_image: Optional[str] = None,
  batch_size: int = 8,
  fix_img_size: bool = False,
  return_img_data: bool = False,
  include_tensors: bool = False,
  path_output: Optional[str] = None,
+ tensor: Optional[torch.Tensor] = None,
  ) -&gt; Union[Response, ImageData]:
  &#34;&#34;&#34;Reads image, detects faces, unifies the detected faces, predicts facial features
  and returns analyzed data.
 
  Args:
- path_image (str): Path to the input image.
+ path_image (Optional[str]): Path to the image to be analyzed. If None, tensor must be provided. Default: None.
  batch_size (int): Batch size for making predictions on the faces. Default is 8.
  fix_img_size (bool): If True, resizes the image to the size specified in reader. Default is False.
  return_img_data (bool): If True, returns all image data including tensors, otherwise only returns the faces. Default is False.
  include_tensors (bool): If True, removes tensors from the returned data object. Default is False.
  path_output (Optional[str]): Path where to save the image with detected faces. If None, the image is not saved. Default: None.
+ tensor (Optional[torch.Tensor]): Image tensor to be analyzed. If None, path_image must be provided. Default: None.
 
  Returns:
  Union[Response, ImageData]: If return_img_data is False, returns a Response object containing the faces and their facial features. If return_img_data is True, returns the entire ImageData object.
@@ -148,8 +150,23 @@ <h1 class="title">Module <code>facetorch.analyzer.core</code></h1>
  return data
 
  self.logger.info(&#34;Running FaceAnalyzer&#34;)
- self.logger.info(&#34;Reading image&#34;, extra={&#34;path_image&#34;: path_image})
- data = self.reader.run(path_image, fix_img_size=fix_img_size)
+
+ if path_image is None and tensor is None:
+ raise ValueError(&#34;Either path_image or tensor must be provided.&#34;)
+
+ if path_image is not None and tensor is not None:
+ raise ValueError(&#34;Only one of path_image or tensor must be provided.&#34;)
+
+ if path_image is not None:
+ reader_input = path_image
+ else:
+ reader_input = tensor
+
+ self.logger.info(
+ &#34;Reading image&#34;, extra={&#34;path_image&#34;: path_image, &#34;tensor&#34;: tensor}
+ )
+ data = self.reader.run(reader_input, fix_img_size=fix_img_size)
+
  path_output = None if path_output == &#34;None&#34; else path_output
  data.path_output = path_output
 
@@ -314,23 +331,25 @@ <h2 id="attributes">Attributes</h2>
  @Timer(&#34;FaceAnalyzer.run&#34;, &#34;{name}: {milliseconds:.2f} ms&#34;, logger=logger.debug)
  def run(
  self,
- path_image: str,
+ path_image: Optional[str] = None,
  batch_size: int = 8,
  fix_img_size: bool = False,
  return_img_data: bool = False,
  include_tensors: bool = False,
  path_output: Optional[str] = None,
+ tensor: Optional[torch.Tensor] = None,
  ) -&gt; Union[Response, ImageData]:
  &#34;&#34;&#34;Reads image, detects faces, unifies the detected faces, predicts facial features
  and returns analyzed data.
 
  Args:
- path_image (str): Path to the input image.
+ path_image (Optional[str]): Path to the image to be analyzed. If None, tensor must be provided. Default: None.
  batch_size (int): Batch size for making predictions on the faces. Default is 8.
  fix_img_size (bool): If True, resizes the image to the size specified in reader. Default is False.
  return_img_data (bool): If True, returns all image data including tensors, otherwise only returns the faces. Default is False.
  include_tensors (bool): If True, removes tensors from the returned data object. Default is False.
  path_output (Optional[str]): Path where to save the image with detected faces. If None, the image is not saved. Default: None.
+ tensor (Optional[torch.Tensor]): Image tensor to be analyzed. If None, path_image must be provided. Default: None.
 
  Returns:
  Union[Response, ImageData]: If return_img_data is False, returns a Response object containing the faces and their facial features. If return_img_data is True, returns the entire ImageData object.
@@ -354,8 +373,23 @@ <h2 id="attributes">Attributes</h2>
  return data
 
  self.logger.info(&#34;Running FaceAnalyzer&#34;)
- self.logger.info(&#34;Reading image&#34;, extra={&#34;path_image&#34;: path_image})
- data = self.reader.run(path_image, fix_img_size=fix_img_size)
+
+ if path_image is None and tensor is None:
+ raise ValueError(&#34;Either path_image or tensor must be provided.&#34;)
+
+ if path_image is not None and tensor is not None:
+ raise ValueError(&#34;Only one of path_image or tensor must be provided.&#34;)
+
+ if path_image is not None:
+ reader_input = path_image
+ else:
+ reader_input = tensor
+
+ self.logger.info(
+ &#34;Reading image&#34;, extra={&#34;path_image&#34;: path_image, &#34;tensor&#34;: tensor}
+ )
+ data = self.reader.run(reader_input, fix_img_size=fix_img_size)
+
  path_output = None if path_output == &#34;None&#34; else path_output
  data.path_output = path_output
 
@@ -404,15 +438,15 @@ <h2 id="attributes">Attributes</h2>
 <h3>Methods</h3>
 <dl>
 <dt id="facetorch.analyzer.core.FaceAnalyzer.run"><code class="name flex">
-<span>def <span class="ident">run</span></span>(<span>self, path_image: str, batch_size: int = 8, fix_img_size: bool = False, return_img_data: bool = False, include_tensors: bool = False, path_output: Optional[str] = None) ‑> Union[<a title="facetorch.datastruct.Response" href="../datastruct.html#facetorch.datastruct.Response">Response</a>, <a title="facetorch.datastruct.ImageData" href="../datastruct.html#facetorch.datastruct.ImageData">ImageData</a>]</span>
+<span>def <span class="ident">run</span></span>(<span>self, path_image: Optional[str] = None, batch_size: int = 8, fix_img_size: bool = False, return_img_data: bool = False, include_tensors: bool = False, path_output: Optional[str] = None, tensor: Optional[torch.Tensor] = None) ‑> Union[<a title="facetorch.datastruct.Response" href="../datastruct.html#facetorch.datastruct.Response">Response</a>, <a title="facetorch.datastruct.ImageData" href="../datastruct.html#facetorch.datastruct.ImageData">ImageData</a>]</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Reads image, detects faces, unifies the detected faces, predicts facial features
 and returns analyzed data.</p>
 <h2 id="args">Args</h2>
 <dl>
-<dt><strong><code>path_image</code></strong> :&ensp;<code>str</code></dt>
-<dd>Path to the input image.</dd>
+<dt><strong><code>path_image</code></strong> :&ensp;<code>Optional[str]</code></dt>
+<dd>Path to the image to be analyzed. If None, tensor must be provided. Default: None.</dd>
 <dt><strong><code>batch_size</code></strong> :&ensp;<code>int</code></dt>
 <dd>Batch size for making predictions on the faces. Default is 8.</dd>
 <dt><strong><code>fix_img_size</code></strong> :&ensp;<code>bool</code></dt>
@@ -423,6 +457,8 @@ <h2 id="args">Args</h2>
 <dd>If True, removes tensors from the returned data object. Default is False.</dd>
 <dt><strong><code>path_output</code></strong> :&ensp;<code>Optional[str]</code></dt>
 <dd>Path where to save the image with detected faces. If None, the image is not saved. Default: None.</dd>
+<dt><strong><code>tensor</code></strong> :&ensp;<code>Optional[torch.Tensor]</code></dt>
+<dd>Image tensor to be analyzed. If None, path_image must be provided. Default: None.</dd>
 </dl>
 <h2 id="returns">Returns</h2>
 <dl>
@@ -436,23 +472,25 @@ <h2 id="returns">Returns</h2>
 <pre><code class="python">@Timer(&#34;FaceAnalyzer.run&#34;, &#34;{name}: {milliseconds:.2f} ms&#34;, logger=logger.debug)
 def run(
  self,
- path_image: str,
+ path_image: Optional[str] = None,
  batch_size: int = 8,
  fix_img_size: bool = False,
  return_img_data: bool = False,
  include_tensors: bool = False,
  path_output: Optional[str] = None,
+ tensor: Optional[torch.Tensor] = None,
 ) -&gt; Union[Response, ImageData]:
  &#34;&#34;&#34;Reads image, detects faces, unifies the detected faces, predicts facial features
  and returns analyzed data.
 
  Args:
- path_image (str): Path to the input image.
+ path_image (Optional[str]): Path to the image to be analyzed. If None, tensor must be provided. Default: None.
  batch_size (int): Batch size for making predictions on the faces. Default is 8.
  fix_img_size (bool): If True, resizes the image to the size specified in reader. Default is False.
  return_img_data (bool): If True, returns all image data including tensors, otherwise only returns the faces. Default is False.
  include_tensors (bool): If True, removes tensors from the returned data object. Default is False.
  path_output (Optional[str]): Path where to save the image with detected faces. If None, the image is not saved. Default: None.
+ tensor (Optional[torch.Tensor]): Image tensor to be analyzed. If None, path_image must be provided. Default: None.
 
  Returns:
  Union[Response, ImageData]: If return_img_data is False, returns a Response object containing the faces and their facial features. If return_img_data is True, returns the entire ImageData object.
@@ -476,8 +514,23 @@ <h2 id="returns">Returns</h2>
  return data
 
  self.logger.info(&#34;Running FaceAnalyzer&#34;)
- self.logger.info(&#34;Reading image&#34;, extra={&#34;path_image&#34;: path_image})
- data = self.reader.run(path_image, fix_img_size=fix_img_size)
+
+ if path_image is None and tensor is None:
+ raise ValueError(&#34;Either path_image or tensor must be provided.&#34;)
+
+ if path_image is not None and tensor is not None:
+ raise ValueError(&#34;Only one of path_image or tensor must be provided.&#34;)
+
+ if path_image is not None:
+ reader_input = path_image
+ else:
+ reader_input = tensor
+
+ self.logger.info(
+ &#34;Reading image&#34;, extra={&#34;path_image&#34;: path_image, &#34;tensor&#34;: tensor}
+ )
+ data = self.reader.run(reader_input, fix_img_size=fix_img_size)
+
  path_output = None if path_output == &#34;None&#34; else path_output
  data.path_output = path_output