Skip to content

Commit

Permalink
Merge pull request #16 from Koldim2001/new_visualize_and_converter
Browse files Browse the repository at this point in the history
New visualize and converter
  • Loading branch information
Koldim2001 authored Jul 25, 2024
2 parents 04267c4 + 8fe0a85 commit bc66236
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 15 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ Possible arguments of the ```visualize_results``` function:
| random_object_colors | bool | False | If true, colors for each object are selected randomly. |
| show_confidences | bool | False | If true and show_class=True, confidences near class are visualized. |
| axis_off | bool | True | If true, axis is turned off in the final visualization. |
| show_classes_list | list | [] | If empty, visualize all classes. Otherwise, visualize only classes in the list. |
| show_classes_list | list | [] | If empty, visualize all classes. Otherwise, visualize only classes in the list. |
| list_of_class_colors | list | None | A list of tuples representing the colors for each class in BGR format. If provided, these colors will be used for displaying the classes instead of random colors. |
| return_image_array | bool | False | If True, the function returns the image (BGR np.array) instead of displaying it. |


Expand Down
7 changes: 7 additions & 0 deletions patched_yolo_infer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ The library also provides a sleek customization of the visualization of the infe
**Model Support**: The library offers support for multiple ultralytics deep learning [models](https://docs.ultralytics.com/models/), such as YOLOv8, YOLOv8-seg, YOLOv9, YOLOv9-seg, YOLOv10, FastSAM, and RTDETR. Users can select from pre-trained options or utilize custom-trained models to best meet their task requirements.


__Explanation of how Patch-Based-Inference works:__

<p align="center">
<img width="600" alt="patched_inf_explanation" src="https://github.com/Koldim2001/YOLO-Patch-Based-Inference/blob/main/readme_content/patched_inf_explanation.gif?raw=true">
</p>

## Installation
You can install the library via pip:

Expand Down Expand Up @@ -141,6 +147,7 @@ Visualizes custom results of object detection or segmentation on an image.
- **show_confidences** (*bool*): If true and show_class=True, confidences near class are visualized. Default is False.
- **axis_off** (*bool*): If true, axis is turned off in the final visualization. Default is True.
- **show_classes_list** (*list*): If empty, visualize all classes. Otherwise, visualize only classes in the list.
- **list_of_class_colors** (*list*) A list of tuples representing the colors for each class in BGR format. If provided, these colors will be used for displaying the classes instead of random colors.
- **return_image_array** (*bool*): If True, the function returns the image (BGR np.array) instead of displaying it. Default is False.


Expand Down
9 changes: 7 additions & 2 deletions patched_yolo_infer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from .functions_extra import visualize_results_usual_yolo_inference, get_crops, visualize_results
from .functions_extra import (
visualize_results_usual_yolo_inference,
get_crops,
visualize_results,
create_masks_from_polygons,
)

from .nodes.MakeCropsDetectThem import MakeCropsDetectThem
from .nodes.CombineDetections import CombineDetections
from .elements.CropElement import CropElement
from .elements.CropElement import CropElement
6 changes: 3 additions & 3 deletions patched_yolo_infer/elements/CropElement.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def calculate_inference(self, model, imgsz=640, conf=0.35, iou=0.7, segment=Fals
self.polygons = [mask.astype(np.uint16) for mask in pred.masks.xy]
else:
# Get the masks
self.detected_masks = pred.masks.data.cpu().numpy()
self.detected_masks = pred.masks.data.cpu().numpy().astype(np.uint8)


def calculate_real_values(self):
Expand Down Expand Up @@ -88,7 +88,7 @@ def calculate_real_values(self):
x_start_global:x_start_global+self.crop.shape[1]] = mask_resized

# Append the masked image to the list of detected_masks_real
self.detected_masks_real.append(black_image)
self.detected_masks_real.append(black_image.astype(np.uint8))

if self.polygons is not None:
# Adjust the mask coordinates
Expand Down Expand Up @@ -116,7 +116,7 @@ def resize_results(self):
# Resize mask
mask_resized = cv2.resize(mask, (self.source_image.shape[1], self.source_image.shape[0]),
interpolation=cv2.INTER_NEAREST)
resized_masks.append(mask_resized)
resized_masks.append(mask_resized.astype(np.uint8))


for polygon in self.detected_polygons_real:
Expand Down
67 changes: 59 additions & 8 deletions patched_yolo_infer/functions_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def visualize_results_usual_yolo_inference(
show_confidences=False,
axis_off=True,
show_classes_list=[],
list_of_class_colors=None,
return_image_array=False,
inference_extra_args=None,
):
Expand All @@ -44,8 +45,8 @@ def visualize_results_usual_yolo_inference(
show_class (bool): Whether to show class labels. Default is True.
fill_mask (bool): Whether to fill the segmented regions with color. Default is False.
alpha (float): The transparency of filled masks. Default is 0.3.
color_class_background (tuple): The background bgr color for class labels. Default is (0, 0, 255) (red).
color_class_text (tuple): The text color for class labels. Default is (255, 255, 255) (white).
color_class_background (tuple): The background BGR color for class labels. Default is (0, 0, 255) (red).
color_class_text (tuple): The text BGR color for class labels. Default is (255, 255, 255) (white).
thickness (int): The thickness of bounding box and text. Default is 4.
font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
font_scale (float): The scale factor for font size. Default is 1.5.
Expand All @@ -56,6 +57,9 @@ def visualize_results_usual_yolo_inference(
axis_off (bool): If True, axis is turned off in the final visualization.
show_classes_list (list): If empty, visualize all classes. Otherwise, visualize only classes in the list.
inference_extra_args (dict/None): Dictionary with extra ultralytics inference parameters.
list_of_class_colors (list/None): A list of tuples representing the colors for each class in BGR format. If provided,
these colors will be used for displaying the classes instead of random colors. The number of tuples
in the list must match the number of possible classes in the network.
return_image_array (bool): If True, the function returns the image bgr array instead of displaying it.
Default is False.
Expand Down Expand Up @@ -106,10 +110,12 @@ def visualize_results_usual_yolo_inference(

if random_object_colors:
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
else:
elif list_of_class_colors is None:
# Assign color according to class
random.seed(int(classes[i] + delta_colors))
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
else:
color = list_of_class_colors[classes[i]]

box = boxes[i]
x_min, y_min, x_max, y_max = box
Expand Down Expand Up @@ -278,6 +284,7 @@ def visualize_results(
show_confidences=False,
axis_off=True,
show_classes_list=[],
list_of_class_colors=None,
return_image_array=False
):
"""
Expand All @@ -295,8 +302,8 @@ def visualize_results(
show_class (bool): Whether to show class labels. Default is True.
fill_mask (bool): Whether to fill the segmented regions with color. Default is False.
alpha (float): The transparency of filled masks. Default is 0.3.
color_class_background (tuple): The background bgr color for class labels. Default is (0, 0, 255) (red).
color_class_text (tuple): The text color for class labels. Default is (255, 255, 255) (white).
color_class_background (tuple): The background BGR color for class labels. Default is (0, 0, 255) (red).
color_class_text (tuple): The text BGR color for class labels. Default is (255, 255, 255) (white).
thickness (int): The thickness of bounding box and text. Default is 4.
font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
font_scale (float): The scale factor for font size. Default is 1.5.
Expand All @@ -306,8 +313,10 @@ def visualize_results(
show_confidences (bool): If true and show_class=True, confidences near class are visualized. Default is False.
axis_off (bool): If true, axis is turned off in the final visualization. Default is True.
show_classes_list (list): If empty, visualize all classes. Otherwise, visualize only classes in the list.
return_image_array (bool): If True, the function returns the image bgr array instead of displaying it.
Default is False.
list_of_class_colors (list/None): A list of tuples representing the colors for each class in BGR format. If provided,
these colors will be used for displaying the classes instead of random colors. The number of tuples
in the list must match the number of possible classes in the network.
return_image_array (bool): If True, the function returns the image bgr array instead of displaying it. Default is False.
Returns:
None/np.array
Expand All @@ -332,10 +341,12 @@ def visualize_results(

if random_object_colors:
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
else:
elif list_of_class_colors is None:
# Assign color according to class
random.seed(int(classes_ids[i] + delta_colors))
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
else:
color = list_of_class_colors[classes_ids[i]]

box = boxes[i]
x_min, y_min, x_max, y_max = box
Expand Down Expand Up @@ -409,3 +420,43 @@ def visualize_results(
if axis_off:
plt.axis('off')
plt.show()


def create_masks_from_polygons(polygons, image):
"""
Create binary masks from a list of polygons.
This function takes a list of polygons and an image, and generates binary masks
where each mask corresponds to one polygon. The masks are boolean arrays with
the same dimensions as the input image, where the regions covered by the polygons
are marked as True.
Parameters:
polygons (list of numpy.ndarray): A list of polygons, where each polygon is
represented as a numpy array of shape (N, 2) containing N (x, y) coordinates.
image (numpy.ndarray): The input image, used to determine the dimensions of the masks.
Returns:
list of numpy.ndarray: A list of binary masks, where each mask is a boolean
numpy array of the same dimensions as the input image.
"""
# Get the dimensions of the image
height, width = image.shape[:2]

# Create empty masks
masks = []

for polygon in polygons:
if len(polygon) > 0:
points = np.array(polygon.reshape((-1, 1, 2)), dtype=np.int32)

# Create an empty mask with the same size as the image
mask = np.zeros((height, width), dtype=np.uint8)

# Draw the polygon on the mask
cv2.fillPoly(mask, [points], 1)

# Add the mask to the list
masks.append(mask)

return masks
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
long_description = "\n" + fh.read()


VERSION = '1.2.8'
VERSION = '1.2.9'
DESCRIPTION = '''Patch-Based-Inference for detection/segmentation of small objects in images.'''

setup(
Expand Down

0 comments on commit bc66236

Please sign in to comment.