-
-
+
+
+
+
+
+
-
-
+
+
+
+
+
+
-
-
+
+
+
+
+
+
-
+
-
-
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
+
-
-
+
+
-
-
+
+
+
+
+
+
+
+
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
-
-
-
-
+
-
-
+
+
-
-
+
+
+
+
+
+
-
-
+
+
+
+
+
+
-
+
-
-
+
+
-
+
+
-
-
+
+
-
+
+
-
+
-
+
-
-
+
+
-
-
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
-
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
+
+
+
-
+
-
+
+
+
+
-
+
+
+
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
+
+
+
+
-
+
-
-
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
diff --git a/README.md b/README.md
index 36616f5a..0d0bfaa8 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ A python library built to empower developers to build applications and systems w
-A DeepQuest AI project https://deepquestai.com .
+An DeepQuest AI project https://deepquestai.com .
Developed and Maintained by [Moses Olafenwa](https://twitter.com/OlafenwaMoses) and [John Olafenwa](https://twitter.com/johnolafenwa), brothers, creators of [TorchFusion](https://github.com/johnolafenwa/TorchFusion), Authors of [Introduction to Deep Computer Vision](https://john.aicommons.science/deepvision) and creators of [DeepStack AI Server](https://deepstack.cc).
diff --git a/build/lib/imageai/Detection/__init__.py b/build/lib/imageai/Detection/__init__.py
index 92b19dbd..d5545de8 100644
--- a/build/lib/imageai/Detection/__init__.py
+++ b/build/lib/imageai/Detection/__init__.py
@@ -248,7 +248,7 @@ def detectObjectsFromImage(self, input_image="", output_image_path="", input_typ
display_percentage_probability=True, display_object_name=True):
"""
'detectObjectsFromImage()' function is used to detect objects observable in the given image path:
- * input_image , which can be file to path, image numpy array or image file stream
+ * input_image , which can be a filepath, image numpy array or image file stream
* output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
* input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream"
* output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
@@ -307,9 +307,9 @@ def detectObjectsFromImage(self, input_image="", output_image_path="", input_typ
:param minimum_percentage_probability:
:param display_percentage_probability:
:param display_object_name
+ :return image_frame:
:return output_objects_array:
- :return detected_copy:
- :return detected_detected_objects_image_array:
+ :return detected_objects_image_array:
"""
if (self.__modelLoaded == False):
@@ -1109,70 +1109,48 @@ def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_f
"""
'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
- * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
- * camera_input , allows you to parse in camera input for live video detections
- * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
- * frames_per_second , which is the number of frames to be used in the output video
- * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
- * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
- * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
- * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
- * display_object_name (optional), can be used to show or hide object names on the detected video frames
- * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
- * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after
- each frame of the video is detected. If this parameter is set to a function, after every video
- frame is detected, the function will be executed with the following values parsed into it:
- -- position number of the frame
- -- an array of dictinaries, with each dictinary corresponding to each object detected.
- Each dictionary contains 'name', 'percentage_probability' and 'box_points'
- -- a dictionary with with keys being the name of each unique objects and value
- are the number of instances of the object present
- -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
- as the fourth value into the function
-
- * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after
- each second of the video is detected. If this parameter is set to a function, after every second of a video
- is detected, the function will be executed with the following values parsed into it:
- -- position number of the second
- -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
-
- -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
-
- -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
-
- -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
- as the fifth value into the function
-
- * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after
- each minute of the video is detected. If this parameter is set to a function, after every minute of a video
- is detected, the function will be executed with the following values parsed into it:
- -- position number of the minute
- -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
-
- -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
-
- -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
-
- -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
+ * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
+ * camera_input , allows you to parse in camera input for live video detections
+ * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
+ * frames_per_second , which is the number of frames to be used in the output video
+ * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
+ * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
+ * display_object_name (optional), can be used to show or hide object names on the detected video frames
+ * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
+ * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video frame is detected, the function will be executed with the following values parsed into it:
+ -- position number of the frame
+ -- an array of dictinaries, with each dictinary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
+ -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
+
+ * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the second
+ -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
as the fifth value into the function
- * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after
- all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video
- is detected, the function will be executed with the following values parsed into it:
- -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
-
- -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
-
- -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+ * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the minute
+ -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
- * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function,
- per_per_second_function or per_per_minute_function
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
- * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
+ -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
+ * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
+ -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+ * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
+ * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
:param input_file_path:
diff --git a/build/lib/imageai/Prediction/Custom/__init__.py b/build/lib/imageai/Prediction/Custom/__init__.py
index ca9d55a5..43ef62bb 100644
--- a/build/lib/imageai/Prediction/Custom/__init__.py
+++ b/build/lib/imageai/Prediction/Custom/__init__.py
@@ -81,7 +81,7 @@ def setModelTypeAsInceptionV3(self):
def setDataDirectory(self, data_directory=""):
"""
- 'setDataDirectory()' is required to set the path to which the data/dataset to be used for
+ 'setDataDirectory()' is required to set the path to which the data/dataset to be used for
training is kept. The directory can have any name, but it must have 'train' and 'test'
sub-directory. In the 'train' and 'test' sub-directories, there must be sub-directories
with each having it's name corresponds to the name/label of the object whose images are
@@ -99,8 +99,9 @@ def setDataDirectory(self, data_directory=""):
>> class4 >> class4_test_images
>> class5 >> class5_test_images
- :return:
- """
+ :param data_directory:
+ :return:
+ """
self.__data_dir = data_directory
self.__train_dir = os.path.join(self.__data_dir, "train")
@@ -139,7 +140,7 @@ def lr_schedule(self, epoch):
def trainModel(self, num_objects, num_experiments=200, enhance_data=False, batch_size = 32, initial_learning_rate=1e-3, show_network_summary=False, training_image_size = 224, continue_from_model=None, transfer_from_model=None, transfer_with_full_training=True, initial_num_objects = None, save_full_model = False):
"""
- 'trainModel()' function starts the actual training. It accepts the following values:
+ 'trainModel()' function starts the model actual training. It accepts the following values:
- num_objects , which is the number of classes present in the dataset that is to be used for training
- num_experiments , also known as epochs, it is the number of times the network will train on all the training dataset
- enhance_data (optional) , this is used to modify the dataset and create more instance of the training set to enhance the training result
diff --git a/dist/imageai-2.0.2-py3-none-any.whl b/dist/imageai-2.0.2-py3-none-any.whl
deleted file mode 100644
index 12ea09c4..00000000
Binary files a/dist/imageai-2.0.2-py3-none-any.whl and /dev/null differ
diff --git a/dist/imageai-2.0.3-py3-none-any.whl b/dist/imageai-2.0.3-py3-none-any.whl
deleted file mode 100644
index e19277fe..00000000
Binary files a/dist/imageai-2.0.3-py3-none-any.whl and /dev/null differ
diff --git a/dist/imageai-2.0.3.tar.gz b/dist/imageai-2.0.3.tar.gz
deleted file mode 100644
index 8dec119b..00000000
Binary files a/dist/imageai-2.0.3.tar.gz and /dev/null differ
diff --git a/examples/custom_detection.py b/examples/custom_detection.py
new file mode 100644
index 00000000..9f88a6ba
--- /dev/null
+++ b/examples/custom_detection.py
@@ -0,0 +1,22 @@
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+detector.setJsonPath("detection_config.json") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+detector.loadModel()
+detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
+for detection in detections:
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+
+
+"""
+EXAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_array_input_output.py b/examples/custom_detection_array_input_output.py
new file mode 100644
index 00000000..9e2bf254
--- /dev/null
+++ b/examples/custom_detection_array_input_output.py
@@ -0,0 +1,29 @@
+from imageai.Detection.Custom import CustomObjectDetection
+import cv2
+
+image_array = cv2.imread("holo2.jpg")
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+detector.setJsonPath("detection_config.json") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+detector.loadModel()
+detected_image, detections = detector.detectObjectsFromImage(input_image=image_array, input_type="array", output_type="array")
+
+for eachObject in detections:
+ print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])
+
+cv2.imshow("Main Image", detected_image)
+cv2.waitKey()
+cv2.destroyAllWindows()
+
+
+"""
+SAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_extract_objects.py b/examples/custom_detection_extract_objects.py
new file mode 100644
index 00000000..ca948234
--- /dev/null
+++ b/examples/custom_detection_extract_objects.py
@@ -0,0 +1,37 @@
+from imageai.Detection.Custom import CustomObjectDetection
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+detector.setJsonPath("detection_config.json") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+detector.loadModel()
+detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)
+
+for detection, object_path in zip(detections, extracted_objects_array):
+ print(object_path)
+ print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
+ print("---------------")
+
+"""
+SAMPLE RESULT
+
+holo2-detected.jpg-objects\hololens-1.jpg
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+---------------
+
+holo2-detected.jpg-objects\hololens-1.jpg
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+---------------
+
+holo2-detected.jpg-objects\hololens-1.jpg
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+---------------
+
+holo2-detected.jpg-objects\hololens-1.jpg
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+---------------
+
+holo2-detected.jpg-objects\hololens-1.jpg
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+---------------
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_from_array_extract_objects_array.py b/examples/custom_detection_from_array_extract_objects_array.py
new file mode 100644
index 00000000..27cdae07
--- /dev/null
+++ b/examples/custom_detection_from_array_extract_objects_array.py
@@ -0,0 +1,37 @@
+from imageai.Detection.Custom import CustomObjectDetection
+import cv2
+
+image_array = cv2.imread("holo2.jpg")
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+detector.setJsonPath("detection_config.json") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+detector.loadModel()
+detected_image, detections, extracted_objects = detector.detectObjectsFromImage(input_image=image_array, extract_detected_objects=True, input_type="array", output_type="array")
+
+
+for eachObject in detections:
+ print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])
+
+
+cv2.imshow("Main Image", detected_image)
+count = 0
+for img in extracted_objects:
+ count += 1
+
+ cv2.imshow("Window" + str(count), img)
+
+cv2.waitKey()
+cv2.destroyAllWindows()
+
+
+"""
+SAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_from_file_extract_objects_array.py b/examples/custom_detection_from_file_extract_objects_array.py
new file mode 100644
index 00000000..fbfcf1af
--- /dev/null
+++ b/examples/custom_detection_from_file_extract_objects_array.py
@@ -0,0 +1,36 @@
+from imageai.Detection.Custom import CustomObjectDetection
+import cv2
+
+
+detector = CustomObjectDetection()
+detector.setModelTypeAsYOLOv3()
+detector.setModelPath("hololens-ex-60--loss-2.76.h5") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+detector.setJsonPath("detection_config.json") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+detector.loadModel()
+detected_image, detections, extracted_objects = detector.detectObjectsFromImage(input_image="holo2.jpg", extract_detected_objects=True, output_type="array")
+
+
+for eachObject in detections:
+ print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])
+
+cv2.imshow("Main Image", detected_image)
+count = 0
+for img in extracted_objects:
+ count += 1
+
+ cv2.imshow("Window" + str(count), img)
+
+cv2.waitKey()
+cv2.destroyAllWindows()
+
+
+"""
+SAMPLE RESULT
+
+hololens : 39.69653248786926 : [611, 74, 751, 154]
+hololens : 87.6643180847168 : [23, 46, 90, 79]
+hololens : 89.25175070762634 : [191, 66, 243, 95]
+hololens : 64.49641585350037 : [437, 81, 514, 133]
+hololens : 91.78624749183655 : [380, 113, 423, 138]
+
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_multi_model_evaluation.py b/examples/custom_detection_multi_model_evaluation.py
new file mode 100644
index 00000000..616f8790
--- /dev/null
+++ b/examples/custom_detection_multi_model_evaluation.py
@@ -0,0 +1,70 @@
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens")
+trainer.evaluateModel(model_path="hololens/models", json_path="hololens/json/detection_config.json", iou_threshold=0.5, object_threshold=0.3, nms_threshold=0.5)
+
+
+
+"""
+SAMPLE RESULT
+
+
+Model File: hololens/models/detection_model-ex-07--loss-4.42.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9231
+mAP: 0.9231
+===============================
+Model File: hololens/models/detection_model-ex-10--loss-3.95.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9725
+mAP: 0.9725
+===============================
+Model File: hololens/models/detection_model-ex-05--loss-5.26.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9204
+mAP: 0.9204
+===============================
+Model File: hololens/models/detection_model-ex-03--loss-6.44.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.8120
+mAP: 0.8120
+===============================
+Model File: hololens/models/detection_model-ex-18--loss-2.96.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9431
+mAP: 0.9431
+===============================
+Model File: hololens/models/detection_model-ex-17--loss-3.10.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9404
+mAP: 0.9404
+===============================
+Model File: hololens/models/detection_model-ex-08--loss-4.16.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9725
+mAP: 0.9725
+===============================
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_single_model_evaluation.py b/examples/custom_detection_single_model_evaluation.py
new file mode 100644
index 00000000..36ff6f40
--- /dev/null
+++ b/examples/custom_detection_single_model_evaluation.py
@@ -0,0 +1,25 @@
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens")
+trainer.evaluateModel(model_path="hololens-ex-60--loss-2.76.h5", json_path="detection_config.json", iou_threshold=0.5, object_threshold=0.3, nms_threshold=0.5)
+
+# download JSON file via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+# download detection model via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+
+
+
+"""
+SAMPLE RESULT
+
+
+Model File: hololens_detection_model-ex-09--loss-4.01.h5
+
+Using IoU : 0.5
+Using Object Threshold : 0.3
+Using Non-Maximum Suppression : 0.5
+hololens: 0.9613
+mAP: 0.9613
+===============================
+"""
\ No newline at end of file
diff --git a/examples/custom_detection_train.py b/examples/custom_detection_train.py
new file mode 100644
index 00000000..09b83bc7
--- /dev/null
+++ b/examples/custom_detection_train.py
@@ -0,0 +1,40 @@
+from imageai.Detection.Custom import DetectionModelTrainer
+
+trainer = DetectionModelTrainer()
+trainer.setModelTypeAsYOLOv3()
+trainer.setDataDirectory(data_directory="hololens")
+trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="pretrained-yolov3.h5") #download pre-trained model via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/pretrained-yolov3.h5
+trainer.trainModel()
+
+
+
+"""
+SAMPLE RESULT
+
+Using TensorFlow backend.
+Generating anchor boxes for training images and annotation...
+Average IOU for 9 anchors: 0.78
+Anchor Boxes generated.
+Detection configuration saved in hololens/json/detection_config.json
+Training on: ['hololens']
+Training with Batch Size: 4
+Number of Experiments: 200
+
+
+
+Epoch 1/200
+ - 733s - loss: 34.8253 - yolo_layer_1_loss: 6.0920 - yolo_layer_2_loss: 11.1064 - yolo_layer_3_loss: 17.6269 - val_loss: 20.5028 - val_yolo_layer_1_loss: 4.0171 - val_yolo_layer_2_loss: 7.5175 - val_yolo_layer_3_loss: 8.9683
+Epoch 2/200
+ - 648s - loss: 11.1396 - yolo_layer_1_loss: 2.1209 - yolo_layer_2_loss: 4.0063 - yolo_layer_3_loss: 5.0124 - val_loss: 7.6188 - val_yolo_layer_1_loss: 1.8513 - val_yolo_layer_2_loss: 2.2446 - val_yolo_layer_3_loss: 3.5229
+Epoch 3/200
+ - 674s - loss: 6.4360 - yolo_layer_1_loss: 1.3500 - yolo_layer_2_loss: 2.2343 - yolo_layer_3_loss: 2.8518 - val_loss: 7.2326 - val_yolo_layer_1_loss: 1.8762 - val_yolo_layer_2_loss: 2.3802 - val_yolo_layer_3_loss: 2.9762
+Epoch 4/200
+ - 634s - loss: 5.3801 - yolo_layer_1_loss: 1.0323 - yolo_layer_2_loss: 1.7854 - yolo_layer_3_loss: 2.5624 - val_loss: 6.3730 - val_yolo_layer_1_loss: 1.4272 - val_yolo_layer_2_loss: 2.0534 - val_yolo_layer_3_loss: 2.8924
+Epoch 5/200
+ - 645s - loss: 5.2569 - yolo_layer_1_loss: 0.9953 - yolo_layer_2_loss: 1.8611 - yolo_layer_3_loss: 2.4005 - val_loss: 6.0458 - val_yolo_layer_1_loss: 1.7037 - val_yolo_layer_2_loss: 1.9754 - val_yolo_layer_3_loss: 2.3667
+Epoch 6/200
+ - 655s - loss: 4.7582 - yolo_layer_1_loss: 0.9959 - yolo_layer_2_loss: 1.5986 - yolo_layer_3_loss: 2.1637 - val_loss: 5.8313 - val_yolo_layer_1_loss: 1.1880 - val_yolo_layer_2_loss: 1.9962 - val_yolo_layer_3_loss: 2.6471
+Epoch 7/200
+
+"""
+
diff --git a/examples/custom_detection_video.py b/examples/custom_detection_video.py
new file mode 100644
index 00000000..b158b4e4
--- /dev/null
+++ b/examples/custom_detection_video.py
@@ -0,0 +1,16 @@
+from imageai.Detection.Custom import CustomVideoObjectDetection
+import os
+
+execution_path = os.getcwd()
+
+video_detector = CustomVideoObjectDetection()
+video_detector.setModelTypeAsYOLOv3()
+video_detector.setModelPath("hololens-ex-60--loss-2.76.h5") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
+video_detector.setJsonPath("detection_config.json") # download via https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json
+video_detector.loadModel()
+
+video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
+ output_file_path=os.path.join(execution_path, "holo1-detected3"),
+ frames_per_second=20,
+ minimum_percentage_probability=40,
+ log_progress=True)
\ No newline at end of file
diff --git a/examples/detection_config.json b/examples/detection_config.json
new file mode 100644
index 00000000..f3573ecf
--- /dev/null
+++ b/examples/detection_config.json
@@ -0,0 +1,9 @@
+{
+ "labels" : [
+ "hololens"
+ ],
+ "anchors" : [
+ [160, 171, 261, 167, 301, 285],
+ [78, 86, 104, 69, 138, 105],
+ [37, 32, 58, 61, 62, 44]]
+}
\ No newline at end of file
diff --git a/imageai.egg-info/PKG-INFO b/imageai.egg-info/PKG-INFO
index f681af7a..bd97b727 100644
--- a/imageai.egg-info/PKG-INFO
+++ b/imageai.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: imageai
-Version: 2.0.3
+Version: 2.1.0
Summary: A python library built to empower developers to build applications and systems with self-contained Computer Vision capabilities
Home-page: https://github.com/OlafenwaMoses/ImageAI
Author: Moses Olafenwa and John Olafenwa
diff --git a/imageai.egg-info/SOURCES.txt b/imageai.egg-info/SOURCES.txt
index 8608c472..d8f0142c 100644
--- a/imageai.egg-info/SOURCES.txt
+++ b/imageai.egg-info/SOURCES.txt
@@ -8,6 +8,19 @@ imageai.egg-info/not-zip-safe
imageai.egg-info/requires.txt
imageai.egg-info/top_level.txt
imageai/Detection/__init__.py
+imageai/Detection/Custom/__init__.py
+imageai/Detection/Custom/callbacks.py
+imageai/Detection/Custom/evaluate.py
+imageai/Detection/Custom/gen_anchors.py
+imageai/Detection/Custom/generator.py
+imageai/Detection/Custom/voc.py
+imageai/Detection/Custom/yolo.py
+imageai/Detection/Custom/utils/__init__.py
+imageai/Detection/Custom/utils/bbox.py
+imageai/Detection/Custom/utils/colors.py
+imageai/Detection/Custom/utils/image.py
+imageai/Detection/Custom/utils/multi_gpu_model.py
+imageai/Detection/Custom/utils/utils.py
imageai/Detection/YOLOv3/__init__.py
imageai/Detection/YOLOv3/models.py
imageai/Detection/YOLOv3/utils.py
diff --git a/imageai/Detection/Custom/__init__.py b/imageai/Detection/Custom/__init__.py
new file mode 100644
index 00000000..c2e50bd6
--- /dev/null
+++ b/imageai/Detection/Custom/__init__.py
@@ -0,0 +1,1387 @@
+import os
+import numpy as np
+import json
+from imageai.Detection.Custom.voc import parse_voc_annotation
+from imageai.Detection.Custom.yolo import create_yolov3_model, dummy_loss
+from imageai.Detection.YOLOv3.models import yolo_main
+from imageai.Detection.Custom.generator import BatchGenerator
+from imageai.Detection.Custom.utils.utils import normalize, evaluate, makedirs
+from keras.callbacks import EarlyStopping, ReduceLROnPlateau
+from keras.optimizers import Adam
+from imageai.Detection.Custom.callbacks import CustomModelCheckpoint, CustomTensorBoard
+from imageai.Detection.Custom.utils.multi_gpu_model import multi_gpu_model
+from imageai.Detection.Custom.gen_anchors import generateAnchors
+import tensorflow as tf
+import keras
+from keras.preprocessing.image import load_img, img_to_array
+from keras.models import load_model, Input
+from PIL import Image
+import matplotlib.image as pltimage
+import cv2
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+
+
+class DetectionModelTrainer:
+
+ """
+ This is the Detection Model training class, which allows you to train object detection models
+ on image datasets that are in Pascal VOC annotation format, using the YOLOv3.
+ """
+
+ def __init__(self):
+
+ self.__model_type = ""
+ self.__training_mode = True
+
+ self.__model_min_input_size = 288
+ self.__model_max_input_size = 448
+ self.__model_anchors = []
+ self.__reversed_model_anchors = []
+ self.__inference_anchors = []
+ self.__json_directory = ""
+ self.__model_labels = []
+ self.__num_objects = 0
+ self.__pre_trained_model = ""
+
+ self.__train_images_folder = ""
+ self.__train_annotations_folder = ""
+ self.__train_cache_file = ""
+ self.__train_times = 8
+ self.__train_batch_size = 4
+ self.__train_learning_rate = 1e-4
+ self.__train_epochs = 100
+ self.__train_warmup_epochs = 3
+ self.__train_ignore_treshold = 0.5
+ self.__train_gpus = "0,1"
+ self.__train_grid_scales = [1,1,1]
+ self.__train_obj_scale = 5
+ self.__train_noobj_scale = 1
+ self.__train_xywh_scale = 1
+ self.__train_class_scale = 1
+ self.__model_directory = ""
+ self.__train_weights_name = ""
+ self.__train_debug = True
+
+ self.__validation_images_folder = ""
+ self.__validation_annotations_folder = ""
+ self.__validation_cache_file = ""
+ self.__validation_times = 1
+
+
+
+
+ def setModelTypeAsYOLOv3(self):
+ """
+ 'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
+ for the training instance object .
+ :return:
+ """
+ self.__model_type = "yolov3"
+
+ def setDataDirectory(self, data_directory):
+
+ """
+
+ 'setDataDirectory()' is required to set the path to which the data/dataset to be used for
+ training is kept. The directory can have any name, but it must have 'train' and 'validation'
+ sub-directory. In the 'train' and 'validation' sub-directories, there must be 'images' and 'annotations'
+ sub-directories respectively. The 'images' folder will contain the pictures for the dataset and the
+ 'annotations' folder will contain the XML files with details of the annotations for each image in the
+ 'images folder'.
+
+ N.B: Strictly take note that the filenames (without the extension) of the pictures in the 'images folder'
+ must be the same as the filenames (without the extension) of their corresponding annotation XML files in
+ the 'annotations' folder.
+
+ The structure of the 'train' and 'validation' folder must be as follows:
+
+ >> train >> images >> img_1.jpg
+ >> images >> img_2.jpg
+ >> images >> img_3.jpg
+ >> annotations >> img_1.xml
+ >> annotations >> img_2.xml
+ >> annotations >> img_3.xml
+
+
+ >> validation >> images >> img_151.jpg
+ >> images >> img_152.jpg
+ >> images >> img_153.jpg
+ >> annotations >> img_151.xml
+ >> annotations >> img_152.xml
+ >> annotations >> img_153.xml
+
+ :param data_directory:
+ :return:
+ """
+
+ self.__train_images_folder = os.path.join(data_directory, "train/images/")
+ self.__train_annotations_folder = os.path.join(data_directory, "train/annotations/")
+ self.__validation_images_folder = os.path.join(data_directory, "validation/images/")
+ self.__validation_annotations_folder = os.path.join(data_directory, "validation/annotations/")
+
+ if os.path.exists(os.path.join(data_directory, "cache")) == False:
+ os.makedirs(os.path.join(data_directory, "cache"))
+ self.__train_cache_file = os.path.join(data_directory, "cache", "detection_train_data.pkl")
+ self.__validation_cache_file = os.path.join(data_directory, "cache", "detection_test_data.pkl")
+
+ if os.path.exists(os.path.join(data_directory, "models")) == False:
+ os.makedirs(os.path.join(data_directory, "models"))
+
+ if os.path.exists(os.path.join(data_directory, "json")) == False:
+ os.makedirs(os.path.join(data_directory, "json"))
+
+ self.__model_directory = os.path.join(data_directory, "models")
+ self.__train_weights_name = os.path.join(self.__model_directory, "detection_model-")
+ self.__json_directory = os.path.join(data_directory, "json")
+
+
+
+ def setTrainConfig(self, object_names_array, batch_size= 4, num_experiments=100, train_from_pretrained_model=""):
+
+ """
+
+ 'setTrainConfig()' function allows you to set the properties for the training instances. It accepts the following values:
+
+ - object_names_array , this is an array of the names of the different objects in your dataset
+ - batch_size (optional), this is the batch size for the training instance
+ - num_experiments (optional), also known as epochs, it is the number of times the network will train on all the training dataset
+ - train_from_pretrained_model (optional), this is used to perform transfer learning by specifying the path to a pre-trained YOLOv3 model
+
+ :param object_names_array:
+ :param batch_size:
+ :param num_experiments:
+ :param train_from_pretrained_model:
+ :return:
+ """
+
+
+ self.__model_anchors, self.__reversed_model_anchors = generateAnchors(self.__train_annotations_folder,
+ self.__train_images_folder,
+ self.__train_cache_file, self.__model_labels)
+
+ self.__model_labels = sorted(object_names_array)
+ self.__num_objects = len(object_names_array)
+
+ self.__train_batch_size = batch_size
+ self.__train_epochs = num_experiments
+ self.__pre_trained_model = train_from_pretrained_model
+
+ self.__inference_anchors.append(self.__reversed_model_anchors[0:6])
+ self.__inference_anchors.append(self.__reversed_model_anchors[6:12])
+ self.__inference_anchors.append(self.__reversed_model_anchors[12:18])
+
+ json_data = {}
+ json_data["labels"] = self.__model_labels
+ json_data["anchors"] = self.__inference_anchors
+
+ with open(os.path.join(self.__json_directory, "detection_config.json"), "w+") as json_file:
+ json.dump(json_data, json_file, indent=4, separators=(",", " : "),
+ ensure_ascii=True)
+ json_file.close()
+
+ print("Detection configuration saved in ", os.path.join(self.__json_directory, "detection_config.json"))
+
+ def trainModel(self):
+
+ """
+ 'trainModel()' function starts the actual model training. Once the training starts, the training instance
+ creates 3 sub-folders in your dataset folder which are:
+
+ - json, where the JSON configuration file for using your trained model is stored
+ - models, where your trained models are stored once they are generated after each improved experiments
+ - cache , where temporary traing configuraton files are stored
+
+ :return:
+ """
+
+ train_ints, valid_ints, labels, max_box_per_image = self._create_training_instances(
+ self.__train_annotations_folder,
+ self.__train_images_folder,
+ self.__train_cache_file,
+ self.__validation_annotations_folder,
+ self.__validation_images_folder,
+ self.__validation_cache_file,
+ self.__model_labels
+
+ )
+ if(self.__training_mode):
+ print('Training on: \t' + str(labels) + '')
+ print("Training with Batch Size: ", self.__train_batch_size)
+ print("Number of Experiments: ", self.__train_epochs)
+
+ ###############################
+ # Create the generators
+ ###############################
+ train_generator = BatchGenerator(
+ instances=train_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.3,
+ norm=normalize
+ )
+
+ valid_generator = BatchGenerator(
+ instances=valid_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.0,
+ norm=normalize
+ )
+
+ ###############################
+ # Create the model
+ ###############################
+ if os.path.exists(self.__pre_trained_model):
+ self.__train_warmup_epochs = 0
+ warmup_batches = self.__train_warmup_epochs * (self.__train_times * len(train_generator))
+
+ os.environ['CUDA_VISIBLE_DEVICES'] = self.__train_gpus
+ multi_gpu = len(self.__train_gpus.split(','))
+
+ train_model, infer_model = self._create_model(
+ nb_class=len(labels),
+ anchors=self.__model_anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=[self.__model_max_input_size, self.__model_max_input_size],
+ batch_size=self.__train_batch_size,
+ warmup_batches=warmup_batches,
+ ignore_thresh=self.__train_ignore_treshold,
+ multi_gpu=multi_gpu,
+ lr=self.__train_learning_rate,
+ grid_scales=self.__train_grid_scales,
+ obj_scale=self.__train_obj_scale,
+ noobj_scale=self.__train_noobj_scale,
+ xywh_scale=self.__train_xywh_scale,
+ class_scale=self.__train_class_scale,
+ )
+
+ ###############################
+ # Kick off the training
+ ###############################
+ callbacks = self._create_callbacks(self.__train_weights_name, infer_model)
+
+
+ train_model.fit_generator(
+ generator=train_generator,
+ steps_per_epoch=len(train_generator) * self.__train_times,
+ validation_data=valid_generator,
+ validation_steps=len(valid_generator) * self.__train_times,
+ epochs=self.__train_epochs + self.__train_warmup_epochs,
+ verbose=2,
+ callbacks=callbacks,
+ workers=4,
+ max_queue_size=8
+ )
+
+
+ def evaluateModel(self, model_path, json_path, batch_size=4, iou_threshold=0.5, object_threshold=0.2, nms_threshold=0.45):
+
+ """
+
+ 'evaluateModel()' is used to obtain the mAP metrics for your model(s). It accepts the following values:
+
+ - model_path ( model file or folder), this value can be the part to your model file or the path to the folder containing all your saved model files
+ - json_path , this is the path the the 'detection_config.json' file saved for the dataset during the training
+ - iou_threshold , this value is used to set the desired 'IoU' to obtain the mAP metrics for your model(s)
+ - object_threshold , this is used to set your desired minimum 'class score' to obtain the mAP metrics for your model(s)
+ - nms_threshold , this is used to set your desired 'Non-maximum suppresion' to obtain the mAP metrics for your model(s)
+
+ :param model_path:
+ :param json_path:
+ :param batch_size:
+ :param iou_threshold:
+ :param object_threshold:
+ :param nms_threshold:
+ :return:
+ """
+
+ self.__training_mode = False
+ detection_model_json = json.load(open(json_path))
+
+
+ temp_anchor_array = []
+ new_anchor_array = []
+
+ for aa in detection_model_json["anchors"]:
+ for aaa in aa:
+ temp_anchor_array.append(aaa)
+
+ reverse_count = len(temp_anchor_array) - 1
+ while (reverse_count > -1):
+ new_anchor_array.append(temp_anchor_array[reverse_count])
+ reverse_count -= 1
+
+ self.__model_anchors = new_anchor_array
+ self.__model_labels = detection_model_json["labels"]
+ self.__num_objects = len(self.__model_labels)
+
+ self.__train_batch_size = batch_size
+ self.__train_epochs = 100
+
+ print("Starting Model evaluation....")
+
+ train_ints, valid_ints, labels, max_box_per_image = self._create_training_instances(
+ self.__train_annotations_folder,
+ self.__train_images_folder,
+ self.__train_cache_file,
+ self.__validation_annotations_folder,
+ self.__validation_images_folder,
+ self.__validation_cache_file,
+ self.__model_labels
+
+ )
+
+ valid_generator = BatchGenerator(
+ instances=valid_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.0,
+ norm=normalize
+ )
+
+ train_generator = BatchGenerator(
+ instances=train_ints,
+ anchors=self.__model_anchors,
+ labels=labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=max_box_per_image,
+ batch_size=self.__train_batch_size,
+ min_net_size=self.__model_min_input_size,
+ max_net_size=self.__model_max_input_size,
+ shuffle=True,
+ jitter=0.3,
+ norm=normalize
+ )
+
+ multi_gpu = len(self.__train_gpus.split(','))
+ warmup_batches = self.__train_warmup_epochs * (self.__train_times * len(train_generator))
+
+ train_model, infer_model = self._create_model(
+ nb_class=len(labels),
+ anchors=self.__model_anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=[self.__model_max_input_size, self.__model_max_input_size],
+ batch_size=self.__train_batch_size,
+ warmup_batches=warmup_batches,
+ ignore_thresh=self.__train_ignore_treshold,
+ multi_gpu=multi_gpu,
+ lr=self.__train_learning_rate,
+ grid_scales=self.__train_grid_scales,
+ obj_scale=self.__train_obj_scale,
+ noobj_scale=self.__train_noobj_scale,
+ xywh_scale=self.__train_xywh_scale,
+ class_scale=self.__train_class_scale,
+ )
+
+
+ if(os.path.isfile(model_path)):
+ if(str(model_path).endswith(".h5")):
+ try:
+ infer_model = load_model(model_path)
+
+ ###############################
+ # Run the evaluation
+ ###############################
+ # compute mAP for all the classes
+ average_precisions = evaluate(infer_model, valid_generator, iou_threshold=iou_threshold,
+ obj_thresh=object_threshold, nms_thresh=nms_threshold)
+
+ # print the score
+ print("Model File: ", model_path, '\n')
+ print("Using IoU : ", iou_threshold)
+ print("Using Object Threshold : ", object_threshold)
+ print("Using Non-Maximum Suppression : ", nms_threshold)
+ for label, average_precision in average_precisions.items():
+ print(labels[label] + ': {:.4f}'.format(average_precision))
+ print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
+ print("===============================")
+ except:
+ None
+
+ elif(os.path.isdir(model_path)):
+ model_files = os.listdir(model_path)
+
+ for model_file in model_files:
+ if(str(model_file).endswith(".h5")):
+ try:
+ infer_model = load_model(os.path.join(model_path, model_file))
+
+ ###############################
+ # Run the evaluation
+ ###############################
+ # compute mAP for all the classes
+ average_precisions = evaluate(infer_model, valid_generator, iou_threshold=iou_threshold,
+ obj_thresh=object_threshold, nms_thresh=nms_threshold)
+
+ # print the score
+ print("Model File: ", os.path.join(model_path, model_file), '\n')
+ print("Using IoU : ", iou_threshold)
+ print("Using Object Threshold : ", object_threshold)
+ print("Using Non-Maximum Suppression : ", nms_threshold)
+ for label, average_precision in average_precisions.items():
+ print(labels[label] + ': {:.4f}'.format(average_precision))
+ print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
+ print("===============================")
+ except:
+ continue
+
+
+
+
+
+
+ def _create_training_instances(self,
+ train_annot_folder,
+ train_image_folder,
+ train_cache,
+ valid_annot_folder,
+ valid_image_folder,
+ valid_cache,
+ labels,
+ ):
+
+ # parse annotations of the training set
+ train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)
+
+ # parse annotations of the validation set, if any, otherwise split the training set
+
+ if os.path.exists(valid_annot_folder):
+ valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels)
+ else:
+
+ train_valid_split = int(0.8 * len(train_ints))
+ np.random.seed(0)
+ np.random.shuffle(train_ints)
+ np.random.seed()
+
+ valid_ints = train_ints[train_valid_split:]
+ train_ints = train_ints[:train_valid_split]
+
+ # compare the seen labels with the given labels in config.json
+ if len(labels) > 0:
+ overlap_labels = set(labels).intersection(set(train_labels.keys()))
+
+ # return None, None, None if some given label is not in the dataset
+ if len(overlap_labels) < len(labels):
+ if(self.__training_mode):
+ print('Some labels have no annotations! Please revise the list of labels in your configuration.')
+ return None, None, None, None
+ else:
+ if(self.__training_mode):
+ print('No labels are provided. Train on all seen labels.')
+ print(train_labels)
+
+ labels = train_labels.keys()
+
+ max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])
+
+ return train_ints, valid_ints, sorted(labels), max_box_per_image
+
+ def _create_callbacks(self, saved_weights_name, model_to_save):
+
+ checkpoint = CustomModelCheckpoint(
+ model_to_save=model_to_save,
+ filepath=saved_weights_name + 'ex-{epoch:02d}--loss-{loss:.2f}.h5',
+ monitor='loss',
+ verbose=0,
+ save_best_only=True,
+ mode='min',
+ period=1
+ )
+ reduce_on_plateau = ReduceLROnPlateau(
+ monitor='loss',
+ factor=0.1,
+ patience=2,
+ verbose=0,
+ mode='min',
+ epsilon=0.01,
+ cooldown=0,
+ min_lr=0
+ )
+
+ return [checkpoint, reduce_on_plateau]
+
+ def _create_model(
+ self,
+ nb_class,
+ anchors,
+ max_box_per_image,
+ max_grid, batch_size,
+ warmup_batches,
+ ignore_thresh,
+ multi_gpu,
+ lr,
+ grid_scales,
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale
+ ):
+ if multi_gpu > 1:
+ with tf.device('/cpu:0'):
+ template_model, infer_model = create_yolov3_model(
+ nb_class=nb_class,
+ anchors=anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=max_grid,
+ batch_size=batch_size // multi_gpu,
+ warmup_batches=warmup_batches,
+ ignore_thresh=ignore_thresh,
+ grid_scales=grid_scales,
+ obj_scale=obj_scale,
+ noobj_scale=noobj_scale,
+ xywh_scale=xywh_scale,
+ class_scale=class_scale
+ )
+ else:
+ template_model, infer_model = create_yolov3_model(
+ nb_class=nb_class,
+ anchors=anchors,
+ max_box_per_image=max_box_per_image,
+ max_grid=max_grid,
+ batch_size=batch_size,
+ warmup_batches=warmup_batches,
+ ignore_thresh=ignore_thresh,
+ grid_scales=grid_scales,
+ obj_scale=obj_scale,
+ noobj_scale=noobj_scale,
+ xywh_scale=xywh_scale,
+ class_scale=class_scale
+ )
+
+ # load the pretrained weight if exists, otherwise load the backend weight only
+ if(len(self.__pre_trained_model) > 3):
+ if(self.__training_mode):
+ print("Training with transfer learning from pretrained Model")
+ template_model.load_weights(self.__pre_trained_model, by_name=True)
+ else:
+ if(self.__training_mode):
+ print("Pre-trained Model not provided. Transfer learning not in use.")
+ print("Training will start with 3 warmup experiments")
+
+
+
+ if multi_gpu > 1:
+ train_model = multi_gpu_model(template_model, gpus=multi_gpu)
+ else:
+ train_model = template_model
+
+ optimizer = Adam(lr=lr, clipnorm=0.001)
+ train_model.compile(loss=dummy_loss, optimizer=optimizer)
+
+ return train_model, infer_model
+
+
+
+
+
+
+class CustomObjectDetection:
+
+ """
+ This is the object detection class for using your custom trained models. It supports your custom trained YOLOv3 model and allows to you to perform object detection in images.
+ """
+
+ def __init__(self):
+ self.__model_type = ""
+ self.__model_path = ""
+ self.__model_labels = []
+ self.__model_anchors = []
+ self.__detection_config_json_path = ""
+ self.__model_loaded = False
+ self.__input_size = 416
+ self.__object_threshold = 0.4
+ self.__nms_threshold = 0.4
+ self.__model_collection = []
+ self.__detection_utils = CustomDetectionUtils(labels=[])
+
+ def setModelTypeAsYOLOv3(self):
+ """
+ 'setModelTypeAsYOLOv3' is used to set your custom detection model as YOLOv3
+ :return:
+ """
+ self.__model_type = "yolov3"
+
+ def setModelPath(self, detection_model_path):
+ """
+ 'setModelPath' is used to specify the filepath to your custom detection model
+ :param detection_model_path:
+ :return:
+ """
+ self.__model_path = detection_model_path
+
+ def setJsonPath(self, configuration_json):
+ """
+ 'setJsonPath' is used to set the filepath to the configuration JSON file for your custom detection model
+ :param configuration_json:
+ :return:
+ """
+ self.__detection_config_json_path = configuration_json
+
+ def loadModel(self):
+
+ """
+ 'loadModel' is used to load the model into the CustomObjectDetection class
+ :return:
+ """
+
+ if (self.__model_loaded == False):
+ if(self.__model_type == "yolov3"):
+ detection_model_json = json.load(open(self.__detection_config_json_path))
+
+ self.__model_labels = detection_model_json["labels"]
+ self.__model_anchors = detection_model_json["anchors"]
+
+ self.__detection_utils = CustomDetectionUtils(labels=self.__model_labels)
+
+ model = yolo_main(Input(shape=(None, None, 3)), 3,
+ len(self.__model_labels))
+
+ model.load_weights(self.__model_path)
+ self.__model_collection.append(model)
+ self.__model_loaded = True
+
+
+
+
+
+
+
+ def detectObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
+ extract_detected_objects=False, minimum_percentage_probability=30, nms_treshold=0.4,
+ display_percentage_probability=True, display_object_name=True):
+
+ """
+
+ 'detectObjectsFromImage()' function is used to detect objects observable in the given image:
+ * input_image , which can be a filepath or image numpy array
+ * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
+ * input_type (optional) , filepath/numpy array of the image. Acceptable values are "file" and "array"
+ * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
+ * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
+ * minimum_percentage_probability (optional, 30 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * nms_threshold (optional, o.45 by default) , option to set the Non-maximum suppression for the detection
+ * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
+ * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
+
+
+ The values returned by this function depends on the parameters parsed. The possible values returnable
+ are stated as below
+ - If extract_detected_objects = False or at its default value and output_type = 'file' or
+ at its default value, you must parse in the 'output_image_path' as a string to the path you want
+ the detected image to be saved. Then the function will return:
+ 1. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (tuple of x1,y1,x2 and y2 coordinates)
+
+ - If extract_detected_objects = False or at its default value and output_type = 'array' ,
+ Then the function will return:
+
+ 1. a numpy array of the detected image
+ 2. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (tuple of x1,y1,x2 and y2 coordinates)
+
+ - If extract_detected_objects = True and output_type = 'file' or
+ at its default value, you must parse in the 'output_image_path' as a string to the path you want
+ the detected image to be saved. Then the function will return:
+ 1. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (tuple of x1,y1,x2 and y2 coordinates)
+ 2. an array of string paths to the image of each object extracted from the image
+
+ - If extract_detected_objects = True and output_type = 'array', the the function will return:
+ 1. a numpy array of the detected image
+ 2. an array of dictionaries, with each dictionary corresponding to the objects
+ detected in the image. Each dictionary contains the following property:
+ * name (string)
+ * percentage_probability (float)
+ * box_points (tuple of x1,y1,x2 and y2 coordinates)
+ 3. an array of numpy arrays of each object detected in the image
+
+ :param input_image:
+ :param output_image_path:
+ :param input_type:
+ :param output_type:
+ :param extract_detected_objects:
+ :param minimum_percentage_probability:
+ :param nms_treshold:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :return image_frame:
+ :return output_objects_array:
+ :return detected_objects_image_array:
+ """
+
+ if (self.__model_loaded == False):
+ raise ValueError("You must call the loadModel() function before making object detection.")
+ else:
+ self.__object_threshold = minimum_percentage_probability / 100
+ self.__nms_threshold = nms_treshold
+
+ output_objects_array = []
+ detected_objects_image_array = []
+
+ model = self.__model_collection[0]
+
+ image = []
+
+ if(input_type == "file"):
+ image = cv2.imread(input_image)
+ elif(input_type == "array"):
+ image = input_image
+
+
+ image_frame = image.copy()
+ image_frame2 = image.copy()
+ height, width, channels = image.shape
+
+ image = cv2.resize(image, (self.__input_size, self.__input_size))
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ image = Image.fromarray(image)
+
+ # pre-process image
+ image = img_to_array(image)
+ image = image.astype("float32") / 255
+
+ # expand the image to batch
+ image = np.expand_dims(image, 0)
+
+ if(self.__model_type == "yolov3"):
+ yolo_result = model.predict(image)
+
+ boxes = list()
+
+
+ for a in range(len(yolo_result)):
+ box_set = self.__detection_utils.decode_netout(yolo_result[a][0], self.__model_anchors[a],
+ self.__object_threshold, self.__input_size,
+ self.__input_size)
+ boxes += box_set
+
+ self.__detection_utils.correct_yolo_boxes(boxes, height, width, self.__input_size, self.__input_size)
+
+ self.__detection_utils.do_nms(boxes, self.__nms_threshold)
+
+ all_boxes, all_labels, all_scores = self.__detection_utils.get_boxes(boxes, self.__model_labels,
+ self.__object_threshold)
+
+ for object_box, object_label, object_score in zip(all_boxes, all_labels, all_scores):
+ each_object_details = {}
+ each_object_details["name"] = object_label
+ each_object_details["percentage_probability"] = object_score
+
+ if(object_box.xmin < 0):
+ object_box.xmin = 0
+ if (object_box.ymin < 0):
+ object_box.ymin = 0
+
+ each_object_details["box_points"] = [object_box.xmin, object_box.ymin, object_box.xmax, object_box.ymax]
+ output_objects_array.append(each_object_details)
+
+ image_frame = self.__detection_utils.draw_boxes_and_caption(image_frame, all_boxes, all_labels,
+ all_scores, show_names=display_object_name,
+ show_percentage=display_percentage_probability)
+
+ if (extract_detected_objects == True):
+ counting = 0
+
+ objects_dir = output_image_path + "-objects"
+ if (os.path.exists(objects_dir) == False):
+ os.mkdir(objects_dir)
+
+ for each_object in output_objects_array:
+ counting += 1
+ splitted_copy = image_frame2.copy()[each_object["box_points"][1]:each_object["box_points"][3],
+ each_object["box_points"][0]:each_object["box_points"][2]]
+ if (output_type == "file"):
+ splitted_image_path = os.path.join(objects_dir,
+ each_object["name"] + "-" + str(counting) + ".jpg")
+ cv2.imwrite(splitted_image_path, splitted_copy)
+ detected_objects_image_array.append(splitted_image_path)
+ elif (output_type == "array"):
+ detected_objects_image_array.append(splitted_copy)
+
+ if (output_type == "file"):
+ cv2.imwrite(output_image_path, image_frame)
+
+ if (extract_detected_objects == True):
+ if (output_type == "file"):
+ return output_objects_array, detected_objects_image_array
+ elif (output_type == "array"):
+ return image_frame, output_objects_array, detected_objects_image_array
+
+ else:
+ if (output_type == "file"):
+ return output_objects_array
+ elif (output_type == "array"):
+ return image_frame, output_objects_array
+
+
+
+
+
+
+class CustomVideoObjectDetection:
+
+
+ """
+
+ This is the object detection class for videos and camera live stream inputs using your custom trained detection models. It provides support for your custom YOLOv3 models.
+
+ """
+
+ def __init__(self):
+ self.__model_type = ""
+ self.__model_path = ""
+ self.__model_labels = []
+ self.__model_anchors = []
+ self.__detection_config_json_path = ""
+ self.__model_loaded = False
+ self.__input_size = 416
+ self.__object_threshold = 0.4
+ self.__nms_threshold = 0.4
+ self.__detector = []
+ self.__detection_utils = CustomDetectionUtils(labels=[])
+
+ def setModelTypeAsYOLOv3(self):
+
+ """
+ 'setModelTypeAsYOLOv3' is used to set your custom detection model as YOLOv3
+ :return:
+ """
+
+ self.__model_type = "yolov3"
+
+
+ def setModelPath(self, detection_model_path):
+ """
+ 'setModelPath' is used to specify the filepath to your custom detection model
+
+ :param detection_model_path:
+ :return:
+ """
+ self.__model_path = detection_model_path
+
+
+ def setJsonPath(self, configuration_json):
+ """
+ 'setJsonPath' is used to set the filepath to the configuration JSON file for your custom detection model
+
+ :param configuration_json:
+ :return:
+ """
+ self.__detection_config_json_path = configuration_json
+
+ def loadModel(self):
+ """
+ 'loadModel' is used to load the model into the CustomVideoObjectDetection class
+
+ :return:
+ """
+
+ if (self.__model_loaded == False):
+ if(self.__model_type == "yolov3"):
+ detector = CustomObjectDetection()
+ detector.setModelTypeAsYOLOv3()
+ detector.setModelPath(self.__model_path)
+ detector.setJsonPath(self.__detection_config_json_path)
+ detector.loadModel()
+
+ self.__detector = detector
+
+
+ def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
+ frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
+ display_percentage_probability=True, display_object_name=True, save_detected_video=True,
+ per_frame_function=None, per_second_function=None, per_minute_function=None,
+ video_complete_function=None, return_detected_frame=False, detection_timeout = None):
+
+
+
+
+ """
+
+ 'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
+ * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
+ * camera_input , allows you to parse in camera input for live video detections
+ * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
+ * frames_per_second , which is the number of frames to be used in the output video
+ * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
+ * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
+ * display_object_name (optional), can be used to show or hide object names on the detected video frames
+ * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
+ * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video frame is detected, the function will be executed with the following values parsed into it:
+ -- position number of the frame
+ -- an array of dictinaries, with each dictinary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
+ -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
+
+ * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the second
+ -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
+ as the fifth value into the function
+
+ * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the minute
+ -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+
+ -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
+
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
+
+ * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
+ -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+
+ * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
+
+ * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
+
+ :param input_file_path:
+ :param camera_input:
+ :param output_file_path:
+ :param frames_per_second:
+ :param frame_detection_interval:
+ :param minimum_percentage_probability:
+ :param log_progress:
+ :param display_percentage_probability:
+ :param display_object_name:
+ :param save_detected_video:
+ :param per_frame_function:
+ :param per_second_function:
+ :param per_minute_function:
+ :param video_complete_function:
+ :param return_detected_frame:
+ :param detection_timeout:
+ :return output_video_filepath:
+ :return counting:
+ :return output_objects_array:
+ :return output_objects_count:
+ :return detected_copy:
+ :return this_second_output_object_array:
+ :return this_second_counting_array:
+ :return this_second_counting:
+ :return this_minute_output_object_array:
+ :return this_minute_counting_array:
+ :return this_minute_counting:
+ :return this_video_output_object_array:
+ :return this_video_counting_array:
+ :return this_video_counting:
+ """
+
+ output_frames_dict = {}
+ output_frames_count_dict = {}
+
+ input_video = cv2.VideoCapture(input_file_path)
+ if (camera_input != None):
+ input_video = camera_input
+
+ output_video_filepath = output_file_path + '.avi'
+
+ frame_width = int(input_video.get(3))
+ frame_height = int(input_video.get(4))
+ output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+ frames_per_second,
+ (frame_width, frame_height))
+
+ counting = 0
+ predicted_numbers = None
+ scores = None
+ detections = None
+
+
+ detection_timeout_count = 0
+ video_frames_count = 0
+
+
+ if(self.__model_type == "yolov3"):
+
+
+
+ while (input_video.isOpened()):
+ ret, frame = input_video.read()
+
+ if (ret == True):
+
+ detected_frame = frame.copy()
+
+ video_frames_count += 1
+ if (detection_timeout != None):
+ if ((video_frames_count % frames_per_second) == 0):
+ detection_timeout_count += 1
+
+ if (detection_timeout_count >= detection_timeout):
+ break
+
+ output_objects_array = []
+
+ counting += 1
+
+ if (log_progress == True):
+ print("Processing Frame : ", str(counting))
+
+
+
+ check_frame_interval = counting % frame_detection_interval
+
+ if (counting == 1 or check_frame_interval == 0):
+ try:
+ detected_frame, output_objects_array = self.__detector.detectObjectsFromImage(
+ input_image=frame, input_type="array", output_type="array",
+ minimum_percentage_probability=minimum_percentage_probability,
+ display_percentage_probability=display_percentage_probability,
+ display_object_name=display_object_name)
+ except:
+ None
+
+
+ output_frames_dict[counting] = output_objects_array
+
+ output_objects_count = {}
+ for eachItem in output_objects_array:
+ eachItemName = eachItem["name"]
+ try:
+ output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
+ except:
+ output_objects_count[eachItemName] = 1
+
+ output_frames_count_dict[counting] = output_objects_count
+
+
+ if (save_detected_video == True):
+ output_video.write(detected_frame)
+
+ if (per_frame_function != None):
+ if (return_detected_frame == True):
+ per_frame_function(counting, output_objects_array, output_objects_count,
+ detected_frame)
+ elif (return_detected_frame == False):
+ per_frame_function(counting, output_objects_array, output_objects_count)
+
+ if (per_second_function != None):
+ if (counting != 1 and (counting % frames_per_second) == 0):
+
+ this_second_output_object_array = []
+ this_second_counting_array = []
+ this_second_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - frames_per_second)):
+ this_second_output_object_array.append(output_frames_dict[aa + 1])
+ this_second_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_second_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_second_counting[eachItem] = this_second_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_second_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_second_counting:
+ this_second_counting[eachCountingItem] = this_second_counting[
+ eachCountingItem] / frames_per_second
+
+ if (return_detected_frame == True):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting, detected_frame)
+
+ elif (return_detected_frame == False):
+ per_second_function(int(counting / frames_per_second),
+ this_second_output_object_array, this_second_counting_array,
+ this_second_counting)
+
+ if (per_minute_function != None):
+
+ if (counting != 1 and (counting % (frames_per_second * 60)) == 0):
+
+ this_minute_output_object_array = []
+ this_minute_counting_array = []
+ this_minute_counting = {}
+
+ for aa in range(counting):
+ if (aa >= (counting - (frames_per_second * 60))):
+ this_minute_output_object_array.append(output_frames_dict[aa + 1])
+ this_minute_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_minute_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_minute_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_minute_counting:
+ this_minute_counting[eachCountingItem] = this_minute_counting[
+ eachCountingItem] / (
+ frames_per_second * 60)
+
+ if (return_detected_frame == True):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting, detected_frame)
+
+ elif (return_detected_frame == False):
+ per_minute_function(int(counting / (frames_per_second * 60)),
+ this_minute_output_object_array, this_minute_counting_array,
+ this_minute_counting)
+
+
+ else:
+ break
+
+ if (video_complete_function != None):
+
+ this_video_output_object_array = []
+ this_video_counting_array = []
+ this_video_counting = {}
+
+ for aa in range(counting):
+ this_video_output_object_array.append(output_frames_dict[aa + 1])
+ this_video_counting_array.append(output_frames_count_dict[aa + 1])
+
+ for eachCountingDict in this_video_counting_array:
+ for eachItem in eachCountingDict:
+ try:
+ this_video_counting[eachItem] = this_video_counting[eachItem] + \
+ eachCountingDict[eachItem]
+ except:
+ this_video_counting[eachItem] = eachCountingDict[eachItem]
+
+ for eachCountingItem in this_video_counting:
+ this_video_counting[eachCountingItem] = this_video_counting[
+ eachCountingItem] / counting
+
+ video_complete_function(this_video_output_object_array, this_video_counting_array,
+ this_video_counting)
+
+ input_video.release()
+ output_video.release()
+
+ if (save_detected_video == True):
+ return output_video_filepath
+
+
+class BoundBox:
+ def __init__(self, xmin, ymin, xmax, ymax, objness=None, classes=None):
+ self.xmin = xmin
+ self.ymin = ymin
+ self.xmax = xmax
+ self.ymax = ymax
+ self.objness = objness
+ self.classes = classes
+ self.label = -1
+ self.score = -1
+
+ def get_label(self):
+ if self.label == -1:
+ self.label = np.argmax(self.classes)
+
+ return self.label
+
+ def get_score(self):
+ if self.score == -1:
+ self.score = self.classes[self.get_label()]
+
+ return self.score
+
+
+
+class CustomDetectionUtils:
+ def __init__(self, labels):
+ self.__labels = labels
+ self.__colors = []
+
+ for i in range(len(labels)):
+ color_space_values = np.random.randint(50, 255, size=(3,))
+ red, green, blue = color_space_values
+ red, green, blue = int(red), int(green), int(blue)
+ self.__colors.append([red, green, blue])
+
+
+ def _sigmoid(self, x):
+ return 1. / (1. + np.exp(-x))
+
+ def decode_netout(self, netout, anchors, obj_thresh, net_h, net_w):
+ grid_h, grid_w = netout.shape[:2]
+ nb_box = 3
+ netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+ nb_class = netout.shape[-1] - 5
+ boxes = []
+ netout[..., :2] = self._sigmoid(netout[..., :2])
+ netout[..., 4:] = self._sigmoid(netout[..., 4:])
+ netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
+ netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+ for i in range(grid_h * grid_w):
+ row = i / grid_w
+ col = i % grid_w
+ for b in range(nb_box):
+ # 4th element is objectness score
+ objectness = netout[int(row)][int(col)][b][4]
+ if (objectness.all() <= obj_thresh): continue
+ # first 4 elements are x, y, w, and h
+ x, y, w, h = netout[int(row)][int(col)][b][:4]
+ x = (col + x) / grid_w # center position, unit: image width
+ y = (row + y) / grid_h # center position, unit: image height
+ w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+ h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
+ # last elements are class probabilities
+ classes = netout[int(row)][col][b][5:]
+ box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes)
+ boxes.append(box)
+ return boxes
+
+ def correct_yolo_boxes(self, boxes, image_h, image_w, net_h, net_w):
+ new_w, new_h = net_w, net_h
+ for i in range(len(boxes)):
+ x_offset, x_scale = (net_w - new_w) / 2. / net_w, float(new_w) / net_w
+ y_offset, y_scale = (net_h - new_h) / 2. / net_h, float(new_h) / net_h
+ boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+ boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+ boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+ boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+
+ def _interval_overlap(self, interval_a, interval_b):
+ x1, x2 = interval_a
+ x3, x4 = interval_b
+ if x3 < x1:
+ if x4 < x1:
+ return 0
+ else:
+ return min(x2, x4) - x1
+ else:
+ if x2 < x3:
+ return 0
+ else:
+ return min(x2, x4) - x3
+
+ def bbox_iou(self, box1, box2):
+ intersect_w = self._interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+ intersect_h = self._interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+ intersect = intersect_w * intersect_h
+ w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
+ w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin
+ union = w1 * h1 + w2 * h2 - intersect
+
+ try:
+ result = float(intersect) / float(union)
+ return result
+ except:
+ return 0.0
+
+ def do_nms(self, boxes, nms_thresh):
+ if len(boxes) > 0:
+ nb_class = len(boxes[0].classes)
+ else:
+ return
+ for c in range(nb_class):
+ sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+ for i in range(len(sorted_indices)):
+ index_i = sorted_indices[i]
+ if boxes[index_i].classes[c] == 0: continue
+ for j in range(i + 1, len(sorted_indices)):
+ index_j = sorted_indices[j]
+ if self.bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+ boxes[index_j].classes[c] = 0
+
+ def get_boxes(self, boxes, labels, thresh):
+ v_boxes, v_labels, v_scores = list(), list(), list()
+ # enumerate all boxes
+ for box in boxes:
+ # enumerate all possible labels
+ for i in range(len(labels)):
+ # check if the threshold for this label is high enough
+ if box.classes[i] > thresh:
+ v_boxes.append(box)
+ v_labels.append(labels[i])
+ v_scores.append(box.classes[i] * 100)
+ # don't break, many labels may trigger for one box
+ return v_boxes, v_labels, v_scores
+
+ def label_color(self, label):
+ """ Return a color from a set of predefined colors. Contains 80 colors in total.
+
+ Args
+ label: The label to get the color for.
+
+ Returns
+ A list of three values representing a RGB color.
+
+ If no color is defined for a certain label, the color green is returned and a warning is printed.
+ """
+ if label < len(self.__colors):
+ return self.__colors[label]
+ else:
+ return (0, 255, 0)
+
+ def draw_boxes_and_caption(self, image_frame, v_boxes, v_labels, v_scores, show_names=False, show_percentage=False):
+
+
+ for i in range(len(v_boxes)):
+ box = v_boxes[i]
+ y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
+ width, height = x2 - x1, y2 - y1
+ class_color = self.label_color(self.__labels.index(v_labels[i]))
+
+ image_frame = cv2.rectangle(image_frame, (x1, y1), (x2, y2), class_color, 2)
+
+ label = ""
+ if(show_names == True and show_percentage == True):
+ label = "%s : %.3f" % (v_labels[i], v_scores[i])
+ elif(show_names == True):
+ label = "%s" % (v_labels[i])
+ elif (show_percentage == True):
+ label = "%.3f" % (v_scores[i])
+
+
+ if(show_names == True or show_percentage == True):
+ b = np.array([x1, y1, x2, y2]).astype(int)
+ cv2.putText(image_frame, label, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (200, 0, 0), 3)
+ cv2.putText(image_frame, label, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2)
+
+ return image_frame
+
+
+
+
+
+
diff --git a/imageai/Detection/Custom/callbacks.py b/imageai/Detection/Custom/callbacks.py
new file mode 100644
index 00000000..ddeb70d6
--- /dev/null
+++ b/imageai/Detection/Custom/callbacks.py
@@ -0,0 +1,71 @@
+from keras.callbacks import TensorBoard, ModelCheckpoint
+import tensorflow as tf
+import numpy as np
+import warnings
+
+class CustomTensorBoard(TensorBoard):
+ """ to log the loss after each batch
+ """
+ def __init__(self, log_every=1, **kwargs):
+ super(CustomTensorBoard, self).__init__(**kwargs)
+ self.log_every = log_every
+ self.counter = 0
+
+ def on_batch_end(self, batch, logs=None):
+ self.counter+=1
+ if self.counter%self.log_every==0:
+ for name, value in logs.items():
+ if name in ['batch', 'size']:
+ continue
+ summary = tf.Summary()
+ summary_value = summary.value.add()
+ summary_value.simple_value = value.item()
+ summary_value.tag = name
+ self.writer.add_summary(summary, self.counter)
+ self.writer.flush()
+
+ super(CustomTensorBoard, self).on_batch_end(batch, logs)
+
+class CustomModelCheckpoint(ModelCheckpoint):
+ """ to save the template model, not the multi-GPU model
+ """
+ def __init__(self, model_to_save, **kwargs):
+ super(CustomModelCheckpoint, self).__init__(**kwargs)
+ self.model_to_save = model_to_save
+
+ def on_epoch_end(self, epoch, logs=None):
+ logs = logs or {}
+ self.epochs_since_last_save += 1
+ if self.epochs_since_last_save >= self.period:
+ self.epochs_since_last_save = 0
+ filepath = self.filepath.format(epoch=epoch + 1, **logs)
+ if self.save_best_only:
+ current = logs.get(self.monitor)
+ if current is None:
+ warnings.warn('Can save best model only with %s available, '
+ 'skipping.' % (self.monitor), RuntimeWarning)
+ else:
+ if self.monitor_op(current, self.best):
+ if self.verbose > 0:
+ print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
+ ' saving model to %s'
+ % (epoch + 1, self.monitor, self.best,
+ current, filepath))
+ self.best = current
+ if self.save_weights_only:
+ self.model_to_save.save_weights(filepath, overwrite=True)
+ else:
+ self.model_to_save.save(filepath, overwrite=True)
+ else:
+ if self.verbose > 0:
+ print('\nEpoch %05d: %s did not improve from %0.5f' %
+ (epoch + 1, self.monitor, self.best))
+ else:
+ if self.verbose > 0:
+ print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
+ if self.save_weights_only:
+ self.model_to_save.save_weights(filepath, overwrite=True)
+ else:
+ self.model_to_save.save(filepath, overwrite=True)
+
+ super(CustomModelCheckpoint, self).on_batch_end(epoch, logs)
\ No newline at end of file
diff --git a/imageai/Detection/Custom/evaluate.py b/imageai/Detection/Custom/evaluate.py
new file mode 100644
index 00000000..726e8eee
--- /dev/null
+++ b/imageai/Detection/Custom/evaluate.py
@@ -0,0 +1,68 @@
+#! /usr/bin/env python
+
+import argparse
+import os
+import numpy as np
+import json
+from imageai.Detection.Custom.voc import parse_voc_annotation
+from imageai.Detection.Custom.yolo import create_yolov3_model
+from imageai.Detection.Custom.generator import BatchGenerator
+from imageai.Detection.Custom.utils.utils import normalize, evaluate
+from keras.callbacks import EarlyStopping, ModelCheckpoint
+from keras.optimizers import Adam
+from keras.models import load_model
+
+def _main_(args):
+ config_path = args.conf
+
+ with open(config_path) as config_buffer:
+ config = json.loads(config_buffer.read())
+
+ ###############################
+ # Create the validation generator
+ ###############################
+ valid_ints, labels = parse_voc_annotation(
+ config['valid']['valid_annot_folder'],
+ config['valid']['valid_image_folder'],
+ config['valid']['cache_name'],
+ config['model']['labels']
+ )
+
+ labels = labels.keys() if len(config['model']['labels']) == 0 else config['model']['labels']
+ labels = sorted(labels)
+
+ valid_generator = BatchGenerator(
+ instances = valid_ints,
+ anchors = config['model']['anchors'],
+ labels = labels,
+ downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image = 0,
+ batch_size = config['train']['batch_size'],
+ min_net_size = config['model']['min_input_size'],
+ max_net_size = config['model']['max_input_size'],
+ shuffle = True,
+ jitter = 0.0,
+ norm = normalize
+ )
+
+ ###############################
+ # Load the model and do evaluation
+ ###############################
+ os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
+
+ infer_model = load_model(config['train']['saved_weights_name'])
+
+ # compute mAP for all the classes
+ average_precisions = evaluate(infer_model, valid_generator)
+
+ # print the score
+ for label, average_precision in average_precisions.items():
+ print(labels[label] + ': {:.4f}'.format(average_precision))
+ print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
+
+if __name__ == '__main__':
+ argparser = argparse.ArgumentParser(description='Evaluate YOLO_v3 model on any dataset')
+ argparser.add_argument('-c', '--conf', help='path to configuration file')
+
+ args = argparser.parse_args()
+ _main_(args)
diff --git a/imageai/Detection/Custom/gen_anchors.py b/imageai/Detection/Custom/gen_anchors.py
new file mode 100644
index 00000000..5cfd5507
--- /dev/null
+++ b/imageai/Detection/Custom/gen_anchors.py
@@ -0,0 +1,124 @@
+import random
+import argparse
+import numpy as np
+
+from imageai.Detection.Custom.voc import parse_voc_annotation
+import json
+
+def IOU(ann, centroids):
+ w, h = ann
+ similarities = []
+
+ for centroid in centroids:
+ c_w, c_h = centroid
+
+ if c_w >= w and c_h >= h:
+ similarity = w*h/(c_w*c_h)
+ elif c_w >= w and c_h <= h:
+ similarity = w*c_h/(w*h + (c_w-w)*c_h)
+ elif c_w <= w and c_h >= h:
+ similarity = c_w*h/(w*h + c_w*(c_h-h))
+ else: #means both w,h are bigger than c_w and c_h respectively
+ similarity = (c_w*c_h)/(w*h)
+ similarities.append(similarity) # will become (k,) shape
+
+ return np.array(similarities)
+
+def avg_IOU(anns, centroids):
+ n,d = anns.shape
+ sum = 0.
+
+ for i in range(anns.shape[0]):
+ sum+= max(IOU(anns[i], centroids))
+
+ return sum/n
+
+
+def run_kmeans(ann_dims, anchor_num):
+ ann_num = ann_dims.shape[0]
+ iterations = 0
+ prev_assignments = np.ones(ann_num)*(-1)
+ iteration = 0
+ old_distances = np.zeros((ann_num, anchor_num))
+
+ indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
+ centroids = ann_dims[indices]
+ anchor_dim = ann_dims.shape[1]
+
+ while True:
+ distances = []
+ iteration += 1
+ for i in range(ann_num):
+ d = 1 - IOU(ann_dims[i], centroids)
+ distances.append(d)
+ distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
+
+ #assign samples to centroids
+ assignments = np.argmin(distances,axis=1)
+
+ if (assignments == prev_assignments).all() :
+ return centroids
+
+ #calculate new centroids
+ centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
+ for i in range(ann_num):
+ centroid_sums[assignments[i]]+=ann_dims[i]
+ for j in range(anchor_num):
+ centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)
+
+ prev_assignments = assignments.copy()
+ old_distances = distances.copy()
+
+def generateAnchors(train_annotation_folder, train_image_folder, train_cache_file, model_labels):
+
+ print("Generating anchor boxes for training images and annotation...")
+ num_anchors = 9
+
+ train_imgs, train_labels = parse_voc_annotation(
+ train_annotation_folder,
+ train_image_folder,
+ train_cache_file,
+ model_labels
+ )
+
+ # run k_mean to find the anchors
+ annotation_dims = []
+ for image in train_imgs:
+
+ for obj in image['object']:
+ relative_w = (float(obj['xmax']) - float(obj['xmin']))/image['width']
+ relatice_h = (float(obj["ymax"]) - float(obj['ymin']))/image['height']
+ annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
+
+ annotation_dims = np.array(annotation_dims)
+ centroids = run_kmeans(annotation_dims, num_anchors)
+
+ # write anchors to file
+ print('Average IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))
+
+ anchors = centroids.copy()
+
+ widths = anchors[:, 0]
+ sorted_indices = np.argsort(widths)
+
+
+ anchor_array = []
+ reverse_anchor_array = []
+ out_string = ""
+ r = "anchors: ["
+ for i in sorted_indices:
+ anchor_array.append(int(anchors[i, 0] * 416))
+ anchor_array.append(int(anchors[i, 1] * 416))
+
+ out_string += str(int(anchors[i, 0] * 416)) + ',' + str(int(anchors[i, 1] * 416)) + ', '
+
+
+ reverse_count = len(anchor_array) -1
+ while(reverse_count > -1):
+ reverse_anchor_array.append(anchor_array[reverse_count])
+ reverse_count -= 1
+
+ print("Anchor Boxes generated.")
+ return anchor_array, reverse_anchor_array
+
+
diff --git a/imageai/Detection/Custom/generator.py b/imageai/Detection/Custom/generator.py
new file mode 100644
index 00000000..d5eb4ba9
--- /dev/null
+++ b/imageai/Detection/Custom/generator.py
@@ -0,0 +1,228 @@
+import cv2
+import copy
+import numpy as np
+from keras.utils import Sequence
+from imageai.Detection.Custom.utils.bbox import BoundBox, bbox_iou
+from imageai.Detection.Custom.utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes
+
+class BatchGenerator(Sequence):
+ def __init__(self,
+ instances,
+ anchors,
+ labels,
+ downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
+ max_box_per_image=30,
+ batch_size=1,
+ min_net_size=320,
+ max_net_size=608,
+ shuffle=True,
+ jitter=True,
+ norm=None
+ ):
+ self.instances = instances
+ self.batch_size = batch_size
+ self.labels = labels
+ self.downsample = downsample
+ self.max_box_per_image = max_box_per_image
+ self.min_net_size = (min_net_size//self.downsample)*self.downsample
+ self.max_net_size = (max_net_size//self.downsample)*self.downsample
+ self.shuffle = shuffle
+ self.jitter = jitter
+ self.norm = norm
+ self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
+ self.net_h = 416
+ self.net_w = 416
+
+ if shuffle: np.random.shuffle(self.instances)
+
+ def __len__(self):
+ return int(np.ceil(float(len(self.instances))/self.batch_size))
+
+ def __getitem__(self, idx):
+ # get image input size, change every 10 batches
+ net_h, net_w = self._get_net_size(idx)
+ base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample
+
+ # determine the first and the last indices of the batch
+ l_bound = idx*self.batch_size
+ r_bound = (idx+1)*self.batch_size
+
+ if r_bound > len(self.instances):
+ r_bound = len(self.instances)
+ l_bound = r_bound - self.batch_size
+
+ x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images
+ t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes
+
+ # initialize the inputs and the outputs
+ yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
+ yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
+ yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
+ yolos = [yolo_3, yolo_2, yolo_1]
+
+ dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
+ dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
+ dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
+
+ instance_count = 0
+ true_box_index = 0
+
+ # do the logic to fill in the inputs and the output
+ for train_instance in self.instances[l_bound:r_bound]:
+ # augment input image and fix object's position and size
+ img, all_objs = self._aug_image(train_instance, net_h, net_w)
+
+ for obj in all_objs:
+ # find the best anchor box for this object
+ max_anchor = None
+ max_index = -1
+ max_iou = -1
+
+ shifted_box = BoundBox(0,
+ 0,
+ obj['xmax']-obj['xmin'],
+ obj['ymax']-obj['ymin'])
+
+ for i in range(len(self.anchors)):
+ anchor = self.anchors[i]
+ iou = bbox_iou(shifted_box, anchor)
+
+ if max_iou < iou:
+ max_anchor = anchor
+ max_index = i
+ max_iou = iou
+
+ # determine the yolo to be responsible for this bounding box
+ yolo = yolos[max_index//3]
+ grid_h, grid_w = yolo.shape[1:3]
+
+ # determine the position of the bounding box on the grid
+ center_x = .5*(obj['xmin'] + obj['xmax'])
+ center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
+ center_y = .5*(obj['ymin'] + obj['ymax'])
+ center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
+
+ # determine the sizes of the bounding box
+ w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
+ h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h
+
+ box = [center_x, center_y, w, h]
+
+ # determine the index of the label
+ obj_indx = self.labels.index(obj['name'])
+
+ # determine the location of the cell responsible for this object
+ grid_x = int(np.floor(center_x))
+ grid_y = int(np.floor(center_y))
+
+ # assign ground truth x, y, w, h, confidence and class probs to y_batch
+ yolo[instance_count, grid_y, grid_x, max_index%3] = 0
+ yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
+ yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1.
+ yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1
+
+ # assign the true box to t_batch
+ true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
+ t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
+
+ true_box_index += 1
+ true_box_index = true_box_index % self.max_box_per_image
+
+ # assign input image to x_batch
+ if self.norm != None:
+ x_batch[instance_count] = self.norm(img)
+ else:
+ # plot image and bounding boxes for sanity check
+ for obj in all_objs:
+ cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
+ cv2.putText(img, obj['name'],
+ (obj['xmin']+2, obj['ymin']+12),
+ 0, 1.2e-3 * img.shape[0],
+ (0,255,0), 2)
+
+ x_batch[instance_count] = img
+
+ # increase instance counter in the current batch
+ instance_count += 1
+
+ return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
+
+ def _get_net_size(self, idx):
+ if idx%10 == 0:
+ net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, \
+ self.max_net_size/self.downsample+1)
+
+ self.net_h, self.net_w = net_size, net_size
+ return self.net_h, self.net_w
+
+ def _aug_image(self, instance, net_h, net_w):
+ image_name = instance['filename']
+ image = cv2.imread(image_name) # RGB image
+
+ if image is None: print('Cannot find ', image_name)
+ image = image[:,:,::-1] # RGB image
+
+ image_h, image_w, _ = image.shape
+
+ # determine the amount of scaling and cropping
+ dw = self.jitter * image_w;
+ dh = self.jitter * image_h;
+
+ new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh));
+ scale = np.random.uniform(0.25, 2);
+
+ if (new_ar < 1):
+ new_h = int(scale * net_h);
+ new_w = int(net_h * new_ar);
+ else:
+ new_w = int(scale * net_w);
+ new_h = int(net_w / new_ar);
+
+ dx = int(np.random.uniform(0, net_w - new_w));
+ dy = int(np.random.uniform(0, net_h - new_h));
+
+ # apply scaling and cropping
+ im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)
+
+ # randomly distort hsv space
+ im_sized = random_distort_image(im_sized)
+
+ # randomly flip
+ flip = np.random.randint(2)
+ im_sized = random_flip(im_sized, flip)
+
+ # correct the size and pos of bounding boxes
+ all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
+
+ return im_sized, all_objs
+
+ def on_epoch_end(self):
+ if self.shuffle: np.random.shuffle(self.instances)
+
+ def num_classes(self):
+ return len(self.labels)
+
+ def size(self):
+ return len(self.instances)
+
+ def get_anchors(self):
+ anchors = []
+
+ for anchor in self.anchors:
+ anchors += [anchor.xmax, anchor.ymax]
+
+ return anchors
+
+ def load_annotation(self, i):
+ annots = []
+
+ for obj in self.instances[i]['object']:
+ annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])]
+ annots += [annot]
+
+ if len(annots) == 0: annots = [[]]
+
+ return np.array(annots)
+
+ def load_image(self, i):
+ return cv2.imread(self.instances[i]['filename'])
\ No newline at end of file
diff --git a/imageai/Detection/Custom/utils/__init__.py b/imageai/Detection/Custom/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/imageai/Detection/Custom/utils/bbox.py b/imageai/Detection/Custom/utils/bbox.py
new file mode 100644
index 00000000..8ba7ecc9
--- /dev/null
+++ b/imageai/Detection/Custom/utils/bbox.py
@@ -0,0 +1,89 @@
+import numpy as np
+import os
+import cv2
+from .colors import get_color
+
+class BoundBox:
+ def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):
+ self.xmin = xmin
+ self.ymin = ymin
+ self.xmax = xmax
+ self.ymax = ymax
+
+ self.c = c
+ self.classes = classes
+
+ self.label = -1
+ self.score = -1
+
+ def get_label(self):
+ if self.label == -1:
+ self.label = np.argmax(self.classes)
+
+ return self.label
+
+ def get_score(self):
+ if self.score == -1:
+ self.score = self.classes[self.get_label()]
+
+ return self.score
+
+def _interval_overlap(interval_a, interval_b):
+ x1, x2 = interval_a
+ x3, x4 = interval_b
+
+ if x3 < x1:
+ if x4 < x1:
+ return 0
+ else:
+ return min(x2,x4) - x1
+ else:
+ if x2 < x3:
+ return 0
+ else:
+ return min(x2,x4) - x3
+
+def bbox_iou(box1, box2):
+ intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+ intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+
+ intersect = intersect_w * intersect_h
+
+ w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
+ w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
+
+ union = w1*h1 + w2*h2 - intersect
+
+ return float(intersect) / union
+
+def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
+ for box in boxes:
+ label_str = ''
+ label = -1
+
+ for i in range(len(labels)):
+ if box.classes[i] > obj_thresh:
+ if label_str != '': label_str += ', '
+ label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%')
+ label = i
+ if not quiet: print(label_str)
+
+ if label >= 0:
+ text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5)
+ width, height = text_size[0][0], text_size[0][1]
+ region = np.array([[box.xmin-3, box.ymin],
+ [box.xmin-3, box.ymin-height-26],
+ [box.xmin+width+13, box.ymin-height-26],
+ [box.xmin+width+13, box.ymin]], dtype='int32')
+
+ cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5)
+ cv2.fillPoly(img=image, pts=[region], color=get_color(label))
+ cv2.putText(img=image,
+ text=label_str,
+ org=(box.xmin+13, box.ymin - 13),
+ fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+ fontScale=1e-3 * image.shape[0],
+ color=(0,0,0),
+ thickness=2)
+
+ return image
\ No newline at end of file
diff --git a/imageai/Detection/Custom/utils/colors.py b/imageai/Detection/Custom/utils/colors.py
new file mode 100644
index 00000000..2983a98a
--- /dev/null
+++ b/imageai/Detection/Custom/utils/colors.py
@@ -0,0 +1,96 @@
+def get_color(label):
+ """ Return a color from a set of predefined colors. Contains 80 colors in total.
+ code originally from https://github.com/fizyr/keras-retinanet/
+ Args
+ label: The label to get the color for.
+ Returns
+ A list of three values representing a RGB color.
+ """
+ if label < len(colors):
+ return colors[label]
+ else:
+ print('Label {} has no color, returning default.'.format(label))
+ return (0, 255, 0)
+
+colors = [
+ [31 , 0 , 255] ,
+ [0 , 159 , 255] ,
+ [255 , 95 , 0] ,
+ [255 , 19 , 0] ,
+ [255 , 0 , 0] ,
+ [255 , 38 , 0] ,
+ [0 , 255 , 25] ,
+ [255 , 0 , 133] ,
+ [255 , 172 , 0] ,
+ [108 , 0 , 255] ,
+ [0 , 82 , 255] ,
+ [0 , 255 , 6] ,
+ [255 , 0 , 152] ,
+ [223 , 0 , 255] ,
+ [12 , 0 , 255] ,
+ [0 , 255 , 178] ,
+ [108 , 255 , 0] ,
+ [184 , 0 , 255] ,
+ [255 , 0 , 76] ,
+ [146 , 255 , 0] ,
+ [51 , 0 , 255] ,
+ [0 , 197 , 255] ,
+ [255 , 248 , 0] ,
+ [255 , 0 , 19] ,
+ [255 , 0 , 38] ,
+ [89 , 255 , 0] ,
+ [127 , 255 , 0] ,
+ [255 , 153 , 0] ,
+ [0 , 255 , 255] ,
+ [0 , 255 , 216] ,
+ [0 , 255 , 121] ,
+ [255 , 0 , 248] ,
+ [70 , 0 , 255] ,
+ [0 , 255 , 159] ,
+ [0 , 216 , 255] ,
+ [0 , 6 , 255] ,
+ [0 , 63 , 255] ,
+ [31 , 255 , 0] ,
+ [255 , 57 , 0] ,
+ [255 , 0 , 210] ,
+ [0 , 255 , 102] ,
+ [242 , 255 , 0] ,
+ [255 , 191 , 0] ,
+ [0 , 255 , 63] ,
+ [255 , 0 , 95] ,
+ [146 , 0 , 255] ,
+ [184 , 255 , 0] ,
+ [255 , 114 , 0] ,
+ [0 , 255 , 235] ,
+ [255 , 229 , 0] ,
+ [0 , 178 , 255] ,
+ [255 , 0 , 114] ,
+ [255 , 0 , 57] ,
+ [0 , 140 , 255] ,
+ [0 , 121 , 255] ,
+ [12 , 255 , 0] ,
+ [255 , 210 , 0] ,
+ [0 , 255 , 44] ,
+ [165 , 255 , 0] ,
+ [0 , 25 , 255] ,
+ [0 , 255 , 140] ,
+ [0 , 101 , 255] ,
+ [0 , 255 , 82] ,
+ [223 , 255 , 0] ,
+ [242 , 0 , 255] ,
+ [89 , 0 , 255] ,
+ [165 , 0 , 255] ,
+ [70 , 255 , 0] ,
+ [255 , 0 , 172] ,
+ [255 , 76 , 0] ,
+ [203 , 255 , 0] ,
+ [204 , 0 , 255] ,
+ [255 , 0 , 229] ,
+ [255 , 133 , 0] ,
+ [127 , 0 , 255] ,
+ [0 , 235 , 255] ,
+ [0 , 255 , 197] ,
+ [255 , 0 , 191] ,
+ [0 , 44 , 255] ,
+ [50 , 255 , 0]
+]
diff --git a/imageai/Detection/Custom/utils/image.py b/imageai/Detection/Custom/utils/image.py
new file mode 100644
index 00000000..3e829f5d
--- /dev/null
+++ b/imageai/Detection/Custom/utils/image.py
@@ -0,0 +1,86 @@
+import cv2
+import numpy as np
+import copy
+
+def _rand_scale(scale):
+ scale = np.random.uniform(1, scale)
+ return scale if (np.random.randint(2) == 0) else 1./scale;
+
+def _constrain(min_v, max_v, value):
+ if value < min_v: return min_v
+ if value > max_v: return max_v
+ return value
+
+def random_flip(image, flip):
+ if flip == 1: return cv2.flip(image, 1)
+ return image
+
+def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h):
+ boxes = copy.deepcopy(boxes)
+
+ # randomize boxes' order
+ np.random.shuffle(boxes)
+
+ # correct sizes and positions
+ sx, sy = float(new_w)/image_w, float(new_h)/image_h
+ zero_boxes = []
+
+ for i in range(len(boxes)):
+ boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
+ boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
+ boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
+ boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))
+
+ if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
+ zero_boxes += [i]
+ continue
+
+ if flip == 1:
+ swap = boxes[i]['xmin'];
+ boxes[i]['xmin'] = net_w - boxes[i]['xmax']
+ boxes[i]['xmax'] = net_w - swap
+
+ boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]
+
+ return boxes
+
+def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
+ # determine scale factors
+ dhue = np.random.uniform(-hue, hue)
+ dsat = _rand_scale(saturation);
+ dexp = _rand_scale(exposure);
+
+ # convert RGB space to HSV space
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')
+
+ # change satuation and exposure
+ image[:,:,1] *= dsat
+ image[:,:,2] *= dexp
+
+ # change hue
+ image[:,:,0] += dhue
+ image[:,:,0] -= (image[:,:,0] > 180)*180
+ image[:,:,0] += (image[:,:,0] < 0) *180
+
+ # convert back to RGB from HSV
+ return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)
+
+def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy):
+ im_sized = cv2.resize(image, (new_w, new_h))
+
+ if dx > 0:
+ im_sized = np.pad(im_sized, ((0,0), (dx,0), (0,0)), mode='constant', constant_values=127)
+ else:
+ im_sized = im_sized[:,-dx:,:]
+ if (new_w + dx) < net_w:
+ im_sized = np.pad(im_sized, ((0,0), (0, net_w - (new_w+dx)), (0,0)), mode='constant', constant_values=127)
+
+ if dy > 0:
+ im_sized = np.pad(im_sized, ((dy,0), (0,0), (0,0)), mode='constant', constant_values=127)
+ else:
+ im_sized = im_sized[-dy:,:,:]
+
+ if (new_h + dy) < net_h:
+ im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0,0), (0,0)), mode='constant', constant_values=127)
+
+ return im_sized[:net_h, :net_w,:]
\ No newline at end of file
diff --git a/imageai/Detection/Custom/utils/multi_gpu_model.py b/imageai/Detection/Custom/utils/multi_gpu_model.py
new file mode 100644
index 00000000..90645825
--- /dev/null
+++ b/imageai/Detection/Custom/utils/multi_gpu_model.py
@@ -0,0 +1,62 @@
+from keras.layers import Lambda, concatenate
+from keras.models import Model
+import tensorflow as tf
+
+def multi_gpu_model(model, gpus):
+ if isinstance(gpus, (list, tuple)):
+ num_gpus = len(gpus)
+ target_gpu_ids = gpus
+ else:
+ num_gpus = gpus
+ target_gpu_ids = range(num_gpus)
+
+ def get_slice(data, i, parts):
+ shape = tf.shape(data)
+ batch_size = shape[:1]
+ input_shape = shape[1:]
+ step = batch_size // parts
+ if i == num_gpus - 1:
+ size = batch_size - step * i
+ else:
+ size = step
+ size = tf.concat([size, input_shape], axis=0)
+ stride = tf.concat([step, input_shape * 0], axis=0)
+ start = stride * i
+ return tf.slice(data, start, size)
+
+ all_outputs = []
+ for i in range(len(model.outputs)):
+ all_outputs.append([])
+
+ # Place a copy of the model on each GPU,
+ # each getting a slice of the inputs.
+ for i, gpu_id in enumerate(target_gpu_ids):
+ with tf.device('/gpu:%d' % gpu_id):
+ with tf.name_scope('replica_%d' % gpu_id):
+ inputs = []
+ # Retrieve a slice of the input.
+ for x in model.inputs:
+ input_shape = tuple(x.get_shape().as_list())[1:]
+ slice_i = Lambda(get_slice,
+ output_shape=input_shape,
+ arguments={'i': i,
+ 'parts': num_gpus})(x)
+ inputs.append(slice_i)
+
+ # Apply model on slice
+ # (creating a model replica on the target device).
+ outputs = model(inputs)
+ if not isinstance(outputs, list):
+ outputs = [outputs]
+
+ # Save the outputs for merging back together later.
+ for o in range(len(outputs)):
+ all_outputs[o].append(outputs[o])
+
+ # Merge outputs on CPU.
+ with tf.device('/cpu:0'):
+ merged = []
+ for name, outputs in zip(model.output_names, all_outputs):
+ merged.append(concatenate(outputs,
+ axis=0, name=name))
+ return Model(model.inputs, merged)
\ No newline at end of file
diff --git a/imageai/Detection/Custom/utils/utils.py b/imageai/Detection/Custom/utils/utils.py
new file mode 100644
index 00000000..615bff7d
--- /dev/null
+++ b/imageai/Detection/Custom/utils/utils.py
@@ -0,0 +1,323 @@
+import cv2
+import numpy as np
+import os
+from .bbox import BoundBox, bbox_iou
+from scipy.special import expit
+
+def _sigmoid(x):
+ return expit(x)
+
+def makedirs(path):
+ try:
+ os.makedirs(path)
+ except OSError:
+ if not os.path.isdir(path):
+ raise
+
+def evaluate(model,
+ generator,
+ iou_threshold,
+ obj_thresh,
+ nms_thresh,
+ net_h=416,
+ net_w=416,
+ save_path=None):
+ """ Evaluate a given dataset using a given model.
+ code originally from https://github.com/fizyr/keras-retinanet
+
+ # Arguments
+ model : The model to evaluate.
+ generator : The generator that represents the dataset to evaluate.
+ iou_threshold : The threshold used to consider when a detection is positive or negative.
+ obj_thresh : The threshold used to distinguish between object and non-object
+ nms_thresh : The threshold used to determine whether two detections are duplicates
+ net_h : The height of the input image to the model, higher value results in better accuracy
+ net_w : The width of the input image to the model
+ save_path : The path to save images with visualized detections to.
+ # Returns
+ A dict mapping class names to mAP scores.
+ """
+ # gather all detections and annotations
+ all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+
+ for i in range(generator.size()):
+ raw_image = [generator.load_image(i)]
+
+ # make the boxes and the labels
+ pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]
+
+ score = np.array([box.get_score() for box in pred_boxes])
+ pred_labels = np.array([box.label for box in pred_boxes])
+
+ if len(pred_boxes) > 0:
+ pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes])
+ else:
+ pred_boxes = np.array([[]])
+
+ # sort the boxes and the labels according to scores
+ score_sort = np.argsort(-score)
+ pred_labels = pred_labels[score_sort]
+ pred_boxes = pred_boxes[score_sort]
+
+ # copy detections to all_detections
+ for label in range(generator.num_classes()):
+ all_detections[i][label] = pred_boxes[pred_labels == label, :]
+
+ annotations = generator.load_annotation(i)
+
+ # copy detections to all_annotations
+ for label in range(generator.num_classes()):
+ all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
+
+ # compute mAP by comparing all detections and all annotations
+ average_precisions = {}
+
+ for label in range(generator.num_classes()):
+ false_positives = np.zeros((0,))
+ true_positives = np.zeros((0,))
+ scores = np.zeros((0,))
+ num_annotations = 0.0
+
+ for i in range(generator.size()):
+ detections = all_detections[i][label]
+ annotations = all_annotations[i][label]
+ num_annotations += annotations.shape[0]
+ detected_annotations = []
+
+ for d in detections:
+ scores = np.append(scores, d[4])
+
+ if annotations.shape[0] == 0:
+ false_positives = np.append(false_positives, 1)
+ true_positives = np.append(true_positives, 0)
+ continue
+
+ overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
+ assigned_annotation = np.argmax(overlaps, axis=1)
+ max_overlap = overlaps[0, assigned_annotation]
+
+ if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
+ false_positives = np.append(false_positives, 0)
+ true_positives = np.append(true_positives, 1)
+ detected_annotations.append(assigned_annotation)
+ else:
+ false_positives = np.append(false_positives, 1)
+ true_positives = np.append(true_positives, 0)
+
+ # no annotations -> AP for this class is 0 (is this correct?)
+ if num_annotations == 0:
+ average_precisions[label] = 0
+ continue
+
+ # sort by score
+ indices = np.argsort(-scores)
+ false_positives = false_positives[indices]
+ true_positives = true_positives[indices]
+
+ # compute false positives and true positives
+ false_positives = np.cumsum(false_positives)
+ true_positives = np.cumsum(true_positives)
+
+ # compute recall and precision
+ recall = true_positives / num_annotations
+ precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
+
+ # compute average precision
+ average_precision = compute_ap(recall, precision)
+ average_precisions[label] = average_precision
+
+ return average_precisions
+
+def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
+ if (float(net_w)/image_w) < (float(net_h)/image_h):
+ new_w = net_w
+ new_h = (image_h*net_w)/image_w
+ else:
+ new_h = net_w
+ new_w = (image_w*net_h)/image_h
+
+ for i in range(len(boxes)):
+ x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
+ y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
+
+ boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+ boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+ boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+ boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+
+def do_nms(boxes, nms_thresh):
+ if len(boxes) > 0:
+ nb_class = len(boxes[0].classes)
+ else:
+ return
+
+ for c in range(nb_class):
+ sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+
+ for i in range(len(sorted_indices)):
+ index_i = sorted_indices[i]
+
+ if boxes[index_i].classes[c] == 0: continue
+
+ for j in range(i+1, len(sorted_indices)):
+ index_j = sorted_indices[j]
+
+ if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+ boxes[index_j].classes[c] = 0
+
+def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
+ grid_h, grid_w = netout.shape[:2]
+ nb_box = 3
+ netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+ nb_class = netout.shape[-1] - 5
+
+ boxes = []
+
+ netout[..., :2] = _sigmoid(netout[..., :2])
+ netout[..., 4] = _sigmoid(netout[..., 4])
+ netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
+ netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+ for i in range(grid_h*grid_w):
+ row = i // grid_w
+ col = i % grid_w
+
+ for b in range(nb_box):
+ # 4th element is objectness score
+ objectness = netout[row, col, b, 4]
+
+ if(objectness <= obj_thresh): continue
+
+ # first 4 elements are x, y, w, and h
+ x, y, w, h = netout[row,col,b,:4]
+
+ x = (col + x) / grid_w # center position, unit: image width
+ y = (row + y) / grid_h # center position, unit: image height
+ w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+ h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
+
+ # last elements are class probabilities
+ classes = netout[row,col,b,5:]
+
+ box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
+
+ boxes.append(box)
+
+ return boxes
+
+def preprocess_input(image, net_h, net_w):
+ new_h, new_w, _ = image.shape
+
+ # determine the new size of the image
+ if (float(net_w)/new_w) < (float(net_h)/new_h):
+ new_h = (new_h * net_w)//new_w
+ new_w = net_w
+ else:
+ new_w = (new_w * net_h)//new_h
+ new_h = net_h
+
+ # resize the image to the new size
+ resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h))
+
+ # embed the image into the standard letter box
+ new_image = np.ones((net_h, net_w, 3)) * 0.5
+ new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
+ new_image = np.expand_dims(new_image, 0)
+
+ return new_image
+
+def normalize(image):
+ return image/255.
+
+def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
+ image_h, image_w, _ = images[0].shape
+ nb_images = len(images)
+ batch_input = np.zeros((nb_images, net_h, net_w, 3))
+
+ # preprocess the input
+ for i in range(nb_images):
+ batch_input[i] = preprocess_input(images[i], net_h, net_w)
+
+ # run the prediction
+ batch_output = model.predict_on_batch(batch_input)
+ batch_boxes = [None]*nb_images
+
+ for i in range(nb_images):
+ yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
+ boxes = []
+
+ # decode the output of the network
+ for j in range(len(yolos)):
+ yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
+ boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
+
+ # correct the sizes of the bounding boxes
+ correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
+
+ # suppress non-maximal boxes
+ do_nms(boxes, nms_thresh)
+
+ batch_boxes[i] = boxes
+
+ return batch_boxes
+
+def compute_overlap(a, b):
+ """
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+ Parameters
+ ----------
+ a: (N, 4) ndarray of float
+ b: (K, 4) ndarray of float
+ Returns
+ -------
+ overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+ """
+ area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+
+ iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
+ ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
+
+ iw = np.maximum(iw, 0)
+ ih = np.maximum(ih, 0)
+
+ ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
+
+ ua = np.maximum(ua, np.finfo(float).eps)
+
+ intersection = iw * ih
+
+ return intersection / ua
+
+def compute_ap(recall, precision):
+ """ Compute the average precision, given the recall and precision curves.
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+
+ # Arguments
+ recall: The recall curve (list).
+ precision: The precision curve (list).
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+ # correct AP calculation
+ # first append sentinel values at the end
+ mrec = np.concatenate(([0.], recall, [1.]))
+ mpre = np.concatenate(([0.], precision, [0.]))
+
+ # compute the precision envelope
+ for i in range(mpre.size - 1, 0, -1):
+ mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ i = np.where(mrec[1:] != mrec[:-1])[0]
+
+ # and sum (\Delta recall) * prec
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+ return ap
+
+def _softmax(x, axis=-1):
+ x = x - np.amax(x, axis, keepdims=True)
+ e_x = np.exp(x)
+
+ return e_x / e_x.sum(axis, keepdims=True)
diff --git a/imageai/Detection/Custom/voc.py b/imageai/Detection/Custom/voc.py
new file mode 100644
index 00000000..f51e5fd4
--- /dev/null
+++ b/imageai/Detection/Custom/voc.py
@@ -0,0 +1,67 @@
+import numpy as np
+import os
+import xml.etree.ElementTree as ET
+import pickle
+
+def parse_voc_annotation(ann_dir, img_dir, cache_name, labels=[]):
+ if os.path.exists(cache_name):
+ with open(cache_name, 'rb') as handle:
+ cache = pickle.load(handle)
+ all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
+ else:
+ all_insts = []
+ seen_labels = {}
+
+ for ann in sorted(os.listdir(ann_dir)):
+ img = {'object':[]}
+
+ try:
+ tree = ET.parse(ann_dir + ann)
+ except Exception as e:
+ print(e)
+ print('Ignore this bad annotation: ' + ann_dir + ann)
+ continue
+
+ for elem in tree.iter():
+ if 'filename' in elem.tag:
+ img['filename'] = img_dir + elem.text
+ if 'width' in elem.tag:
+ img['width'] = int(elem.text)
+ if 'height' in elem.tag:
+ img['height'] = int(elem.text)
+ if 'object' in elem.tag or 'part' in elem.tag:
+ obj = {}
+
+ for attr in list(elem):
+ if 'name' in attr.tag:
+ obj['name'] = attr.text
+
+ if obj['name'] in seen_labels:
+ seen_labels[obj['name']] += 1
+ else:
+ seen_labels[obj['name']] = 1
+
+ if len(labels) > 0 and obj['name'] not in labels:
+ break
+ else:
+ img['object'] += [obj]
+
+ if 'bndbox' in attr.tag:
+ for dim in list(attr):
+ if 'xmin' in dim.tag:
+ obj['xmin'] = int(round(float(dim.text)))
+ if 'ymin' in dim.tag:
+ obj['ymin'] = int(round(float(dim.text)))
+ if 'xmax' in dim.tag:
+ obj['xmax'] = int(round(float(dim.text)))
+ if 'ymax' in dim.tag:
+ obj['ymax'] = int(round(float(dim.text)))
+
+ if len(img['object']) > 0:
+ all_insts += [img]
+
+ cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
+ with open(cache_name, 'wb') as handle:
+ pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+ return all_insts, seen_labels
\ No newline at end of file
diff --git a/imageai/Detection/Custom/yolo.py b/imageai/Detection/Custom/yolo.py
new file mode 100644
index 00000000..f3c53922
--- /dev/null
+++ b/imageai/Detection/Custom/yolo.py
@@ -0,0 +1,364 @@
+from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
+from keras.layers.merge import add, concatenate
+from keras.models import Model
+from keras.engine.topology import Layer
+import tensorflow as tf
+
+class YoloLayer(Layer):
+ def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
+ grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
+ **kwargs):
+ # make the model settings persistent
+ self.ignore_thresh = ignore_thresh
+ self.warmup_batches = warmup_batches
+ self.anchors = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
+ self.grid_scale = grid_scale
+ self.obj_scale = obj_scale
+ self.noobj_scale = noobj_scale
+ self.xywh_scale = xywh_scale
+ self.class_scale = class_scale
+
+ # make a persistent mesh grid
+ max_grid_h, max_grid_w = max_grid
+
+ cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
+ cell_y = tf.transpose(cell_x, (0,2,1,3,4))
+ self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
+
+ super(YoloLayer, self).__init__(**kwargs)
+
+ def build(self, input_shape):
+ super(YoloLayer, self).build(input_shape) # Be sure to call this somewhere!
+
+ def call(self, x):
+ input_image, y_pred, y_true, true_boxes = x
+
+ # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
+ y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
+
+ # initialize the masks
+ object_mask = tf.expand_dims(y_true[..., 4], 4)
+
+ # the variable to keep track of number of batches processed
+ batch_seen = tf.Variable(0.)
+
+ # compute grid factor and net factor
+ grid_h = tf.shape(y_true)[1]
+ grid_w = tf.shape(y_true)[2]
+ grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
+
+ net_h = tf.shape(input_image)[1]
+ net_w = tf.shape(input_image)[2]
+ net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
+
+ """
+ Adjust prediction
+ """
+ pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy
+ pred_box_wh = y_pred[..., 2:4] # t_wh
+ pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence
+ pred_box_class = y_pred[..., 5:] # adjust class probabilities
+
+ """
+ Adjust ground truth
+ """
+ true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
+ true_box_wh = y_true[..., 2:4] # t_wh
+ true_box_conf = tf.expand_dims(y_true[..., 4], 4)
+ true_box_class = tf.argmax(y_true[..., 5:], -1)
+
+ """
+ Compare each predicted box to all true boxes
+ """
+ # initially, drag all objectness of all boxes to 0
+ conf_delta = pred_box_conf - 0
+
+ # then, ignore the boxes which have good overlap with some true box
+ true_xy = true_boxes[..., 0:2] / grid_factor
+ true_wh = true_boxes[..., 2:4] / net_factor
+
+ true_wh_half = true_wh / 2.
+ true_mins = true_xy - true_wh_half
+ true_maxes = true_xy + true_wh_half
+
+ pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
+ pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
+
+ pred_wh_half = pred_wh / 2.
+ pred_mins = pred_xy - pred_wh_half
+ pred_maxes = pred_xy + pred_wh_half
+
+ intersect_mins = tf.maximum(pred_mins, true_mins)
+ intersect_maxes = tf.minimum(pred_maxes, true_maxes)
+
+ intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
+ intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+
+ true_areas = true_wh[..., 0] * true_wh[..., 1]
+ pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
+
+ union_areas = pred_areas + true_areas - intersect_areas
+ iou_scores = tf.truediv(intersect_areas, union_areas)
+
+ best_ious = tf.reduce_max(iou_scores, axis=4)
+ conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4)
+
+ """
+ Compute some online statistics
+ """
+ true_xy = true_box_xy / grid_factor
+ true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
+
+ true_wh_half = true_wh / 2.
+ true_mins = true_xy - true_wh_half
+ true_maxes = true_xy + true_wh_half
+
+ pred_xy = pred_box_xy / grid_factor
+ pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
+
+ pred_wh_half = pred_wh / 2.
+ pred_mins = pred_xy - pred_wh_half
+ pred_maxes = pred_xy + pred_wh_half
+
+ intersect_mins = tf.maximum(pred_mins, true_mins)
+ intersect_maxes = tf.minimum(pred_maxes, true_maxes)
+ intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
+ intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+
+ true_areas = true_wh[..., 0] * true_wh[..., 1]
+ pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
+
+ union_areas = pred_areas + true_areas - intersect_areas
+ iou_scores = tf.truediv(intersect_areas, union_areas)
+ iou_scores = object_mask * tf.expand_dims(iou_scores, 4)
+
+ count = tf.reduce_sum(object_mask)
+ count_noobj = tf.reduce_sum(1 - object_mask)
+ detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5)
+ class_mask = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4)
+ recall50 = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask * class_mask) / (count + 1e-3)
+ recall75 = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask * class_mask) / (count + 1e-3)
+ avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3)
+ avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3)
+ avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3)
+ avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3)
+
+ """
+ Warm-up training
+ """
+ batch_seen = tf.assign_add(batch_seen, 1.)
+
+ true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1),
+ lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
+ true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
+ tf.ones_like(object_mask)],
+ lambda: [true_box_xy,
+ true_box_wh,
+ object_mask])
+
+ """
+ Compare each true box to all anchor boxes
+ """
+ wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
+ wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
+
+ xy_delta = xywh_mask * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
+ wh_delta = xywh_mask * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
+ conf_delta = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
+ class_delta = object_mask * \
+ tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
+ self.class_scale
+
+ loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5)))
+ loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5)))
+ loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5)))
+ loss_class = tf.reduce_sum(class_delta, list(range(1,5)))
+
+ loss = loss_xy + loss_wh + loss_conf + loss_class
+
+ loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
+ loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy),
+ tf.reduce_sum(loss_wh),
+ tf.reduce_sum(loss_conf),
+ tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000)
+
+
+ return loss*self.grid_scale
+
+ def compute_output_shape(self, input_shape):
+ return [(None, 1)]
+
+def _conv_block(inp, convs, do_skip=True):
+ x = inp
+ count = 0
+
+ for conv in convs:
+ if count == (len(convs) - 2) and do_skip:
+ skip_connection = x
+ count += 1
+
+ if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
+ x = Conv2D(conv['filter'],
+ conv['kernel'],
+ strides=conv['stride'],
+ padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
+ name='conv_' + str(conv['layer_idx']),
+ use_bias=False if conv['bnorm'] else True)(x)
+ if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
+ if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
+
+ return add([skip_connection, x]) if do_skip else x
+
+def create_yolov3_model(
+ nb_class,
+ anchors,
+ max_box_per_image,
+ max_grid,
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales,
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale
+):
+ input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3
+ true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))
+ true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
+ true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
+ true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
+
+ # Layer 0 => 4
+ x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
+ {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
+ {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
+ {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
+
+ # Layer 5 => 8
+ x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
+ {'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
+ {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
+
+ # Layer 9 => 11
+ x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
+ {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
+
+ # Layer 12 => 15
+ x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
+ {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
+ {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
+
+ # Layer 16 => 36
+ for i in range(7):
+ x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
+ {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
+
+ skip_36 = x
+
+ # Layer 37 => 40
+ x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
+ {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
+ {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
+
+ # Layer 41 => 61
+ for i in range(7):
+ x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
+ {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
+
+ skip_61 = x
+
+ # Layer 62 => 65
+ x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
+ {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
+ {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
+
+ # Layer 66 => 74
+ for i in range(3):
+ x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
+ {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
+
+ # Layer 75 => 79
+ x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
+ {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
+ {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
+ {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
+ {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], do_skip=False)
+
+ # Layer 80 => 82
+ pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80},
+ {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
+ loss_yolo_1 = YoloLayer(anchors[12:],
+ [1*num for num in max_grid],
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales[0],
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale)([input_image, pred_yolo_1, true_yolo_1, true_boxes])
+
+ # Layer 83 => 86
+ x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], do_skip=False)
+ x = UpSampling2D(2)(x)
+ x = concatenate([x, skip_61])
+
+ # Layer 87 => 91
+ x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
+ {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
+ {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
+ {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
+ {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], do_skip=False)
+
+ # Layer 92 => 94
+ pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92},
+ {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
+ loss_yolo_2 = YoloLayer(anchors[6:12],
+ [2*num for num in max_grid],
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales[1],
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale)([input_image, pred_yolo_2, true_yolo_2, true_boxes])
+
+ # Layer 95 => 98
+ x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}], do_skip=False)
+ x = UpSampling2D(2)(x)
+ x = concatenate([x, skip_36])
+
+ # Layer 99 => 106
+ pred_yolo_3 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99},
+ {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100},
+ {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101},
+ {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102},
+ {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103},
+ {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
+ {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
+ loss_yolo_3 = YoloLayer(anchors[:6],
+ [4*num for num in max_grid],
+ batch_size,
+ warmup_batches,
+ ignore_thresh,
+ grid_scales[2],
+ obj_scale,
+ noobj_scale,
+ xywh_scale,
+ class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])
+
+ train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
+ infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
+
+ return [train_model, infer_model]
+
+def dummy_loss(y_true, y_pred):
+ return tf.sqrt(tf.reduce_sum(y_pred))
\ No newline at end of file
diff --git a/imageai/Detection/__init__.py b/imageai/Detection/__init__.py
index 92b19dbd..d5545de8 100644
--- a/imageai/Detection/__init__.py
+++ b/imageai/Detection/__init__.py
@@ -248,7 +248,7 @@ def detectObjectsFromImage(self, input_image="", output_image_path="", input_typ
display_percentage_probability=True, display_object_name=True):
"""
'detectObjectsFromImage()' function is used to detect objects observable in the given image path:
- * input_image , which can be file to path, image numpy array or image file stream
+ * input_image , which can be a filepath, image numpy array or image file stream
* output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
* input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream"
* output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
@@ -307,9 +307,9 @@ def detectObjectsFromImage(self, input_image="", output_image_path="", input_typ
:param minimum_percentage_probability:
:param display_percentage_probability:
:param display_object_name
+ :return image_frame:
:return output_objects_array:
- :return detected_copy:
- :return detected_detected_objects_image_array:
+ :return detected_objects_image_array:
"""
if (self.__modelLoaded == False):
@@ -1109,70 +1109,48 @@ def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_f
"""
'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
- * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
- * camera_input , allows you to parse in camera input for live video detections
- * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
- * frames_per_second , which is the number of frames to be used in the output video
- * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
- * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
- * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
- * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
- * display_object_name (optional), can be used to show or hide object names on the detected video frames
- * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
- * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after
- each frame of the video is detected. If this parameter is set to a function, after every video
- frame is detected, the function will be executed with the following values parsed into it:
- -- position number of the frame
- -- an array of dictinaries, with each dictinary corresponding to each object detected.
- Each dictionary contains 'name', 'percentage_probability' and 'box_points'
- -- a dictionary with with keys being the name of each unique objects and value
- are the number of instances of the object present
- -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
- as the fourth value into the function
-
- * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after
- each second of the video is detected. If this parameter is set to a function, after every second of a video
- is detected, the function will be executed with the following values parsed into it:
- -- position number of the second
- -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
-
- -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
-
- -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
-
- -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
- as the fifth value into the function
-
- * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after
- each minute of the video is detected. If this parameter is set to a function, after every minute of a video
- is detected, the function will be executed with the following values parsed into it:
- -- position number of the minute
- -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
-
- -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
-
- -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
-
- -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
+ * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
+ * camera_input , allows you to parse in camera input for live video detections
+ * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
+ * frames_per_second , which is the number of frames to be used in the output video
+ * frame_detection_interval (optional, 1 by default) , which is the intervals of frames that will be detected.
+ * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
+ * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
+ * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
+ * display_object_name (optional), can be used to show or hide object names on the detected video frames
+ * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
+ * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video frame is detected, the function will be executed with the following values parsed into it:
+ -- position number of the frame
+ -- an array of dictinaries, with each dictinary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
+ -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
+
+ * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the second
+ -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
as the fifth value into the function
- * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after
- all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video
- is detected, the function will be executed with the following values parsed into it:
- -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
-
- -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
-
- -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+ * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
+ -- position number of the minute
+ -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
- * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function,
- per_per_second_function or per_per_minute_function
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
- * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
+ -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
+ -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
+ * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
+ -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
+ -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
+ -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
+ * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
+ * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
:param input_file_path:
diff --git a/imageai/Prediction/Custom/__init__.py b/imageai/Prediction/Custom/__init__.py
index ca9d55a5..43ef62bb 100644
--- a/imageai/Prediction/Custom/__init__.py
+++ b/imageai/Prediction/Custom/__init__.py
@@ -81,7 +81,7 @@ def setModelTypeAsInceptionV3(self):
def setDataDirectory(self, data_directory=""):
"""
- 'setDataDirectory()' is required to set the path to which the data/dataset to be used for
+ 'setDataDirectory()' is required to set the path to which the data/dataset to be used for
training is kept. The directory can have any name, but it must have 'train' and 'test'
sub-directory. In the 'train' and 'test' sub-directories, there must be sub-directories
with each having it's name corresponds to the name/label of the object whose images are
@@ -99,8 +99,9 @@ def setDataDirectory(self, data_directory=""):
>> class4 >> class4_test_images
>> class5 >> class5_test_images
- :return:
- """
+ :param data_directory:
+ :return:
+ """
self.__data_dir = data_directory
self.__train_dir = os.path.join(self.__data_dir, "train")
@@ -139,7 +140,7 @@ def lr_schedule(self, epoch):
def trainModel(self, num_objects, num_experiments=200, enhance_data=False, batch_size = 32, initial_learning_rate=1e-3, show_network_summary=False, training_image_size = 224, continue_from_model=None, transfer_from_model=None, transfer_with_full_training=True, initial_num_objects = None, save_full_model = False):
"""
- 'trainModel()' function starts the actual training. It accepts the following values:
+ 'trainModel()' function starts the model actual training. It accepts the following values:
- num_objects , which is the number of classes present in the dataset that is to be used for training
- num_experiments , also known as epochs, it is the number of times the network will train on all the training dataset
- enhance_data (optional) , this is used to modify the dataset and create more instance of the training set to enhance the training result
diff --git a/images/Thumbs.db b/images/Thumbs.db
deleted file mode 100644
index ea712392..00000000
Binary files a/images/Thumbs.db and /dev/null differ
diff --git a/images/holo1.jpg b/images/holo1.jpg
new file mode 100644
index 00000000..781b73e8
Binary files /dev/null and b/images/holo1.jpg differ
diff --git a/images/holo2-detected.jpg b/images/holo2-detected.jpg
new file mode 100644
index 00000000..9eebf56a
Binary files /dev/null and b/images/holo2-detected.jpg differ
diff --git a/images/holo2.jpg b/images/holo2.jpg
new file mode 100644
index 00000000..6d834a13
Binary files /dev/null and b/images/holo2.jpg differ
diff --git a/setup.py b/setup.py
index 092b7e82..020d4212 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
from setuptools import setup,find_packages
setup(name="imageai",
- version='2.0.3',
+ version='2.1.0',
description='A python library built to empower developers to build applications and systems with self-contained Computer Vision capabilities',
url="https://github.com/OlafenwaMoses/ImageAI",
author='Moses Olafenwa and John Olafenwa',
diff --git a/videos/Thumbs.db b/videos/Thumbs.db
deleted file mode 100644
index 3b026f51..00000000
Binary files a/videos/Thumbs.db and /dev/null differ
diff --git a/videos/holo1.mp4 b/videos/holo1.mp4
new file mode 100644
index 00000000..28a9c12c
Binary files /dev/null and b/videos/holo1.mp4 differ