diff --git a/asone/asone.py b/asone/asone.py index 082b03f..c5b3789 100644 --- a/asone/asone.py +++ b/asone/asone.py @@ -18,13 +18,14 @@ def __init__(self, weights: str = None, use_cuda: bool = True, recognizer: int = None, - languages: list = ['en'] + languages: list = ['en'], + num_classes=80 ) -> None: self.use_cuda = use_cuda # get detector object - self.detector = self.get_detector(detector, weights, recognizer) + self.detector = self.get_detector(detector, weights, recognizer, num_classes) self.recognizer = self.get_recognizer(recognizer, languages=languages) if tracker == -1: @@ -33,9 +34,9 @@ def __init__(self, self.tracker = self.get_tracker(tracker) - def get_detector(self, detector: int, weights: str, recognizer): + def get_detector(self, detector: int, weights: str, recognizer, num_classes): detector = Detector(detector, weights=weights, - use_cuda=self.use_cuda, recognizer=recognizer).get_detector() + use_cuda=self.use_cuda, recognizer=recognizer, num_classes=num_classes).get_detector() return detector def get_recognizer(self, recognizer: int, languages): @@ -85,6 +86,99 @@ def track_video(self, # yeild bbox_details, frame_details to main script yield bbox_details, frame_details + def detect_video(self, + video_path, + **kwargs + ): + output_filename = os.path.basename(video_path) + kwargs['filename'] = output_filename + config = self._update_args(kwargs) + + # os.makedirs(output_path, exist_ok=True) + + fps = config.pop('fps') + output_dir = config.pop('output_dir') + filename = config.pop('filename') + save_result = config.pop('save_result') + display = config.pop('display') + draw_trails = config.pop('draw_trails') + class_names = config.pop('class_names') + + cap = cv2.VideoCapture(video_path) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) + + if fps is None: + fps = cap.get(cv2.CAP_PROP_FPS) + + if save_result: + os.makedirs(output_dir, exist_ok=True) + save_path = os.path.join(output_dir, filename) + logger.info(f"video save path is {save_path}") + + video_writer = cv2.VideoWriter( + save_path, + cv2.VideoWriter_fourcc(*"mp4v"), + fps, + (int(width), int(height)), + ) + + frame_id = 1 + tic = time.time() + + prevTime = 0 + frame_no = 0 + while True: + start_time = time.time() + + ret, img = cap.read() + if not ret: + break + frame = img.copy() + + dets, img_info = self.detector.detect(img, conf_thres=0.25, iou_thres=0.45) + currTime = time.time() + fps = 1 / (currTime - prevTime) + prevTime = currTime + + if dets is not None: + bbox_xyxy = dets[:, :4] + scores = dets[:, 4] + class_ids = dets[:, 5] + img = utils.draw_boxes(img, bbox_xyxy, class_ids=class_ids, class_names=class_names) + + cv2.line(img, (20, 25), (127, 25), [85, 45, 255], 30) + cv2.putText(img, f'FPS: {int(fps)}', (11, 35), 0, 1, [ + 225, 255, 255], thickness=2, lineType=cv2.LINE_AA) + + + elapsed_time = time.time() - start_time + + logger.info( + 'frame {}/{} ({:.2f} ms)'.format(frame_no, int(frame_count), + elapsed_time * 1000)) + frame_no+=1 + if display: + cv2.imshow('Window', img) + + if save_result: + video_writer.write(img) + + if cv2.waitKey(25) & 0xFF == ord('q'): + break + + yield (bbox_xyxy, scores, class_ids), (im0 if display else frame, frame_no-1, fps) + + tac = time.time() + print(f'Total Time Taken: {tac - tic:.2f}') + # kwargs['filename'] = output_filename + # config = self._update_args(kwargs) + + # for (bbox_details, frame_details) in self._start_tracking(video_path, config): + # # yeild bbox_details, frame_details to main script + # yield bbox_details, frame_details + def detect(self, source, **kwargs)->np.ndarray: """ Function to perform detection on an img diff --git a/asone/demo_detector.py b/asone/demo_detector.py index b39c19b..999cea7 100644 --- a/asone/demo_detector.py +++ b/asone/demo_detector.py @@ -1,97 +1,65 @@ +import sys +import argparse import asone from asone import ASOne -from .utils import draw_boxes -import cv2 -import argparse -import time -import os -import sys import torch def main(args): filter_classes = args.filter_classes - video_path = args.video - - os.makedirs(args.output_path, exist_ok=True) if filter_classes: - filter_classes = filter_classes.split(',') - + filter_classes = ['person'] + # Check if cuda available if args.use_cuda and torch.cuda.is_available(): args.use_cuda = True else: args.use_cuda = False - + if sys.platform.startswith('darwin'): detector = asone.YOLOV7_MLMODEL else: detector = asone.YOLOV7_PYTORCH - - detector = ASOne(detector, weights=args.weights, use_cuda=args.use_cuda) - - cap = cv2.VideoCapture(video_path) - width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) - height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) - FPS = cap.get(cv2.CAP_PROP_FPS) - - if args.save: - video_writer = cv2.VideoWriter( - os.path.basename(video_path), - cv2.VideoWriter_fourcc(*"mp4v"), - FPS, - (int(width), int(height)), + + detect = ASOne( + detector=detector, + weights=args.weights, + use_cuda=args.use_cuda ) + # Get tracking function + track = detect.detect_video(args.video_path, + output_dir=args.output_dir, + conf_thres=args.conf_thres, + iou_thres=args.iou_thres, + display=args.display, + filter_classes=filter_classes, + class_names=None) # class_names=['License Plate'] for custom weights - frame_no = 1 - tic = time.time() - - prevTime = 0 - - while True: - start_time = time.time() - - ret, img = cap.read() - if not ret: - break - frame = img.copy() + # Loop over track_fn to retrieve outputs of each frame + for bbox_details, frame_details in track: + bbox_xyxy, scores, class_ids = bbox_details + frame, frame_num, fps = frame_details + print(frame_num) - dets, img_info = detector.detect(img, conf_thres=0.25, iou_thres=0.45) - currTime = time.time() - fps = 1 / (currTime - prevTime) - prevTime = currTime - - if dets is not None: - bbox_xyxy = dets[:, :4] - scores = dets[:, 4] - class_ids = dets[:, 5] - img = draw_boxes(img, bbox_xyxy, class_ids=class_ids) - - cv2.line(img, (20, 25), (127, 25), [85, 45, 255], 30) - cv2.putText(img, f'FPS: {int(fps)}', (11, 35), 0, 1, [ - 225, 255, 255], thickness=2, lineType=cv2.LINE_AA) - - - frame_no+=1 - if args.display: - cv2.imshow('Window', img) - if args.save: - video_writer.write(img) - - if cv2.waitKey(25) & 0xFF == ord('q'): - break - -if __name__=='__main__': - +if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("video", help="Path of video") - parser.add_argument('--cpu', default=True, action='store_false', dest='use_cuda', help='If provided the model will run on cpu otherwise it will run on gpu') - parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ') + + parser.add_argument('video_path', help='Path to input video') + parser.add_argument('--cpu', default=True, action='store_false', dest='use_cuda', + help='run on cpu if not provided the program will run on gpu.') + parser.add_argument('--no_save', default=True, action='store_false', + dest='save_result', help='whether or not save results') + parser.add_argument('--no_display', default=True, action='store_false', + dest='display', help='whether or not display results on screen') + parser.add_argument('--output_dir', default='data/results', help='Path to output directory') + parser.add_argument('--draw_trails', action='store_true', default=False, + help='if provided object motion trails will be drawn.') + parser.add_argument('--filter_classes', default=None, help='Filter class name') parser.add_argument('-w', '--weights', default=None, help='Path of trained weights') - parser.add_argument('-o', '--output_path', default='data/results', help='path of output file') - parser.add_argument('--no_display', action='store_false', default=True, dest='display', help='if provided video will not be displayed') - parser.add_argument('--no_save', action='store_false', default=True, dest='save', help='if provided video will not be saved') + parser.add_argument('-ct', '--conf_thres', default=0.25, type=float, help='confidence score threshold') + parser.add_argument('-it', '--iou_thres', default=0.45, type=float, help='iou score threshold') args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/asone/detectors/detector.py b/asone/detectors/detector.py index aa87fb8..e76914b 100644 --- a/asone/detectors/detector.py +++ b/asone/detectors/detector.py @@ -19,10 +19,11 @@ def __init__(self, model_flag: int, weights: str = None, use_cuda: bool = True, - recognizer:int = None): + recognizer:int = None, + num_classes=80): - self.model = self._select_detector(model_flag, weights, use_cuda, recognizer) - def _select_detector(self, model_flag, weights, cuda, recognizer): + self.model = self._select_detector(model_flag, weights, use_cuda, recognizer, num_classes) + def _select_detector(self, model_flag, weights, cuda, recognizer, num_classes): # Get required weight using model_flag mlmodel = False if weights and weights.split('.')[-1] == 'onnx': @@ -101,9 +102,12 @@ def _select_detector(self, model_flag, weights, cuda, recognizer): use_cuda=cuda) elif model_flag in range(160, 163): # Get exp file and corresponding model for coreml only - _detector = YOLOnasDetector(weights=weight, - use_onnx=onnx, - use_cuda=cuda) + _detector = YOLOnasDetector( + model_flag, + weights=weight, + use_onnx=onnx, + use_cuda=cuda, + num_classes=num_classes) return _detector diff --git a/asone/detectors/yolonas/yolonas.py b/asone/detectors/yolonas/yolonas.py index da3939f..93243da 100644 --- a/asone/detectors/yolonas/yolonas.py +++ b/asone/detectors/yolonas/yolonas.py @@ -8,26 +8,31 @@ import super_gradients import numpy as np from super_gradients.training.processing import DetectionCenterPadding, StandardizeImage, NormalizeImage, ImagePermute, ComposeProcessing, DetectionLongestMaxSizeRescale +from super_gradients.training import models +from super_gradients.common.object_names import Models -class_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", - "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", - "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", - "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63","64", "65", "66", "67", "68", - "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79"] +class_names = [""] class YOLOnasDetector: def __init__(self, + model_flag, weights=None, cfg=None, use_onnx=True, use_cuda=True, + # checkpoint_num_classes=80, + num_classes=80 ): + + self.model_flag = model_flag + # self.checkpoint_num_classes = checkpoint_num_classes if not os.path.exists(weights): utils.download_weights(weights) - + + self.num_classes = num_classes self.device = 'cuda' if use_cuda and torch.cuda.is_available() else 'cpu' self.use_onnx = use_onnx @@ -35,12 +40,26 @@ def __init__(self, self.model = self.load_model(weights=weights) def load_model(self, weights): - model_name = os.path.basename(weights) - name, file_extension = os.path.splitext(model_name) - - model = super_gradients.training.models.get(name, checkpoint_path=weights, checkpoint_num_classes=80, num_classes=80).to(self.device) + # model = super_gradients.training.models.get(name, + # checkpoint_path=weights, + # checkpoint_num_classes=self.checkpoint_num_classes, + # num_classes=self.num_classes).to(self.device) + + if self.model_flag == 160: + model = models.get(Models.YOLO_NAS_S, + checkpoint_path=weights, + num_classes=self.num_classes).to(self.device) + elif self.model_flag == 161: + model = models.get(Models.YOLO_NAS_M, + checkpoint_path=weights, + num_classes=self.num_classes).to(self.device) + elif self.model_flag == 162: + model = models.get(Models.YOLO_NAS_L, + checkpoint_path=weights, + num_classes=self.num_classes).to(self.device) return model + def detect(self, image: list, input_shape: tuple = (640, 640), conf_thres: float = 0.25, @@ -51,28 +70,23 @@ def detect(self, image: list, with_p6: bool = False, return_image=False) -> list: - - self.model.set_dataset_processing_params( class_names=class_names, - image_processor=ComposeProcessing( - [ - DetectionLongestMaxSizeRescale(output_shape=(636, 636)), - DetectionCenterPadding(output_shape=(640, 640), pad_value=114), - StandardizeImage(max_value=255.0), - ImagePermute(permutation=(2, 0, 1)), - ] - ), - iou=iou_thres,conf=conf_thres, - ) + if self.num_classes==80: + self.model.set_dataset_processing_params(class_names=class_names, + image_processor=ComposeProcessing( + [ + DetectionLongestMaxSizeRescale(output_shape=(636, 636)), + DetectionCenterPadding(output_shape=(640, 640), pad_value=114), + StandardizeImage(max_value=255.0), + ImagePermute(permutation=(2, 0, 1)), + ] + ), + iou=iou_thres,conf=conf_thres, + ) original_image = image # Inference if self.use_onnx: pass - # Input names of ONNX model on which it is exported - # input_name = self.model.get_inputs()[0].name - # # Run onnx model - # pred = self.model.run([self.model.get_outputs()[0].name], { - # input_name: processed_image})[0] - # Run Pytorch model + else: detections = self.model.predict(image)