diff --git a/coco_2_labelImg.py b/coco_2_labelImg.py index 10e15a3..772a594 100644 --- a/coco_2_labelImg.py +++ b/coco_2_labelImg.py @@ -10,69 +10,71 @@ from tqdm import tqdm -class COCO2labelImg(): +class COCO2labelImg: def __init__(self, data_dir: str = None): # coco dir self.data_dir = Path(data_dir) self.verify_exists(self.data_dir) - anno_dir = self.data_dir / 'annotations' + anno_dir = self.data_dir / "annotations" self.verify_exists(anno_dir) - self.train_json = anno_dir / 'instances_train2017.json' - self.val_json = anno_dir / 'instances_val2017.json' + self.train_json = anno_dir / "instances_train2017.json" + self.val_json = anno_dir / "instances_val2017.json" self.verify_exists(self.train_json) self.verify_exists(self.val_json) - self.train2017_dir = self.data_dir / 'train2017' - self.val2017_dir = self.data_dir / 'val2017' + self.train2017_dir = self.data_dir / "train2017" + self.val2017_dir = self.data_dir / "val2017" self.verify_exists(self.train2017_dir) self.verify_exists(self.val2017_dir) # save dir - self.save_dir = self.data_dir.parent / 'COCO_labelImg_format' + self.save_dir = self.data_dir.parent / "COCO_labelImg_format" self.mkdir(self.save_dir) - self.save_train_dir = self.save_dir / 'train' + self.save_train_dir = self.save_dir / "train" self.mkdir(self.save_train_dir) - self.save_val_dir = self.save_dir / 'val' + self.save_val_dir = self.save_dir / "val" self.mkdir(self.save_val_dir) - def __call__(self, ): + def __call__( + self, + ): train_list = [self.train_json, self.save_train_dir, self.train2017_dir] self.convert(train_list) val_list = [self.val_json, self.save_val_dir, self.val2017_dir] self.convert(val_list) - print(f'Successfully convert, detail in {self.save_dir}') + print(f"Successfully convert, detail in {self.save_dir}") def convert(self, info_list: list): json_path, save_dir, img_dir = info_list data = self.read_json(str(json_path)) - self.gen_classes_txt(save_dir, data.get('categories')) + self.gen_classes_txt(save_dir, data.get("categories")) - id_img_dict = {v['id']: v for v in data.get('images')} - all_annotaions = data.get('annotations') + id_img_dict = {v["id"]: v for v in data.get("images")} + all_annotaions = data.get("annotations") for one_anno in tqdm(all_annotaions): - image_info = id_img_dict.get(one_anno['image_id']) - img_name = image_info.get('file_name') - img_height = image_info.get('height') - img_width = image_info.get('width') + image_info = id_img_dict.get(one_anno["image_id"]) + img_name = image_info.get("file_name") + img_height = image_info.get("height") + img_width = image_info.get("width") - seg_info = one_anno.get('segmentation') + seg_info = one_anno.get("segmentation") if seg_info: bbox = self.get_bbox(seg_info) xywh = self.xyxy_to_xywh(bbox, img_width, img_height) - category_id = int(one_anno.get('category_id')) - 1 - xywh_str = ' '.join([str(v) for v in xywh]) - label_str = f'{category_id} {xywh_str}' + category_id = int(one_anno.get("category_id")) - 1 + xywh_str = " ".join([str(v) for v in xywh]) + label_str = f"{category_id} {xywh_str}" # 写入标注的txt文件 - txt_full_path = save_dir / f'{Path(img_name).stem}.txt' - self.write_txt(txt_full_path, label_str, mode='a') + txt_full_path = save_dir / f"{Path(img_name).stem}.txt" + self.write_txt(txt_full_path, label_str, mode="a") # 复制图像到转换后目录 img_full_path = img_dir / img_name @@ -80,13 +82,13 @@ def convert(self, info_list: list): @staticmethod def read_json(json_path): - with open(json_path, 'r', encoding='utf-8') as f: + with open(json_path, "r", encoding="utf-8") as f: data = json.load(f) return data def gen_classes_txt(self, save_dir, categories_dict): - class_info = [value['name'] for value in categories_dict] - self.write_txt(save_dir / 'classes.txt', class_info) + class_info = [value["name"] for value in categories_dict] + self.write_txt(save_dir / "classes.txt", class_info) def get_bbox(self, seg_info): seg_info = np.array(seg_info[0]).reshape(4, 2) @@ -96,20 +98,20 @@ def get_bbox(self, seg_info): return bbox @staticmethod - def write_txt(save_path: str, content: list, mode='w'): + def write_txt(save_path: str, content: list, mode="w"): if not isinstance(save_path, str): save_path = str(save_path) if isinstance(content, str): content = [content] - with open(save_path, mode, encoding='utf-8') as f: + with open(save_path, mode, encoding="utf-8") as f: for value in content: - f.write(f'{value}\n') + f.write(f"{value}\n") @staticmethod - def xyxy_to_xywh(xyxy: list, - img_width: int, - img_height: int) -> tuple([float, float, float, float]): + def xyxy_to_xywh( + xyxy: list, img_width: int, img_height: int + ) -> tuple([float, float, float, float]): """ xyxy: (list), [x1, y1, x2, y2] """ @@ -127,18 +129,21 @@ def xyxy_to_xywh(xyxy: list, def verify_exists(file_path): file_path = Path(file_path) if not file_path.exists(): - raise FileNotFoundError(f'The {file_path} is not exists!!!') + raise FileNotFoundError(f"The {file_path} is not exists!!!") @staticmethod def mkdir(dir_path): Path(dir_path).mkdir(parents=True, exist_ok=True) -if __name__ == '__main__': - parser = argparse.ArgumentParser('Datasets convert from COCO to labelImg') - parser.add_argument('--data_dir', type=str, - default='dataset/YOLOV5_COCO_format', - help='Dataset root path') +if __name__ == "__main__": + parser = argparse.ArgumentParser("Datasets convert from COCO to labelImg") + parser.add_argument( + "--data_dir", + type=str, + default="dataset/YOLOV5_COCO_format", + help="Dataset root path", + ) args = parser.parse_args() converter = COCO2labelImg(args.data_dir) diff --git a/coco_visual.py b/coco_visual.py index 0b315d3..f55ffff 100644 --- a/coco_visual.py +++ b/coco_visual.py @@ -10,68 +10,80 @@ def visualization_bbox(num_image, json_path, img_path): - with open(json_path, 'r', encoding='utf-8') as annos: + with open(json_path, "r", encoding="utf-8") as annos: annotation_json = json.load(annos) - print('The annotation_json num_key is:', len(annotation_json)) - print('The annotation_json key is:', annotation_json.keys()) - print('The annotation_json num_images is:', len(annotation_json['images'])) + print("The annotation_json num_key is:", len(annotation_json)) + print("The annotation_json key is:", annotation_json.keys()) + print("The annotation_json num_images is:", len(annotation_json["images"])) - categories = annotation_json['categories'] - categories_dict = {c['id']: c['name'] for c in categories} + categories = annotation_json["categories"] + categories_dict = {c["id"]: c["name"] for c in categories} class_nums = len(categories_dict.keys()) - color = [(random.randint(0, 255), random.randint(0, 255), - random.randint(0, 255)) for _ in range(class_nums)] + color = [ + (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) + for _ in range(class_nums) + ] - image_name = annotation_json['images'][num_image - 1]['file_name'] - img_id = annotation_json['images'][num_image - 1]['id'] + image_name = annotation_json["images"][num_image - 1]["file_name"] + img_id = annotation_json["images"][num_image - 1]["id"] image_path = os.path.join(img_path, str(image_name).zfill(5)) image = cv2.imread(image_path, 1) - annotations = annotation_json['annotations'] + annotations = annotation_json["annotations"] num_bbox = 0 for anno in annotations: - if anno['image_id'] == img_id: + if anno["image_id"] == img_id: num_bbox = num_bbox + 1 - class_id = anno['category_id'] + class_id = anno["category_id"] class_name = categories_dict[class_id] - class_color = color[class_id-1] + class_color = color[class_id - 1] - x, y, w, h = list(map(int, anno['bbox'])) - cv2.rectangle(image, (int(x), int(y)), - (int(x + w), int(y + h)), - class_color, 2) + x, y, w, h = list(map(int, anno["bbox"])) + cv2.rectangle( + image, (int(x), int(y)), (int(x + w), int(y + h)), class_color, 2 + ) font_size = 0.7 - txt_size = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, - font_size, 1)[0] - cv2.rectangle(image, (x, y + 1), - (x + txt_size[0] + 10, y - int(2 * txt_size[1])), - class_color, -1) - cv2.putText(image, class_name, (x + 5, y - 5), - cv2.FONT_HERSHEY_SIMPLEX, - font_size, (255, 255, 255), 1) + txt_size = cv2.getTextSize( + class_name, cv2.FONT_HERSHEY_SIMPLEX, font_size, 1 + )[0] + cv2.rectangle( + image, + (x, y + 1), + (x + txt_size[0] + 10, y - int(2 * txt_size[1])), + class_color, + -1, + ) + cv2.putText( + image, + class_name, + (x + 5, y - 5), + cv2.FONT_HERSHEY_SIMPLEX, + font_size, + (255, 255, 255), + 1, + ) - print('The unm_bbox of the display image is:', num_bbox) + print("The unm_bbox of the display image is:", num_bbox) cur_os = platform.system() - if cur_os == 'Windows': + if cur_os == "Windows": cv2.namedWindow(image_name, 0) cv2.resizeWindow(image_name, 1000, 1000) cv2.imshow(image_name, image) cv2.waitKey(0) else: - save_path = f'visul_{num_image}.jpg' + save_path = f"visul_{num_image}.jpg" cv2.imwrite(save_path, image) - print(f'The {save_path} has been saved the current director.') + print(f"The {save_path} has been saved the current director.") if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--vis_num', type=int, default=1, - help="visual which one") - parser.add_argument('--json_path', type=str, required=True) - parser.add_argument('--img_dir', type=str, required=True) + parser.add_argument("--vis_num", type=int, default=1, help="visual which one") + parser.add_argument("--json_path", type=str, required=True) + parser.add_argument("--img_dir", type=str, required=True) args = parser.parse_args() visualization_bbox(args.vis_num, args.json_path, args.img_dir) diff --git a/darknet_2_coco.py b/darknet_2_coco.py index 089cf46..a8ac5ce 100644 --- a/darknet_2_coco.py +++ b/darknet_2_coco.py @@ -11,7 +11,7 @@ import cv2 as cv -class DARKNET2COCO(): +class DARKNET2COCO: def __init__(self, genconfig_data): self.src_data = genconfig_data self.src = Path(self.src_data).parent @@ -20,37 +20,43 @@ def __init__(self, genconfig_data): self.coco_valid = "val2017" self.coco_images = "images" self.coco_annotation = "annotations" - self.coco_train_json = Path(self.dst) / self.coco_annotation / f'instances_{self.coco_train}.json' - self.coco_valid_json = Path(self.dst) / self.coco_annotation / f'instances_{self.coco_valid}.json' - self.type = 'instances' + self.coco_train_json = ( + Path(self.dst) / self.coco_annotation / f"instances_{self.coco_train}.json" + ) + self.coco_valid_json = ( + Path(self.dst) / self.coco_annotation / f"instances_{self.coco_valid}.json" + ) + self.type = "instances" self.categories = [] self.annotation_id = 1 self.info = { - 'year': 2021, - 'version': '1.0', - 'description': 'For object detection', - 'date_created': '2021', + "year": 2021, + "version": "1.0", + "description": "For object detection", + "date_created": "2021", } - self.licenses = [{ - 'id': 1, - 'name': 'Apache License v2.0', - 'url': 'https://github.com/RapidAI/YOLO2COCO/LICENSE', - }] + self.licenses = [ + { + "id": 1, + "name": "Apache License v2.0", + "url": "https://github.com/RapidAI/YOLO2COCO/LICENSE", + } + ] if not Path(self.dst).is_dir(): Path(self.dst).mkdir() if not Path(self.dst / self.coco_images).is_dir(): - Path(self.dst/self.coco_images).mkdir() + Path(self.dst / self.coco_images).mkdir() - if not (Path(self.dst)/self.coco_images / self.coco_train).is_dir(): - (Path(self.dst)/self.coco_images/self.coco_train).mkdir() + if not (Path(self.dst) / self.coco_images / self.coco_train).is_dir(): + (Path(self.dst) / self.coco_images / self.coco_train).mkdir() if not Path(self.dst / self.coco_images / self.coco_valid).is_dir(): - (Path(self.dst)/self.coco_images/self.coco_valid).mkdir() + (Path(self.dst) / self.coco_images / self.coco_valid).mkdir() if not (Path(self.dst) / self.coco_annotation).is_dir(): - (Path(self.dst)/self.coco_annotation).mkdir() + (Path(self.dst) / self.coco_annotation).mkdir() if Path(self.src_data).is_file(): self.ready = True @@ -63,7 +69,7 @@ def initcfg(self): return self.cnf = cfg.RawConfigParser() with open(self.src_data) as f: - file_content = '[dummy_section]\n' + f.read() + file_content = "[dummy_section]\n" + f.read() self.cnf.read_string(file_content) def getint(self, key): @@ -98,10 +104,10 @@ def get_list(self, name): return content def _get_annotation(self, vertex_info, height, width): - ''' + """ # derived from https://github.com/zhiqwang/yolov5-rt-stack/blob/master/yolort/utils/yolo2coco.py - ''' + """ cx, cy, w, h = [float(i) for i in vertex_info] cx = cx * width cy = cy * height @@ -124,44 +130,44 @@ def read_annotation(self, txtfile, img_id, height, width): allinfo = f.readlines() for line in allinfo: - label_info = line.replace('\n', '').replace('\r', '') + label_info = line.replace("\n", "").replace("\r", "") label_info = label_info.strip().split(" ") if len(label_info) < 5: continue category_id, vertex_info = label_info[0], label_info[1:] - segmentation, bbox, area = self._get_annotation( - vertex_info, height, width) - annotation.append({ - 'segmentation': segmentation, - 'area': area, - 'iscrowd': 0, - 'image_id': img_id, - 'bbox': bbox, - 'category_id': int(int(category_id)+1), - 'id': self.annotation_id, - }) + segmentation, bbox, area = self._get_annotation(vertex_info, height, width) + annotation.append( + { + "segmentation": segmentation, + "area": area, + "iscrowd": 0, + "image_id": img_id, + "bbox": bbox, + "category_id": int(int(category_id) + 1), + "id": self.annotation_id, + } + ) self.annotation_id += 1 return annotation def get_category(self): for id, category in enumerate(self.name_lists, 1): - self.categories.append({ - 'id': id, - 'name': category, - 'supercategory': category, - }) + self.categories.append( + { + "id": id, + "name": category, + "supercategory": category, + } + ) def generate(self): self.classnum = self.getint("classes") - self.train = Path(self.src_data).parent / \ - Path(self.getstring("train")).name - self.valid = Path(self.src_data).parent / \ - Path(self.getstring("valid")).name - self.names = Path(self.src_data).parent / \ - Path(self.getstring("names")).name + self.train = Path(self.src_data).parent / Path(self.getstring("train")).name + self.valid = Path(self.src_data).parent / Path(self.getstring("valid")).name + self.names = Path(self.src_data).parent / Path(self.getstring("names")).name self.train_files = self.get_path(self.train) if os.path.exists(self.valid): @@ -171,49 +177,48 @@ def generate(self): self.get_category() dest_path_train = Path(self.dst) / self.coco_images / self.coco_train - self.gen_dataset(self.train_files, dest_path_train, - self.coco_train_json) + self.gen_dataset(self.train_files, dest_path_train, self.coco_train_json) dest_path_valid = Path(self.dst) / self.coco_images / self.coco_valid if os.path.exists(self.valid): - self.gen_dataset(self.valid_files, dest_path_valid, - self.coco_valid_json) + self.gen_dataset(self.valid_files, dest_path_valid, self.coco_valid_json) print("The output directory is :", str(self.dst)) def gen_dataset(self, file_lists, target_img_path, target_json): - ''' + """ https://cocodataset.org/#format-data - ''' + """ images = [] annotations = [] for img_id, file in enumerate(file_lists, 1): if not Path(file).exists(): continue - txt = str(Path(file).parent / Path(file).stem) + \ - ".txt" + txt = str(Path(file).parent / Path(file).stem) + ".txt" tmpname = str(img_id) - prefix = "0"*(12 - len(tmpname)) - destfilename = prefix+tmpname+".jpg" + prefix = "0" * (12 - len(tmpname)) + destfilename = prefix + tmpname + ".jpg" imgsrc = cv.imread(file) # 读取图片 if Path(file).suffix.lower() == ".jpg": - shutil.copyfile(file, target_img_path/destfilename) + shutil.copyfile(file, target_img_path / destfilename) else: - cv.imwrite(str(target_img_path/destfilename), imgsrc) + cv.imwrite(str(target_img_path / destfilename), imgsrc) # shutil.copyfile(file,target_img_path/ ) image = imgsrc.shape # 获取图片宽高及通道数 height = image[0] width = image[1] - images.append({ - 'date_captured': '2021', - 'file_name': destfilename, - 'id': img_id, - 'height': height, - 'width': width, - }) + images.append( + { + "date_captured": "2021", + "file_name": destfilename, + "id": img_id, + "height": height, + "width": width, + } + ) if Path(txt).exists(): new_anno = self.read_annotation(txt, img_id, height, width) @@ -221,21 +226,22 @@ def gen_dataset(self, file_lists, target_img_path, target_json): annotations.extend(new_anno) json_data = { - 'info': self.info, - 'images': images, - 'licenses': self.licenses, - 'type': self.type, - 'annotations': annotations, - 'categories': self.categories, + "info": self.info, + "images": images, + "licenses": self.licenses, + "type": self.type, + "annotations": annotations, + "categories": self.categories, } - with open(target_json, 'w', encoding='utf-8') as f: + with open(target_json, "w", encoding="utf-8") as f: json.dump(json_data, f, ensure_ascii=False) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--data_path', default='data/getn_config.data', - help='Dataset root path') + parser.add_argument( + "--data_path", default="data/getn_config.data", help="Dataset root path" + ) args = parser.parse_args() converter = DARKNET2COCO(args.data_path) diff --git a/labelImg_2_yolov5.py b/labelImg_2_yolov5.py index d28724c..fc94917 100644 --- a/labelImg_2_yolov5.py +++ b/labelImg_2_yolov5.py @@ -10,17 +10,17 @@ from tqdm import tqdm -class LabelImgToYOLOV5(): +class LabelImgToYOLOV5: def __init__(self, root_dir, out_dir, val_ratio, have_test, test_ratio): self.root_dir = Path(root_dir) self.verify_exists(self.root_dir) self.out_dir = Path(out_dir) - self.out_img_dir = self.out_dir / 'images' - self.out_label_dir = self.out_dir / 'labels' - self.out_non_label_dir = self.out_dir / 'non_labels' + self.out_img_dir = self.out_dir / "images" + self.out_label_dir = self.out_dir / "labels" + self.out_non_label_dir = self.out_dir / "non_labels" - self.classes_path = self.root_dir / 'classes.txt' + self.classes_path = self.root_dir / "classes.txt" self.verify_exists(self.classes_path) self.cp_file(self.classes_path, dst_dir=self.out_dir) @@ -33,40 +33,41 @@ def __call__(self): if img_list: img_list = self.gen_image_label_dir(img_list) else: - return ValueError(f'{self.root_dir} is corrupted.') - - split_list = self.get_train_val_test_list(img_list, - ratio=self.val_ratio, - have_test=self.have_test, - test_ratio=self.test_ratio) + return ValueError(f"{self.root_dir} is corrupted.") + + split_list = self.get_train_val_test_list( + img_list, + ratio=self.val_ratio, + have_test=self.have_test, + test_ratio=self.test_ratio, + ) train_list, val_list, test_list = split_list - self.write_txt(self.out_dir / 'train.txt', train_list) - self.write_txt(self.out_dir / 'val.txt', val_list) + self.write_txt(self.out_dir / "train.txt", train_list) + self.write_txt(self.out_dir / "val.txt", val_list) if test_list: - self.write_txt(self.out_dir / 'test.txt', test_list) - print(f'Successfully convert, detail in {self.out_dir}') + self.write_txt(self.out_dir / "test.txt", test_list) + print(f"Successfully convert, detail in {self.out_dir}") @staticmethod def verify_exists(file_path): file_path = Path(file_path) if not file_path.exists(): - raise FileNotFoundError(f'The {file_path} is not exists!!!') + raise FileNotFoundError(f"The {file_path} is not exists!!!") def get_img_list(self): img_list = [] - all_list = self.root_dir.glob('*.*') + all_list = self.root_dir.glob("*.*") for one in all_list: cur_suffix = one.suffix - if cur_suffix != '.txt': + if cur_suffix != ".txt": img_list.append(one) return img_list def gen_image_label_dir(self, img_list): new_image_list = [] for img_path in tqdm(img_list): - right_label_path = img_path.with_name(f'{img_path.stem}.txt') - if right_label_path.exists() \ - and self.read_txt(str(right_label_path)): + right_label_path = img_path.with_name(f"{img_path.stem}.txt") + if right_label_path.exists() and self.read_txt(str(right_label_path)): self.cp_file(img_path, dst_dir=self.out_img_dir) self.cp_file(right_label_path, dst_dir=self.out_label_dir) @@ -75,11 +76,11 @@ def gen_image_label_dir(self, img_list): self.cp_file(img_path, dst_dir=self.out_non_label_dir) return new_image_list - def get_train_val_test_list(self, img_list, ratio=0.2, have_test=True, - test_ratio=0.2): + def get_train_val_test_list( + self, img_list, ratio=0.2, have_test=True, test_ratio=0.2 + ): random.shuffle(img_list) - img_list = [f'{self.out_img_dir / img_path.name}' - for img_path in img_list] + img_list = [f"{self.out_img_dir / img_path.name}" for img_path in img_list] len_img = len(img_list) if have_test: split_idx_first = int(len_img * ratio) @@ -102,21 +103,21 @@ def mkdir(dir_path): @staticmethod def read_txt(txt_path: str) -> list: - with open(txt_path, 'r', encoding='utf-8') as f: - data = list(map(lambda x: x.rstrip('\n'), f)) + with open(txt_path, "r", encoding="utf-8") as f: + data = list(map(lambda x: x.rstrip("\n"), f)) return data @staticmethod - def write_txt(save_path: str, content: list, mode='w'): + def write_txt(save_path: str, content: list, mode="w"): if isinstance(content, str): content = [content] - with open(save_path, mode, encoding='utf-8') as f: + with open(save_path, mode, encoding="utf-8") as f: for value in content: - f.write(f'{value}\n') + f.write(f"{value}\n") @staticmethod def get_img_format(img_path): - with open(img_path, 'rb') as f: + with open(img_path, "rb") as f: return imghdr.what(f) def cp_file(self, file_path: Path, dst_dir: Path): @@ -130,16 +131,17 @@ def cp_file(self, file_path: Path, dst_dir: Path): shutil.copy2(str(file_path), str(dst_file_path)) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--src_dir', type=str) - parser.add_argument('--out_dir', type=str) - parser.add_argument('--val_ratio', type=float, default=0.2) - parser.add_argument('--have_test', type=bool, default=True) - parser.add_argument('--test_ratio', type=float, default=0.2) + parser.add_argument("--src_dir", type=str) + parser.add_argument("--out_dir", type=str) + parser.add_argument("--val_ratio", type=float, default=0.2) + parser.add_argument("--have_test", type=bool, default=True) + parser.add_argument("--test_ratio", type=float, default=0.2) args = parser.parse_args() - converter = LabelImgToYOLOV5(args.src_dir, args.out_dir, args.val_ratio, - args.have_test, args.test_ratio) + converter = LabelImgToYOLOV5( + args.src_dir, args.out_dir, args.val_ratio, args.have_test, args.test_ratio + ) converter() - print(f'Successfully output to the {args.out_dir}') + print(f"Successfully output to the {args.out_dir}") diff --git a/yolov5_2_coco.py b/yolov5_2_coco.py index b7a590a..0c2b6d9 100644 --- a/yolov5_2_coco.py +++ b/yolov5_2_coco.py @@ -13,14 +13,14 @@ from tqdm import tqdm -class YOLOV5ToCOCO(): +class YOLOV5ToCOCO: def __init__(self, data_dir): self.raw_data_dir = Path(data_dir) - self.verify_exists(self.raw_data_dir / 'images') - self.verify_exists(self.raw_data_dir / 'labels') + self.verify_exists(self.raw_data_dir / "images") + self.verify_exists(self.raw_data_dir / "labels") - save_dir_name = f'{Path(self.raw_data_dir).name}_COCO_format' + save_dir_name = f"{Path(self.raw_data_dir).name}_COCO_format" self.output_dir = self.raw_data_dir.parent / save_dir_name self.mkdir(self.output_dir) @@ -28,54 +28,56 @@ def __init__(self, data_dir): def __call__(self, mode_list: list): if not mode_list: - raise ValueError('mode_list is empty!!') + raise ValueError("mode_list is empty!!") for mode in mode_list: # Read the image txt. - txt_path = self.raw_data_dir / f'{mode}.txt' + txt_path = self.raw_data_dir / f"{mode}.txt" self.verify_exists(txt_path) img_list = self.read_txt(txt_path) - if mode == 'train': + if mode == "train": img_list = self.append_bg_img(img_list) # Create the directory of saving the new image. - save_img_dir = self.output_dir / f'{mode}2017' + save_img_dir = self.output_dir / f"{mode}2017" self.mkdir(save_img_dir) # Generate json file. anno_dir = self.output_dir / "annotations" self.mkdir(anno_dir) - save_json_path = anno_dir / f'instances_{mode}2017.json' + save_json_path = anno_dir / f"instances_{mode}2017.json" json_data = self.convert(img_list, save_img_dir, mode) self.write_json(save_json_path, json_data) - print(f'Successfully convert, detail in {self.output_dir}') + print(f"Successfully convert, detail in {self.output_dir}") def _init_json(self): - classes_path = self.raw_data_dir / 'classes.txt' + classes_path = self.raw_data_dir / "classes.txt" self.verify_exists(classes_path) self.categories = self._get_category(classes_path) - self.type = 'instances' + self.type = "instances" self.annotation_id = 1 - self.cur_year = time.strftime('%Y', time.localtime(time.time())) + self.cur_year = time.strftime("%Y", time.localtime(time.time())) self.info = { - 'year': int(self.cur_year), - 'version': '1.0', - 'description': 'For object detection', - 'date_created': self.cur_year, + "year": int(self.cur_year), + "version": "1.0", + "description": "For object detection", + "date_created": self.cur_year, } - self.licenses = [{ - 'id': 1, - 'name': 'Apache License v2.0', - 'url': 'https://github.com/RapidAI/YOLO2COCO/LICENSE', - }] + self.licenses = [ + { + "id": 1, + "name": "Apache License v2.0", + "url": "https://github.com/RapidAI/YOLO2COCO/LICENSE", + } + ] def append_bg_img(self, img_list): - bg_dir = self.raw_data_dir / 'background_images' + bg_dir = self.raw_data_dir / "background_images" if bg_dir.exists(): bg_img_list = list(bg_dir.iterdir()) for bg_img_path in bg_img_list: @@ -86,11 +88,13 @@ def _get_category(self, classes_path): class_list = self.read_txt(classes_path) categories = [] for i, category in enumerate(class_list, 1): - categories.append({ - 'supercategory': category, - 'id': i, - 'name': category, - }) + categories.append( + { + "supercategory": category, + "id": i, + "name": category, + } + ) return categories def convert(self, img_list, save_img_dir, mode): @@ -99,20 +103,19 @@ def convert(self, img_list, save_img_dir, mode): image_dict = self.get_image_info(img_path, img_id, save_img_dir) images.append(image_dict) - label_path = self.raw_data_dir / 'labels' / f'{Path(img_path).stem}.txt' - annotation = self.get_annotation(label_path, - img_id, - image_dict['height'], - image_dict['width']) + label_path = self.raw_data_dir / "labels" / f"{Path(img_path).stem}.txt" + annotation = self.get_annotation( + label_path, img_id, image_dict["height"], image_dict["width"] + ) annotations.extend(annotation) json_data = { - 'info': self.info, - 'images': images, - 'licenses': self.licenses, - 'type': self.type, - 'annotations': annotations, - 'categories': self.categories, + "info": self.info, + "images": images, + "licenses": self.licenses, + "type": self.type, + "annotations": annotations, + "categories": self.categories, } return json_data @@ -125,7 +128,7 @@ def get_image_info(self, img_path, img_id, save_img_dir): self.verify_exists(img_path) - new_img_name = f'{img_id:012d}.jpg' + new_img_name = f"{img_id:012d}.jpg" save_img_path = save_img_dir / new_img_name img_src = cv2.imread(str(img_path)) if img_path.suffix.lower() == ".jpg": @@ -135,11 +138,11 @@ def get_image_info(self, img_path, img_id, save_img_dir): height, width = img_src.shape[:2] image_info = { - 'date_captured': self.cur_year, - 'file_name': new_img_name, - 'id': img_id, - 'height': height, - 'width': width, + "date_captured": self.cur_year, + "file_name": new_img_name, + "id": img_id, + "height": height, + "width": width, } return image_info @@ -166,45 +169,48 @@ def get_box_info(vertex_info, height, width): return segmentation, bbox, area if not label_path.exists(): - annotation = [{ - 'segmentation': [], - 'area': 0, - 'iscrowd': 0, - 'image_id': img_id, - 'bbox': [], - 'category_id': -1, - 'id': self.annotation_id, - }] + annotation = [ + { + "segmentation": [], + "area": 0, + "iscrowd": 0, + "image_id": img_id, + "bbox": [], + "category_id": -1, + "id": self.annotation_id, + } + ] self.annotation_id += 1 return annotation annotation = [] label_list = self.read_txt(str(label_path)) for i, one_line in enumerate(label_list): - label_info = one_line.split(' ') + label_info = one_line.split(" ") if len(label_info) < 5: - warnings.warn( - f'The {i+1} line of the {label_path} has been corrupted.') + warnings.warn(f"The {i+1} line of the {label_path} has been corrupted.") continue category_id, vertex_info = label_info[0], label_info[1:] segmentation, bbox, area = get_box_info(vertex_info, height, width) - annotation.append({ - 'segmentation': segmentation, - 'area': area, - 'iscrowd': 0, - 'image_id': img_id, - 'bbox': bbox, - 'category_id': int(category_id)+1, - 'id': self.annotation_id, - }) + annotation.append( + { + "segmentation": segmentation, + "area": area, + "iscrowd": 0, + "image_id": img_id, + "bbox": bbox, + "category_id": int(category_id) + 1, + "id": self.annotation_id, + } + ) self.annotation_id += 1 return annotation @staticmethod def read_txt(txt_path): - with open(str(txt_path), 'r', encoding='utf-8') as f: - data = list(map(lambda x: x.rstrip('\n'), f)) + with open(str(txt_path), "r", encoding="utf-8") as f: + data = list(map(lambda x: x.rstrip("\n"), f)) return data @staticmethod @@ -215,21 +221,23 @@ def mkdir(dir_path): def verify_exists(file_path): file_path = Path(file_path) if not file_path.exists(): - raise FileNotFoundError(f'The {file_path} is not exists!!!') + raise FileNotFoundError(f"The {file_path} is not exists!!!") @staticmethod def write_json(json_path, content: dict): - with open(json_path, 'w', encoding='utf-8') as f: + with open(json_path, "w", encoding="utf-8") as f: json.dump(content, f, ensure_ascii=False) if __name__ == "__main__": - parser = argparse.ArgumentParser('Datasets converter from YOLOV5 to COCO') - parser.add_argument('--data_dir', type=str, default='datasets/YOLOV5', - help='Dataset root path') - parser.add_argument('--mode_list', type=str, default='train,val', - help='generate which mode') + parser = argparse.ArgumentParser("Datasets converter from YOLOV5 to COCO") + parser.add_argument( + "--data_dir", type=str, default="datasets/YOLOV5", help="Dataset root path" + ) + parser.add_argument( + "--mode_list", type=str, default="train,val", help="generate which mode" + ) args = parser.parse_args() converter = YOLOV5ToCOCO(args.data_dir) - converter(mode_list=args.mode_list.split(',')) + converter(mode_list=args.mode_list.split(",")) diff --git a/yolov5_yaml_2_coco.py b/yolov5_yaml_2_coco.py index dbc3070..8e3b876 100644 --- a/yolov5_yaml_2_coco.py +++ b/yolov5_yaml_2_coco.py @@ -14,8 +14,8 @@ def read_txt(txt_path): - with open(str(txt_path), 'r', encoding='utf-8') as f: - data = list(map(lambda x: x.rstrip('\n'), f)) + with open(str(txt_path), "r", encoding="utf-8") as f: + data = list(map(lambda x: x.rstrip("\n"), f)) return data @@ -26,41 +26,44 @@ def mkdir(dir_path): def verify_exists(file_path): file_path = Path(file_path).resolve() if not file_path.exists(): - raise FileNotFoundError(f'The {file_path} is not exists!!!') + raise FileNotFoundError(f"The {file_path} is not exists!!!") -class YOLOV5CFG2COCO(): +class YOLOV5CFG2COCO: def __init__(self, yaml_path): verify_exists(yaml_path) - with open(yaml_path, 'r', encoding="UTF-8") as f: + with open(yaml_path, "r", encoding="UTF-8") as f: self.data_cfg = yaml.safe_load(f) self.root_dir = Path(yaml_path).parent.parent - self.root_data_dir = Path(self.data_cfg.get('path')) + self.root_data_dir = Path(self.data_cfg.get("path")) - self.train_path = self._get_data_dir('train') - self.val_path = self._get_data_dir('val') + self.train_path = self._get_data_dir("train") + self.val_path = self._get_data_dir("val") - nc = self.data_cfg['nc'] + nc = self.data_cfg["nc"] - if 'names' in self.data_cfg: - self.names = self.data_cfg.get('names') + if "names" in self.data_cfg: + self.names = self.data_cfg.get("names") else: # assign class names if missing - self.names = [f'class{i}' for i in range(self.data_cfg['nc'])] + self.names = [f"class{i}" for i in range(self.data_cfg["nc"])] - assert len(self.names) == nc, \ - f'{len(self.names)} names found for nc={nc} dataset in {yaml_path}' + assert ( + len(self.names) == nc + ), f"{len(self.names)} names found for nc={nc} dataset in {yaml_path}" # 构建COCO格式目录 self.dst = self.root_dir / f"{Path(self.root_data_dir).stem}_COCO_format" self.coco_train = "train2017" self.coco_val = "val2017" self.coco_annotation = "annotations" - self.coco_train_json = self.dst / self.coco_annotation / \ - f'instances_{self.coco_train}.json' - self.coco_val_json = self.dst / self.coco_annotation / \ - f'instances_{self.coco_val}.json' + self.coco_train_json = ( + self.dst / self.coco_annotation / f"instances_{self.coco_train}.json" + ) + self.coco_val_json = ( + self.dst / self.coco_annotation / f"instances_{self.coco_val}.json" + ) mkdir(self.dst) mkdir(self.dst / self.coco_train) @@ -68,24 +71,26 @@ def __init__(self, yaml_path): mkdir(self.dst / self.coco_annotation) # 构建json内容结构 - self.type = 'instances' + self.type = "instances" self.categories = [] self._get_category() self.annotation_id = 1 - cur_year = time.strftime('%Y', time.localtime(time.time())) + cur_year = time.strftime("%Y", time.localtime(time.time())) self.info = { - 'year': int(cur_year), - 'version': '1.0', - 'description': 'For object detection', - 'date_created': cur_year, + "year": int(cur_year), + "version": "1.0", + "description": "For object detection", + "date_created": cur_year, } - self.licenses = [{ - 'id': 1, - 'name': 'Apache License v2.0', - 'url': 'https://github.com/RapidAI/YOLO2COCO/LICENSE', - }] + self.licenses = [ + { + "id": 1, + "name": "Apache License v2.0", + "url": "https://github.com/RapidAI/YOLO2COCO/LICENSE", + } + ] def _get_data_dir(self, mode): data_dir = self.data_cfg.get(mode) @@ -93,59 +98,61 @@ def _get_data_dir(self, mode): if isinstance(data_dir, str): full_path = [str(self.root_data_dir / data_dir)] elif isinstance(data_dir, list): - full_path = [str(self.root_data_dir / one_dir) - for one_dir in data_dir] + full_path = [str(self.root_data_dir / one_dir) for one_dir in data_dir] else: - raise TypeError(f'{data_dir} is not str or list.') + raise TypeError(f"{data_dir} is not str or list.") else: - raise ValueError(f'{mode} dir is not in the yaml.') + raise ValueError(f"{mode} dir is not in the yaml.") return full_path def _get_category(self): for i, category in enumerate(self.names, start=1): - self.categories.append({ - 'supercategory': category, - 'id': i, - 'name': category, - }) + self.categories.append( + { + "supercategory": category, + "id": i, + "name": category, + } + ) def generate(self): self.train_files = self.get_files(self.train_path) self.valid_files = self.get_files(self.val_path) train_dest_dir = Path(self.dst) / self.coco_train - self.gen_dataset(self.train_files, train_dest_dir, - self.coco_train_json, mode='train') + self.gen_dataset( + self.train_files, train_dest_dir, self.coco_train_json, mode="train" + ) val_dest_dir = Path(self.dst) / self.coco_val - self.gen_dataset(self.valid_files, val_dest_dir, - self.coco_val_json, mode='val') + self.gen_dataset(self.valid_files, val_dest_dir, self.coco_val_json, mode="val") print(f"The output directory is: {self.dst}") def get_files(self, path): # include image suffixes - IMG_FORMATS = ['bmp', 'dng', 'jpeg', 'jpg', - 'mpo', 'png', 'tif', 'tiff', 'webp'] + IMG_FORMATS = ["bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp"] f = [] for p in path: p = Path(p) # os-agnostic if p.is_dir(): # dir - f += glob.glob(str(p / '**' / '*.*'), recursive=True) + f += glob.glob(str(p / "**" / "*.*"), recursive=True) # f = list(p.rglob('*.*')) # pathlib elif p.is_file(): # file with open(p) as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep # local to global path - f += [x.replace('./', parent) - if x.startswith('./') else x for x in t] + f += [ + x.replace("./", parent) if x.startswith("./") else x for x in t + ] # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: - raise Exception(f'{p} does not exist') + raise Exception(f"{p} does not exist") - im_files = sorted(x.replace('/', os.sep) - for x in f if x.split('.')[-1].lower() in IMG_FORMATS) + im_files = sorted( + x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS + ) return im_files def gen_dataset(self, img_paths, target_img_path, target_json, mode): @@ -155,12 +162,13 @@ def gen_dataset(self, img_paths, target_img_path, target_json, mode): """ images = [] annotations = [] - sa, sb = os.sep + 'images' + os.sep, os.sep + \ - 'labels' + os.sep # /images/, /labels/ substrings + sa, sb = ( + os.sep + "images" + os.sep, + os.sep + "labels" + os.sep, + ) # /images/, /labels/ substrings for img_id, img_path in enumerate(tqdm(img_paths, desc=mode), 1): - label_path = sb.join(img_path.rsplit( - sa, 1)).rsplit('.', 1)[0] + '.txt' + label_path = sb.join(img_path.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" img_path = Path(img_path) @@ -169,7 +177,7 @@ def gen_dataset(self, img_paths, target_img_path, target_json, mode): imgsrc = cv2.imread(str(img_path)) height, width = imgsrc.shape[:2] - dest_file_name = f'{img_id:012d}.jpg' + dest_file_name = f"{img_id:012d}.jpg" save_img_path = target_img_path / dest_file_name if img_path.suffix.lower() == ".jpg": @@ -177,34 +185,35 @@ def gen_dataset(self, img_paths, target_img_path, target_json, mode): else: cv2.imwrite(str(save_img_path), imgsrc) - images.append({ - 'date_captured': '2021', - 'file_name': dest_file_name, - 'id': img_id, - 'height': height, - 'width': width, - }) + images.append( + { + "date_captured": "2021", + "file_name": dest_file_name, + "id": img_id, + "height": height, + "width": width, + } + ) if Path(label_path).exists(): - new_anno = self.read_annotation(label_path, img_id, - height, width) + new_anno = self.read_annotation(label_path, img_id, height, width) if len(new_anno) > 0: annotations.extend(new_anno) else: # print(f'{label_path} is empty') - raise ValueError(f'{label_path} is empty') + raise ValueError(f"{label_path} is empty") else: - raise FileNotFoundError(f'{label_path} not exists') + raise FileNotFoundError(f"{label_path} not exists") json_data = { - 'info': self.info, - 'images': images, - 'licenses': self.licenses, - 'type': self.type, - 'annotations': annotations, - 'categories': self.categories, + "info": self.info, + "images": images, + "licenses": self.licenses, + "type": self.type, + "annotations": annotations, + "categories": self.categories, } - with open(target_json, 'w', encoding='utf-8') as f: + with open(target_json, "w", encoding="utf-8") as f: json.dump(json_data, f, ensure_ascii=False) def read_annotation(self, txt_file, img_id, height, width): @@ -217,17 +226,18 @@ def read_annotation(self, txt_file, img_id, height, width): continue category_id, vertex_info = label_info[0], label_info[1:] - segmentation, bbox, area = self._get_annotation(vertex_info, - height, width) - annotation.append({ - 'segmentation': segmentation, - 'area': area, - 'iscrowd': 0, - 'image_id': img_id, - 'bbox': bbox, - 'category_id': int(category_id)+1, - 'id': self.annotation_id, - }) + segmentation, bbox, area = self._get_annotation(vertex_info, height, width) + annotation.append( + { + "segmentation": segmentation, + "area": area, + "iscrowd": 0, + "image_id": img_id, + "bbox": bbox, + "category_id": int(category_id) + 1, + "id": self.annotation_id, + } + ) self.annotation_id += 1 return annotation @@ -255,10 +265,13 @@ def _get_annotation(vertex_info, height, width): if __name__ == "__main__": - parser = argparse.ArgumentParser('Datasets converter from YOLOV5 to COCO') - parser.add_argument('--yaml_path', type=str, - default='dataset/YOLOV5_yaml/sample.yaml', - help='Dataset cfg file') + parser = argparse.ArgumentParser("Datasets converter from YOLOV5 to COCO") + parser.add_argument( + "--yaml_path", + type=str, + default="dataset/YOLOV5_yaml/sample.yaml", + help="Dataset cfg file", + ) args = parser.parse_args() converter = YOLOV5CFG2COCO(args.yaml_path)