-
-
Notifications
You must be signed in to change notification settings - Fork 386
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
237 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,110 +1,24 @@ | ||
# -*- encoding: utf-8 -*- | ||
# @Author: SWHL | ||
# @Contact: [email protected] | ||
import math | ||
import random | ||
from pathlib import Path | ||
|
||
import cv2 | ||
import numpy as np | ||
from PIL import Image, ImageDraw, ImageFont | ||
|
||
from rapidocr_onnxruntime import RapidOCR | ||
|
||
# from rapidocr_openvino import RapidOCR | ||
|
||
|
||
def draw_ocr_box_txt(image, boxes, txts, font_path, scores=None, text_score=0.5): | ||
if not Path(font_path).exists(): | ||
raise FileNotFoundError( | ||
f"The {font_path} does not exists! \n" | ||
f"Please download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" | ||
) | ||
|
||
h, w = image.height, image.width | ||
if image.mode == "L": | ||
image = image.convert("RGB") | ||
|
||
img_left = image.copy() | ||
img_right = Image.new("RGB", (w, h), (255, 255, 255)) | ||
|
||
random.seed(0) | ||
draw_left = ImageDraw.Draw(img_left) | ||
draw_right = ImageDraw.Draw(img_right) | ||
for idx, (box, txt) in enumerate(zip(boxes, txts)): | ||
if scores is not None and float(scores[idx]) < text_score: | ||
continue | ||
|
||
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) | ||
draw_left.polygon(box, fill=color) | ||
draw_right.polygon( | ||
[ | ||
box[0][0], | ||
box[0][1], | ||
box[1][0], | ||
box[1][1], | ||
box[2][0], | ||
box[2][1], | ||
box[3][0], | ||
box[3][1], | ||
], | ||
outline=color, | ||
) | ||
|
||
box_height = math.sqrt( | ||
(box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2 | ||
) | ||
|
||
box_width = math.sqrt( | ||
(box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2 | ||
) | ||
|
||
if box_height > 2 * box_width: | ||
font_size = max(int(box_width * 0.9), 10) | ||
font = ImageFont.truetype(font_path, font_size, encoding="utf-8") | ||
cur_y = box[0][1] | ||
for c in txt: | ||
char_size = font.getsize(c) | ||
draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) | ||
cur_y += char_size[1] | ||
else: | ||
font_size = max(int(box_height * 0.8), 10) | ||
font = ImageFont.truetype(font_path, font_size, encoding="utf-8") | ||
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) | ||
|
||
img_left = Image.blend(image, img_left, 0.5) | ||
img_show = Image.new("RGB", (w * 2, h), (255, 255, 255)) | ||
img_show.paste(img_left, (0, 0, w, h)) | ||
img_show.paste(img_right, (w, 0, w * 2, h)) | ||
return np.array(img_show) | ||
|
||
|
||
def visualize(image_path, result, font_path="resources/fonts/FZYTK.TTF"): | ||
image = Image.open(image_path) | ||
boxes, txts, scores = list(zip(*result)) | ||
|
||
draw_img = draw_ocr_box_txt( | ||
image, np.array(boxes), txts, font_path, scores, text_score=0.5 | ||
) | ||
from rapidocr_onnxruntime import RapidOCR, VisRes | ||
|
||
draw_img_save = Path("./inference_results/") | ||
if not draw_img_save.exists(): | ||
draw_img_save.mkdir(parents=True, exist_ok=True) | ||
# from rapidocr_openvino import RapidOCR, VisRes | ||
|
||
image_save = str(draw_img_save / f"infer_{Path(image_path).name}") | ||
cv2.imwrite(image_save, draw_img[:, :, ::-1]) | ||
print(f"The infer result has saved in {image_save}") | ||
|
||
rapid_ocr = RapidOCR() | ||
vis = VisRes(font_path="resources/fonts/FZYTK.TTF") | ||
|
||
if __name__ == "__main__": | ||
rapid_ocr = RapidOCR() | ||
image_path = "tests/test_files/ch_en_num.jpg" | ||
with open(image_path, "rb") as f: | ||
img = f.read() | ||
|
||
image_path = "tests/test_files/ch_en_num.jpg" | ||
with open(image_path, "rb") as f: | ||
img = f.read() | ||
result, elapse_list = rapid_ocr(img) | ||
print(result) | ||
print(elapse_list) | ||
result, elapse_list = rapid_ocr(img) | ||
print(result) | ||
print(elapse_list) | ||
|
||
if result: | ||
visualize(image_path, result, font_path="resources/fonts/FZYTK.TTF") | ||
boxes, txts, scores = list(zip(*result)) | ||
res = vis(img, boxes, txts, scores) | ||
cv2.imwrite("vis.png", res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
# @Author: SWHL | ||
# @Contact: [email protected] | ||
from .main import RapidOCR | ||
from .utils import LoadImageError | ||
from .utils import LoadImageError, VisRes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,11 +2,13 @@ | |
# @Author: SWHL | ||
# @Contact: [email protected] | ||
import argparse | ||
import math | ||
import random | ||
import traceback | ||
import warnings | ||
from io import BytesIO | ||
from pathlib import Path | ||
from typing import Dict, List, Optional, Union | ||
from typing import Dict, List, Optional, Tuple, Union | ||
|
||
import cv2 | ||
import numpy as np | ||
|
@@ -18,7 +20,7 @@ | |
get_available_providers, | ||
get_device, | ||
) | ||
from PIL import Image, UnidentifiedImageError | ||
from PIL import Image, ImageDraw, ImageFont, UnidentifiedImageError | ||
|
||
root_dir = Path(__file__).resolve().parent | ||
InputType = Union[str, np.ndarray, bytes, Path] | ||
|
@@ -344,3 +346,110 @@ def remove_prefix( | |
k = k.split(prefix)[1] | ||
new_rec_dict[k] = v | ||
return new_rec_dict | ||
|
||
|
||
class VisRes: | ||
def __init__( | ||
self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5 | ||
): | ||
if font_path is None: | ||
raise FileNotFoundError( | ||
f"The {font_path} does not exists! \n" | ||
f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" | ||
) | ||
|
||
self.font_path = str(font_path) | ||
self.text_score = text_score | ||
self.load_img = LoadImage() | ||
|
||
def __call__( | ||
self, | ||
img_content: InputType, | ||
dt_boxes: np.ndarray, | ||
txts: Optional[Union[List[str], Tuple[str]]] = None, | ||
scores: Optional[Tuple[float]] = None, | ||
) -> np.ndarray: | ||
img = self.load_img(img_content) | ||
img = Image.fromarray(img) | ||
|
||
if txts is None and scores is None: | ||
return self.draw_dt_boxes(img, dt_boxes) | ||
|
||
return self.draw_ocr_box_txt(img, dt_boxes, txts, scores) | ||
|
||
def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray: | ||
img_temp = img.copy() | ||
draw_img = ImageDraw.Draw(img_temp) | ||
for idx, box in enumerate(dt_boxes): | ||
draw_img.polygon(np.array(box), fill=self.get_random_color()) | ||
|
||
box_height = self.get_box_height(box) | ||
font_size = max(int(box_height * 0.8), 10) | ||
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") | ||
draw_img.polygon( | ||
np.array(box).reshape(8).tolist(), | ||
outline=(0, 0, 0), | ||
) | ||
draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font) | ||
return np.array(img_temp) | ||
|
||
def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): | ||
h, w = image.height, image.width | ||
if image.mode == "L": | ||
image = image.convert("RGB") | ||
|
||
img_left = image.copy() | ||
img_right = Image.new("RGB", (w, h), (255, 255, 255)) | ||
|
||
random.seed(0) | ||
draw_left = ImageDraw.Draw(img_left) | ||
draw_right = ImageDraw.Draw(img_right) | ||
for idx, (box, txt) in enumerate(zip(boxes, txts)): | ||
if scores is not None and float(scores[idx]) < self.text_score: | ||
continue | ||
|
||
color = self.get_random_color() | ||
draw_left.polygon(np.array(box), fill=color) | ||
draw_right.polygon( | ||
np.array(box).reshape(8).tolist(), | ||
outline=color, | ||
) | ||
|
||
box_height = self.get_box_height(box) | ||
box_width = self.get_box_width(box) | ||
if box_height > 2 * box_width: | ||
font_size = max(int(box_width * 0.9), 10) | ||
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") | ||
cur_y = box[0][1] | ||
for c in txt: | ||
char_size = font.getsize(c) | ||
draw_right.text( | ||
(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font | ||
) | ||
cur_y += char_size[1] | ||
else: | ||
font_size = max(int(box_height * 0.8), 10) | ||
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") | ||
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) | ||
|
||
img_left = Image.blend(image, img_left, 0.5) | ||
img_show = Image.new("RGB", (w * 2, h), (255, 255, 255)) | ||
img_show.paste(img_left, (0, 0, w, h)) | ||
img_show.paste(img_right, (w, 0, w * 2, h)) | ||
return np.array(img_show) | ||
|
||
@staticmethod | ||
def get_random_color(): | ||
return ( | ||
random.randint(0, 255), | ||
random.randint(0, 255), | ||
random.randint(0, 255), | ||
) | ||
|
||
@staticmethod | ||
def get_box_height(box: List[List[float]]) -> float: | ||
return math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2) | ||
|
||
@staticmethod | ||
def get_box_width(box: List[List[float]]) -> float: | ||
return math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
# @Author: SWHL | ||
# @Contact: [email protected] | ||
from .main import RapidOCR | ||
from .utils import LoadImageError | ||
from .utils import LoadImageError, VisRes |
Oops, something went wrong.