From 94576e5e4ce900ad3b59abbb828e40c1f47f818d Mon Sep 17 00:00:00 2001 From: SWHL Date: Fri, 17 Jan 2025 09:36:38 +0800 Subject: [PATCH] chore: optimize code --- .../ch_ppocr_det/text_detect.py | 4 +-- .../ch_ppocr_det/utils.py | 7 +++-- python/rapidocr_onnxruntime/config.yaml | 5 +--- .../ch_ppocr_cls/config.yaml | 14 ---------- .../ch_ppocr_det/config.yaml | 21 --------------- .../rapidocr_openvino/ch_ppocr_det/utils.py | 11 +++++--- .../ch_ppocr_rec/config.yaml | 4 --- python/rapidocr_openvino/config.yaml | 2 ++ .../rapidocr_paddle/ch_ppocr_cls/config.yaml | 10 ------- .../rapidocr_paddle/ch_ppocr_cls/text_cls.py | 19 +------------- .../rapidocr_paddle/ch_ppocr_det/config.yaml | 26 ------------------- python/rapidocr_paddle/ch_ppocr_det/utils.py | 11 +++++--- .../rapidocr_paddle/ch_ppocr_rec/config.yaml | 8 ------ .../ch_ppocr_rec/text_recognize.py | 17 +----------- python/rapidocr_paddle/config.yaml | 2 ++ python/tests/test_paddle.py | 15 ++++++++--- python/tests/test_vino.py | 15 ++++++++--- 17 files changed, 50 insertions(+), 141 deletions(-) delete mode 100644 python/rapidocr_openvino/ch_ppocr_cls/config.yaml delete mode 100644 python/rapidocr_openvino/ch_ppocr_det/config.yaml delete mode 100644 python/rapidocr_openvino/ch_ppocr_rec/config.yaml delete mode 100644 python/rapidocr_paddle/ch_ppocr_cls/config.yaml delete mode 100644 python/rapidocr_paddle/ch_ppocr_det/config.yaml delete mode 100644 python/rapidocr_paddle/ch_ppocr_rec/config.yaml diff --git a/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py b/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py index 1e97b9302..5632d7908 100644 --- a/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py +++ b/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py @@ -63,7 +63,7 @@ def __call__(self, img: np.ndarray) -> Tuple[Optional[np.ndarray], float]: return dt_boxes, elapse def get_preprocess(self, max_wh): - if self.limit_type == 'min': + if self.limit_type == "min": limit_side_len = self.limit_side_len elif max_wh < 960: limit_side_len = 960 @@ -71,7 +71,7 @@ def get_preprocess(self, max_wh): limit_side_len = 1500 else: limit_side_len = 2000 - return DetPreProcess(limit_side_len, self.limit_type,self.mean, self.std) + return DetPreProcess(limit_side_len, self.limit_type, self.mean, self.std) def filter_tag_det_res( self, dt_boxes: np.ndarray, image_shape: Tuple[int, int] diff --git a/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py b/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py index bc5ccc11b..ef410631c 100644 --- a/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py +++ b/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py @@ -10,12 +10,15 @@ class DetPreProcess: - def __init__(self, limit_side_len: int = 736, limit_type: str = "min", mean=None, - std=None): + def __init__( + self, limit_side_len: int = 736, limit_type: str = "min", mean=None, std=None + ): if mean is None: mean = [0.5, 0.5, 0.5] + if std is None: std = [0.5, 0.5, 0.5] + self.mean = np.array(mean) self.std = np.array(std) self.scale = 1 / 255.0 diff --git a/python/rapidocr_onnxruntime/config.yaml b/python/rapidocr_onnxruntime/config.yaml index eaa7a7df5..d249ce8f3 100644 --- a/python/rapidocr_onnxruntime/config.yaml +++ b/python/rapidocr_onnxruntime/config.yaml @@ -26,10 +26,7 @@ Det: limit_type: min std: [ 0.5, 0.5, 0.5 ] mean: [ 0.5, 0.5, 0.5 ] -# limit_side_len: 960 -# limit_type: max -# std: [ 0.229, 0.224, 0.225 ] -# mean: [ 0.485, 0.456, 0.406 ] + thresh: 0.3 box_thresh: 0.5 max_candidates: 1000 diff --git a/python/rapidocr_openvino/ch_ppocr_cls/config.yaml b/python/rapidocr_openvino/ch_ppocr_cls/config.yaml deleted file mode 100644 index 0ec669ec8..000000000 --- a/python/rapidocr_openvino/ch_ppocr_cls/config.yaml +++ /dev/null @@ -1,14 +0,0 @@ -model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx - -use_cuda: false -# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html -CUDAExecutionProvider: - device_id: 0 - arena_extend_strategy: kNextPowerOfTwo - cudnn_conv_algo_search: EXHAUSTIVE - do_copy_in_default_stream: true - -cls_image_shape: [3, 48, 192] -cls_batch_num: 6 -cls_thresh: 0.9 -label_list: ['0', '180'] \ No newline at end of file diff --git a/python/rapidocr_openvino/ch_ppocr_det/config.yaml b/python/rapidocr_openvino/ch_ppocr_det/config.yaml deleted file mode 100644 index 7be1e716c..000000000 --- a/python/rapidocr_openvino/ch_ppocr_det/config.yaml +++ /dev/null @@ -1,21 +0,0 @@ -model_path: models/ch_PP-OCRv4_det_infer.onnx - -pre_process: - DetResizeForTest: - limit_side_len: 736 - limit_type: min - NormalizeImage: - std: [0.229, 0.224, 0.225] - mean: [0.485, 0.456, 0.406] - scale: 1./255. - order: hwc - ToCHWImage: - KeepKeys: - keep_keys: ['image', 'shape'] - -post_process: - thresh: 0.3 - box_thresh: 0.5 - max_candidates: 1000 - unclip_ratio: 1.6 - use_dilation: true diff --git a/python/rapidocr_openvino/ch_ppocr_det/utils.py b/python/rapidocr_openvino/ch_ppocr_det/utils.py index 014145c0f..ef410631c 100644 --- a/python/rapidocr_openvino/ch_ppocr_det/utils.py +++ b/python/rapidocr_openvino/ch_ppocr_det/utils.py @@ -10,12 +10,15 @@ class DetPreProcess: - def __init__(self, limit_side_len: int = 736, limit_type: str = "min",mean=None, - std=None): + def __init__( + self, limit_side_len: int = 736, limit_type: str = "min", mean=None, std=None + ): if mean is None: - mean = [0.485, 0.456, 0.406] + mean = [0.5, 0.5, 0.5] + if std is None: - std = [0.229, 0.224, 0.225] + std = [0.5, 0.5, 0.5] + self.mean = np.array(mean) self.std = np.array(std) self.scale = 1 / 255.0 diff --git a/python/rapidocr_openvino/ch_ppocr_rec/config.yaml b/python/rapidocr_openvino/ch_ppocr_rec/config.yaml deleted file mode 100644 index 9453c0856..000000000 --- a/python/rapidocr_openvino/ch_ppocr_rec/config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -model_path: models/ch_PP-OCRv4_rec_infer.onnx - -rec_img_shape: [3, 48, 320] -rec_batch_num: 6 diff --git a/python/rapidocr_openvino/config.yaml b/python/rapidocr_openvino/config.yaml index 8954d2170..b6831f4e0 100644 --- a/python/rapidocr_openvino/config.yaml +++ b/python/rapidocr_openvino/config.yaml @@ -21,6 +21,8 @@ Det: limit_side_len: 736 limit_type: min + std: [ 0.5, 0.5, 0.5 ] + mean: [ 0.5, 0.5, 0.5 ] thresh: 0.3 box_thresh: 0.5 diff --git a/python/rapidocr_paddle/ch_ppocr_cls/config.yaml b/python/rapidocr_paddle/ch_ppocr_cls/config.yaml deleted file mode 100644 index 47eb5a028..000000000 --- a/python/rapidocr_paddle/ch_ppocr_cls/config.yaml +++ /dev/null @@ -1,10 +0,0 @@ -model_path: models/ch_ppocr_mobile_v2_cls_infer - -use_cuda: false -gpu_id: 0 -gpu_mem: 500 - -cls_image_shape: [3, 48, 192] -cls_batch_num: 6 -cls_thresh: 0.9 -label_list: ['0', '180'] \ No newline at end of file diff --git a/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py b/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py index 06317a9f1..063816aa8 100644 --- a/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py +++ b/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import argparse import copy import math import time @@ -20,7 +19,7 @@ import cv2 import numpy as np -from rapidocr_paddle.utils import PaddleInferSession, read_yaml +from rapidocr_paddle.utils import PaddleInferSession from .utils import ClsPostProcess @@ -96,19 +95,3 @@ def resize_norm_img(self, img): padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32) padding_im[:, :, :resized_w] = resized_image return padding_im - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--image_path", type=str, help="image_dir|image_path") - parser.add_argument("--config_path", type=str, default="config.yaml") - args = parser.parse_args() - - config = read_yaml(args.config_path) - - text_classifier = TextClassifier(config) - - img = cv2.imread(args.image_path) - img_list, cls_res, predict_time = text_classifier(img) - for ino in range(len(img_list)): - print(f"cls result:{cls_res[ino]}") diff --git a/python/rapidocr_paddle/ch_ppocr_det/config.yaml b/python/rapidocr_paddle/ch_ppocr_det/config.yaml deleted file mode 100644 index 7091a0730..000000000 --- a/python/rapidocr_paddle/ch_ppocr_det/config.yaml +++ /dev/null @@ -1,26 +0,0 @@ -model_path: models/ch_PP-OCRv4_det_infer - -use_cuda: false -gpu_id: 0 -gpu_mem: 500 - -pre_process: - DetResizeForTest: - limit_side_len: 736 - limit_type: min - NormalizeImage: - std: [0.229, 0.224, 0.225] - mean: [0.485, 0.456, 0.406] - scale: 1./255. - order: hwc - ToCHWImage: - KeepKeys: - keep_keys: ['image', 'shape'] - -post_process: - thresh: 0.3 - box_thresh: 0.5 - max_candidates: 1000 - unclip_ratio: 1.6 - use_dilation: true - score_mode: "fast" diff --git a/python/rapidocr_paddle/ch_ppocr_det/utils.py b/python/rapidocr_paddle/ch_ppocr_det/utils.py index 9302d4a64..ef410631c 100644 --- a/python/rapidocr_paddle/ch_ppocr_det/utils.py +++ b/python/rapidocr_paddle/ch_ppocr_det/utils.py @@ -10,12 +10,15 @@ class DetPreProcess: - def __init__(self, limit_side_len: int = 736, limit_type: str = "min", mean=None, - std=None): + def __init__( + self, limit_side_len: int = 736, limit_type: str = "min", mean=None, std=None + ): if mean is None: - mean = [0.485, 0.456, 0.406] + mean = [0.5, 0.5, 0.5] + if std is None: - std = [0.229, 0.224, 0.225] + std = [0.5, 0.5, 0.5] + self.mean = np.array(mean) self.std = np.array(std) self.scale = 1 / 255.0 diff --git a/python/rapidocr_paddle/ch_ppocr_rec/config.yaml b/python/rapidocr_paddle/ch_ppocr_rec/config.yaml deleted file mode 100644 index 6b7375c29..000000000 --- a/python/rapidocr_paddle/ch_ppocr_rec/config.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model_path: models/ch_PP-OCRv4_rec_infer - -use_cuda: false -gpu_id: 0 -gpu_mem: 500 - -rec_img_shape: [3, 48, 320] -rec_batch_num: 6 diff --git a/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py b/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py index 320a141c9..30994fe14 100644 --- a/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py +++ b/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import argparse import math import time from pathlib import Path @@ -20,7 +19,7 @@ import cv2 import numpy as np -from rapidocr_paddle.utils import PaddleInferSession, read_yaml +from rapidocr_paddle.utils import PaddleInferSession from .utils import CTCLabelDecode @@ -112,17 +111,3 @@ def resize_norm_img(self, img: np.ndarray, max_wh_ratio: float) -> np.ndarray: padding_im = np.zeros((img_channel, img_height, img_width), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--image_path", type=str, help="image_dir|image_path") - parser.add_argument("--config_path", type=str, default="config.yaml") - args = parser.parse_args() - - config = read_yaml(args.config_path) - text_recognizer = TextRecognizer(config) - - img = cv2.imread(args.image_path) - rec_res, predict_time = text_recognizer(img) - print(f"rec result: {rec_res}\t cost: {predict_time}s") diff --git a/python/rapidocr_paddle/config.yaml b/python/rapidocr_paddle/config.yaml index 8bf797667..e3a057d6a 100644 --- a/python/rapidocr_paddle/config.yaml +++ b/python/rapidocr_paddle/config.yaml @@ -23,6 +23,8 @@ Det: limit_side_len: 736 limit_type: min + std: [ 0.5, 0.5, 0.5 ] + mean: [ 0.5, 0.5, 0.5 ] thresh: 0.3 box_thresh: 0.5 diff --git a/python/tests/test_paddle.py b/python/tests/test_paddle.py index f75d846c1..ad3f809f4 100644 --- a/python/tests/test_paddle.py +++ b/python/tests/test_paddle.py @@ -1,6 +1,13 @@ # -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com +""" +以下测试用例在以下环境中可以全部通过 +OS: macOS 15.1.1 (24B91) +Python: 3.10.13 +paddlepaddle: 3.0.0b2 CPU +""" + import sys from pathlib import Path from typing import List @@ -27,7 +34,7 @@ def test_long_img(): download_file(img_url, save_path=img_path) result, _ = engine(img_path) assert result is not None - assert len(result) == 55 + assert len(result) == 53 img_path.unlink() @@ -62,7 +69,7 @@ def test_transparent_img(img_name: str, gt: str): ( "test_letterbox_like.jpg", 2, - "A::取决于所使用的执行提供者,它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外,即使一个操作是由CUDAeXecution", + "A::取决于所使用的执行提供者,它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外,即使一个操作是由CUDAexecution", ), ("test_without_det.jpg", 1, "在中国作家协会第三届儿童文学"), ], @@ -79,7 +86,7 @@ def test_only_det(): result, _ = engine(img_path, use_det=True, use_cls=False, use_rec=False) assert len(result) == 18 - assert result[0][0] == [5.0, 2.0] + assert result[0][0] == [6.0, 2.0] def test_only_cls(): @@ -212,7 +219,7 @@ def test_input_three_ndim_one_channel(): ), ( "text_vertical_words.png", - ["已", "取", "之", "時", "不", "參", "一", "人", "见", "而"], + ["已", "取", "之", "時", "不", "參", "一", "人", "見", "而"], ), ( "issue_170.png", diff --git a/python/tests/test_vino.py b/python/tests/test_vino.py index 9c7fb2929..629cb0b7e 100644 --- a/python/tests/test_vino.py +++ b/python/tests/test_vino.py @@ -1,6 +1,13 @@ # -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com +""" +以下测试用例在以下环境中可以全部通过 +OS: macOS 15.1.1 (24B91) +Python: 3.10.13 +openvino: 2024.0.0 +""" + import sys from pathlib import Path from typing import List @@ -27,7 +34,7 @@ def test_long_img(): download_file(img_url, save_path=img_path) result, _ = engine(img_path) assert result is not None - assert len(result) == 55 + assert len(result) == 53 img_path.unlink() @@ -62,7 +69,7 @@ def test_transparent_img(img_name: str, gt: str): ( "test_letterbox_like.jpg", 2, - "A::取决于所使用的执行提供者,它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外,即使一个操作是由CUDAeXecution", + "A::取决于所使用的执行提供者,它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外,即使一个操作是由CUDAexecution", ), ("test_without_det.jpg", 1, "在中国作家协会第三届儿童文学"), ], @@ -79,7 +86,7 @@ def test_only_det(): result, _ = engine(img_path, use_det=True, use_cls=False, use_rec=False) assert len(result) == 18 - assert result[0][0] == [5.0, 2.0] + assert result[0][0] == [6.0, 2.0] def test_only_cls(): @@ -212,7 +219,7 @@ def test_input_three_ndim_one_channel(): ), ( "text_vertical_words.png", - ["已", "取", "之", "時", "不", "參", "一", "人", "见", "而"], + ["已", "取", "之", "時", "不", "參", "一", "人", "見", "而"], ), ( "issue_170.png",