From 94576e5e4ce900ad3b59abbb828e40c1f47f818d Mon Sep 17 00:00:00 2001
From: SWHL <liekkaskono@163.com>
Date: Fri, 17 Jan 2025 09:36:38 +0800
Subject: [PATCH] chore: optimize code

---
 .../ch_ppocr_det/text_detect.py               |  4 +--
 .../ch_ppocr_det/utils.py                     |  7 +++--
 python/rapidocr_onnxruntime/config.yaml       |  5 +---
 .../ch_ppocr_cls/config.yaml                  | 14 ----------
 .../ch_ppocr_det/config.yaml                  | 21 ---------------
 .../rapidocr_openvino/ch_ppocr_det/utils.py   | 11 +++++---
 .../ch_ppocr_rec/config.yaml                  |  4 ---
 python/rapidocr_openvino/config.yaml          |  2 ++
 .../rapidocr_paddle/ch_ppocr_cls/config.yaml  | 10 -------
 .../rapidocr_paddle/ch_ppocr_cls/text_cls.py  | 19 +-------------
 .../rapidocr_paddle/ch_ppocr_det/config.yaml  | 26 -------------------
 python/rapidocr_paddle/ch_ppocr_det/utils.py  | 11 +++++---
 .../rapidocr_paddle/ch_ppocr_rec/config.yaml  |  8 ------
 .../ch_ppocr_rec/text_recognize.py            | 17 +-----------
 python/rapidocr_paddle/config.yaml            |  2 ++
 python/tests/test_paddle.py                   | 15 ++++++++---
 python/tests/test_vino.py                     | 15 ++++++++---
 17 files changed, 50 insertions(+), 141 deletions(-)
 delete mode 100644 python/rapidocr_openvino/ch_ppocr_cls/config.yaml
 delete mode 100644 python/rapidocr_openvino/ch_ppocr_det/config.yaml
 delete mode 100644 python/rapidocr_openvino/ch_ppocr_rec/config.yaml
 delete mode 100644 python/rapidocr_paddle/ch_ppocr_cls/config.yaml
 delete mode 100644 python/rapidocr_paddle/ch_ppocr_det/config.yaml
 delete mode 100644 python/rapidocr_paddle/ch_ppocr_rec/config.yaml

diff --git a/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py b/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py
index 1e97b9302..5632d7908 100644
--- a/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py
+++ b/python/rapidocr_onnxruntime/ch_ppocr_det/text_detect.py
@@ -63,7 +63,7 @@ def __call__(self, img: np.ndarray) -> Tuple[Optional[np.ndarray], float]:
         return dt_boxes, elapse
 
     def get_preprocess(self, max_wh):
-        if self.limit_type == 'min':
+        if self.limit_type == "min":
             limit_side_len = self.limit_side_len
         elif max_wh < 960:
             limit_side_len = 960
@@ -71,7 +71,7 @@ def get_preprocess(self, max_wh):
             limit_side_len = 1500
         else:
             limit_side_len = 2000
-        return DetPreProcess(limit_side_len, self.limit_type,self.mean, self.std)
+        return DetPreProcess(limit_side_len, self.limit_type, self.mean, self.std)
 
     def filter_tag_det_res(
         self, dt_boxes: np.ndarray, image_shape: Tuple[int, int]
diff --git a/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py b/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py
index bc5ccc11b..ef410631c 100644
--- a/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py
+++ b/python/rapidocr_onnxruntime/ch_ppocr_det/utils.py
@@ -10,12 +10,15 @@
 
 
 class DetPreProcess:
-    def __init__(self, limit_side_len: int = 736, limit_type: str = "min", mean=None,
-                 std=None):
+    def __init__(
+        self, limit_side_len: int = 736, limit_type: str = "min", mean=None, std=None
+    ):
         if mean is None:
             mean = [0.5, 0.5, 0.5]
+
         if std is None:
             std = [0.5, 0.5, 0.5]
+
         self.mean = np.array(mean)
         self.std = np.array(std)
         self.scale = 1 / 255.0
diff --git a/python/rapidocr_onnxruntime/config.yaml b/python/rapidocr_onnxruntime/config.yaml
index eaa7a7df5..d249ce8f3 100644
--- a/python/rapidocr_onnxruntime/config.yaml
+++ b/python/rapidocr_onnxruntime/config.yaml
@@ -26,10 +26,7 @@ Det:
     limit_type: min
     std: [ 0.5, 0.5, 0.5 ]
     mean: [ 0.5, 0.5, 0.5 ]
-#    limit_side_len: 960
-#    limit_type: max
-#    std: [ 0.229, 0.224, 0.225 ]
-#    mean: [ 0.485, 0.456, 0.406 ]
+
     thresh: 0.3
     box_thresh: 0.5
     max_candidates: 1000
diff --git a/python/rapidocr_openvino/ch_ppocr_cls/config.yaml b/python/rapidocr_openvino/ch_ppocr_cls/config.yaml
deleted file mode 100644
index 0ec669ec8..000000000
--- a/python/rapidocr_openvino/ch_ppocr_cls/config.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
-
-use_cuda: false
-# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
-CUDAExecutionProvider:
-    device_id: 0
-    arena_extend_strategy: kNextPowerOfTwo
-    cudnn_conv_algo_search: EXHAUSTIVE
-    do_copy_in_default_stream: true
-
-cls_image_shape: [3, 48, 192]
-cls_batch_num: 6
-cls_thresh: 0.9
-label_list: ['0', '180']
\ No newline at end of file
diff --git a/python/rapidocr_openvino/ch_ppocr_det/config.yaml b/python/rapidocr_openvino/ch_ppocr_det/config.yaml
deleted file mode 100644
index 7be1e716c..000000000
--- a/python/rapidocr_openvino/ch_ppocr_det/config.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-model_path: models/ch_PP-OCRv4_det_infer.onnx
-
-pre_process:
-    DetResizeForTest:
-        limit_side_len: 736
-        limit_type: min
-    NormalizeImage:
-        std: [0.229, 0.224, 0.225]
-        mean: [0.485, 0.456, 0.406]
-        scale: 1./255.
-        order: hwc
-    ToCHWImage:
-    KeepKeys:
-        keep_keys: ['image', 'shape']
-
-post_process:
-    thresh: 0.3
-    box_thresh: 0.5
-    max_candidates: 1000
-    unclip_ratio: 1.6
-    use_dilation: true
diff --git a/python/rapidocr_openvino/ch_ppocr_det/utils.py b/python/rapidocr_openvino/ch_ppocr_det/utils.py
index 014145c0f..ef410631c 100644
--- a/python/rapidocr_openvino/ch_ppocr_det/utils.py
+++ b/python/rapidocr_openvino/ch_ppocr_det/utils.py
@@ -10,12 +10,15 @@
 
 
 class DetPreProcess:
-    def __init__(self, limit_side_len: int = 736, limit_type: str = "min",mean=None,
-                 std=None):
+    def __init__(
+        self, limit_side_len: int = 736, limit_type: str = "min", mean=None, std=None
+    ):
         if mean is None:
-            mean = [0.485, 0.456, 0.406]
+            mean = [0.5, 0.5, 0.5]
+
         if std is None:
-            std = [0.229, 0.224, 0.225]
+            std = [0.5, 0.5, 0.5]
+
         self.mean = np.array(mean)
         self.std = np.array(std)
         self.scale = 1 / 255.0
diff --git a/python/rapidocr_openvino/ch_ppocr_rec/config.yaml b/python/rapidocr_openvino/ch_ppocr_rec/config.yaml
deleted file mode 100644
index 9453c0856..000000000
--- a/python/rapidocr_openvino/ch_ppocr_rec/config.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-model_path: models/ch_PP-OCRv4_rec_infer.onnx
-
-rec_img_shape: [3, 48, 320]
-rec_batch_num: 6
diff --git a/python/rapidocr_openvino/config.yaml b/python/rapidocr_openvino/config.yaml
index 8954d2170..b6831f4e0 100644
--- a/python/rapidocr_openvino/config.yaml
+++ b/python/rapidocr_openvino/config.yaml
@@ -21,6 +21,8 @@ Det:
 
     limit_side_len: 736
     limit_type: min
+    std: [ 0.5, 0.5, 0.5 ]
+    mean: [ 0.5, 0.5, 0.5 ]
 
     thresh: 0.3
     box_thresh: 0.5
diff --git a/python/rapidocr_paddle/ch_ppocr_cls/config.yaml b/python/rapidocr_paddle/ch_ppocr_cls/config.yaml
deleted file mode 100644
index 47eb5a028..000000000
--- a/python/rapidocr_paddle/ch_ppocr_cls/config.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-model_path: models/ch_ppocr_mobile_v2_cls_infer
-
-use_cuda: false
-gpu_id: 0
-gpu_mem: 500
-
-cls_image_shape: [3, 48, 192]
-cls_batch_num: 6
-cls_thresh: 0.9
-label_list: ['0', '180']
\ No newline at end of file
diff --git a/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py b/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py
index 06317a9f1..063816aa8 100644
--- a/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py
+++ b/python/rapidocr_paddle/ch_ppocr_cls/text_cls.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import argparse
 import copy
 import math
 import time
@@ -20,7 +19,7 @@
 import cv2
 import numpy as np
 
-from rapidocr_paddle.utils import PaddleInferSession, read_yaml
+from rapidocr_paddle.utils import PaddleInferSession
 
 from .utils import ClsPostProcess
 
@@ -96,19 +95,3 @@ def resize_norm_img(self, img):
         padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
         padding_im[:, :, :resized_w] = resized_image
         return padding_im
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--image_path", type=str, help="image_dir|image_path")
-    parser.add_argument("--config_path", type=str, default="config.yaml")
-    args = parser.parse_args()
-
-    config = read_yaml(args.config_path)
-
-    text_classifier = TextClassifier(config)
-
-    img = cv2.imread(args.image_path)
-    img_list, cls_res, predict_time = text_classifier(img)
-    for ino in range(len(img_list)):
-        print(f"cls result:{cls_res[ino]}")
diff --git a/python/rapidocr_paddle/ch_ppocr_det/config.yaml b/python/rapidocr_paddle/ch_ppocr_det/config.yaml
deleted file mode 100644
index 7091a0730..000000000
--- a/python/rapidocr_paddle/ch_ppocr_det/config.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-model_path: models/ch_PP-OCRv4_det_infer
-
-use_cuda: false
-gpu_id: 0
-gpu_mem: 500
-
-pre_process:
-    DetResizeForTest:
-        limit_side_len: 736
-        limit_type: min
-    NormalizeImage:
-        std: [0.229, 0.224, 0.225]
-        mean: [0.485, 0.456, 0.406]
-        scale: 1./255.
-        order: hwc
-    ToCHWImage:
-    KeepKeys:
-        keep_keys: ['image', 'shape']
-
-post_process:
-    thresh: 0.3
-    box_thresh: 0.5
-    max_candidates: 1000
-    unclip_ratio: 1.6
-    use_dilation: true
-    score_mode: "fast"
diff --git a/python/rapidocr_paddle/ch_ppocr_det/utils.py b/python/rapidocr_paddle/ch_ppocr_det/utils.py
index 9302d4a64..ef410631c 100644
--- a/python/rapidocr_paddle/ch_ppocr_det/utils.py
+++ b/python/rapidocr_paddle/ch_ppocr_det/utils.py
@@ -10,12 +10,15 @@
 
 
 class DetPreProcess:
-    def __init__(self, limit_side_len: int = 736, limit_type: str = "min", mean=None,
-                 std=None):
+    def __init__(
+        self, limit_side_len: int = 736, limit_type: str = "min", mean=None, std=None
+    ):
         if mean is None:
-            mean = [0.485, 0.456, 0.406]
+            mean = [0.5, 0.5, 0.5]
+
         if std is None:
-            std = [0.229, 0.224, 0.225]
+            std = [0.5, 0.5, 0.5]
+
         self.mean = np.array(mean)
         self.std = np.array(std)
         self.scale = 1 / 255.0
diff --git a/python/rapidocr_paddle/ch_ppocr_rec/config.yaml b/python/rapidocr_paddle/ch_ppocr_rec/config.yaml
deleted file mode 100644
index 6b7375c29..000000000
--- a/python/rapidocr_paddle/ch_ppocr_rec/config.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-model_path: models/ch_PP-OCRv4_rec_infer
-
-use_cuda: false
-gpu_id: 0
-gpu_mem: 500
-
-rec_img_shape: [3, 48, 320]
-rec_batch_num: 6
diff --git a/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py b/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py
index 320a141c9..30994fe14 100644
--- a/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py
+++ b/python/rapidocr_paddle/ch_ppocr_rec/text_recognize.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import argparse
 import math
 import time
 from pathlib import Path
@@ -20,7 +19,7 @@
 import cv2
 import numpy as np
 
-from rapidocr_paddle.utils import PaddleInferSession, read_yaml
+from rapidocr_paddle.utils import PaddleInferSession
 
 from .utils import CTCLabelDecode
 
@@ -112,17 +111,3 @@ def resize_norm_img(self, img: np.ndarray, max_wh_ratio: float) -> np.ndarray:
         padding_im = np.zeros((img_channel, img_height, img_width), dtype=np.float32)
         padding_im[:, :, 0:resized_w] = resized_image
         return padding_im
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--image_path", type=str, help="image_dir|image_path")
-    parser.add_argument("--config_path", type=str, default="config.yaml")
-    args = parser.parse_args()
-
-    config = read_yaml(args.config_path)
-    text_recognizer = TextRecognizer(config)
-
-    img = cv2.imread(args.image_path)
-    rec_res, predict_time = text_recognizer(img)
-    print(f"rec result: {rec_res}\t cost: {predict_time}s")
diff --git a/python/rapidocr_paddle/config.yaml b/python/rapidocr_paddle/config.yaml
index 8bf797667..e3a057d6a 100644
--- a/python/rapidocr_paddle/config.yaml
+++ b/python/rapidocr_paddle/config.yaml
@@ -23,6 +23,8 @@ Det:
 
     limit_side_len: 736
     limit_type: min
+    std: [ 0.5, 0.5, 0.5 ]
+    mean: [ 0.5, 0.5, 0.5 ]
 
     thresh: 0.3
     box_thresh: 0.5
diff --git a/python/tests/test_paddle.py b/python/tests/test_paddle.py
index f75d846c1..ad3f809f4 100644
--- a/python/tests/test_paddle.py
+++ b/python/tests/test_paddle.py
@@ -1,6 +1,13 @@
 # -*- encoding: utf-8 -*-
 # @Author: SWHL
 # @Contact: liekkaskono@163.com
+"""
+以下测试用例在以下环境中可以全部通过
+OS: macOS 15.1.1 (24B91)
+Python: 3.10.13
+paddlepaddle: 3.0.0b2 CPU
+"""
+
 import sys
 from pathlib import Path
 from typing import List
@@ -27,7 +34,7 @@ def test_long_img():
     download_file(img_url, save_path=img_path)
     result, _ = engine(img_path)
     assert result is not None
-    assert len(result) == 55
+    assert len(result) == 53
     img_path.unlink()
 
 
@@ -62,7 +69,7 @@ def test_transparent_img(img_name: str, gt: str):
         (
             "test_letterbox_like.jpg",
             2,
-            "A：：取决于所使用的执行提供者，它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外，即使一个操作是由CUDAeXecution",
+            "A：：取决于所使用的执行提供者，它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外，即使一个操作是由CUDAexecution",
         ),
         ("test_without_det.jpg", 1, "在中国作家协会第三届儿童文学"),
     ],
@@ -79,7 +86,7 @@ def test_only_det():
     result, _ = engine(img_path, use_det=True, use_cls=False, use_rec=False)
 
     assert len(result) == 18
-    assert result[0][0] == [5.0, 2.0]
+    assert result[0][0] == [6.0, 2.0]
 
 
 def test_only_cls():
@@ -212,7 +219,7 @@ def test_input_three_ndim_one_channel():
         ),
         (
             "text_vertical_words.png",
-            ["已", "取", "之", "時", "不", "參", "一", "人", "见", "而"],
+            ["已", "取", "之", "時", "不", "參", "一", "人", "見", "而"],
         ),
         (
             "issue_170.png",
diff --git a/python/tests/test_vino.py b/python/tests/test_vino.py
index 9c7fb2929..629cb0b7e 100644
--- a/python/tests/test_vino.py
+++ b/python/tests/test_vino.py
@@ -1,6 +1,13 @@
 # -*- encoding: utf-8 -*-
 # @Author: SWHL
 # @Contact: liekkaskono@163.com
+"""
+以下测试用例在以下环境中可以全部通过
+OS: macOS 15.1.1 (24B91)
+Python: 3.10.13
+openvino: 2024.0.0
+"""
+
 import sys
 from pathlib import Path
 from typing import List
@@ -27,7 +34,7 @@ def test_long_img():
     download_file(img_url, save_path=img_path)
     result, _ = engine(img_path)
     assert result is not None
-    assert len(result) == 55
+    assert len(result) == 53
     img_path.unlink()
 
 
@@ -62,7 +69,7 @@ def test_transparent_img(img_name: str, gt: str):
         (
             "test_letterbox_like.jpg",
             2,
-            "A：：取决于所使用的执行提供者，它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外，即使一个操作是由CUDAeXecution",
+            "A：：取决于所使用的执行提供者，它可能没有完全支持模型中的所有操作。回落到CPU操作可能会导致性能速度的下降。此外，即使一个操作是由CUDAexecution",
         ),
         ("test_without_det.jpg", 1, "在中国作家协会第三届儿童文学"),
     ],
@@ -79,7 +86,7 @@ def test_only_det():
     result, _ = engine(img_path, use_det=True, use_cls=False, use_rec=False)
 
     assert len(result) == 18
-    assert result[0][0] == [5.0, 2.0]
+    assert result[0][0] == [6.0, 2.0]
 
 
 def test_only_cls():
@@ -212,7 +219,7 @@ def test_input_three_ndim_one_channel():
         ),
         (
             "text_vertical_words.png",
-            ["已", "取", "之", "時", "不", "參", "一", "人", "见", "而"],
+            ["已", "取", "之", "時", "不", "參", "一", "人", "見", "而"],
         ),
         (
             "issue_170.png",