From 0db54665ca4e67c1302431f63ed8aa3f41c1e13c Mon Sep 17 00:00:00 2001 From: SWHL Date: Sat, 11 Mar 2023 14:01:04 +0800 Subject: [PATCH] Fix bugs of recognizing in single mode --- README.md | 11 ++++++----- demo.py | 4 ++-- docs/README_en.md | 10 +++++++--- docs/change_log.md | 4 ++++ docs/change_log_en.md | 4 ++++ docs/doc_whl.md | 6 +++--- rapid_videocr/rapid_videocr.py | 27 +++++++++++++-------------- setup.py | 2 +- tests/test_rapid_videocr.py | 8 ++++---- 9 files changed, 44 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 88d24a5..ccdfd4f 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ flowchart LR from rapid_videocr import RapidVideOCR # RapidVideOCR有两个初始化参数 - # is_single_res: 是否用单张图识别,默认是False,也就是默认用叠图识别 + # is_concat_rec: 是否用单张图识别,默认是False,也就是默认用单图识别 # concat_batch: 叠图识别的图像张数,默认10,可自行调节 extractor = RapidVideOCR() @@ -92,7 +92,7 @@ flowchart LR Output file format. Default is "all" -m {single,concat}, --mode {single,concat} Which mode to run (concat recognition or single - recognition), default is "concat" + recognition), default is "single" -b CONCAT_BATCH, --concat_batch CONCAT_BATCH The batch of concating image nums in concat recognition mode. Default is 10. @@ -107,12 +107,13 @@ flowchart LR ### 更新日志([more](./docs/change_log.md)) +- 🎢2023-03-11 v2.1.1 update: + - 修复单图识别与之前版本差异问题 + - 默认识别模式更改为单图识别,是否使用叠图识别,请自行决定 + - 🥇2023-03-10 v2.1.0 update: - 添加叠字识别功能,速度更快,默认是叠字识别功能 -- 🎈2023-03-02 v2.0.5~7 update: - - 修复生成的srt文件中的格式错误, [#19](https://github.com/SWHL/RapidVideOCR/issues/19) - ### 写在最后 - 扫码加入组织: diff --git a/demo.py b/demo.py index d136afa..899da27 100644 --- a/demo.py +++ b/demo.py @@ -4,8 +4,8 @@ from rapid_videocr import RapidVideOCR -extractor = RapidVideOCR(is_single_res=True) +extractor = RapidVideOCR(is_concat_rec=True) -rgb_dir = 'tests/test_files/RGBImage' +rgb_dir = 'tests/test_files/RGBImages' save_dir = 'result' extractor(rgb_dir, save_dir) diff --git a/docs/README_en.md b/docs/README_en.md index 9e9cf56..77d9323 100644 --- a/docs/README_en.md +++ b/docs/README_en.md @@ -67,7 +67,7 @@ flowchart LR ```python # __init__ Args: - is_single_res (bool, optional): Whether to single recognition. Defaults to False. + is_concat_rec (bool, optional): Whether to single recognition. Defaults to False. concat_batch (int, optional): The batch of concating image nums in concat recognition mode. Defaults to 10. # __call__ @@ -84,7 +84,7 @@ flowchart LR ```python from rapid_videocr import RapidVideOCR - extractor = RapidVideOCR(is_single_res=True, concat_batch=10) + extractor = RapidVideOCR(is_concat_rec=True, concat_batch=10) rgb_dir = 'RGBImages' save_dir = 'result' @@ -107,7 +107,7 @@ flowchart LR Output file format. Default is "all" -m {single,concat}, --mode {single,concat} Which mode to run (concat recognition or single - recognition), default is "concat" + recognition), default is "single" -b CONCAT_BATCH, --concat_batch CONCAT_BATCH The batch of concating image nums in concat recognition mode. Default is 10. @@ -122,6 +122,10 @@ flowchart LR - It is worth noting that if you want the video playback software to automatically mount the srt file, you need to change the name of the srt file to the name of the video file, and put it in the same directory, or manually specify the loading. ### Change log ([more](../docs/change_log_en.md)) +- 🎢2023-03-11 v2.1.1 update: + - Fix the difference between single image recognition and the previous version. + - The default recognition mode is changed to single image recognition, please decide whether to use overlapping image recognition. + - 🥇2023-03-10 v2.1.0 update: - Added overlap recognition function, faster speed, the default is concat recognition mode. diff --git a/docs/change_log.md b/docs/change_log.md index 7935d12..70b1d56 100644 --- a/docs/change_log.md +++ b/docs/change_log.md @@ -1,4 +1,8 @@ +#### 🎢2023-03-11 v2.1.1 update: +- 修复单图识别与之前版本差异问题 +- 默认识别模式更改为单图识别,是否使用叠图识别,请自行决定 + #### 🥇2023-03-10 v2.1.0 update: - 添加叠字识别功能,速度更快,默认是叠字识别功能 diff --git a/docs/change_log_en.md b/docs/change_log_en.md index 4bd4aca..8f7c980 100644 --- a/docs/change_log_en.md +++ b/docs/change_log_en.md @@ -1,3 +1,7 @@ +#### 🎢2023-03-11 v2.1.1 update: +- Fix the difference between single image recognition and the previous version. +- The default recognition mode is changed to single image recognition, please decide whether to use overlapping image recognition. + #### 🥇2023-03-10 v2.1.0 update: - Added overlap recognition function, faster speed, the default is concat recognition mode. diff --git a/docs/doc_whl.md b/docs/doc_whl.md index 9f592ab..ed767e1 100644 --- a/docs/doc_whl.md +++ b/docs/doc_whl.md @@ -18,7 +18,7 @@ pip install rapid_videocr ```python # __init__ Args: - is_single_res (bool, optional): Whether to single recognition. Defaults to False. + is_concat_rec (bool, optional): Whether to single recognition. Defaults to False. concat_batch (int, optional): The batch of concating image nums in concat recognition mode. Defaults to 10. # __call__ @@ -35,7 +35,7 @@ pip install rapid_videocr ```python from rapid_videocr import RapidVideOCR - extractor = RapidVideOCR(is_single_res=True, concat_batch=10) + extractor = RapidVideOCR(is_concat_rec=True, concat_batch=10) rgb_dir = 'RGBImages' save_dir = 'result' @@ -59,7 +59,7 @@ pip install rapid_videocr Output file format. Default is "all" -m {single,concat}, --mode {single,concat} Which mode to run (concat recognition or single - recognition), default is "concat" + recognition), default is "single" -b CONCAT_BATCH, --concat_batch CONCAT_BATCH The batch of concating image nums in concat recognition mode. Default is 10. diff --git a/rapid_videocr/rapid_videocr.py b/rapid_videocr/rapid_videocr.py index 2fd2bd2..bee2273 100644 --- a/rapid_videocr/rapid_videocr.py +++ b/rapid_videocr/rapid_videocr.py @@ -16,18 +16,18 @@ class RapidVideOCR(): - def __init__(self, is_single_res: bool = False, concat_batch: int = 10): + def __init__(self, is_concat_rec: bool = False, concat_batch: int = 10): """Init Args: - is_single_res (bool, optional): Whether to single recognition. Defaults to False. + is_concat_rec (bool, optional): Whether to single recognition. Defaults to False. concat_batch (int, optional): The batch of concating image nums in concat recognition mode. Defaults to 10. """ - self.rapid_ocr = RapidOCR() + self.rapid_ocr = RapidOCR(width_height_ratio=-1) self.cropper = CropByProject() self.batch_size = concat_batch - self.is_single_res = is_single_res + self.is_concat_rec = is_concat_rec def __call__(self, video_sub_finder_dir: Union[str, Path], @@ -59,12 +59,12 @@ def __call__(self, raise RapidVideOCRError( f'{video_sub_finder_dir} has not images with jpeg as suffix.') - if self.is_single_res: - print('Running with single recognition.') - srt_result, txt_result = self.single_rec(img_list, is_txt_dir) - else: + if self.is_concat_rec: print('Running with concat recognition.') srt_result, txt_result = self.concat_rec(img_list, is_txt_dir) + else: + print('Running with single recognition.') + srt_result, txt_result = self.single_rec(img_list, is_txt_dir) self.export_file(save_dir, srt_result, txt_result, out_format) @@ -79,11 +79,10 @@ def single_rec(self, img_list: List[str], time_str = self.get_time(img_path) img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1) - dt_boxes, rec_res = self.run_ocr(img, int(img.shape[1] / 3), + dt_boxes, rec_res = self.run_ocr(img, img.shape[0], is_txt_dir) if rec_res: txts = self.process_same_line(dt_boxes, rec_res) - srt_result.append(f'{i+1}\n{time_str}\n{txts}\n') txt_result.append(f'{txts}\n') return srt_result, txt_result @@ -314,15 +313,15 @@ def main() -> None: parser.add_argument('-o', '--out_format', type=str, default='all', choices=['srt', 'txt', 'all'], help='Output file format. Default is "all"') - parser.add_argument('-m', '--mode', type=str, default='concat', + parser.add_argument('-m', '--mode', type=str, default='single', choices=['single', 'concat'], - help='Which mode to run (concat recognition or single recognition), default is "concat"') + help='Which mode to run (concat recognition or single recognition), default is "single"') parser.add_argument('-b', '--concat_batch', type=int, default=10, help='The batch of concating image nums in concat recognition mode. Default is 10.') args = parser.parse_args() - is_single_res = 'single' in args.mode - extractor = RapidVideOCR(is_single_res=is_single_res, + is_concat_rec = 'concat' in args.mode + extractor = RapidVideOCR(is_concat_rec=is_concat_rec, concat_batch=args.concat_batch) extractor(args.img_dir, args.save_dir, args.out_format) diff --git a/setup.py b/setup.py index 1497c6b..eabf660 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ def extract_version(message: str) -> str: url="https://github.com/SWHL/RapidVideOCR.git", license='Apache-2.0', include_package_data=True, - install_requires=["tqdm>=4.52.0", "rapidocr_onnxruntime>=1.1.29"], + install_requires=["tqdm>=4.52.0", "rapidocr_onnxruntime>=1.2.2"], packages=[MODULE_NAME], package_data={'': ['*.yaml']}, keywords=['rapidocr,videocr,subtitle'], diff --git a/tests/test_rapid_videocr.py b/tests/test_rapid_videocr.py index 3a0f3ae..0cbe588 100644 --- a/tests/test_rapid_videocr.py +++ b/tests/test_rapid_videocr.py @@ -24,7 +24,7 @@ def test_single_rec(): - extractor = RapidVideOCR(is_single_res=True) + extractor = RapidVideOCR(is_concat_rec=False) img_dir = test_file_dir / 'RGBImages' extractor(img_dir, test_file_dir) @@ -43,7 +43,7 @@ def test_single_rec(): def test_concat_rec(): - extractor = RapidVideOCR(is_single_res=False) + extractor = RapidVideOCR(is_concat_rec=True) img_dir = test_file_dir / 'RGBImages' extractor(img_dir, test_file_dir) @@ -62,7 +62,7 @@ def test_concat_rec(): def test_empty_dir(): - extractor = RapidVideOCR(is_single_res=False) + extractor = RapidVideOCR(is_concat_rec=False) img_dir = test_file_dir / 'RGBImage' mkdir(img_dir) @@ -74,7 +74,7 @@ def test_empty_dir(): def test_nothing_dir(): - extractor = RapidVideOCR(is_single_res=False) + extractor = RapidVideOCR(is_concat_rec=False) img_dir = test_file_dir / 'RGBImage' mkdir(img_dir) with pytest.raises(RapidVideOCRError) as exc_info: