diff --git a/.github/workflows/gen_whl_to_pypi_rapid_layout.yml b/.github/workflows/gen_whl_to_pypi_rapid_layout.yml deleted file mode 100644 index 3adb9f9..0000000 --- a/.github/workflows/gen_whl_to_pypi_rapid_layout.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: Push rapidocr_layout to pypi - -on: - push: - branches: [ main ] - paths: - - 'rapid_layout/**' - - 'docs/doc_whl_rapid_layout.md' - - 'setup_layout.py' - - '.github/workflows/gen_whl_to_pypi_rapid_layout.yml' - - 'tests/test_layout.py' - -env: - RESOURCES_URL: https://github.com/RapidAI/RapidStructure/releases/download/v0.0.0/rapid_layout_models.zip - -jobs: - UnitTesting: - runs-on: ubuntu-latest - steps: - - name: Pull latest code - uses: actions/checkout@v3 - - - name: Set up Python 3.10 - uses: actions/setup-python@v4 - with: - python-version: '3.10' - architecture: 'x64' - - - name: Display Python version - run: python -c "import sys; print(sys.version)" - - - name: Unit testings - run: | - wget $RESOURCES_URL - ZIP_NAME=${RESOURCES_URL##*/} - DIR_NAME=${ZIP_NAME%.*} - - unzip $DIR_NAME - cp $DIR_NAME/*.onnx rapid_layout/models/ - pip install -r requirements.txt - pytest tests/test_layout.py - - GenerateWHL_PushPyPi: - needs: UnitTesting - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Run setup - run: | - pip install -r requirements.txt - wget $RESOURCES_URL - ZIP_NAME=${RESOURCES_URL##*/} - DIR_NAME=${ZIP_NAME%.*} - unzip $ZIP_NAME - mv $DIR_NAME/*.onnx rapid_layout/models/ - python setup_layout.py bdist_wheel "${{ github.event.head_commit.message }}" - - - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@v1.5.0 - with: - password: ${{ secrets.RAPID_STRUCTURE }} - packages_dir: dist/ diff --git a/README.md b/README.md index 4f953f8..fd9e64b 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,6 @@ ### 简介 -> 🎉 推出知识星球[RapidAI私享群](https://t.zsxq.com/0duLBZczw),这里的提问会优先得到回答和支持,也会享受到RapidAI组织后续持续优质的服务,欢迎大家的加入。 - 该部分的功能主要针对文档类图像,包括文档图像分类、版面分析和表格识别。 可配套使用项目:[RapidOCR](https://github.com/RapidAI/RapidOCR) @@ -24,7 +22,7 @@ 相关模型托管:[Hugging Face Models](https://huggingface.co/SWHL/RapidStructure) ### [文档方向分类](./docs/README_Orientation.md) -### [版面分析](./docs/README_Layout.md) +### [版面分析](https://github.com/RapidAI/RapidLayout) ### [表格识别](./docs/README_Table.md) 更多表格识别:[TableStructureRec](https://github.com/RapidAI/TableStructureRec) diff --git a/docs/README_Layout.md b/docs/README_Layout.md deleted file mode 100644 index bd2b518..0000000 --- a/docs/README_Layout.md +++ /dev/null @@ -1,82 +0,0 @@ -## Rapid Layout -

- - - PyPI - -

- -#### 简介和说明 -- 该部分主要是做文档类图像的版面分析。模型来源:[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md) -- 具体来说,就是分析给定的文档类别图像(论文截图等),定位其中类别和位置,如标题、段落、表格和图片等各个部分。 -- 目前支持三种类别的版面分析模型:中文、英文和表格版面分析模型,具体可参见下面表格: - - |`model_type`| 版面类型 | 模型名称 | 支持类别| - | :------ | :----- | :------ | :----- | - |`pp_layout_table`| 表格 | `layout_table.onnx` |`table` | - | `pp_layout_publaynet`| 英文 | `layout_publaynet.onnx` |`text title list table figure` | - | `pp_layout_table`| 中文 | `layout_cdla.onnx` | `text title figure figure_caption table table_caption`
`header footer reference equation` | - -- 模型下载地址为:[百度网盘](https://pan.baidu.com/s/1PI9fksW6F6kQfJhwUkewWg?pwd=p29g) | [Google Drive](https://drive.google.com/drive/folders/1DAPWSN2zGQ-ED_Pz7RaJGTjfkN2-Mvsf?usp=sharing) - -#### 安装 -由于模型较小,预先将中文版面分析模型(`layout_cdla.onnx`)打包进了whl包内,如果做中文版面分析,可直接安装使用 - -```bash -$ pip install rapid-layout -``` - -#### 使用方式 -1. python脚本运行 - ```python - import cv2 - from rapid_layout import RapidLayout,vis_layout - - # model_type类型参见上表。指定不同model_type时,会自动下载相应模型到安装目录下的。 - layout_engine = RapidLayout(box_threshold=0.5, model_type="pp_layout_cdla") - - img = cv2.imread('test_images/layout.png') - - layout_res, elapse = layout_engine(img) - - ploted_img = vis_layout(img, layout_res) - cv2.imwrite("layout_res.png", ploted_img) - print(layout_res) - ``` - -2. 终端运行 - - 用法: - ```bash - $ rapid_layout -h - usage: rapid_layout [-h] -img IMG_PATH [-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table}] - [--box_threshold {pp_layout_cdla,pp_layout_publaynet,pp_layout_table}] [-v] - - options: - -h, --help show this help message and exit - -img IMG_PATH, --img_path IMG_PATH - Path to image for layout. - -m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table} - Support model type - --box_threshold {pp_layout_cdla,pp_layout_publaynet,pp_layout_table} - Box threshold, the range is [0, 1] - -v, --vis Wheter to visualize the layout results. - ``` - - 示例: - ```bash - $ rapid_layout -v -img test_images/layout.png - ``` - -3. 结果 - - 返回结果 - ```python - # bbox: [左上角x0,左上角y0, 右下角x1, 右下角y1] - # label: 类别 - [ - {'bbox': array([321.4160495, 91.53214898, 562.06141263, 199.85522603]), 'label': 'text'}, - {'bbox': array([58.67292211, 107.29000663, 300.25448676, 199.68142]), 'label': 'table_caption'} - ] - ``` - - 可视化结果 -
- -
diff --git a/docs/doc_whl_rapid_layout.md b/docs/doc_whl_rapid_layout.md deleted file mode 100644 index 8163c11..0000000 --- a/docs/doc_whl_rapid_layout.md +++ /dev/null @@ -1 +0,0 @@ -See [link](https://github.com/RapidAI/RapidStructure) for details. diff --git a/rapid_layout/__init__.py b/rapid_layout/__init__.py deleted file mode 100644 index 014ffb0..0000000 --- a/rapid_layout/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .main import RapidLayout -from .utils import VisLayout diff --git a/rapid_layout/config.yaml b/rapid_layout/config.yaml deleted file mode 100644 index 33a85d2..0000000 --- a/rapid_layout/config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -model_path: models/layout_cdla.onnx - -use_cuda: false -CUDAExecutionProvider: - device_id: 0 - arena_extend_strategy: kNextPowerOfTwo - cudnn_conv_algo_search: EXHAUSTIVE - do_copy_in_default_stream: true - -pre_process: - Resize: - size: [800, 608] - NormalizeImage: - std: [0.229, 0.224, 0.225] - mean: [0.485, 0.456, 0.406] - scale: 1./255. - order: hwc - ToCHWImage: - KeepKeys: - keep_keys: ['image'] - -post_process: - score_threshold: 0.5 - nms_threshold: 0.5 \ No newline at end of file diff --git a/rapid_layout/main.py b/rapid_layout/main.py deleted file mode 100644 index ae06b92..0000000 --- a/rapid_layout/main.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import time -from pathlib import Path -from typing import Optional, Tuple, Union - -import cv2 -import numpy as np - -from .utils import ( - DownloadModel, - LoadImage, - OrtInferSession, - PicoDetPostProcess, - VisLayout, - create_operators, - get_logger, - read_yaml, - transform, -) - -ROOT_DIR = Path(__file__).resolve().parent -logger = get_logger("rapid_layout") - -ROOT_URL = "https://github.com/RapidAI/RapidStructure/releases/download/v0.0.0/" -KEY_TO_MODEL_URL = { - "pp_layout_cdla": f"{ROOT_URL}/layout_cdla.onnx", - "pp_layout_publaynet": f"{ROOT_URL}/layout_publaynet.onnx", - "pp_layout_table": f"{ROOT_URL}/layout_table.onnx", -} -DEFAULT_MODEL_PATH = str(ROOT_DIR / "models" / "layout_cdla.onnx") - - -class RapidLayout: - def __init__( - self, - model_type: str = "pp_layout_cdla", - box_threshold: float = 0.5, - use_cuda: bool = False, - ): - config_path = str(ROOT_DIR / "config.yaml") - config = read_yaml(config_path) - config["model_path"] = self.get_model_path(model_type) - config["use_cuda"] = use_cuda - - self.session = OrtInferSession(config) - labels = self.session.get_character_list() - logger.info("%s contains %s", model_type, labels) - - self.preprocess_op = create_operators(config["pre_process"]) - - config["post_process"]["score_threshold"] = box_threshold - self.postprocess_op = PicoDetPostProcess(labels, **config["post_process"]) - self.load_img = LoadImage() - - def __call__( - self, img_content: Union[str, np.ndarray, bytes, Path] - ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], float]: - img = self.load_img(img_content) - - ori_im = img.copy() - data = transform({"image": img}, self.preprocess_op) - img = data[0] - if img is None: - return None, None, None, 0.0 - - img = np.expand_dims(img, axis=0) - img = img.copy() - - preds, elapse = 0, 1 - starttime = time.time() - preds = self.session(img) - - score_list, boxes_list = [], [] - num_outs = int(len(preds) / 2) - for out_idx in range(num_outs): - score_list.append(preds[out_idx]) - boxes_list.append(preds[out_idx + num_outs]) - - boxes, scores, class_names = self.postprocess_op( - ori_im, img, {"boxes": score_list, "boxes_num": boxes_list} - ) - elapse = time.time() - starttime - return boxes, scores, class_names, elapse - - @staticmethod - def get_model_path(model_type: str) -> str: - model_url = KEY_TO_MODEL_URL.get(model_type, None) - if model_url: - model_path = DownloadModel.download(model_url) - return model_path - - logger.info("model url is None, using the default model %s", DEFAULT_MODEL_PATH) - return DEFAULT_MODEL_PATH - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "-img", "--img_path", type=str, required=True, help="Path to image for layout." - ) - parser.add_argument( - "-m", - "--model_type", - type=str, - default=DEFAULT_MODEL_PATH, - choices=list(KEY_TO_MODEL_URL.keys()), - help="Support model type", - ) - parser.add_argument( - "--box_threshold", - type=float, - default=0.5, - choices=list(KEY_TO_MODEL_URL.keys()), - help="Box threshold, the range is [0, 1]", - ) - parser.add_argument( - "-v", - "--vis", - action="store_true", - help="Wheter to visualize the layout results.", - ) - args = parser.parse_args() - - layout_engine = RapidLayout( - model_type=args.model_type, box_threshold=args.box_threshold - ) - - img = cv2.imread(args.img_path) - boxes, scores, class_names, *elapse = layout_engine(img) - print(boxes) - print(scores) - print(class_names) - - if args.vis: - img_path = Path(args.img_path) - ploted_img = VisLayout.draw_detections(img, boxes, scores, class_names) - if ploted_img is not None: - save_path = img_path.resolve().parent / f"vis_{img_path.name}" - cv2.imwrite(str(save_path), ploted_img) - print(f"The visualized image has been saved in {save_path}") - - -if __name__ == "__main__": - main() diff --git a/rapid_layout/models/.gitkeep b/rapid_layout/models/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/rapid_layout/utils/__init__.py b/rapid_layout/utils/__init__.py deleted file mode 100644 index beb7dac..0000000 --- a/rapid_layout/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import yaml - -from .download_model import DownloadModel -from .infer_engine import OrtInferSession -from .load_image import LoadImage -from .logger import get_logger -from .post_prepross import PicoDetPostProcess -from .pre_procss import create_operators, transform -from .vis_res import VisLayout - - -def read_yaml(yaml_path): - with open(yaml_path, "rb") as f: - data = yaml.load(f, Loader=yaml.Loader) - return data diff --git a/rapid_layout/utils/download_model.py b/rapid_layout/utils/download_model.py deleted file mode 100644 index a0d9d93..0000000 --- a/rapid_layout/utils/download_model.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import io -from pathlib import Path -from typing import Optional, Union - -import requests -from tqdm import tqdm - -from .logger import get_logger - -logger = get_logger("DownloadModel") -CUR_DIR = Path(__file__).resolve() -PROJECT_DIR = CUR_DIR.parent.parent - - -class DownloadModel: - cur_dir = PROJECT_DIR - - @classmethod - def download(cls, model_full_url: Union[str, Path]) -> str: - save_dir = cls.cur_dir / "models" - save_dir.mkdir(parents=True, exist_ok=True) - - model_name = Path(model_full_url).name - save_file_path = save_dir / model_name - if save_file_path.exists(): - logger.info("%s already exists", save_file_path) - return str(save_file_path) - - try: - logger.info("Download %s to %s", model_full_url, save_dir) - file = cls.download_as_bytes_with_progress(model_full_url, model_name) - cls.save_file(save_file_path, file) - except Exception as exc: - raise DownloadModelError from exc - return str(save_file_path) - - @staticmethod - def download_as_bytes_with_progress( - url: Union[str, Path], name: Optional[str] = None - ) -> bytes: - resp = requests.get(str(url), stream=True, allow_redirects=True, timeout=180) - total = int(resp.headers.get("content-length", 0)) - bio = io.BytesIO() - with tqdm( - desc=name, total=total, unit="b", unit_scale=True, unit_divisor=1024 - ) as pbar: - for chunk in resp.iter_content(chunk_size=65536): - pbar.update(len(chunk)) - bio.write(chunk) - return bio.getvalue() - - @staticmethod - def save_file(save_path: Union[str, Path], file: bytes): - with open(save_path, "wb") as f: - f.write(file) - - -class DownloadModelError(Exception): - pass diff --git a/rapid_layout/utils/infer_engine.py b/rapid_layout/utils/infer_engine.py deleted file mode 100644 index fb1fd35..0000000 --- a/rapid_layout/utils/infer_engine.py +++ /dev/null @@ -1,231 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import os -import platform -import traceback -from enum import Enum -from pathlib import Path -from typing import Any, Dict, List, Tuple, Union - -import numpy as np -from onnxruntime import ( - GraphOptimizationLevel, - InferenceSession, - SessionOptions, - get_available_providers, - get_device, -) - -from .logger import get_logger - - -class EP(Enum): - CPU_EP = "CPUExecutionProvider" - CUDA_EP = "CUDAExecutionProvider" - DIRECTML_EP = "DmlExecutionProvider" - - -class OrtInferSession: - def __init__(self, config: Dict[str, Any]): - self.logger = get_logger("OrtInferSession") - - model_path = config.get("model_path", None) - self._verify_model(model_path) - - self.cfg_use_cuda = config.get("use_cuda", None) - self.cfg_use_dml = config.get("use_dml", None) - - self.had_providers: List[str] = get_available_providers() - EP_list = self._get_ep_list() - - sess_opt = self._init_sess_opts(config) - self.session = InferenceSession( - model_path, - sess_options=sess_opt, - providers=EP_list, - ) - self._verify_providers() - - @staticmethod - def _init_sess_opts(config: Dict[str, Any]) -> SessionOptions: - sess_opt = SessionOptions() - sess_opt.log_severity_level = 4 - sess_opt.enable_cpu_mem_arena = False - sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL - - cpu_nums = os.cpu_count() - intra_op_num_threads = config.get("intra_op_num_threads", -1) - if intra_op_num_threads != -1 and 1 <= intra_op_num_threads <= cpu_nums: - sess_opt.intra_op_num_threads = intra_op_num_threads - - inter_op_num_threads = config.get("inter_op_num_threads", -1) - if inter_op_num_threads != -1 and 1 <= inter_op_num_threads <= cpu_nums: - sess_opt.inter_op_num_threads = inter_op_num_threads - - return sess_opt - - def _get_ep_list(self) -> List[Tuple[str, Dict[str, Any]]]: - cpu_provider_opts = { - "arena_extend_strategy": "kSameAsRequested", - } - EP_list = [(EP.CPU_EP.value, cpu_provider_opts)] - - cuda_provider_opts = { - "device_id": 0, - "arena_extend_strategy": "kNextPowerOfTwo", - "cudnn_conv_algo_search": "EXHAUSTIVE", - "do_copy_in_default_stream": True, - } - self.use_cuda = self._check_cuda() - if self.use_cuda: - EP_list.insert(0, (EP.CUDA_EP.value, cuda_provider_opts)) - - self.use_directml = self._check_dml() - if self.use_directml: - self.logger.info( - "Windows 10 or above detected, try to use DirectML as primary provider" - ) - directml_options = ( - cuda_provider_opts if self.use_cuda else cpu_provider_opts - ) - EP_list.insert(0, (EP.DIRECTML_EP.value, directml_options)) - return EP_list - - def _check_cuda(self) -> bool: - if not self.cfg_use_cuda: - return False - - cur_device = get_device() - if cur_device == "GPU" and EP.CUDA_EP.value in self.had_providers: - return True - - self.logger.warning( - "%s is not in available providers (%s). Use %s inference by default.", - EP.CUDA_EP.value, - self.had_providers, - self.had_providers[0], - ) - self.logger.info("!!!Recommend to use rapidocr_paddle for inference on GPU.") - self.logger.info( - "(For reference only) If you want to use GPU acceleration, you must do:" - ) - self.logger.info( - "First, uninstall all onnxruntime pakcages in current environment." - ) - self.logger.info( - "Second, install onnxruntime-gpu by `pip install onnxruntime-gpu`." - ) - self.logger.info( - "\tNote the onnxruntime-gpu version must match your cuda and cudnn version." - ) - self.logger.info( - "\tYou can refer this link: https://onnxruntime.ai/docs/execution-providers/CUDA-EP.html" - ) - self.logger.info( - "Third, ensure %s is in available providers list. e.g. ['CUDAExecutionProvider', 'CPUExecutionProvider']", - EP.CUDA_EP.value, - ) - return False - - def _check_dml(self) -> bool: - if not self.cfg_use_dml: - return False - - cur_os = platform.system() - if cur_os != "Windows": - self.logger.warning( - "DirectML is only supported in Windows OS. The current OS is %s. Use %s inference by default.", - cur_os, - self.had_providers[0], - ) - return False - - cur_window_version = int(platform.release().split(".")[0]) - if cur_window_version < 10: - self.logger.warning( - "DirectML is only supported in Windows 10 and above OS. The current Windows version is %s. Use %s inference by default.", - cur_window_version, - self.had_providers[0], - ) - return False - - if EP.DIRECTML_EP.value in self.had_providers: - return True - - self.logger.warning( - "%s is not in available providers (%s). Use %s inference by default.", - EP.DIRECTML_EP.value, - self.had_providers, - self.had_providers[0], - ) - self.logger.info("If you want to use DirectML acceleration, you must do:") - self.logger.info( - "First, uninstall all onnxruntime pakcages in current environment." - ) - self.logger.info( - "Second, install onnxruntime-directml by `pip install onnxruntime-directml`" - ) - self.logger.info( - "Third, ensure %s is in available providers list. e.g. ['DmlExecutionProvider', 'CPUExecutionProvider']", - EP.DIRECTML_EP.value, - ) - return False - - def _verify_providers(self): - session_providers = self.session.get_providers() - first_provider = session_providers[0] - - if self.use_cuda and first_provider != EP.CUDA_EP.value: - self.logger.warning( - "%s is not avaiable for current env, the inference part is automatically shifted to be executed under %s.", - EP.CUDA_EP.value, - first_provider, - ) - - if self.use_directml and first_provider != EP.DIRECTML_EP.value: - self.logger.warning( - "%s is not available for current env, the inference part is automatically shifted to be executed under %s.", - EP.DIRECTML_EP.value, - first_provider, - ) - - def __call__(self, input_content: np.ndarray) -> np.ndarray: - input_dict = dict(zip(self.get_input_names(), [input_content])) - try: - return self.session.run(self.get_output_names(), input_dict) - except Exception as e: - error_info = traceback.format_exc() - raise ONNXRuntimeError(error_info) from e - - def get_input_names(self) -> List[str]: - return [v.name for v in self.session.get_inputs()] - - def get_output_names(self) -> List[str]: - return [v.name for v in self.session.get_outputs()] - - def get_character_list(self, key: str = "character") -> List[str]: - meta_dict = self.session.get_modelmeta().custom_metadata_map - return meta_dict[key].splitlines() - - def have_key(self, key: str = "character") -> bool: - meta_dict = self.session.get_modelmeta().custom_metadata_map - if key in meta_dict.keys(): - return True - return False - - @staticmethod - def _verify_model(model_path: Union[str, Path, None]): - if model_path is None: - raise ValueError("model_path is None!") - - model_path = Path(model_path) - if not model_path.exists(): - raise FileNotFoundError(f"{model_path} does not exists.") - - if not model_path.is_file(): - raise FileExistsError(f"{model_path} is not a file.") - - -class ONNXRuntimeError(Exception): - pass diff --git a/rapid_layout/utils/load_image.py b/rapid_layout/utils/load_image.py deleted file mode 100644 index 04c49e0..0000000 --- a/rapid_layout/utils/load_image.py +++ /dev/null @@ -1,124 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -from io import BytesIO -from pathlib import Path -from typing import Any, Union - -import cv2 -import numpy as np -from PIL import Image, UnidentifiedImageError - -root_dir = Path(__file__).resolve().parent -InputType = Union[str, np.ndarray, bytes, Path, Image.Image] - - -class LoadImage: - def __init__(self): - pass - - def __call__(self, img: InputType) -> np.ndarray: - if not isinstance(img, InputType.__args__): - raise LoadImageError( - f"The img type {type(img)} does not in {InputType.__args__}" - ) - - origin_img_type = type(img) - img = self.load_img(img) - img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) - img = self.convert_img(img, origin_img_type) - return img - - def load_img(self, img: InputType) -> np.ndarray: - if isinstance(img, (str, Path)): - self.verify_exist(img) - try: - img = self.img_to_ndarray(Image.open(img)) - except UnidentifiedImageError as e: - raise LoadImageError(f"cannot identify image file {img}") from e - return img - - if isinstance(img, bytes): - img = self.img_to_ndarray(Image.open(BytesIO(img))) - return img - - if isinstance(img, np.ndarray): - return img - - if isinstance(img, Image.Image): - return self.img_to_ndarray(img) - - raise LoadImageError(f"{type(img)} is not supported!") - - def img_to_ndarray(self, img: Image.Image) -> np.ndarray: - if img.mode == "1": - img = img.convert("L") - return np.array(img) - return np.array(img) - - def convert_img(self, img: np.ndarray, origin_img_type: Any) -> np.ndarray: - if img.ndim == 2: - return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - if img.ndim == 3: - channel = img.shape[2] - if channel == 1: - return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - if channel == 2: - return self.cvt_two_to_three(img) - - if channel == 3: - if issubclass(origin_img_type, (str, Path, bytes, Image.Image)): - return cv2.cvtColor(img, cv2.COLOR_RGB2BGR) - return img - - if channel == 4: - return self.cvt_four_to_three(img) - - raise LoadImageError( - f"The channel({channel}) of the img is not in [1, 2, 3, 4]" - ) - - raise LoadImageError(f"The ndim({img.ndim}) of the img is not in [2, 3]") - - @staticmethod - def cvt_two_to_three(img: np.ndarray) -> np.ndarray: - """gray + alpha → BGR""" - img_gray = img[..., 0] - img_bgr = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR) - - img_alpha = img[..., 1] - not_a = cv2.bitwise_not(img_alpha) - not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR) - - new_img = cv2.bitwise_and(img_bgr, img_bgr, mask=img_alpha) - new_img = cv2.add(new_img, not_a) - return new_img - - @staticmethod - def cvt_four_to_three(img: np.ndarray) -> np.ndarray: - """RGBA → BGR""" - r, g, b, a = cv2.split(img) - new_img = cv2.merge((b, g, r)) - - not_a = cv2.bitwise_not(a) - not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR) - - new_img = cv2.bitwise_and(new_img, new_img, mask=a) - - mean_color = np.mean(new_img) - if mean_color <= 0.0: - new_img = cv2.add(new_img, not_a) - else: - new_img = cv2.bitwise_not(new_img) - return new_img - - @staticmethod - def verify_exist(file_path: Union[str, Path]): - if not Path(file_path).exists(): - raise LoadImageError(f"{file_path} does not exist.") - - -class LoadImageError(Exception): - pass diff --git a/rapid_layout/utils/logger.py b/rapid_layout/utils/logger.py deleted file mode 100644 index 66522c4..0000000 --- a/rapid_layout/utils/logger.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import logging -from functools import lru_cache - - -@lru_cache(maxsize=32) -def get_logger(name: str) -> logging.Logger: - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - - fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s" - format_str = logging.Formatter(fmt) - - sh = logging.StreamHandler() - sh.setLevel(logging.DEBUG) - - logger.addHandler(sh) - sh.setFormatter(format_str) - return logger diff --git a/rapid_layout/utils/post_prepross.py b/rapid_layout/utils/post_prepross.py deleted file mode 100644 index f134cf8..0000000 --- a/rapid_layout/utils/post_prepross.py +++ /dev/null @@ -1,256 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import numpy as np - - -class PicoDetPostProcess: - def __init__( - self, - labels, - strides=[8, 16, 32, 64], - score_threshold=0.4, - nms_threshold=0.5, - nms_top_k=1000, - keep_top_k=100, - ): - self.labels = labels - self.strides = strides - self.score_threshold = score_threshold - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - - def __call__(self, ori_img, img, preds): - scores, raw_boxes = preds["boxes"], preds["boxes_num"] - batch_size = raw_boxes[0].shape[0] - reg_max = int(raw_boxes[0].shape[-1] / 4 - 1) - - out_boxes_num = [] - out_boxes_list = [] - ori_shape, input_shape, scale_factor = self.img_info(ori_img, img) - - for batch_id in range(batch_size): - # generate centers - decode_boxes = [] - select_scores = [] - for stride, box_distribute, score in zip(self.strides, raw_boxes, scores): - box_distribute = box_distribute[batch_id] - score = score[batch_id] - # centers - fm_h = input_shape[0] / stride - fm_w = input_shape[1] / stride - h_range = np.arange(fm_h) - w_range = np.arange(fm_w) - ww, hh = np.meshgrid(w_range, h_range) - ct_row = (hh.flatten() + 0.5) * stride - ct_col = (ww.flatten() + 0.5) * stride - center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1) - - # box distribution to distance - reg_range = np.arange(reg_max + 1) - box_distance = box_distribute.reshape((-1, reg_max + 1)) - box_distance = self.softmax(box_distance, axis=1) - box_distance = box_distance * np.expand_dims(reg_range, axis=0) - box_distance = np.sum(box_distance, axis=1).reshape((-1, 4)) - box_distance = box_distance * stride - - # top K candidate - topk_idx = np.argsort(score.max(axis=1))[::-1] - topk_idx = topk_idx[: self.nms_top_k] - center = center[topk_idx] - score = score[topk_idx] - box_distance = box_distance[topk_idx] - - # decode box - decode_box = center + [-1, -1, 1, 1] * box_distance - - select_scores.append(score) - decode_boxes.append(decode_box) - - # nms - bboxes = np.concatenate(decode_boxes, axis=0) - confidences = np.concatenate(select_scores, axis=0) - picked_box_probs = [] - picked_labels = [] - for class_index in range(0, confidences.shape[1]): - probs = confidences[:, class_index] - mask = probs > self.score_threshold - probs = probs[mask] - if probs.shape[0] == 0: - continue - subset_boxes = bboxes[mask, :] - box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) - box_probs = self.hard_nms( - box_probs, - iou_threshold=self.nms_threshold, - top_k=self.keep_top_k, - ) - picked_box_probs.append(box_probs) - picked_labels.extend([class_index] * box_probs.shape[0]) - - if len(picked_box_probs) == 0: - out_boxes_list.append(np.empty((0, 4))) - out_boxes_num.append(0) - - else: - picked_box_probs = np.concatenate(picked_box_probs) - - # resize output boxes - picked_box_probs[:, :4] = self.warp_boxes( - picked_box_probs[:, :4], ori_shape[batch_id] - ) - im_scale = np.concatenate( - [scale_factor[batch_id][::-1], scale_factor[batch_id][::-1]] - ) - picked_box_probs[:, :4] /= im_scale - # clas score box - out_boxes_list.append( - np.concatenate( - [ - np.expand_dims(np.array(picked_labels), axis=-1), - np.expand_dims(picked_box_probs[:, 4], axis=-1), - picked_box_probs[:, :4], - ], - axis=1, - ) - ) - out_boxes_num.append(len(picked_labels)) - - out_boxes_list = np.concatenate(out_boxes_list, axis=0) - out_boxes_num = np.asarray(out_boxes_num).astype(np.int32) - - boxes, scores, class_names = [], [], [] - for dt in out_boxes_list: - clsid, bbox, score = int(dt[0]), dt[2:], dt[1] - label = self.labels[clsid] - boxes.append(bbox) - scores.append(score) - class_names.append(label) - return np.array(boxes), np.array(scores), np.array(class_names) - - def load_layout_dict(self, layout_dict_path): - with open(layout_dict_path, "r", encoding="utf-8") as fp: - labels = fp.readlines() - return [label.strip("\n") for label in labels] - - def warp_boxes(self, boxes, ori_shape): - """Apply transform to boxes""" - width, height = ori_shape[1], ori_shape[0] - n = len(boxes) - if n: - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( - n * 4, 2 - ) # x1y1, x2y2, x1y2, x2y1 - # xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = ( - np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - ) - # clip boxes - xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) - xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) - return xy.astype(np.float32) - return boxes - - def img_info(self, ori_img, img): - origin_shape = ori_img.shape - resize_shape = img.shape - im_scale_y = resize_shape[2] / float(origin_shape[0]) - im_scale_x = resize_shape[3] / float(origin_shape[1]) - scale_factor = np.array([im_scale_y, im_scale_x], dtype=np.float32) - img_shape = np.array(img.shape[2:], dtype=np.float32) - - input_shape = np.array(img).astype("float32").shape[2:] - ori_shape = np.array((img_shape,)).astype("float32") - scale_factor = np.array((scale_factor,)).astype("float32") - return ori_shape, input_shape, scale_factor - - @staticmethod - def softmax(x, axis=None): - def logsumexp(a, axis=None, b=None, keepdims=False): - a_max = np.amax(a, axis=axis, keepdims=True) - - if a_max.ndim > 0: - a_max[~np.isfinite(a_max)] = 0 - elif not np.isfinite(a_max): - a_max = 0 - - tmp = np.exp(a - a_max) - - # suppress warnings about log of zero - with np.errstate(divide="ignore"): - s = np.sum(tmp, axis=axis, keepdims=keepdims) - out = np.log(s) - - if not keepdims: - a_max = np.squeeze(a_max, axis=axis) - out += a_max - return out - - return np.exp(x - logsumexp(x, axis=axis, keepdims=True)) - - def hard_nms(self, box_scores, iou_threshold, top_k=-1, candidate_size=200): - """ - Args: - box_scores (N, 5): boxes in corner-form and probabilities. - iou_threshold: intersection over union threshold. - top_k: keep top_k results. If k <= 0, keep all the results. - candidate_size: only consider the candidates with the highest scores. - Returns: - picked: a list of indexes of the kept boxes - """ - scores = box_scores[:, -1] - boxes = box_scores[:, :-1] - picked = [] - indexes = np.argsort(scores) - indexes = indexes[-candidate_size:] - while len(indexes) > 0: - current = indexes[-1] - picked.append(current) - if 0 < top_k == len(picked) or len(indexes) == 1: - break - current_box = boxes[current, :] - indexes = indexes[:-1] - rest_boxes = boxes[indexes, :] - iou = self.iou_of( - rest_boxes, - np.expand_dims(current_box, axis=0), - ) - indexes = indexes[iou <= iou_threshold] - - return box_scores[picked, :] - - def iou_of(self, boxes0, boxes1, eps=1e-5): - """Return intersection-over-union (Jaccard index) of boxes. - Args: - boxes0 (N, 4): ground truth boxes. - boxes1 (N or 1, 4): predicted boxes. - eps: a small number to avoid 0 as denominator. - Returns: - iou (N): IoU values. - """ - overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) - overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) - - overlap_area = self.area_of(overlap_left_top, overlap_right_bottom) - area0 = self.area_of(boxes0[..., :2], boxes0[..., 2:]) - area1 = self.area_of(boxes1[..., :2], boxes1[..., 2:]) - return overlap_area / (area0 + area1 - overlap_area + eps) - - @staticmethod - def area_of(left_top, right_bottom): - """Compute the areas of rectangles given two corners. - Args: - left_top (N, 2): left top corner. - right_bottom (N, 2): right bottom corner. - Returns: - area (N): return the area. - """ - hw = np.clip(right_bottom - left_top, 0.0, None) - return hw[..., 0] * hw[..., 1] diff --git a/rapid_layout/utils/pre_procss.py b/rapid_layout/utils/pre_procss.py deleted file mode 100644 index f5e3e21..0000000 --- a/rapid_layout/utils/pre_procss.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -from pathlib import Path -from typing import Union - -import cv2 -import numpy as np - -InputType = Union[str, np.ndarray, bytes, Path] - - -def transform(data, ops=None): - """transform""" - if ops is None: - ops = [] - - for op in ops: - data = op(data) - if data is None: - return None - return data - - -def create_operators(op_param_dict): - ops = [] - for op_name, param in op_param_dict.items(): - if param is None: - param = {} - op = eval(op_name)(**param) - ops.append(op) - return ops - - -class Resize: - def __init__(self, size=(640, 640)): - self.size = size - - def resize_image(self, img): - resize_h, resize_w = self.size - ori_h, ori_w = img.shape[:2] # (h, w, c) - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - return img, [ratio_h, ratio_w] - - def __call__(self, data): - img = data["image"] - if "polys" in data: - text_polys = data["polys"] - - img_resize, [ratio_h, ratio_w] = self.resize_image(img) - if "polys" in data: - new_boxes = [] - for box in text_polys: - new_box = [] - for cord in box: - new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) - new_boxes.append(new_box) - data["polys"] = np.array(new_boxes, dtype=np.float32) - data["image"] = img_resize - return data - - -class NormalizeImage: - def __init__(self, scale=None, mean=None, std=None, order="chw"): - if isinstance(scale, str): - scale = eval(scale) - - self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) - mean = mean if mean is not None else [0.485, 0.456, 0.406] - std = std if std is not None else [0.229, 0.224, 0.225] - - shape = (3, 1, 1) if order == "chw" else (1, 1, 3) - self.mean = np.array(mean).reshape(shape).astype("float32") - self.std = np.array(std).reshape(shape).astype("float32") - - def __call__(self, data): - img = np.array(data["image"]) - assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" - data["image"] = (img.astype("float32") * self.scale - self.mean) / self.std - return data - - -class ToCHWImage: - def __init__(self, **kwargs): - pass - - def __call__(self, data): - img = np.array(data["image"]) - data["image"] = img.transpose((2, 0, 1)) - return data - - -class KeepKeys: - def __init__(self, keep_keys): - self.keep_keys = keep_keys - - def __call__(self, data): - data_list = [] - for key in self.keep_keys: - data_list.append(data[key]) - return data_list diff --git a/rapid_layout/utils/vis_res.py b/rapid_layout/utils/vis_res.py deleted file mode 100644 index f73ee79..0000000 --- a/rapid_layout/utils/vis_res.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -from typing import Optional - -import cv2 -import numpy as np - - -class VisLayout: - @classmethod - def draw_detections( - cls, - image: np.ndarray, - boxes: Optional[np.ndarray], - scores: Optional[np.ndarray], - class_names: Optional[np.ndarray], - mask_alpha=0.3, - ) -> Optional[np.ndarray]: - """_summary_ - - Args: - image (np.ndarray): H x W x C - boxes (np.ndarray): (N, 4) - scores (np.ndarray): (N, ) - class_ids (np.ndarray): (N, ) - mask_alpha (float, optional): _description_. Defaults to 0.3. - - Returns: - np.ndarray: _description_ - """ - if boxes is None or scores is None or class_names is None: - return None - - det_img = image.copy() - - img_height, img_width = image.shape[:2] - font_size = min([img_height, img_width]) * 0.0006 - text_thickness = int(min([img_height, img_width]) * 0.001) - - det_img = cls.draw_masks(det_img, boxes, mask_alpha) - - for label, box, score in zip(class_names, boxes, scores): - color = cls.get_color() - - cls.draw_box(det_img, box, color) - caption = f"{label} {int(score * 100)}%" - cls.draw_text(det_img, caption, box, color, font_size, text_thickness) - - return det_img - - @staticmethod - def draw_box( - image: np.ndarray, - box: np.ndarray, - color: tuple[int, int, int] = (0, 0, 255), - thickness: int = 2, - ) -> np.ndarray: - x1, y1, x2, y2 = box.astype(int) - return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness) - - @staticmethod - def draw_text( - image: np.ndarray, - text: str, - box: np.ndarray, - color: tuple[int, int, int] = (0, 0, 255), - font_size: float = 0.001, - text_thickness: int = 2, - ) -> np.ndarray: - x1, y1, x2, y2 = box.astype(int) - (tw, th), _ = cv2.getTextSize( - text=text, - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=font_size, - thickness=text_thickness, - ) - th = int(th * 1.2) - - cv2.rectangle(image, (x1, y1), (x1 + tw, y1 - th), color, -1) - - return cv2.putText( - image, - text, - (x1, y1), - cv2.FONT_HERSHEY_SIMPLEX, - font_size, - (255, 255, 255), - text_thickness, - cv2.LINE_AA, - ) - - @classmethod - def draw_masks( - cls, - image: np.ndarray, - boxes: np.ndarray, - mask_alpha: float = 0.3, - ) -> np.ndarray: - mask_img = image.copy() - for box in boxes: - color = cls.get_color() - x1, y1, x2, y2 = box.astype(int) - cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1) - - return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0) - - @staticmethod - def get_color(): - colors = ( - np.random.randint(0, 255), - np.random.randint(0, 255), - np.random.randint(0, 255), - ) - return colors diff --git a/setup_layout.py b/setup_layout.py deleted file mode 100644 index ebb2bd8..0000000 --- a/setup_layout.py +++ /dev/null @@ -1,66 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import sys -from pathlib import Path - -import setuptools -from get_pypi_latest_version import GetPyPiLatestVersion - - -def get_readme(): - root_dir = Path(__file__).resolve().parent - readme_path = str(root_dir / "docs" / "doc_whl_rapid_layout.md") - with open(readme_path, "r", encoding="utf-8") as f: - readme = f.read() - return readme - - -MODULE_NAME = "rapid_layout" -obtainer = GetPyPiLatestVersion() -latest_version = obtainer(MODULE_NAME) -VERSION_NUM = obtainer.version_add_one(latest_version) - -if len(sys.argv) > 2: - match_str = " ".join(sys.argv[2:]) - matched_versions = obtainer.extract_version(match_str) - if matched_versions: - VERSION_NUM = matched_versions -sys.argv = sys.argv[:2] - -setuptools.setup( - name=MODULE_NAME, - version=VERSION_NUM, - platforms="Any", - long_description=get_readme(), - long_description_content_type="text/markdown", - description="Tools for document layout analysis based ONNXRuntime.", - author="SWHL", - author_email="liekkaskono@163.com", - url="https://github.com/RapidAI/RapidStructure", - license="Apache-2.0", - include_package_data=True, - install_requires=[ - "onnxruntime>=1.7.0", - "PyYAML>=6.0", - "opencv_python>=4.5.1.48", - "numpy>=1.21.6,<2", - "Pillow", - "tqdm", - "requests", - ], - packages=[MODULE_NAME, f"{MODULE_NAME}.models", f"{MODULE_NAME}.utils"], - package_data={"": ["layout_cdla.onnx", "*.yaml"]}, - keywords=["ppstructure,layout,rapidocr,rapid_layout"], - classifiers=[ - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - ], - python_requires=">=3.6,<3.13", - entry_points={"console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"]}, -) diff --git a/tests/test_files/layout.png b/tests/test_files/layout.png deleted file mode 100644 index 29a1dc5..0000000 Binary files a/tests/test_files/layout.png and /dev/null differ diff --git a/tests/test_layout.py b/tests/test_layout.py deleted file mode 100644 index 1d3fa5c..0000000 --- a/tests/test_layout.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- encoding: utf-8 -*- -# @Author: SWHL -# @Contact: liekkaskono@163.com -import sys -from pathlib import Path - -import cv2 -import pytest - -cur_dir = Path(__file__).resolve().parent -root_dir = cur_dir.parent - -sys.path.append(str(root_dir)) - -from rapid_layout import RapidLayout - -test_file_dir = cur_dir / "test_files" -layout_engine = RapidLayout() - -img_path = test_file_dir / "layout.png" - -img = cv2.imread(str(img_path)) - - -@pytest.mark.parametrize( - "img_content", [img_path, str(img_path), open(img_path, "rb").read(), img] -) -def test_multi_input(img_content): - boxes, scores, class_names, *elapse = layout_engine(img_content) - assert len(boxes) == 15