-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
158 additions
and
161 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,10 @@ | ||
from .det_utils import * | ||
from .draw_utils import * | ||
from .ds_utils import * | ||
from .eval_utils import * | ||
from .geometry_utils import * | ||
from .img_utils import * | ||
from .io_utils import * | ||
from .misc_utils import * | ||
from .network_utils import * | ||
from .geometry_utils import * | ||
from .det_utils import * | ||
from .model_utils import * | ||
from .network_utils import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from typing import List, Tuple | ||
|
||
import numpy as np | ||
|
||
from ocrtoolkit.utilities.geometry_utils import estimate_page_angle, rotate_boxes | ||
|
||
|
||
def sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||
"""Sort bounding boxes from top to bottom, left to right.""" | ||
if boxes.ndim == 3: # Rotated boxes | ||
angle = -estimate_page_angle(boxes) | ||
boxes = rotate_boxes( | ||
loc_preds=boxes, angle=angle, orig_shape=(1024, 1024), min_angle=5.0 | ||
) | ||
boxes = np.concatenate((boxes.min(axis=1), boxes.max(axis=1)), axis=-1) | ||
sort_indices = ( | ||
boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1]) | ||
).argsort() | ||
return sort_indices, boxes | ||
|
||
|
||
def resolve_sub_lines( | ||
boxes: np.ndarray, word_idcs: List[int], paragraph_break: float | ||
) -> List[List[int]]: | ||
"""Split a line in sub-lines.""" | ||
lines = [] | ||
word_idcs = sorted(word_idcs, key=lambda idx: boxes[idx, 0]) | ||
|
||
if len(word_idcs) < 2: | ||
return [word_idcs] | ||
|
||
sub_line = [word_idcs[0]] | ||
for i in word_idcs[1:]: | ||
if boxes[i, 0] - boxes[sub_line[-1], 2] < paragraph_break: | ||
sub_line.append(i) | ||
else: | ||
lines.append(sub_line) | ||
sub_line = [i] | ||
lines.append(sub_line) | ||
return lines | ||
|
||
|
||
def resolve_lines(boxes: np.ndarray, paragraph_break: float) -> List[List[int]]: | ||
"""Order boxes to group them in lines.""" | ||
idxs, boxes = sort_boxes(boxes) | ||
y_med = np.median(boxes[:, 3] - boxes[:, 1]) | ||
|
||
lines, words, y_center_sum = [], [idxs[0]], boxes[idxs[0], [1, 3]].mean() | ||
for idx in idxs[1:]: | ||
y_dist = abs(boxes[idx, [1, 3]].mean() - y_center_sum / len(words)) | ||
|
||
if y_dist < y_med / 2: | ||
words.append(idx) | ||
y_center_sum += boxes[idx, [1, 3]].mean() | ||
else: | ||
lines.extend(resolve_sub_lines(boxes, words, paragraph_break)) | ||
words, y_center_sum = [idx], boxes[idx, [1, 3]].mean() | ||
|
||
if words: # Process the last line | ||
lines.extend(resolve_sub_lines(boxes, words, paragraph_break)) | ||
|
||
return lines |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,63 +1,82 @@ | ||
import numpy as np | ||
from typing import List, Tuple | ||
from ocrtoolkit.utilities.geometry_utils import ( | ||
estimate_page_angle, | ||
rotate_boxes, | ||
) | ||
|
||
|
||
def sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||
"""Sort bounding boxes from top to bottom, left to right.""" | ||
if boxes.ndim == 3: # Rotated boxes | ||
angle = -estimate_page_angle(boxes) | ||
boxes = rotate_boxes( | ||
loc_preds=boxes, angle=angle, orig_shape=(1024, 1024), min_angle=5.0 | ||
) | ||
boxes = np.concatenate((boxes.min(axis=1), boxes.max(axis=1)), axis=-1) | ||
sort_indices = ( | ||
boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1]) | ||
).argsort() | ||
return sort_indices, boxes | ||
|
||
|
||
def resolve_sub_lines( | ||
boxes: np.ndarray, word_idcs: List[int], paragraph_break: float | ||
) -> List[List[int]]: | ||
"""Split a line in sub-lines.""" | ||
lines = [] | ||
word_idcs = sorted(word_idcs, key=lambda idx: boxes[idx, 0]) | ||
|
||
if len(word_idcs) < 2: | ||
return [word_idcs] | ||
|
||
sub_line = [word_idcs[0]] | ||
for i in word_idcs[1:]: | ||
if boxes[i, 0] - boxes[sub_line[-1], 2] < paragraph_break: | ||
sub_line.append(i) | ||
else: | ||
lines.append(sub_line) | ||
sub_line = [i] | ||
lines.append(sub_line) | ||
return lines | ||
|
||
|
||
def resolve_lines(boxes: np.ndarray, paragraph_break: float) -> List[List[int]]: | ||
"""Order boxes to group them in lines.""" | ||
idxs, boxes = sort_boxes(boxes) | ||
y_med = np.median(boxes[:, 3] - boxes[:, 1]) | ||
|
||
lines, words, y_center_sum = [], [idxs[0]], boxes[idxs[0], [1, 3]].mean() | ||
for idx in idxs[1:]: | ||
y_dist = abs(boxes[idx, [1, 3]].mean() - y_center_sum / len(words)) | ||
|
||
if y_dist < y_med / 2: | ||
words.append(idx) | ||
y_center_sum += boxes[idx, [1, 3]].mean() | ||
else: | ||
lines.extend(resolve_sub_lines(boxes, words, paragraph_break)) | ||
words, y_center_sum = [idx], boxes[idx, [1, 3]].mean() | ||
|
||
if words: # Process the last line | ||
lines.extend(resolve_sub_lines(boxes, words, paragraph_break)) | ||
|
||
return lines | ||
import json | ||
from pathlib import Path | ||
|
||
import h5py | ||
from loguru import logger | ||
|
||
|
||
def save_dets(l_dets, path: str): | ||
with h5py.File(path, "w") as f: | ||
group = f.create_group("dets") | ||
for idx, dets in enumerate(l_dets): | ||
npy_bboxes = dets.to_numpy(encode=True) | ||
dset = group.create_dataset(f"dets_{idx}", data=npy_bboxes) | ||
dset.attrs["width"] = dets.width | ||
dset.attrs["height"] = dets.height | ||
dset.attrs["img_name"] = dets.img_name | ||
logger.info(f"Detections saved to {path}") | ||
|
||
|
||
def save_dets_as_label_studio(l_dets, path: str, subdir_images="images"): | ||
"""Save detections as Label Studio json format""" | ||
base_dir = "/data/local-files/?d={subdir_images}" | ||
l_json_data = [ | ||
{ | ||
"data": { | ||
"image": base_dir.format( | ||
subdir_images=Path(subdir_images) | ||
.joinpath(detection.img_name) | ||
.as_posix() | ||
), | ||
}, | ||
"predictions": [ | ||
{ | ||
"model_version": "one", | ||
"score": 0.5, | ||
"result": [ | ||
{ | ||
"id": f"bbox{i+1}", | ||
"type": "rectanglelabels", | ||
"from_name": "label", | ||
"to_name": "image", | ||
"original_width": detection.width, | ||
"original_height": detection.height, | ||
"image_rotation": 0, | ||
"value": { | ||
"rotation": 0, | ||
"x": bbox.x1 * 100, | ||
"y": bbox.y1 * 100, | ||
"width": bbox.w * 100, | ||
"height": bbox.h * 100, | ||
"rectanglelabels": [bbox.label], | ||
}, | ||
} | ||
for i, bbox in enumerate(detection.normalize().bboxes) | ||
], | ||
} | ||
], | ||
} | ||
for detection in l_dets | ||
] | ||
with open(path, "w") as f: | ||
json.dump(l_json_data, f, indent=2) | ||
|
||
|
||
def load_dets(path: str): | ||
from ocrtoolkit.wrappers.bbox import BBox | ||
from ocrtoolkit.wrappers.detection_results import DetectionResults | ||
|
||
with h5py.File(path, "r") as f: | ||
l_dets = [] | ||
group = f["dets"] | ||
dets_keys = sorted(group.keys(), key=lambda x: int(x.split("_")[-1])) | ||
for key in dets_keys: | ||
dets_width = int(group[key].attrs["width"]) | ||
dets_height = int(group[key].attrs["height"]) | ||
dets_img_name = str(group[key].attrs["img_name"]) | ||
dets_data = group[key][()] | ||
l_bboxes = [BBox.from_numpy(bbox) for bbox in dets_data] | ||
l_dets.append( | ||
DetectionResults(l_bboxes, dets_width, dets_height, dets_img_name) | ||
) | ||
return l_dets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.