-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
441 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# FORMAT | ||
# Put your extra requirements here in the following format | ||
# | ||
# package[version_required]: tag1, tag2, ... | ||
|
||
ultralytics==8.1.11: ultralytics | ||
dill==0.3.8: ultralytics | ||
paddleocr==2.7.0.3: paddle | ||
paddlepaddle-gpu==2.6.0: paddle | ||
python-doctr[torch]==0.8.1: doctr |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ matplotlib | |
tqdm | ||
loguru | ||
h5py | ||
scikit-learn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import numpy as np | ||
from typing import List, Tuple | ||
from ocrtoolkit.utilities.geometry_utils import ( | ||
estimate_page_angle, | ||
rotate_boxes, | ||
) | ||
|
||
|
||
def sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||
"""Sort bounding boxes from top to bottom, left to right.""" | ||
if boxes.ndim == 3: # Rotated boxes | ||
angle = -estimate_page_angle(boxes) | ||
boxes = rotate_boxes( | ||
loc_preds=boxes, angle=angle, orig_shape=(1024, 1024), min_angle=5.0 | ||
) | ||
boxes = np.concatenate((boxes.min(axis=1), boxes.max(axis=1)), axis=-1) | ||
sort_indices = ( | ||
boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1]) | ||
).argsort() | ||
return sort_indices, boxes | ||
|
||
|
||
def resolve_sub_lines( | ||
boxes: np.ndarray, word_idcs: List[int], paragraph_break: float | ||
) -> List[List[int]]: | ||
"""Split a line in sub-lines.""" | ||
lines = [] | ||
word_idcs = sorted(word_idcs, key=lambda idx: boxes[idx, 0]) | ||
|
||
if len(word_idcs) < 2: | ||
return [word_idcs] | ||
|
||
sub_line = [word_idcs[0]] | ||
for i in word_idcs[1:]: | ||
if boxes[i, 0] - boxes[sub_line[-1], 2] < paragraph_break: | ||
sub_line.append(i) | ||
else: | ||
lines.append(sub_line) | ||
sub_line = [i] | ||
lines.append(sub_line) | ||
return lines | ||
|
||
|
||
def resolve_lines(boxes: np.ndarray, paragraph_break: float) -> List[List[int]]: | ||
"""Order boxes to group them in lines.""" | ||
idxs, boxes = sort_boxes(boxes) | ||
y_med = np.median(boxes[:, 3] - boxes[:, 1]) | ||
|
||
lines, words, y_center_sum = [], [idxs[0]], boxes[idxs[0], [1, 3]].mean() | ||
for idx in idxs[1:]: | ||
y_dist = abs(boxes[idx, [1, 3]].mean() - y_center_sum / len(words)) | ||
|
||
if y_dist < y_med / 2: | ||
words.append(idx) | ||
y_center_sum += boxes[idx, [1, 3]].mean() | ||
else: | ||
lines.extend(resolve_sub_lines(boxes, words, paragraph_break)) | ||
words, y_center_sum = [idx], boxes[idx, [1, 3]].mean() | ||
|
||
if words: # Process the last line | ||
lines.extend(resolve_sub_lines(boxes, words, paragraph_break)) | ||
|
||
return lines |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import numpy as np | ||
from typing import Optional, Tuple | ||
|
||
|
||
def estimate_page_angle(polys: np.ndarray) -> float: | ||
"""Takes a batch of rotated previously | ||
ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the | ||
estimated angle ccw in degrees | ||
""" | ||
# Compute mean left points and mean right point | ||
# with respect to the reading direction (oriented polygon) | ||
xleft = polys[:, 0, 0] + polys[:, 3, 0] | ||
yleft = polys[:, 0, 1] + polys[:, 3, 1] | ||
xright = polys[:, 1, 0] + polys[:, 2, 0] | ||
yright = polys[:, 1, 1] + polys[:, 2, 1] | ||
with np.errstate(divide="raise", invalid="raise"): | ||
try: | ||
return float( | ||
np.median( | ||
np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi | ||
) # Y axis from top to bottom! | ||
) | ||
except FloatingPointError: | ||
return 0.0 | ||
|
||
|
||
def remap_boxes( | ||
loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int] | ||
) -> np.ndarray: | ||
"""Remaps a batch of rotated locpred (N, 4, 2) | ||
expressed for an origin_shape to a destination_shape. | ||
This does not impact the absolute shape of the boxes, | ||
but allow to calculate the new relative RotatedBbox | ||
coordinates after a resizing of the image. | ||
Args: | ||
---- | ||
loc_preds: (N, 4, 2) array of RELATIVE loc_preds | ||
orig_shape: shape of the origin image | ||
dest_shape: shape of the destination image | ||
Returns: | ||
------- | ||
A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial | ||
""" | ||
if len(dest_shape) != 2: | ||
raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") | ||
if len(orig_shape) != 2: | ||
raise ValueError( | ||
f"Image_shape length should be 2, was found at: {len(orig_shape)}" | ||
) | ||
orig_height, orig_width = orig_shape | ||
dest_height, dest_width = dest_shape | ||
mboxes = loc_preds.copy() | ||
mboxes[:, :, 0] = ( | ||
(loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2 | ||
) / dest_width | ||
mboxes[:, :, 1] = ( | ||
(loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2 | ||
) / dest_height | ||
|
||
return mboxes | ||
|
||
|
||
def rotate_boxes( | ||
loc_preds: np.ndarray, | ||
angle: float, | ||
orig_shape: Tuple[int, int], | ||
min_angle: float = 1.0, | ||
target_shape: Optional[Tuple[int, int]] = None, | ||
) -> np.ndarray: | ||
"""Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) | ||
or rotated bounding boxes | ||
(4, 2) of an angle, if angle > min_angle, around the center of the page. | ||
If target_shape is specified, the boxes are | ||
remapped to the target shape after the rotation. This | ||
is done to remove the padding that is created by rotate_page(expand=True) | ||
Args: | ||
---- | ||
loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes | ||
angle: angle between -90 and +90 degrees | ||
orig_shape: shape of the origin image | ||
min_angle: minimum angle to rotate boxes | ||
target_shape: shape of the destination image | ||
Returns: | ||
------- | ||
A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes | ||
""" | ||
# Change format of the boxes to rotated boxes | ||
_boxes = loc_preds.copy() | ||
if _boxes.ndim == 2: | ||
_boxes = np.stack( | ||
[ | ||
_boxes[:, [0, 1]], | ||
_boxes[:, [2, 1]], | ||
_boxes[:, [2, 3]], | ||
_boxes[:, [0, 3]], | ||
], | ||
axis=1, | ||
) | ||
# If small angle, return boxes (no rotation) | ||
if abs(angle) < min_angle or abs(angle) > 90 - min_angle: | ||
return _boxes | ||
# Compute rotation matrix | ||
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions | ||
rotation_mat = np.array( | ||
[ | ||
[np.cos(angle_rad), -np.sin(angle_rad)], | ||
[np.sin(angle_rad), np.cos(angle_rad)], | ||
], | ||
dtype=_boxes.dtype, | ||
) | ||
# Rotate absolute points | ||
points: np.ndarray = np.stack( | ||
(_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1 | ||
) | ||
image_center = (orig_shape[1] / 2, orig_shape[0] / 2) | ||
rotated_points = image_center + np.matmul(points - image_center, rotation_mat) | ||
rotated_boxes: np.ndarray = np.stack( | ||
( | ||
rotated_points[:, :, 0] / orig_shape[1], | ||
rotated_points[:, :, 1] / orig_shape[0], | ||
), | ||
axis=-1, | ||
) | ||
|
||
# Apply a mask if requested | ||
if target_shape is not None: | ||
rotated_boxes = remap_boxes( | ||
rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape | ||
) | ||
|
||
return rotated_boxes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.