-
Notifications
You must be signed in to change notification settings - Fork 1
/
t2t.py
40 lines (29 loc) · 951 Bytes
/
t2t.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
from transcribe_page import *
from skimage import io
import matplotlib.pyplot as plt
df_train = pd.read_csv('./data/train/train.csv')
imagelist = df_train['Image'].tolist()
image = '200021853-00034_1'
dfs = df_train[df_train['Image'] == image]
'''
for img, labels in dfs.values:
if type(labels) == float:
continue
chars = []
for unic, x, y, w, h in np.array(labels.split()).reshape(-1, 5):
chars.append((unic, int(int(x)+int(w)/2), int(int(y)+int(h)/2)))
print(img)
print(chars)
img = io.imread('./data/train/train/{}.jpg'.format(img))
for unic, x, y in chars:
img[y-10:y+10, x-10:x+10, :] = [255, 0, 0]
plt.figure(figsize=(12, 12))
plt.imshow(img)
plt.show()
print(sorted([x[1] for x in chars]))
break
'''
chars = charformat(dfs)
img_shape = io.imread('./data/train/train/{}.jpg'.format(image)).shape
txt = transcribe(chars, img_shape)