-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
83 lines (66 loc) · 2.81 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import cv2
import numpy as np
from PIL import Image
def process_db(db, train_mode):
print("Stage: preproccesing data")
im_names = list(db['data'].keys())
list_words = []
list_chars_fonts = []
list_char_images = []
# iterate images
for curr_img in im_names:
font = None
img = db['data'][curr_img][:] / 255.0 # Normalize pixels
if train_mode:
font = db['data'][curr_img].attrs['font']
txt = db['data'][curr_img].attrs['txt']
char_bb = db['data'][curr_img].attrs['charBB']
word_bb = db['data'][curr_img].attrs['wordBB']
# print(char_bb.shape) # (2, 4, 30)
# print(word_bb.shape) # (2, 4, 8)
# print('Image: ', curr_img)
# plt.imshow(img)
# plt.show()
char_index = 0
# iterate words in image
# print('Words: ', end='')
for word in txt:
# converting sequence of bytes to string
word = word.decode('UTF-8')
# print(word, end=', ')
list_words.append(word)
# iterate characters in word
for char in word:
processed_char_img = preprocess_img(
img, char_bb[:, :, char_index])
# plt.imshow(processed_char_img, cmap='gray')
# plt.show()
if train_mode:
list_chars_fonts.append(font[char_index])
list_char_images.append(processed_char_img)
char_index += 1
return list_words, list_chars_fonts, list_char_images
def preprocess_img(img, char_bb):
# warp perspective
# the exact resolution will be (128,128) after adding margins
dest_res_bb = (108, 108)
h_margin = 10
v_margin = 10
src_bb = char_bb.transpose()
# 2D plane bounding box (topleft, topright, bottomright, bottomleft)
dest_bb = np.array([[h_margin, v_margin],
[h_margin + dest_res_bb[0], v_margin],
[h_margin + dest_res_bb[0], v_margin + dest_res_bb[1]],
[h_margin, v_margin + dest_res_bb[1]]])
# resolution of the target concentrated image (including margins)
dest_img_res = (2*h_margin + dest_res_bb[0], 2*v_margin + dest_res_bb[1])
# Find the homography matrix
# H, _ = ... is a tuple while first arg is going to 'H' var and second, '_', is ignored
H, _ = cv2.findHomography(src_bb, dest_bb)
# Warp the source image using the homography matrix
dst_image = cv2.warpPerspective(img, H, dest_img_res)
# Convert image to grayscale
dst_image = cv2.cvtColor(np.float32(dst_image), cv2.COLOR_BGR2GRAY)
dst_image = Image.fromarray(dst_image)
dst_image = np.array(dst_image.resize((32, 32)))
return dst_image