-
Notifications
You must be signed in to change notification settings - Fork 0
/
words.py
162 lines (119 loc) · 4.83 KB
/
words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import cv2
import numpy as np
from copy import deepcopy
from collections import namedtuple
from lib import calc_bbox, X, Y, WIDTH, HEIGHT, BboxImg, percent_inc_border, add_inc_border
from characters import estimate_avg_char_size
#import PIL
DEBUG = True
def _extract_lines(img):
''' Returns a list of images of each line '''
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# Do edge detection using canny edge detection
height, width = img.shape[0:2]
edges_img = cv2.Canny(gray_img, 10, 100)
if DEBUG:
cv2.imshow('edges_img', edges_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Close the image to form lines
m = estimate_avg_char_size(img)
# make the kernel size dynamic according to the mean character size
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (m*5, int(m/3)))
line_img = cv2.morphologyEx(edges_img, cv2.MORPH_CLOSE, rect_kernel)
# if DEBUG:
# cv2.imshow('line_img', line_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# otsu's method
line_img = cv2.threshold(line_img, 0, 255, cv2.THRESH_BINARY)[1]
# find the bounding boxes
_, contours, _ = cv2.findContours(line_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
bboxes = calc_bbox(contours, width, height, add_inc_border, m/4, m/4)
if DEBUG:
debug_img = deepcopy(img)
for bbox in bboxes:
x, y, w, h = bbox
cv2.rectangle(debug_img, (x,y), (x+w,y+h), (0,255,0), 1)
cv2.imshow('line_debug_img', debug_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# split into images
res = []
for bbox in bboxes:
bbox_img = img[bbox[Y]:bbox[Y]+bbox[HEIGHT], bbox[X]:bbox[X]+bbox[WIDTH]]
res.append(BboxImg(bbox, bbox_img))
# sort the lines from decending order y
res.sort(key=lambda k: k.bbox[Y], reverse=False)
return [i.img for i in res]
def _extract_words_line(img):
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
height, width = img.shape[0:2]
edges_img = cv2.Canny(gray_img, 10, 100)
# dynamically size the kernel according to character size in line
m = estimate_avg_char_size(img)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(m/2.5), int(m/3)))
word_img = cv2.morphologyEx(edges_img, cv2.MORPH_CLOSE, kernel)
# if DEBUG:
# cv2.imshow('word_img', word_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
word_img = cv2.threshold(word_img, 0, 255, cv2.THRESH_BINARY)[1]
_, contours, _ = cv2.findContours(word_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
bboxes = calc_bbox(contours, width, height, add_inc_border, m/6, m/6)
if DEBUG:
debug_img = deepcopy(img)
for bbox in bboxes:
x, y, w, h = bbox
cv2.rectangle(debug_img, (x,y), (x+w,y+h), (0,255,0), 1)
cv2.imshow('word_debug_img', debug_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
res = []
for bbox in bboxes:
bbox_img = img[bbox[Y]:bbox[Y]+bbox[HEIGHT], bbox[X]:bbox[X]+bbox[WIDTH]]
res.append(BboxImg(bbox, bbox_img))
res.sort(key=lambda k: k.bbox[X], reverse=False)
return [i.img for i in res]
def extract_words(img):
# Extract the lines and for each line, extract the words
lines = _extract_lines(img)
# convert the words into individual images
words = []
for l in lines:
line_words = _extract_words_line(l)
words.append(line_words)
# return a list where it holds a list that contains a line of words
return words
def extract_regions(img):
'''
Extracts text regions from a image that has no text
i.g. has used SWT to filter non text
'''
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
height, width = img.shape[0:2]
edges_img = cv2.Canny(gray_img, 10, 100)
m = estimate_avg_char_size(img)
# make kernel dynamically sized (use image size?)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5*m, 5*m))
region_img = cv2.dilate(edges_img, kernel, iterations=1)
# if DEBUG:
# cv2.imshow('region_img', region_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
_, contours, _ = cv2.findContours(region_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
bboxes = calc_bbox(contours, width, height, add_inc_border, m, m)
if DEBUG:
debug_img = deepcopy(img)
for bbox in bboxes:
x, y, w, h = bbox
cv2.rectangle(debug_img, (x,y), (x+w,y+h), (0,255,0), 1)
cv2.imshow('region_debug_img', debug_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
res = []
for bbox in bboxes:
bbox_img = img[bbox[Y]:bbox[Y]+bbox[HEIGHT], bbox[X]:bbox[X]+bbox[WIDTH]]
res.append(BboxImg(bbox, bbox_img))
res.sort(key=lambda k: k.bbox[X], reverse=False)
return [i.img for i in res], [i.bbox for i in res]