-
Notifications
You must be signed in to change notification settings - Fork 0
/
scan.py
394 lines (324 loc) · 16.1 KB
/
scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
# USAGE:
# python scan.py (--images <IMG_DIR> | --image <IMG_PATH>) [-i]
# For example, to scan a single image with interactive mode:
# python scan.py --image sample_images/desk.JPG -i
# To scan all images in a directory automatically:
# python scan.py --images sample_images
# Scanned images will be output to directory named 'output'
from scipy.spatial import distance as dist
import numpy as np
import itertools
import math
import cv2
from pylsd.lsd import lsd
from PIL import Image
import io, base64
class DocScanner(object):
"""An image scanner"""
def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40):
"""
Args:
interactive (boolean): If True, user can adjust screen contour before
transformation occurs in interactive pyplot window.
MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners
do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO
of the original image. Defaults to 0.25.
MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range
of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40.
"""
self.interactive = interactive
self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO
self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE
def filter_corners(self, corners, min_dist=20):
"""Filters corners that are within min_dist of others"""
def predicate(representatives, corner):
return all(dist.euclidean(representative, corner) >= min_dist
for representative in representatives)
filtered_corners = []
for c in corners:
if predicate(filtered_corners, c):
filtered_corners.append(c)
return filtered_corners
def angle_between_vectors_degrees(self, u, v):
"""Returns the angle between two vectors in degrees"""
return np.degrees(
math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))))
def get_angle(self, p1, p2, p3):
"""
Returns the angle between the line segment from p2 to p1
and the line segment from p2 to p3 in degrees
"""
a = np.radians(np.array(p1))
b = np.radians(np.array(p2))
c = np.radians(np.array(p3))
avec = a - b
cvec = c - b
return self.angle_between_vectors_degrees(avec, cvec)
def angle_range(self, quad):
"""
Returns the range between max and min interior angles of quadrilateral.
The input quadrilateral must be a numpy array with vertices ordered clockwise
starting with the top left vertex.
"""
tl, tr, br, bl = quad
ura = self.get_angle(tl[0], tr[0], br[0])
ula = self.get_angle(bl[0], tl[0], tr[0])
lra = self.get_angle(tr[0], br[0], bl[0])
lla = self.get_angle(br[0], bl[0], tl[0])
angles = [ura, ula, lra, lla]
return np.ptp(angles)
def get_corners(self, img):
"""
Returns a list of corners ((x, y) tuples) found in the input image. With proper
pre-processing and filtering, it should output at most 10 potential corners.
This is a utility function used by get_contours. The input image is expected
to be rescaled and Canny filtered prior to be passed in.
"""
lines = lsd(img)
# massages the output from LSD
# LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines
# 1. separate out the lines into horizontal and vertical lines.
# 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer.
# 3. Run connected-components on the new canvas
# 4. Get the bounding box for each component, and the bounding box is final line.
# 5. The ends of each line is a corner
# 6. Repeat for vertical lines
# 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners
corners = []
if lines is not None:
# separate out the horizontal and vertical lines, and draw them back onto separate canvases
lines = lines.squeeze().astype(np.int32).tolist()
horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
for line in lines:
x1, y1, x2, y2, _ = line
if abs(x2 - x1) > abs(y2 - y1):
(x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0])
cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2)
else:
(x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1])
cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2)
lines = []
# find the horizontal lines (connected-components -> bounding boxes -> final lines)
(contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
for contour in contours:
contour = contour.reshape((contour.shape[0], contour.shape[2]))
min_x = np.amin(contour[:, 0], axis=0) + 2
max_x = np.amax(contour[:, 0], axis=0) - 2
left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1]))
right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1]))
lines.append((min_x, left_y, max_x, right_y))
cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1)
corners.append((min_x, left_y))
corners.append((max_x, right_y))
# find the vertical lines (connected-components -> bounding boxes -> final lines)
(contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
for contour in contours:
contour = contour.reshape((contour.shape[0], contour.shape[2]))
min_y = np.amin(contour[:, 1], axis=0) + 2
max_y = np.amax(contour[:, 1], axis=0) - 2
top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0]))
bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0]))
lines.append((top_x, min_y, bottom_x, max_y))
cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1)
corners.append((top_x, min_y))
corners.append((bottom_x, max_y))
# find the corners
corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2)
corners += zip(corners_x, corners_y)
# remove corners in close proximity
corners = self.filter_corners(corners)
return corners
def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT):
"""Returns True if the contour satisfies all requirements set at instantitation"""
return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO
and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE)
def get_contour(self, rescaled_image):
"""
Returns a numpy array of shape (4, 2) containing the vertices of the four corners
of the document in the image. It considers the corners returned from get_corners()
and uses heuristics to choose the four corners that most likely represent
the corners of the document. If no corners were found, or the four corners represent
a quadrilateral that is too small or convex, it returns the original four corners.
"""
# these constants are carefully chosen
MORPH = 9
CANNY = 84
HOUGH = 25
IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape
# convert the image to grayscale and blur it slightly
gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7,7), 0)
# dilate helps to remove potential holes between edge segments
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH))
dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
# find edges and mark them in the output map using the Canny algorithm
edged = cv2.Canny(dilated, 0, CANNY)
test_corners = self.get_corners(edged)
approx_contours = []
if len(test_corners) >= 4:
quads = []
for quad in itertools.combinations(test_corners, 4):
points = np.array(quad)
points = order_points(points)
points = np.array([[p] for p in points], dtype = "int32")
quads.append(points)
# get top five quadrilaterals by area
quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5]
# sort candidate quadrilaterals by their angle range, which helps remove outliers
quads = sorted(quads, key=self.angle_range)
approx = quads[0]
if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
approx_contours.append(approx)
# for debugging: uncomment the code below to draw the corners and countour found
# by get_corners() and overlay it on the image
# cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2)
# plt.scatter(*zip(*test_corners))
# plt.imshow(rescaled_image)
# plt.show()
# also attempt to find contours directly from the edged image, which occasionally
# produces better results
(cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
# loop over the contours
for c in cnts:
# approximate the contour
approx = cv2.approxPolyDP(c, 80, True)
if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
approx_contours.append(approx)
break
# If we did not find any valid contours, just use the whole image
if not approx_contours:
TOP_RIGHT = (IM_WIDTH, 0)
BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT)
BOTTOM_LEFT = (0, IM_HEIGHT)
TOP_LEFT = (0, 0)
screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]])
else:
screenCnt = max(approx_contours, key=cv2.contourArea)
return screenCnt.reshape(4, 2)
def scan_corners(self, image_str):
# image = cv2.imdecode(np.fromstring(image.file.read(), np.uint8), 1)
sbuf = io.BytesIO()
sbuf.write(base64.b64decode(image_str))
pimg = Image.open(sbuf)
image = cv2.cvtColor(np.array(pimg), cv2.COLOR_RGB2BGR)
assert(image is not None)
# get the contour of the document
screenCnt = self.get_contour(image)
return screenCnt
def scan(self, image_str, screenCnt):
sbuf = io.BytesIO()
sbuf.write(base64.b64decode(image_str))
pimg = Image.open(sbuf)
image = cv2.cvtColor(np.array(pimg), cv2.COLOR_RGB2BGR)
orig = image.copy()
# apply the perspective transformation
screenCnt = np.array(screenCnt)
warped = four_point_transform(orig, screenCnt)
# convert the warped image to grayscale
gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
# sharpen image
sharpen = cv2.GaussianBlur(gray, (0,0), 3)
sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)
# apply adaptive threshold to get black and white effect
# thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
img_crop_pil = Image.fromarray(sharpen)
byte_io = io.BytesIO()
img_crop_pil.save(byte_io, format="png")
jpg_buffer = byte_io.getvalue()
byte_io.close()
return jpg_buffer
def translate(image, x, y):
M = np.float32([[1, 0, x], [0, 1, y]])
shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
return shifted
def rotate(image, angle, center = None, scale = 1.0):
(h, w) = image.shape[:2]
if center is None:
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, M, (w, h))
return rotated
def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation = inter)
return resized
def order_points(pts):
# sort the points based on their x-coordinates
xSorted = pts[np.argsort(pts[:, 0]), :]
# grab the left-most and right-most points from the sorted
# x-roodinate points
leftMost = xSorted[:2, :]
rightMost = xSorted[2:, :]
# now, sort the left-most coordinates according to their
# y-coordinates so we can grab the top-left and bottom-left
# points, respectively
leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
(tl, bl) = leftMost
# now that we have the top-left coordinate, use it as an
# anchor to calculate the Euclidean distance between the
# top-left and right-most points; by the Pythagorean
# theorem, the point with the largest distance will be
# our bottom-right point
D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
(br, tr) = rightMost[np.argsort(D)[::-1], :]
# return the coordinates in top-left, top-right,
# bottom-right, and bottom-left order
return np.array([tl, tr, br, bl], dtype = "float32")
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
def main_scan_corners(image_str):
interactive_mode = None
scanner = DocScanner(interactive_mode)
corners = scanner.scan_corners(image_str)
return (corners)
def main_scan(corners, image_str):
interactive_mode = None
scanner = DocScanner(interactive_mode)
# image = Image.open("autoquiz/images/corners_image.jpg")
image = scanner.scan(image_str, corners)
return (image)