diff --git a/OCR/image-alignment/deskew.py b/OCR/image-alignment/deskew.py index 37fd2144..ec6c642e 100755 --- a/OCR/image-alignment/deskew.py +++ b/OCR/image-alignment/deskew.py @@ -12,35 +12,37 @@ import cv2 as cv -def order_points(inn: np.ndarray) -> np.ndarray: - "Reorder points such that the points go in order from top left, top right, bottom right, and bottom left." - inn = inn.reshape(4, 2) - out = np.zeros([4, 2]).astype(np.float32) - s = inn.sum(axis=1) - out[0] = inn[np.argmin(s)] - out[2] = inn[np.argmax(s)] - diff = np.diff(inn, axis=1) - out[1] = inn[np.argmin(diff)] - out[3] = inn[np.argmax(diff)] - return out - - -def dewarp(img: np.ndarray) -> np.ndarray: +def order_points(quadrilateral: np.ndarray) -> np.ndarray: + "Reorder points from a 4x2 input array representing the vertices of a quadrilateral, such that the coordinates of each vertex are arranged in order from top left, top right, bottom right, and bottom left." + quadrilateral = quadrilateral.reshape(4, 2) + output_quad = np.zeros([4, 2]).astype(np.float32) + s = quadrilateral.sum(axis=1) + output_quad[0] = quadrilateral[np.argmin(s)] + output_quad[2] = quadrilateral[np.argmax(s)] + diff = np.diff(quadrilateral, axis=1) + output_quad[1] = quadrilateral[np.argmin(diff)] + output_quad[3] = quadrilateral[np.argmax(diff)] + return output_quad + + +def dewarp(image: np.ndarray) -> np.ndarray: # compute contours for an image and find the biggest one by area - cnts, hier = cv.findContours(img, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[-2:] + _, contours, _ = cv.findContours(image, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) biggest_contour = functools.reduce( - lambda a, b: b if cv.contourArea(a) < cv.contourArea(b) else a, cnts + lambda a, b: b if cv.contourArea(a) < cv.contourArea(b) else a, contours ) # simplify to a polygon with (hopefully four) vertices - peri = cv.arcLength(biggest_contour, True) - approx = cv.approxPolyDP(biggest_contour, 0.01 * peri, True) + perimeter = cv.arcLength(biggest_contour, True) + approximated = cv.approxPolyDP(biggest_contour, 0.01 * perimeter, True) - h, w = img.shape - dest = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype=np.float32) + height, width = image.shape + destination = np.array( + [[0, 0], [width, 0], [width, height], [0, height]], dtype=np.float32 + ) - M = cv.getPerspectiveTransform(order_points(approx), dest) - return cv.warpPerspective(img, M, (w, h)) + M = cv.getPerspectiveTransform(order_points(approximated), destination) + return cv.warpPerspective(image, M, (width, height)) def parse_args() -> argparse.Namespace: diff --git a/OCR/image-alignment/perspective-transform.py b/OCR/image-alignment/perspective-transform.py index 15a14fa5..1492786e 100755 --- a/OCR/image-alignment/perspective-transform.py +++ b/OCR/image-alignment/perspective-transform.py @@ -26,7 +26,7 @@ def parse_args(): return p.parse_args() -def make_tform(distortion_scale: float) -> object: +def make_transform(distortion_scale: float) -> object: return transforms.Compose( [ transforms.RandomPerspective(distortion_scale=distortion_scale, p=1), @@ -43,7 +43,7 @@ def main(): img = Image.open(args.image) with concurrent.futures.ProcessPoolExecutor() as executor: - futures = [executor.submit(make_tform(ii / 10), img) for ii in range(1, 10)] + futures = [executor.submit(make_transform(ii / 10), img) for ii in range(1, 10)] for idx, result in enumerate(concurrent.futures.as_completed(futures)): new_name = args.output_dir / ( args.image.stem + f"-{idx + 1}" + args.image.suffix