OpenCVで数独グリッドのセルを取得する方法は？

Question

私は過去数日間、写真から数独グリッドを取得するために努力しており、グリッドの小さな正方形を取得するのに苦労しています。下の写真に取り組んでいます。 cannyフィルターで画像を処理するとうまくいくと思っていましたが、うまくいかず、各正方形のすべての輪郭を取得できませんでした。次に、アダプティブしきい値、大津、および従来のしきい値処理をテストに適用しましたが、毎回、すべての小さな正方形をキャプチャできるようには見えませんでした。

最終的な目標は、数値を含むセルを取得し、pytorchで数値を認識することです。そのため、数値のきれいな画像を用意して、認識が失敗しないようにします:)

これを達成する方法について誰かがアイデアを持っていますか？よろしくお願いします！：D

最終的な目標は、数値を含むセルを取得し、pytorchで数値を認識することです。そのため、数値のきれいな画像を用意して、認識が失敗しないようにします:)

これを達成する方法について誰かがアイデアを持っていますか？よろしくお願いします！：D

nathancy · Accepted Answer

考えられる解決策は次のとおりです。

バイナリイメージを取得します。イメージをグレースケールに変換し、適応しきい値
すべての数値とノイズをフィルタリングしてボックスのみを分離します。 等高線領域を使用してフィルタリングし、数値のみを削除します。個々のセル
グリッド線を修正します。 形態学的クロージングを水平および垂直カーネルで実行して、グリッド線を修復します。
各セルを上から下、左から右の順序で並べ替えます。各セルを imutils.contours.sort_contours() とtop-to-bottomおよびleft-to-rightパラメータ

これは、初期のバイナリイメージ（左）とフィルター処理された数値+修復されたグリッド線+反転イメージ（右）です。

これは各セルの反復の視覚化です

各セルで検出された数

コード

import cv2 from imutils import contours import numpy as np # Load image, grayscale, and adaptive threshold image = cv2.imread('1.png') gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,57,5) # Filter out all numbers and noise to isolate only boxes cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: area = cv2.contourArea(c) if area < 1000: cv2.drawContours(thresh, [c], -1, (0,0,0), -1) # Fix horizontal and vertical lines vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,5)) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, vertical_kernel, iterations=9) horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,1)) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, horizontal_kernel, iterations=4) # Sort by top to bottom and each row by left to right invert = 255 - thresh cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] (cnts, _) = contours.sort_contours(cnts, method="top-to-bottom") sudoku_rows = [] row = [] for (i, c) in enumerate(cnts, 1): area = cv2.contourArea(c) if area < 50000: row.append(c) if i % 9 == 0: (cnts, _) = contours.sort_contours(row, method="left-to-right") sudoku_rows.append(cnts) row = [] # Iterate through each box for row in sudoku_rows: for c in row: mask = np.zeros(image.shape, dtype=np.uint8) cv2.drawContours(mask, [c], -1, (255,255,255), -1) result = cv2.bitwise_and(image, mask) result[mask==0] = 255 cv2.imshow('result', result) cv2.waitKey(175) cv2.imshow('thresh', thresh) cv2.imshow('invert', invert) cv2.waitKey()

注：並べ替えのアイデアは、 Rubrikキューブソルバーカラー抽出の以前の以前の回答から適応されました。

user2693313 · Answer

画像にぴったりと合った数独グリッドだけが含まれている場合、それを達成するための1つの大まかな方法は、画像を等しい9X9グリッドに分割し、各グリッドで数値を抽出することです。

Pygirl · Answer

手順：

画像前処理（クローズオペレーション）
数独広場を見つけてマスク画像を作成する
縦線を見つける
水平線を見つける
グリッドポイントの検索
欠陥を修正する
各セルから数字を抽出する

コード：

# ==========import the necessary packages============ import imutils import numpy as np import cv2 from transform import four_point_transform from PIL import Image import pytesseract import math from skimage.filters import threshold_local # =============== For Transformation ============== def order_points(pts): """initialzie a list of coordinates that will be ordered such that the first entry in the list is the top-left, the second entry is the top-right, the third is the bottom-right, and the fourth is the bottom-left""" rect = np.zeros((4, 2), dtype = "float32") # the top-left point will have the smallest sum, whereas # the bottom-right point will have the largest sum s = pts.sum(axis = 1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] # now, compute the difference between the points, the # top-right point will have the smallest difference, # whereas the bottom-left will have the largest difference diff = np.diff(pts, axis = 1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] # return the ordered coordinates return rect def four_point_transform(image, pts): # obtain a consistent order of the points and unpack them # individually rect = order_points(pts) (tl, tr, br, bl) = rect # compute the width of the new image, which will be the # maximum distance between bottom-right and bottom-left # x-coordiates or the top-right and top-left x-coordinates widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) # compute the height of the new image, which will be the # maximum distance between the top-right and bottom-right # y-coordinates or the top-left and bottom-left y-coordinates heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # now that we have the dimensions of the new image, construct # the set of destination points to obtain a "birds eye view", # (i.e. top-down view) of the image, again specifying points # in the top-left, top-right, bottom-right, and bottom-left # order dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype = "float32") # compute the perspective transform matrix and then apply it M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) # return the warped image return warped ############## To show image ############## def show_image(img,title): cv2.imshow(title, img) cv2.waitKey(0) cv2.destroyAllWindows() def find_largest_feature(inp_img, scan_tl=None, scan_br=None): """ Uses the fact the `floodFill` function returns a bounding box of the area it filled to find the biggest connected pixel structure in the image. Fills this structure in white, reducing the rest to black. """ img = inp_img.copy() # Copy the image, leaving the original untouched height, width = img.shape[:2] max_area = 0 seed_point = (None, None) if scan_tl is None: scan_tl = [0, 0] if scan_br is None: scan_br = [width, height] # Loop through the image for x in range(scan_tl[0], scan_br[0]): for y in range(scan_tl[1], scan_br[1]): # Only operate on light or white squares if img.item(y, x) == 255 and x < width and y < height: # Note that .item() appears to take input as y, x area = cv2.floodFill(img, None, (x, y), 64) if area[0] > max_area: # Gets the maximum bound area which should be the grid max_area = area[0] seed_point = (x, y) # Colour everything grey (compensates for features outside of our middle scanning range for x in range(width): for y in range(height): if img.item(y, x) == 255 and x < width and y < height: cv2.floodFill(img, None, (x, y), 64) mask = np.zeros((height + 2, width + 2), np.uint8) # Mask that is 2 pixels bigger than the image # Highlight the main feature if all([p is not None for p in seed_point]): cv2.floodFill(img, mask, seed_point, 255) for x in range(width): for y in range(height): if img.item(y, x) == 64: # Hide anything that isn't the main feature cv2.floodFill(img, mask, (x, y), 0) return img ################# Preprocessing of sudoku image ############### def preprocess(image,case): ratio = image.shape[0] / 500.0 orig = image.copy() image = imutils.resize(image, height = 500) if case == True: gray = cv2.GaussianBlur(image,(5,5),0) gray = cv2.cvtColor(gray,cv2.COLOR_BGR2GRAY) mask = np.zeros((gray.shape),np.uint8) kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11)) close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1) div = np.float32(gray)/(close) res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX)) res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR) edged = cv2.Canny(res, 75, 200) cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if imutils.is_cv2() else cnts[1] cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5] # loop over the contours for c in cnts: # approximate the contour rect = cv2.boundingRect(c) area = cv2.contourArea(c) cv2.rectangle(edged.copy(), (rect[0],rect[1]), (rect[2]+rect[0],rect[3]+rect[1]), (0,0,0), 2) peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) # if our approximated contour has four points, then we # can assume that we have found our screen if len(approx) == 4: screenCnt = approx #print(screenCnt) break # show the contour (outline) of the piece of paper #print(screenCnt) cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2) # apply the four point transform to obtain a top-down # view of the original image warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) warped1 = cv2.resize(warped,(610,610)) warp = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) T = threshold_local(warp, 11, offset = 10, method = "gaussian") warp = (warp > T).astype("uint8") * 255 th3 = cv2.adaptiveThreshold(warp,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ cv2.THRESH_BINARY_INV,11,2) kernel = np.ones((5,5),np.uint8) dilation =cv2.GaussianBlur(th3,(5,5),0) else : warped = image warped1 = cv2.resize(warped,(610,610)) warp = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) T = threshold_local(warp, 11, offset = 10, method = "gaussian") warp = (warp > T).astype("uint8") * 255 th3 = cv2.adaptiveThreshold(warp,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ cv2.THRESH_BINARY_INV,11,2) #show_image(warped1,"preprocessed") return th3,warped1,warped def grids(img,warped2): #print("im:",img.shape) img2 = img.copy() img = np.zeros((500,500,3), np.uint8) ratio2 = 3 kernel_size = 3 lowThreshold = 30 frame = img img = cv2.resize(frame,(610,610)) for i in range(10): cv2.line(img, (0,(img.shape[0]//9)*i),(img.shape[1],(img.shape[0]//9)*i), (255, 255, 255), 3, 1) cv2.line(warped2, (0,(img.shape[0]//9)*i),(img.shape[1],(img.shape[0]//9)*i), (125, 0, 55), 3, 1) for j in range(10): cv2.line(img, ((img.shape[1]//9)*j, 0), ((img.shape[1]//9)*j, img.shape[0]), (255, 255, 255), 3, 1) cv2.line(warped2, ((img.shape[1]//9)*j, 0), ((img.shape[1]//9)*j, img.shape[0]), (125, 0, 55), 3, 1) #show_image(warped2,"grids") return img ############### Finding out the intersection pts to get the grids ######### def grid_points(img,warped2): img1 = img.copy() kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10)) dx = cv2.Sobel(img,cv2.CV_16S,1,0) dx = cv2.convertScaleAbs(dx) c=cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX) c = cv2.morphologyEx(c,cv2.MORPH_DILATE,kernelx,iterations = 1) cy = cv2.cvtColor(c,cv2.COLOR_BGR2GRAY) closex = cv2.morphologyEx(cy,cv2.MORPH_DILATE,kernelx,iterations = 1) kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2)) dy = cv2.Sobel(img,cv2.CV_16S,0,2) dy = cv2.convertScaleAbs(dy) c = cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX) c = cv2.morphologyEx(c,cv2.MORPH_DILATE,kernely,iterations = 1) cy = cv2.cvtColor(c,cv2.COLOR_BGR2GRAY) closey = cv2.morphologyEx(cy,cv2.MORPH_DILATE,kernelx,iterations = 1) res = cv2.bitwise_and(closex,closey) #gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(res,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) kernel = np.ones((6,6),np.uint8) # Perform morphology se = np.ones((8,8), dtype='uint8') image_close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, se) image_close = cv2.morphologyEx(image_close, cv2.MORPH_OPEN, kernel) contour, hier = cv2.findContours (image_close,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) cnts = sorted(contour, key=cv2.contourArea, reverse=True)[:100] centroids = [] for cnt in cnts: mom = cv2.moments(cnt) (x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00']) cv2.circle(img1,(x,y),4,(0,255,0),-1) cv2.circle(warped2,(x,y),4,(0,255,0),-1) centroids.append((x,y)) #show_image(warped2,"grid_points") Points = np.array(centroids,dtype = np.float32) c = Points.reshape((100,2)) c2 = c[np.argsort(c[:,1])] b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in range(10)]) bm = b.reshape((10,10,2)) return c2,bm,cnts ############ Recognize digit images to number ############# def image_to_num(c2): img = 255-c2 text = pytesseract.image_to_string(img, lang="eng",config='--psm 6 --oem 3') #builder=builder) return list(text)[0] ###### To get the digit at the particular cell ############# def get_digit(c2,bm,warped1,cnts): num = [] centroidx = np.empty((9, 9)) centroidy = np.empty((9, 9)) global list_images list_images = [] for i in range(0,9): for j in range(0,9): x1,y1 = bm[i][j] # bm[0] row1 x2,y2 = bm[i+1][j+1] coordx = ((x1+x2)//2) coordy = ((y1+y2)//2) centroidx[i][j] = coordx centroidy[i][j] = coordy crop = warped1[int(x1):int(x2),int(y1):int(y2)] crop = imutils.resize(crop, height=69,width=67) c2 = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) c2 = cv2.adaptiveThreshold(c2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ cv2.THRESH_BINARY_INV,11,2) kernel = np.ones((2,2),np.uint8) #c2 = cv2.morphologyEx(c2, cv2.MORPH_OPEN, kernel) c2= cv2.copyMakeBorder(c2,5,5,5,5,cv2.BORDER_CONSTANT,value=(0,0,0)) no = 0 shape=c2.shape w=shape[1] h=shape[0] mom = cv2.moments(c2) (x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00']) c2 = c2[14:70,15:62] contour, hier = cv2.findContours (c2,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) if cnts is not None: cnts = sorted(contour, key=cv2.contourArea,reverse=True)[:1] for cnt in cnts: x,y,w,h = cv2.boundingRect(cnt) aspect_ratio = w/h # print(aspect_ratio) area = cv2.contourArea(cnt) #print(area) if area>120 and cnt.shape[0]>15 and aspect_ratio>0.2 and aspect_ratio<=0.9 : #print("area:",area) c2 = find_largest_feature(c2) #show_image(c2,"box2") contour, hier = cv2.findContours (c2,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) cnts = sorted(contour, key=cv2.contourArea,reverse=True)[:1] for cnt in cnts: rect = cv2.boundingRect(cnt) #cv2.rectangle(c2, (rect[0],rect[1]), (rect[2]+rect[0],rect[3]+rect[1]), (255,255,255), 2) c2 = c2[rect[1]:rect[3]+rect[1],rect[0]:rect[2]+rect[0]] c2= cv2.copyMakeBorder(c2,5,5,5,5,cv2.BORDER_CONSTANT,value=(0,0,0)) list_images.append(c2) #show_image(c2,"box") no = image_to_num(c2) num.append(no) centroidx = np.transpose(centroidx) centroidy = np.transpose(centroidy) return c2, num, centroidx, centroidy ######## creating matrix and filling numbers exist in the orig image ####### def sudoku_matrix(num): c = 0 grid = np.empty((9, 9)) for i in range(9): for j in range(9): grid[i][j] = int(num[c]) c += 1 grid = np.transpose(grid) return grid ######## Creating board to show the puzzle result in terminal############## def board(arr): for i in range(9): if i%3==0 : print("+",end="") print("-------+"*3) for j in range(9): if j%3 ==0 : print("",end="| ") print(int(arr[i][j]),end=" ") print("",end="|") print() print("+",end="") print("-------+"*3) return arr def check_col(arr,num,col): if all([num != arr[i][col] for i in range(9)]): return True return False def check_row(arr,num,row): if all([num != arr[row][i] for i in range(9)]): return True return False def check_cell(arr,num,row,col): sectopx = 3 * (row//3) sectopy = 3 * (col//3) for i in range(sectopx, sectopx+3): for j in range(sectopy, sectopy+3): if arr[i][j] == num: return True return False def empty_loc(arr,l): for i in range(9): for j in range(9): if arr[i][j] == 0: l[0]=i l[1]=j return True return False #### Solving sudoku by back tracking############ def sudoku(arr): l=[0,0] if not empty_loc(arr,l): return True row = l[0] col = l[1] for num in range(1,10): if check_row(arr,num,row) and check_col(arr,num,col) and not check_cell(arr,num,row,col): arr[row][col] = int(num) if(sudoku(arr)): return True # failure, unmake & try again arr[row][col] = 0 return False def overlay(arr,num,img,cx,cy): no = -1 for i in range(9): for j in range(9): no += 1 #cv2.putText(img,str(no), (int(cx[i][j]),int(cy[i][j])),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) if num[no] == 0: cv2.putText(img,str(int(arr[j][i])), (int(cx[i][j]-4),int(cy[i][j])+8),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4) cv2.imshow("Sudoku",img) cv2.waitKey(0)

case = "False" # If transformation is required set True image = cv2.imread("QupKb.png") th3,warped1,warped = preprocess(image,case) warped2 = warped1.copy() img = grids(warped,warped2) c2,bm,cnts = grid_points(img,warped2) c2,num,cx,cy = get_digit(c2,bm,warped1,cnts) grid = sudoku_matrix(num) if(sudoku(grid)): arr = board(grid) overlay(arr,num,warped1,cx,cy) else: print("There is no solution")

歪んだ：

th3：

warped2：

数独の結果：

抽出されたすべての数字：

########## To view all the extracted digits ############### _, axs = plt.subplots(1, len(list_images), figsize=(24, 24)) axs = axs.flatten() for img, ax in Zip(list_images, axs): ax.imshow(cv2.resize(img,(64,64))) plt.show()

参考文献：