根据表格线垂直投影后切割表格
import cv2
import numpy as np
import matplotlib.pyplot as plt
filename = 'D:\\Python\\Table\\000pic_to_excel_server0906\\lines\\000033.jpg'
img=cv2.imread(filename,0)
ret3,th3 = cv2.threshold(img,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
horizontal_sum = np.sum(img, axis=1)
vertical_sum=np.sum(img,axis=0)
def extract_peek_ranges_from_array(array_vals, minimun_val=20, minimun_range=15):
start_i = None
end_i = None
peek_ranges = []
for i, val in enumerate(array_vals):
if val >=minimun_val and start_i is None:
start_i = i
elif val > minimun_val and start_i is not None:
pass
elif val < minimun_val and start_i is not None:
end_i = i
if end_i - start_i >= minimun_range:
peek_ranges.append((start_i, end_i))
start_i = None
end_i = None
elif val < minimun_val and start_i is None:
pass
else:
raise ValueError("cannot parse this case...")
return peek_ranges
horizontal_peek=extract_peek_ranges_from_array(horizontal_sum,2000)
vertical_peek=extract_peek_ranges_from_array(vertical_sum,2000)
print(horizontal_peek,vertical_peek)
table_list= [[[] for a in range(len(vertical_peek))] for a in range(len(horizontal_peek))]
for i in range(len(horizontal_peek)):
for j in range(len(vertical_peek)):
print(i,j)
table_list[i][j]=th3[horizontal_peek[i][0]:horizontal_peek[i][1], vertical_peek[j][0]:vertical_peek[j][1]]
cv2.imwrite("./test/"+str(i)+"__"+str(j)+".jpg",table_list[i][j])
print(img.shape)