Loovelj
9/7/2018 - 1:20 AM

根据表格线垂直投影后切割表格

根据表格线垂直投影后切割表格

import cv2
import numpy as np
import matplotlib.pyplot as plt

filename = 'D:\\Python\\Table\\000pic_to_excel_server0906\\lines\\000033.jpg'
img=cv2.imread(filename,0)
ret3,th3 = cv2.threshold(img,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

horizontal_sum = np.sum(img, axis=1)
vertical_sum=np.sum(img,axis=0)



def extract_peek_ranges_from_array(array_vals, minimun_val=20, minimun_range=15):
    start_i = None
    end_i = None
    peek_ranges = []
    for i, val in enumerate(array_vals):
        if val >=minimun_val and start_i is None:
            start_i = i
        elif val > minimun_val and start_i is not None:
            pass
        elif val < minimun_val and start_i is not None:
            end_i = i
            if end_i - start_i >= minimun_range:
                peek_ranges.append((start_i, end_i))
            start_i = None
            end_i = None
        elif val < minimun_val and start_i is None:
            pass
        else:
            raise ValueError("cannot parse this case...")
    return peek_ranges

horizontal_peek=extract_peek_ranges_from_array(horizontal_sum,2000)

vertical_peek=extract_peek_ranges_from_array(vertical_sum,2000)


print(horizontal_peek,vertical_peek)

table_list=  [[[] for a in range(len(vertical_peek))] for a in range(len(horizontal_peek))]
for i in range(len(horizontal_peek)):
    for j in range(len(vertical_peek)):
        print(i,j)
        table_list[i][j]=th3[horizontal_peek[i][0]:horizontal_peek[i][1],  vertical_peek[j][0]:vertical_peek[j][1]]
        cv2.imwrite("./test/"+str(i)+"__"+str(j)+".jpg",table_list[i][j])



print(img.shape)