# imports

In [4]:
import os

In [5]:
import cv2

In [6]:
import numpy as np

In [7]:
import pandas as pd

# config

In [9]:
#提取的box信息存放文件名
g_boxes_csv_pathfn = './data/src_img/allboxes.csv'

In [10]:
#标记过的图片路径
g_srcimg_marked_path = './data/src_img/marked'

# 符号类型常量

In [11]:
#颜色
C1 = (136,0,21)
C2 = (237,28,36)
C3 = (255,127,39)
C4 = (255,242,0)
C5 = (34,177,76)
C6 = (0,162,232)
C7 = (63,72,204)
C8 = (163,73,164)
C9 = (185,122,87)
C10 = (255,174,201)
C11 = (255,201,14)
C12 = (239,228,176)
C13 = (181,230,29)
C14 = (153,217,234)
C15 = (112,146,190)
C16 = (200,191,231)

COLORS = [C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14, C15, C16]
NAMES = ['c1_tiaoya', 'c2_dianyahgq', 'c3_3gongweiglkg', 'c4_kuaisujdkg', 'c5_daidianxianshiqi', 'c6_rongduanqi', 
         'c7_bileiqi', 'c8_dianliuhgq', 'c9_danxiangdyhgq', 'c10_duanluqi', 'c11_sanxiangbyq', 'c12_dongrongshidyhgq', 
         'c13_danxiangbyq', 'c14_danciranzudlhgq', 'c15_duojikaiguan', 'c16_diankangqi']

# 滤波参数

In [12]:
k0 = np.array([[1, 1],
               [1, 0]])

k1 = np.array([[1, 1],
               [0, 1]])

k2 = np.array([[0, 1],
               [1, 1]])

k3 = np.array([[1, 0],
               [1, 1]])

# 函数


In [9]:
def find_box(imgfn, dbg = []):    
    """
    从一个手工标记的图片中提取标记的矩形框
    参数:
        imgfn：要提取的标记过的图片文件名
        dbg: 调试信息
    返回值:
        box列表，符号列表（NAMES）。和box元素一一相对。
        其中box的结构为[左上y，左上x，右下y，右下x]，xy为图片的点阵坐标，与numpy的定位方式相同。
    """
    
    img_rgb = cv2.imdecode(np.fromfile(imgfn, dtype=np.uint8), -1)
    img = img_rgb.copy()
    
    rboxs = []
    rnames = []
    idx = 0
    cnt = 0
    for COLOR in COLORS:
        if 'prt_info' in dbg:
            print('>>>>', imgfn, idx, COLOR)
        a = (img_rgb[:,:,0]==COLOR[2]).astype(np.uint8)
        b = (img_rgb[:,:,1]==COLOR[1]).astype(np.uint8)
        c = (img_rgb[:,:,2]==COLOR[0]).astype(np.uint8)
        #print(COLOR)
        d = a*b*c
        img_fk0=cv2.filter2D(d,-1,k0)
        img_fk0.shape
        if 'show' in dbg:
            plt.figure(figsize=(10,10))
            plt.imshow(img_fk0);

        xs0,ys0 = np.where((img_fk0==3).astype(np.uint8)==1)
        xs0,ys0
        
        img_fk1=cv2.filter2D(d,-1,k1)
        xs1,ys1 = np.where((img_fk1==3).astype(np.uint8)==1)
        xs1,ys1
        img_fk3=cv2.filter2D(d,-1,k3)
        xs3,ys3 = np.where((img_fk3==3).astype(np.uint8)==1)
        xs3,ys3

        boxs = []
        #print(xs1, ys1)
        for x0,y0 in zip(xs0,ys0):
            filt = ((xs1==x0).astype(int) * (ys1>y0).astype(int)).astype(bool)

            ys_filt = ys1[filt]
            #print(filt, ys1, ys_filt)
            if len(ys_filt) == 0:
                continue
            y1 = np.sort(ys_filt)[0]

            filt = ((ys3==y0).astype(int) * (xs3>x0).astype(int)).astype(bool)
            xs_filt = xs3[filt]
            x3 = np.sort(xs_filt)[0]

            boxs += [[x0,y0,x3,y1]]
        #print('boxs:', boxs)
        font = cv2.FONT_HERSHEY_SIMPLEX 
        
        for box in boxs:
            if 'saveimg' in dbg:
                #cv2.imencode('.jpg',img[box[0] : box[2], box[1] : box[3]])[1].tofile('coms/' + NAMES[idx] + '_%d_%d_%d_' % (cnt, box[1], box[0]) + imgfn.split('/')[-1] + '.jpg') 
                cv2.rectangle(img,(box[1],box[0]),(box[3],box[2]),COLOR,5)
                cv2.putText(img, NAMES[idx], (box[1], box[0]), font, 0.51, COLOR, 2)
            rnames.append(NAMES[idx])
            cnt += 1
        idx += 1
        rboxs.extend(boxs)
    
    if 'saveimg' in dbg:
        #plt.imshow(img);
        cv2.imwrite('mark_' + imgfn.split('/')[-1], img)
        cv2.imencode('.jpg',img)[1].tofile(imgfn.split('/')[-1] + '_marked.jpg')
        
    return rboxs, rnames


In [10]:
def find_boxes(boxes_csv_fn = './data/src_img/allboxes.csv', srcimg_marked_path = './data/src_img/marked', dbg = []):   
    """
    对一个目录里面的标记图片进行处理
    参数：
        boxes_csv_fn：生成结果保存的csv文件名。内容与返回值相同。
        srcimg_marked_path：标记图片的保存目录
        dbg：调试用信息
    返回值：
        提取的box信息：
        {
            'image': 文件名list
            'box': 图片文件提取到的矩形的list；list元素也是一个list，对应image中的每个图片，每个元素是一个矩形的位置信息
            'cls': 图片文件提取到的矩形里面符号的类型的list；与上面的'box'结构相同并一一对应。
        }
        
    """
    
    ds = {'image':[], 'box': [], 'cls': []}
    for path,_,files in os.walk(srcimg_marked_path):
        for fn in files:
            imgfn = path + '/' + fn 
            boxes, classes = find_box(imgfn, dbg)
            
            if 'prt_info' in dbg:
                print('???', imgfn, len(boxes), len(classes))
            ds['image'].append(fn)
            ds['box'].append(boxes)
            ds['cls'].append(classes)
            if 'prt_info' in dbg:
                print('ds:', len(ds['image']), len(ds['box']), len(ds['cls']))        
            
            #f = pd.DataFrame(ds)
            #f.to_csv('data/ds_20200117/allboxes.csv')
    df = pd.DataFrame(ds)
    df.to_csv(boxes_csv_fn)
    
    return ds
     

In [23]:
def gen_zoomed_boxes(ds, zoomed_img_path = './data/src_img/zoomed'):
    '''
    根据已有的标记过的图片找到的box的记录，生成新的不同大小的图片的box记录。
    具体来说就是标记过的图片是300dpi的，现在有一批其他dpi的图片，根据原记录生成新的纪录。
    缩放的比例根据文件名中的dpi值确定。
    
    参数：
        ds：find_boxes的返回结果        
    '''
    for path,_,files in os.walk(zoomed_img_path):
        for fn in files:
            imgfn = path + '/' + fn 
            dpi = float(imgfn.split('dpi')[-1].split('_')[0])
            print(imgfn, dpi)
   
gen_zoomed_boxes(None)

# 提取标记的矩形信息

In [24]:
gen_zoomed_boxes(None)

In [11]:
#全部默认参数,生成一个存储在当前目录下的allboxes.csv文件 
find_boxes(g_boxes_csv_pathfn, g_srcimg_marked_path)

{'image': ['[BA18101C-D01-03]短路电流计算及校验表.pdf__dpi300.0_0.png',
  '[BA18101C-D01-04]电气主接线图.pdf__dpi300.0_0.png',
  '[BA18101C-D01-08]110kV主变进线及母线设备间隔断面图.pdf__dpi300.0_0.png',
  '[BA18101C-D01-09]110kV出线间隔断面图.pdf__dpi300.0_0.png',
  '[BA18101C-D01-10]110kV内桥间隔断面图.pdf__dpi300.0_0.png',
  '[BA18101C-D01-11]10kV配电装置接线图.pdf__dpi300.0_0.png',
  '[BA18101C-D01-15]10kV电容器组平断面图.pdf__dpi300.0_0.png',
  '[BA18101C-D01-16]站用电系统接线图.pdf__dpi300.0_0.png',
  '[BA18101C-D02-06]全站保护配置图.pdf__dpi300.0_0.png',
  '[BA18101C-D02-07]变电站过程层设备配置图.pdf__dpi300.0_0.png'],
 'box': [[[1209, 876, 1301, 931],
   [1209, 1125, 1299, 1180],
   [1209, 1328, 1300, 1382]],
  [[2097, 1579, 2235, 1662],
   [2100, 3754, 2232, 3839],
   [2100, 5375, 2234, 5457],
   [1127, 3741, 1208, 3854],
   [1127, 5359, 1204, 5473],
   [1128, 1566, 1208, 1679],
   [3175, 6336, 3258, 6449],
   [3176, 2302, 3256, 2416],
   [3176, 3581, 3256, 3695],
   [3180, 3926, 3258, 4040],
   [877, 4208, 910, 4243],
   [877, 5748, 910, 5783],
   [878, 2316, 