# 2017年11月21日20:07:05
# 利用DenseNet+Squeeze-and-Excitation Networks 结构改进原CNN网络

In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
import pickle
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
from cumt_variant import densenet
sess=tf.InteractiveSession()
init=tf.global_variables_initializer()

**修改DenseNet参数，希望能够减小参数，希望能用1k的数据集训练出一个ACC接近1的网络**
* CUMT_ori: 4.25 million,  acc=0.85
* DenseNet_ori:  k=12,  L=5,  dense block=4,  theta=0.5   ->  0.197 million,  acc=0.95
* 1: k=8 ,L=4 ,dense block=3,theta=0.3,reg=4e-4,drop out=0.5,data size =2048   ->0.042 million,acc=0.95  
改变conv block的顺序，由 conv-bn-relu 变成 bn-relu-conv,在tail的最后两个conv层加drop out
* 2: k=8 ,L=4 ,dense block=3,theta=0.3,reg=4e-4,all layer drop out=0.5,data size =1024   ->0.042 million,acc=0.95  
改变conv block的顺序，由 conv-bn-relu 变成 bn-relu-conv,所有bottle neck layer 加上drop out
* 3: k=12 ,L=12 ,dense block=4,theta=0.5,data size =1024   ->0.728 million,acc=0.978  
conv block的顺序任然为conv-bn-relu，去除所有dropout和正则
* 4: k=12 ,L=20 ,dense block=4,theta=0.5,data size =1024   ->1.704 million,acc=0.985  
conv block的顺序任然为conv-bn-relu，去除所有dropout和正则  
权值文件路径为 'densenet_dump/bigmodel_acc985.pkl',模型文件在 ‘model_save/DenseNet/big_model_on_cumt_acc985-10085’  
  
**减小深度，生长率，希望降低参数值**
* 5: k=3 ,L=3 ,dense block=3,theta=0.5,data size =1024   ->0.0052 million,acc=0.63  
* 6: k=3 ,L=20,dense block=4,theta=0.1,data size =1024   ->0.064 million,acc=0.8 
* 7: k=2 ,L=2,dense block=4,theta=0.8,data size =all   ->0.00263 million,acc=1.0  
  **只用了差不多10KB大小的参数，完成了CUMT的分类任务**





*总结：*
* *减少L,theta对准确率的影响最小，但是减少的参数也少*
* *减小K,dense block数量能大量的减小参数，但是同时也会比较大程度的影响ACC*
* *增加正则和drop out能稍微增加一点准确率，但不是关键的因素*
* *从V6可以看到，对于参数很少的网络，即使网络很深，其效果也不理想。猜测是因为若将所有W看做一个解  
  空间，更多的参数意味着更高的维度。而在这个高维空间中更容易找点局部极点或者是全局最优点。*


In [2]:
log_name='cumt_small/densenet_v0'
with tf.variable_scope('Placehloder'):
    X=tf.placeholder(dtype=tf.float32,shape=[None,128,128,3],name='X')
    Y=tf.placeholder(dtype=tf.float32,shape=[None,10],name='Y')
    bn_train=tf.placeholder(dtype=tf.bool,name='BN_FLAG')
    LR=tf.placeholder(dtype=tf.float32,name='lr')
#     DROPOUT=tf.placeholder(dtype=tf.float32,name='DROPOUT')
with tf.name_scope('DenseNet'):
    model=densenet(K=12,L=20,theta=0.5,denseblock_num=4,output_class=10,\
                   sess=sess,Images=X,bn_istraining=bn_train)
    y_score=model.y_score
with tf.name_scope('LOSS'):
    LOSS=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_score,labels=Y))#+4e-4*(model.get_l2loss())
    tf.summary.scalar('loss',LOSS)
with tf.name_scope('TRAIN'):
    TRAIN=tf.train.AdamOptimizer(LR).minimize(LOSS)
with tf.name_scope('ACCURACY'):
    acc_count=tf.equal(tf.arg_max(y_score,1),tf.arg_max(Y,1))
    ACCURACY=tf.reduce_mean(tf.cast(acc_count,tf.float32))
    tf.summary.scalar('acc',ACCURACY)
bn_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)

writer_te=tf.summary.FileWriter(r'./mylog/'+log_name+r'/test')
writer_tr=tf.summary.FileWriter(r'./mylog/'+log_name+r'/train')
writer_val=tf.summary.FileWriter(r'./mylog/'+log_name+r'/val')
merge=tf.summary.merge_all()



Building DenseNet....
(?, 64, 64, 24)
(?, 64, 64, 264)
(?, 32, 32, 132)
(?, 32, 32, 372)
(?, 16, 16, 186)
(?, 16, 16, 426)
(?, 8, 8, 213)
(?, 8, 8, 453)
(?, 4, 4, 226)


In [3]:
model.count_trainable_params()

Total training params: 1.70442 Million,6.50185 Mb


In [4]:
#cumt rgb 相片均值
cumt_picmean=[103.939, 116.779, 123.68]
cumt_data=np.load('cumt_data.pkl')
index=np.arange(cumt_data['data'].shape[0])
np.random.shuffle(index)
data=cumt_data['data'][index]
label=cumt_data['labels'][index]
del cumt_data
resize_img=[]
for i in range(data.shape[0]):
    img=data[i]+cumt_picmean
    img=cv2.resize(img.astype('uint8'),(128,128))
    resize_img.append(img.astype('uint8'))
    if i%10000==0:
        print (i)
data=None
resize_img=np.array(resize_img).astype('uint8')

0
10000
20000
30000
40000
50000
60000
70000


In [5]:
data_size=1024*1

data=resize_img

index=np.arange(data.shape[0])
np.random.shuffle(index)
# tr_index=index[:data_size]
# te_index=index[data_size:]
tr_index=index[:int(data.shape[0]*0.9)]
te_index=index[int(data.shape[0]*0.9):]
tr_data,tr_label=data[tr_index],label[tr_index]
te_data,te_label=data[te_index],label[te_index]
del data
del label
del resize_img
print(tr_data.shape,te_data.shape)


val_d=np.load('valimg1k_1109.pkl')
val_data=[]
for v in range(val_d['data'].shape[0]):
    img_=cv2.resize(val_d['data'][v],(128,128))
    
    val_data.append(img_)
val_data=np.array(val_data)
val_label=val_d['label']
del val_d
print(val_data.shape)

(67346, 128, 128, 3) (7483, 128, 128, 3)
(966, 128, 128, 3)


In [None]:
# saver=tf.train.Saver()
# saver.restore(sess,r'D:\Proj_DL\Code\Proj_EyeTraker\Proj_iTraker\CUMT_iTraker\model_save\DenseNet\big_model_on_cumt_acc985-10085')

In [6]:
sess.run(tf.global_variables_initializer())
# saver=tf.train.Saver()

In [None]:
#开始正式训练
batchsize=128
best_acc=0
lr_=1e-4
dp_=1.
test_acc_his=[]
best_test=0
for i in range(32180,1000*6*12*1000):
    mask=np.random.choice(tr_data.shape[0],batchsize,replace=False)
    x_,y_=tr_data[mask]-cumt_picmean,tr_label[mask]
    feed_dict={X:x_,Y:y_,bn_train:True,LR:lr_}
    sess.run([TRAIN,bn_ops],feed_dict=feed_dict)
    if i%10==0:
        feed_dict={X:x_,Y:y_,bn_train:False}
        loss_,acc_,m_=sess.run([LOSS,ACCURACY,merge],feed_dict=feed_dict)
        writer_tr.add_summary(m_,i)
        print('epoch:{},train loss:{:.4f},train accuracy:{:.4f}'.format(i,loss_,acc_))
    if i%20==0:
        mask=np.random.choice(te_data.shape[0],128,replace=False)
        x_,y_=te_data[mask]-cumt_picmean,te_label[mask]
        feed_dict={X:x_,Y:y_,bn_train:False}
        loss_,acc_,m_=sess.run([LOSS,ACCURACY,merge],feed_dict=feed_dict)
        test_acc_his.append(acc_)
        mean_acc=np.mean(test_acc_his[-20:])
        best_test=max(best_test,mean_acc)
        writer_te.add_summary(m_,i)
        
        print('--epoch:{},test loss:{:.4f},test accuracy:{:.4f},mean acc:{:.4f},best test acc:{:.4f}'\
              .format(i,loss_,acc_,mean_acc,best_test))
    if i%30==0:
        mask=np.random.choice(val_data.shape[0],128,replace=False)
        x_,y_=val_data[mask]-cumt_picmean,val_label[mask]
        feed_dict={X:x_,Y:y_,bn_train:False}
        loss_,acc_,m_=sess.run([LOSS,ACCURACY,merge],feed_dict=feed_dict)
        best_acc=max(best_acc,acc_)
        writer_val.add_summary(m_,i)
        print('@@epoch:{},val loss:{:.4f},val accuracy:{:.4f},best val acc:{:.4f}'.format(i,loss_,acc_,best_acc))
    if i%500==0:
        lr_=max(lr_*0.7,9e-5)
        print('$$$ lr change:{}'.format(lr_))

In [None]:
model.save_network_weight('densenet_dump/smallv7_acc969.pkl',sess=sess)

In [None]:
saver=tf.train.Saver()
saver.save(sess,'model_save/DenseNet/small_model_on_cumtv7_acc949',global_step=i)

In [7]:
%%timeit
mask=np.random.choice(te_data.shape[0],1,replace=False)
x_,y_=te_data[mask],te_label[mask]
feed_dict={X:x_,Y:y_,bn_train:False}
sess.run(y_score,feed_dict=feed_dict)

237 ms ± 2.85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 使用CUMT训练好的模型进行迁移 学习

**利用DenseNet结构加Cumt数据训练出的模型，进行webcam的gaze estimate**
* DenseNet_V0_trans762: 根据CUMT数据训练出的模型进行迁移学习，5X5 分类，准确率为95%左右 
* DenseNet_V0_zeros762: 从0开始训练，5X5 分类，准确率为95%左右  
* DenseNet_V7_trans762: 根据CUMT数据训练出的模型进行迁移学习，5X5 分类，准确率为80%左右   
  V7（k=2 ,L=2,dense block=4,theta=0.8）参数为10KB,要比较多的训练数据才能出好的效果
  
    
      
* DenseNet_V0_trans256: 根据CUMT数据训练出的模型进行迁移学习，5X5 分类，准确率为88%左右 
* DenseNet_V0_zeros256: 从0开始训练，5X5 分类，准确率为68%左右  
### 结论：
** 1.在1K数据量左右不管是从0还是迁移，都能得到差不多好的效果，迁移学习收敛稍微快一些**  
** 2.在极少数据量（256）迁移学习发挥的作用大很多，收敛更快，准确率更高  **  
** 3.数据量越多网络的表现越好，迁移学习总是比从0开始训练要好  **


In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
import pickle
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
from cumt_variant import densenet
cumt_picmean=[103.939, 116.779, 123.68]
sess=tf.InteractiveSession()
init=tf.global_variables_initializer()

In [None]:
output_class=36
log_name='6X6/densenet_v0trans_18951'
with tf.variable_scope('Placehloder'):
    X=tf.placeholder(dtype=tf.float32,shape=[None,128,128,3],name='X')
    Y=tf.placeholder(dtype=tf.float32,shape=[None,output_class],name='Y')
    bn_train=tf.placeholder(dtype=tf.bool,name='BN_FLAG')
    LR=tf.placeholder(dtype=tf.float32,name='lr')
with tf.name_scope('DenseNet'):
    model=densenet(Images=X,bn_istraining=bn_train,K=12,L=5,theta=0.5,output_class=output_class,sess=sess,denseblock_num=4)
    y_score=model.y_score
with tf.name_scope('LOSS'):
    LOSS=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_score,labels=Y))
    tf.summary.scalar('loss',LOSS)
with tf.name_scope('TRAIN'):
    TRAIN=tf.train.AdamOptimizer(LR).minimize(LOSS)
with tf.name_scope('ACCURACY'):
    acc_count=tf.equal(tf.arg_max(y_score,1),tf.arg_max(Y,1))
    ACCURACY=tf.reduce_mean(tf.cast(acc_count,tf.float32))
    tf.summary.scalar('acc',ACCURACY)
bn_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)

writer_te=tf.summary.FileWriter(r'./mylog/'+log_name+r'/test')
writer_tr=tf.summary.FileWriter(r'./mylog/'+log_name+r'/train')
writer_val=tf.summary.FileWriter(r'./mylog/'+log_name+r'/val')
merge=tf.summary.merge_all()

In [None]:
# <tf.Variable 'Tail/conv_class/weight:0' shape=(1, 1, 57, 10) dtype=float32_ref>
# <tf.Variable 'Tail/conv_class/biases:0' shape=(10,) dtype=float32_ref>
# <tf.Variable 'Tail/conv_class/BN/beta:0' shape=(10,) dtype=float32_ref>
# <tf.Variable 'Tail/conv_class/BN/gamma:0' shape=(10,) dtype=float32_ref>
model.init_network(sess=sess,weight_addr='densenet_dump/midV0_acc949.pkl',
                   skip_layer=['Tail/conv_class/weight','Tail/conv_class/biases','Tail/conv_class/BN/beta','Tail/conv_class/BN/gamma'],
                      )

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
#  'img4X4_1006.pkl'   'img5X5_240.pkl'
# d=np.load('img4X4_1006.pkl')
d=np.load('image_pklfile/img6X6_18951.pkl')
index_=np.arange(d['data'].shape[0])
np.random.shuffle(index_)
tr_index=index_[:int(d['data'].shape[0]*0.9)]
te_index=index_[int(d['data'].shape[0]*0.9):]
# tr_index=index_[:256]
# te_index=index_[256:]
tr_data,tr_label=d['data'][tr_index],d['label'][tr_index]
te_data,te_label=d['data'][te_index],d['label'][te_index]
print(tr_data.shape,te_data.shape)
del d

In [None]:
def data_argment(image_batch,label_batch):
    map_dict={0:5,1:4,2:3,
         3:2,4:1,5:0}
    h_img=image_batch[:,:,::-1,:]
    new_label=[]
    for lab_ in label_batch:
        label_=np.argmax(lab_)
        n_=map_dict[label_%6]+(label_//6)*6
        nl=[0]*36
        nl[n_]=1
        new_label.append(nl)
    new_label=np.array(new_label)
    image_batch=np.vstack((image_batch,h_img))
    label_batch=np.vstack((label_batch,new_label))
    return image_batch,label_batch
    

In [None]:
a,b=data_argment(x_,y_)

In [None]:
#开始正式训练
batchsize=32
lr_=6e-6
for i in range(16361,10000*6*12):
    mask=np.random.choice(tr_data.shape[0],batchsize,replace=False)
    x_,y_=tr_data[mask],tr_label[mask]
    x_,y_=data_argment(x_,y_)
    feed_dict={X:x_-cumt_picmean,Y:y_,bn_train:True,LR:lr_}
    sess.run([TRAIN,bn_ops],feed_dict=feed_dict)
    if i%10==0:
        mask=np.random.choice(tr_data.shape[0],128,replace=False)
        x_,y_=tr_data[mask]-cumt_picmean,tr_label[mask]
        feed_dict={X:x_,Y:y_,bn_train:False}
        loss_,acc_,m_=sess.run([LOSS,ACCURACY,merge],feed_dict=feed_dict)
        writer_tr.add_summary(m_,i)
        print('epoch:{},train loss:{},train accuracy:{}'.format(i,loss_,acc_))
    if i%20==0:
        mask=np.random.choice(te_data.shape[0],128,replace=False)
        x_,y_=te_data[mask]-cumt_picmean,te_label[mask]
        feed_dict={X:x_,Y:y_,bn_train:False}
        loss_,acc_,m_=sess.run([LOSS,ACCURACY,merge],feed_dict=feed_dict)
        writer_te.add_summary(m_,i)
        print('--epoch:{},test loss:{},test accuracy:{}'.format(i,loss_,acc_))
#     if i%500==0:
#         lr_=max(lr_*0.9,9e-5)
#         print('$$$ lr change:{}'.format(lr_))

In [None]:
saver=tf.train.Saver()
saver.save(sess,'./model_save/6X6DensNet_v0/v0_937.ckpt',global_step=i)

In [None]:
model.save_network_weight('dense4x4_trans991.pkl',sess)

In [None]:
%%timeit
mask=np.random.choice(10,1,replace=False)
x_,y_=te_data[mask]-cumt_picmean,te_label[mask]
feed_dict={X:x_,Y:y_,bn_train:False}
sess.run(y_score,feed_dict=feed_dict)

## 实测 5X5分类

In [None]:
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') 
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')  
def drawline(img_,line_w=1,line_color=(0,0,0),wandh_num=4):
        '''
        在图片上格子
        :param line_w: 线宽
        :param line_color: 线颜色
        :param wandh_num:  长宽线的数量
        :return:  无
        '''
        h,w=img_.shape[0],img_.shape[1]
        w_num,h_num=wandh_num,wandh_num
        h_,w_=h//h_num,w//w_num

        # 竖线 (w,h)
        for i in range(1,w_num):
            #print(i)
            cv2.line(img_,(w_*i,0),(w_*i,h),line_color,line_w)
        # 横线
        for i in range(1,h_num):
            cv2.line(img_,(0,h_*i),(w,h_*i),line_color,line_w)
        return img_

def drawblock(img,line_num,block_id=0,blockcolor=(46,218,255),blockwideth=5,show_rec='fill',rec_shape=None):
        '''
        选定九宫格，在这个格子上填充矩形表示选定这个格子
        :param img_: 图片
        :param block: 九宫格序号 0-15
        :param blockcolor: 矩形框颜色
        :param blockwideth: 框的宽度
        :return:
        '''
        h,w=img.shape[0],img.shape[1]
        w_line,h_line=line_num,line_num
        h_,w_=h//h_line,w//w_line
        cor_h=block_id//line_num
        cor_w=block_id%line_num
        sx,sy=cor_w*w_,cor_h*h_
        
        if show_rec=='fill':
        #将整个矩形填充为其他颜色
            img[sy:sy+h_,sx:sx+w_,:]=blockcolor
        elif show_rec=='rec':
            #显示矩形轮廓
            xe,ye,we,he=rec_shape
            cv2.rectangle(img,(xe,ye),(xe+we,ye+he),(255,0,0),10)
        elif show_rec=='dot':
            #标记一个小点
            roi_=img[sy:sy+h_,sx:sx+w_]
            cv2.circle(roi_,(roi_.shape[1]//2,roi_.shape[0]//2), 10, (255,128,120), -1)
        elif show_rec=='select':
            #hight light 矩形区域
            #img[sy:sy+h_,sx:sx+w_,0]=255#r
            img[sy:sy+h_,sx:sx+w_,1]=255#g
            #img[sy:sy+h_,sx:sx+w_,2]=255#b
        elif show_rec=='pick':
            #返回矩形框内容
            return img[sy:sy+h_,sx:sx+w_],(sx,sy,w_,h_)
        return img
def geteyeimg(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
    faces = face_cascade.detectMultiScale(gray, 1.3,5) 
    if len(faces) !=1:
        print(len(faces))
        print('bad faces')
        return None
    for (x,y,w,h) in faces: 
            cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2) 
            roi_gray = gray[y:y+h, x:x+w] 
            roi_color = img[y:y+h, x:x+w] 
            #检测视频中脸部的眼睛，并用vector保存眼睛的坐标、大小（用矩形表示） 
            eyes = eye_cascade.detectMultiScale(roi_gray,scaleFactor=1.2, minNeighbors=7, minSize=(29, 29),
                                     flags=cv2.CASCADE_SCALE_IMAGE) 
            #眼睛检测 ,对于识别比较差的情况舍弃
            if len(eyes)!=2:
                print(len(eyes))
                print('bad eyes')
                return None
            if eyes[0][0]>eyes[1][0]:
                ex=eyes[1][0]
                W=eyes[0][0]-eyes[1][0]+eyes[0][2]
            else:
                ex=eyes[0][0]
                W=eyes[1][0]-eyes[0][0]+eyes[1][2]
            
            if eyes[0][1]>eyes[1][1]:
                ey=eyes[1][1]
                H=eyes[0][1]-eyes[1][1]+eyes[0][3]
            else:
                ey=eyes[0][1]
                H=eyes[1][1]-eyes[0][1]+eyes[1][3]
    
    return roi_color[ey+10:ey+H-2,ex-10:ex+W+10] if roi_color is not None else None

In [None]:
cap=cv2.VideoCapture(0)
_,frame=cap.read()
cap.release()

In [None]:
prob=tf.nn.softmax(y_score)

In [None]:
e=geteyeimg(frame)

In [None]:
e=cv2.resize(e,(128,128))[:,:,::-1].reshape((1,128,128,3))

In [None]:
#e=get_eyeimg(frame)
prob_=sess.run(prob,feed_dict={X:e-cumt_picmean,bn_train:False})

In [None]:
prob_

In [None]:
import time

In [None]:
fps=10
fsize_desk=(1920,1080)
# save video
video_d=cv2.VideoWriter('demo_desk2.flv',cv2.VideoWriter_fourcc('F','L','V','1'),fps,fsize_desk)
cap=cv2.VideoCapture(0)
s_time=time.time()
tar_img=cv2.imread('test.jpg')
#设置window 为全屏
cv2.namedWindow('Capture001',cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Capture001', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

s_time=time.time()
random_show=np.random.randint(0,36,1)[0]

#每N帧输出平均预测
sum_score=[]
sum_counter=0
block_id=0
ret_counter=0
right_frame_counter=0
while True:
    ret,fram=cap.read()
    if ret:
        ret_counter+=1
        #显示 内窥镜图像
        tar_img=cv2.imread('test.jpg')
        tar_img=drawline(tar_img,wandh_num=6)
        #每30帧换点
        if ret_counter%30==0:
            random_show=np.random.randint(0,36,1)[0]
        tar_img=drawblock(tar_img,line_num=6,block_id=random_show)
        cv2.imshow('Capture001',tar_img)
        e=geteyeimg(fram)
        if e is None:
            continue
        e=cv2.resize(e,(128,128))[:,:,::-1].reshape((1,128,128,3))
        y_guess=sess.run(prob,feed_dict={X:e-cumt_picmean,bn_train:False})
        sum_score.append(y_guess)
        #平均每 N 帧的预测分数
        if ret_counter%4==0:
            sum_score=np.array(sum_score)
            mean_=np.mean(sum_score,0)
            block_id=np.argmax(mean_)
            sum_score=[]
        if block_id<0:block_id=0
        elif block_id>35:block_id=35

        print(block_id)
        if block_id==random_show:
             right_frame_counter+=1
        tar_img=drawblock(tar_img,line_num=6,block_id=block_id,show_rec='dot')
        v_img=tar_img.copy()
        fram=cv2.resize(fram,(150,150))
        v_img[:150,-150:]=fram
        cv2.imshow('Capture001',tar_img)


#         print('{} sec per frame'.format(time.time()-s_time))
        s_time=time.time()
        video_d.write(v_img)
        if cv2.waitKey(1)&0xff==27:
            print('accuracy:{}'.format(right_frame_counter/ret_counter))
            print('out')
            break
    if (time.time()-s_time)>60*5:
        print('time out')
        break
video_d.release()
cap.release()
cv2.destroyAllWindows()


In [None]:
video_d.release()
cap.release()
cv2.destroyAllWindows()


# 2017年12月2日23:22:39
## 选中ROI

In [None]:
target_img='medical2_big.jpg'

fps=10
fsize_desk=(1920,1080)
# save video
video_d=cv2.VideoWriter('demo_pic2.flv',cv2.VideoWriter_fourcc('F','L','V','1'),fps,fsize_desk)
cap=cv2.VideoCapture(0)
s_time=time.time()
tar_img=cv2.imread(target_img)
ori_img=cv2.resize(tar_img,(1920,1080))
#ori_img0=cv2.resize(tar_img,(1920,1080))
#设置window 为全屏
cv2.namedWindow('Capture001',cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Capture001', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

s_time=time.time()
#random_show=np.random.randint(0,16,1)[0]

#每N帧输出平均预测
sum_score=[]
sum_counter=0
block_id=0
ret_counter=0
right_frame_counter=0
#选中矩形框部分的变量
change_flag=0
block_counter={}
rec_x,rec_y,rec_w,rec_h=0,0,0,0
while True:
    ret,fram=cap.read()
    if ret:
        #显示 内窥镜图像
        tar_img=ori_img.copy()
        tar_img=drawline(tar_img,wandh_num=6)
        cv2.imshow('Capture001',tar_img)
        e=geteyeimg(fram)
        if e is None:
            continue
        e=cv2.resize(e,(128,128))[:,:,::-1].reshape((1,128,128,3))
        y_guess=sess.run(prob,feed_dict={X:e-cumt_picmean,bn_train:False})[0]
        
        #平均每 N 帧的预测分数
        sum_counter+=1
        sum_score.append(y_guess)
        if sum_counter==3:
            sum_score=np.asarray(sum_score).reshape((-1,36))
            mean_score=np.mean(sum_score,0)
            block_id=np.argmax(mean_score)
            sum_score=[]
            sum_counter=0
            #print(block_id)
            if block_id<0:block_id=0
            elif block_id>35:block_id=35
        
        #假如连续盯着一个地方，放大这部分,只放大两次
        if change_flag <2:
            if block_id not in block_counter:
                block_counter[block_id]=0
            block_counter[block_id]+=1
            if block_counter[block_id]==30:
                roi_rec,rec_shape=drawblock(tar_img,line_num=6,block_id=block_id,show_rec='pick')
                s_x,s_y,s_w,s_h=rec_shape
                rec_x+=s_x//(6**change_flag)
                rec_y+=s_y//(6**change_flag)
                #print(rec_x,rec_y,s_x,s_y)
                rec_w,rec_h=s_w//6,s_h//6
                block_counter={}
                ori_img=cv2.resize(roi_rec,(1920,1080))
                change_flag+=1
        elif change_flag ==2:
            change_flag+=1
            #将选中的区域标记处来
            tar_img=cv2.imread(target_img)
            ori_img=cv2.resize(tar_img,(1920,1080))
            cv2.rectangle(ori_img,(rec_x,rec_y),(rec_x+rec_w,rec_y+rec_h),(0,0,255),3)
            #cv2.circle(ori_img,(ori_img.shape[1]//2,ori_img.shape[0]//2), 10, (255,128,120), -1)
            continue
        
        tar_img=drawblock(tar_img,line_num=6,block_id=block_id,show_rec='select')
        v_img=tar_img.copy()
        fram=cv2.resize(fram,(200,200))
        v_img[:200,-200:]=fram
        cv2.imshow('Capture001',tar_img)

        video_d.write(v_img)
        if cv2.waitKey(1)&0xff==27:
#             print('accuracy:{}'.format(right_frame_counter/ret_counter))
            print('out')
            break
    if (time.time()-s_time)>60*5:
        print('time out')
        break
video_d.release()
cap.release()
cv2.destroyAllWindows()
