In [1]:
'''
运行条件：alex_net权值文件、
          处理好的图片文件(image_file_name,pkl文件，里面是字典)
          image_dict{
                          'train':{'image':{....},'label':{}},
                        'test':{'image':{....},'label':{}},
                    }一般来说存储的时候类型为uint8,提取的时候再转为float32,这样节省空间
'''
#程序的一些参数设定
#设定可以训练与不可训练的网络层
#               conv1     conv2      conv3       conv4     conv5    fc6   fc7     fc8
variable_trable=[False,    False,     False,    False,    False,  True,  True,  True,]
#处理好的图片文件
image_file_name='flower.pkl'
#权值文件
MODEL_ADDR='bvlc_alexnet.npy'
#Tensorboard保存的文件名
log_file_name='conv3tofc8_modify'
#选择是否保存提取出来的特征值
save_feature=True

In [2]:
import numpy as np
#确保下载的权值文件和这个ipython文件再同一个文件夹里面，或者自己指定绝对路径
import tensorflow as tf
from scipy.misc import imread
from scipy.misc import imresize
from class_name import class_names
import time
import os
import pickle

#                 conv1               conv2           conv3           conv4             conv5
input_shape_list=[[None,227,227,3],[None,27,27,96],[None,13,13,256],[None,13,13,384],[None,13,13,384],\
                 # fc6            fc7           fc8
                  [None,6,6,256],[None,4096],[None,4096]\
                 ]
for vindex,vi in enumerate(variable_trable):
    if vi:
        input_shape__=input_shape_list[vindex]
        layer_index=vindex
        break

variable_trable=[None]+variable_trable

In [3]:
def conv(input, kernel, biases, c_o, s_h, s_w, padding="VALID", group=1):
    '''From https://github.com/ethereon/caffe-tensorflow
    '''
    c_i = input.get_shape()[-1]
    convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
    if group == 1:
        #不对输入分组卷积
        conv = convolve(input, kernel)
    else:
        #将输入平分成group组，按[N,w,h,channel]->[0,1,2,3]也就是按输入的channel来分成两个矩阵
        input_groups = tf.split(input, group, 3)  # tf.split(3, group, input)
        #将卷积核平分成group组，按[w,h,in_channel,out_channel]->[0,1,2,3]也就是按输入的channel来分成两个矩阵
        kernel_groups = tf.split(kernel, group, 3)  # tf.split(3, group, kernel)
        #分组卷积
        output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
        #连接卷积的结果
        conv = tf.concat(output_groups, 3)  # tf.concat(3, output_groups)
    return conv + biases

def lrn(x):
    #return x
    #lrn层，现在比较少用，一般用bn层代替
    return tf.nn.local_response_normalization(x,
                                              depth_radius=2,
                                              alpha=2e-05,
                                              beta=0.75,
                                              bias=1.0)
def maxpool(x):
    #因为alex net 用到的maxpool都是一样的参数，所以直接写以函数代替，不用填参数
    return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')

def load_model_weight_and_biases():
    '''
    读取模型中的变量值，返回训练好的权重
    model_addr：模型的路径
    '''
    weights_dict = np.load(MODEL_ADDR, encoding='bytes').item()
    return weights_dict

In [4]:
def alexnet(x,net_data,keep_prob,train=True,extrater_layer=8):#提取特征的时候train=False
    '''
    train=False:特征提取模式，会跑整个model
    train=True:训练模式，只跑需要训练的层
    '''
    input_flag=train
    op_list=[]
    #layer_1 conv1-relu-lrn-maxpool
    if variable_trable[1] or (not train):# 0 or 1
        with tf.name_scope('layer_1'):
            CONV1_W,CONV1_b=tf.Variable(net_data['conv1'][0],name='conv1_w',trainable=variable_trable[1]),\
            tf.Variable(net_data['conv1'][1],name='conv1_b',trainable=variable_trable[1])
            conv1_=conv(X, CONV1_W, CONV1_b, c_o=96, s_h=4, s_w=4, padding="VALID", group=1)
            relu1_=tf.nn.relu(conv1_)
            norm1=lrn(relu1_)#55*55*96
            maxpool1_=maxpool(norm1)#27*27*96
            if not train:op_list.append(maxpool1_)
    #layer_2 conv2-relu-lrn-maxpool
    if variable_trable[2] or (not train):
        if input_flag:
            maxpool1_=x
            input_flag=False
        with tf.name_scope('layer_2'):
            CONV2_W,CONV2_b=tf.Variable(net_data['conv2'][0],name='conv2_w',trainable=variable_trable[2]), \
            tf.Variable(net_data['conv2'][1],name='conv2_b',trainable=variable_trable[2])
            conv2_=conv(maxpool1_, CONV2_W, CONV2_b, c_o=256, s_h=1, s_w=1, padding="SAME", group=2)#27*27*256
            relu2_=tf.nn.relu(conv2_)
            norm2=lrn(relu2_)
            maxpool2_=maxpool(norm2)
            if not train:op_list.append(maxpool2_)#13*13*256
        
    #layer_3 conv3-relu
    if variable_trable[3] or (not train):
        if input_flag:
            maxpool2_=x
            input_flag=False
        with tf.name_scope('layer_3'):
            CONV3_W,CONV3_b=tf.Variable(net_data['conv3'][0],name='conv3_w',trainable=variable_trable[3]),\
            tf.Variable(net_data['conv3'][1],name='conv3_b',trainable=variable_trable[3])
            conv3_=conv(maxpool2_, CONV3_W, CONV3_b, c_o=384, s_h=1, s_w=1, padding="SAME", group=1)#13*13*384
            relu3_=tf.nn.relu(conv3_)
            if not train:op_list.append(relu3_)#13*13*384
    #layer_4 conv4-relu
    if variable_trable[4] or (not train):
        if input_flag:
            relu3_=x
            input_flag=False
        with tf.name_scope('layer_4'):
            CONV4_W,CONV4_b=tf.Variable(net_data['conv4'][0],name='conv4_w',trainable=variable_trable[4]), \
            tf.Variable(net_data['conv4'][1],name='conv4_b',trainable=variable_trable[4])
            conv4_=conv(relu3_, CONV4_W, CONV4_b, c_o=384, s_h=1, s_w=1, padding="SAME", group=2)#13*13*384
            relu4_=tf.nn.relu(conv4_)
            if not train:op_list.append(relu4_)#13*13*384
    
    #layer_5 conv5-relu-maxpool
    if variable_trable[5] or (not train):
        if input_flag:
            relu4_=x
            input_flag=False
        with tf.name_scope('layer_5'):
            CONV5_W,CONV5_b=tf.Variable(net_data['conv5'][0],name='conv5_w',trainable=variable_trable[5]), \
            tf.Variable(net_data['conv5'][1],name='conv5_b',trainable=variable_trable[5])
            conv5_=conv(relu4_, CONV5_W, CONV5_b, c_o=256, s_h=1, s_w=1, padding="SAME", group=2)
            relu5_=tf.nn.relu(conv5_)#13*13*256
            maxpool5_=maxpool(relu5_)
            if not train:op_list.append(maxpool5_)#6*6*256
    if variable_trable[6] or (not train):
        if input_flag:
            maxpool5_=x
            input_flag=False
        with tf.name_scope('layer_6'):
            floatten_input=tf.reshape(maxpool5_,[-1,9216])#N*9216
            floatten_input=tf.nn.dropout(x=floatten_input,keep_prob=keep_prob)
            fc6_w,fc6_b=tf.Variable(net_data['fc6'][0],name='fc6_w',trainable=variable_trable[6]), \
            tf.Variable(net_data['fc6'][1],name='fc7_b',trainable=variable_trable[6])
            fc6_=tf.matmul(floatten_input,fc6_w)+fc6_b
            relu6_=tf.nn.relu(fc6_)#N*4096
            if not train:op_list.append(relu6_)
    if variable_trable[7] or (not train):
        if input_flag:
            relu6_=x
            input_flag=False
        with tf.name_scope('layer_7'):
            relu6_=tf.nn.dropout(x=relu6_,keep_prob=keep_prob)
            fc7_w,fc7_b=tf.Variable(net_data['fc7'][0],name='fc7_w',trainable=variable_trable[7]),\
            tf.Variable(net_data['fc7'][1],name='fc7_b',trainable=variable_trable[7])
            fc7_=tf.matmul(relu6_,fc7_w)+fc7_b
            relu7_=tf.nn.relu(fc7_)#N*4096
            if not train:op_list.append(relu7_)
    if variable_trable[8] or (not train):
        if input_flag:
            relu7_=x
            input_flag=False
        with tf.name_scope('layer_8'):
    #         fc8_w,fc8_b=tf.Variable(net_data['fc8'][0],name='fc8_w',trainable=variable_trable[8]), \
    #         tf.Variable(net_data['fc8'][1],name='fc8_b',trainable=variable_trable[8])
            relu7_=tf.nn.dropout(x=relu7_,keep_prob=keep_prob)
            #最后一层fc层必须要重新训练
            fc8_w=tf.Variable(tf.truncated_normal(shape=[4096,5],stddev=0.01),dtype=tf.float32,name='fc8_w',\
                              trainable=variable_trable[8])
            fc8_b=tf.Variable(tf.zeros(shape=[5]),dtype=tf.float32,name='fc8_b',trainable=variable_trable[8])
            fc8_=tf.matmul(relu7_,fc8_w)+fc8_b#N*1000
            if not train:op_list.append(fc8_)
    #     return fc8_
    return fc8_ if train else op_list[extrater_layer-1]
    
    

In [5]:
reg_pen=tf.reduce_sum([tf.reduce_sum(tf.square(i)) for i in tf.trainable_variables()])

In [6]:
netdata=load_model_weight_and_biases()
with tf.name_scope('input'):
    X=tf.placeholder(dtype=tf.float32,shape=[None,227,227,3])
    Y=tf.placeholder(dtype=tf.float32,shape=[None,5])
    FX=tf.placeholder(dtype=tf.float32,shape=input_shape__)
    KEEP_PROB=tf.placeholder(dtype=tf.float32)
    LEARNRATE=tf.placeholder(dtype=tf.float32)
with tf.name_scope('predict'):
    y_pre=alexnet(FX,netdata,KEEP_PROB)
    prob=tf.nn.softmax(y_pre)
with tf.name_scope('extrator'):
    feature=alexnet(X,netdata,KEEP_PROB,train=False,extrater_layer=layer_index)
#loss
with tf.name_scope('loss'):
    loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pre,labels=Y))
    loss+=1e-4*reg_pen
with tf.name_scope('trainer'):
    trainer=tf.train.AdamOptimizer(1e-3).minimize(loss)
with tf.name_scope('accuracy'):
    acc_c=tf.equal(tf.arg_max(y_pre,1),tf.arg_max(Y,1))
    accuracy=tf.reduce_mean(tf.cast(x=acc_c,dtype=tf.float32))
sess=tf.InteractiveSession()
writer=tf.summary.FileWriter('./mylog',sess.graph)
init=tf.global_variables_initializer()
log_file_name='train_'+log_file_name
writer=tf.summary.FileWriter(os.path.join('mylog',log_file_name))
log_file_name='test_'+log_file_name[6:]
writer_=tf.summary.FileWriter(os.path.join('mylog',log_file_name))
tf.summary.scalar('loss',loss)
tf.summary.scalar('accuracy',accuracy)
merge=tf.summary.merge_all()

In [7]:

fp=open(image_file_name,'rb')
flower_dict=pickle.load(fp)

TR_IMG,TE_IMG=flower_dict['train']['image'].astype(np.float32),flower_dict['test']['image'].astype(np.float32)
TR_LAB,TE_LAB=flower_dict['train']['label'].astype(np.float32),flower_dict['test']['label'].astype(np.float32)

del flower_dict

In [8]:
sess.run(init)

In [9]:
train_pic_num=TR_IMG.shape[0]
batch_size=50
mod_num=train_pic_num%batch_size
loop_num=train_pic_num//batch_size

In [10]:
TR_FEA=sess.run(feature,feed_dict={X:TR_IMG[:mod_num],Y:TR_LAB[:mod_num],KEEP_PROB:1.})
strat_index=mod_num
print('start conver train image to feature')
for i in range(loop_num):
    if i%10==0:
        print ('conver image to feature:{}/{}'.format(i,loop_num))
    end_index=strat_index+batch_size
    x_,y_=TR_IMG[strat_index:end_index],TR_LAB[strat_index:end_index]
    strat_index=end_index
    f_=sess.run(feature,feed_dict={X:x_,Y:y_,KEEP_PROB:1.})
    TR_FEA=np.vstack((TR_FEA,f_))


start conver train image to feature
conver image to feature:0/58
conver image to feature:10/58
conver image to feature:20/58
conver image to feature:30/58
conver image to feature:40/58
conver image to feature:50/58


In [11]:
del TR_IMG

In [12]:
train_pic_num=TE_IMG.shape[0]
batch_size=50
mod_num=train_pic_num%batch_size
loop_num=train_pic_num//batch_size
TE_FEA=sess.run(feature,feed_dict={X:TE_IMG[:mod_num],Y:TE_LAB[:mod_num],KEEP_PROB:1.})
strat_index=mod_num
print('start conver test image to feature')
for i in range(loop_num):
    if i%10==0:
        print ('conver image to feature:{}/{}'.format(i,loop_num))
    end_index=strat_index+batch_size
    x_,y_=TE_IMG[strat_index:end_index],TE_LAB[strat_index:end_index]
    strat_index=end_index
    f_=sess.run(feature,feed_dict={X:x_,Y:y_,KEEP_PROB:1.})
    TE_FEA=np.vstack((TE_FEA,f_))

start conver test image to feature
conver image to feature:0/14
conver image to feature:10/14


In [13]:
del TE_IMG

In [20]:

#……………………………………存储特征值……………………………………………………
if save_feature:
    print('saving feature file ......')
    feature_root_file=r'feature_file'
    feature_file=os.path.join(feature_root_file,('image_feature_layer'+str(layer_index)+'output.pkl'))
    fp=open(feature_file,'wb')
    feature_dict={'train':{},'test':{}}
    feature_dict['train']['feature']=TR_FEA.astype(np.float32)
    feature_dict['test']['feature']=TE_FEA.astype(np.float32)
    feature_dict['train']['label']=TR_LAB.astype(np.int8)
    feature_dict['test']['label']=TE_LAB.astype(np.int8)
    pickle.dump(file=fp,obj=feature_dict)
    fp.close()
    del feature_dict
    print ('save feature file done!')
    

saving feature file ......
save feature file done!


In [None]:
#……………………………………训练网络……………………………………………………
kp_te,kp_tr=1.,0.5
#sess.run(init)
lr=1e-3
for i in range(1000):
    mask=np.random.choice(TR_FEA.shape[0],128,replace=False)
    x_,y_=TR_FEA[mask],TR_LAB[mask]
    
    loss_,acc_,m_=sess.run([loss,accuracy,merge],feed_dict={FX:x_,Y:y_,KEEP_PROB:kp_tr,LEARNRATE:lr})
    writer.add_summary(m_,i)
    print('epoch:{},loss:{},train accuracy:{}'.format(i,loss_,acc_))
    for j in range(10):
        sess.run(trainer,feed_dict={FX:x_,Y:y_,KEEP_PROB:kp_tr,LEARNRATE:lr})
    if i%5==0:
        mask=np.random.choice(TE_FEA.shape[0],128,replace=False)
        x_,y_=TE_FEA[mask],TE_LAB[mask]
        loss_,acc_,m_=sess.run([loss,accuracy,merge],feed_dict={FX:x_,Y:y_,KEEP_PROB:1.,LEARNRATE:lr})
        writer_.add_summary(m_,i)
        if acc_>0.8:
            lr=max(0.99*lr,1e-5)
            print ('epoch {},learning rate:{}'.format(i,lr))
        print('--epoch:{},test loss:{},test accuracy:{}'.format(i,loss_,acc_))

In [None]:
writer.close()
writer_.close()