# 猫狗大战

In [1]:
import os
import numpy as np
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

## 生成图片路径和标签的List

In [2]:
#step1：获取.../data/Cat下所有的猫图路径名，存放到cats中，同时贴上标签0，存放到label_cats中。狗图同理。
def get_files(file_dir):
    cats=[]
    label_cats=[]
    dogs=[]
    label_dogs=[]
    for file in os.listdir(file_dir+'/Cat'):
            cats.append(file_dir +'/Cat'+'/'+ file) 
            label_cats.append(0)
    for file in os.listdir(file_dir+'/Dog'):
            dogs.append(file_dir +'/Dog'+'/'+file)
            label_dogs.append(1)
            
            
#step2：对生成的图片路径和标签List做打乱处理
    #把cat和dog合起来组成一个list（img和lab）
    image_list = np.hstack((cats, dogs))                             # 路径列表
    label_list = np.hstack((label_cats, label_dogs))                 # 标签列表，猫为0，狗为1

    #利用shuffle打乱顺序
    temp = np.array([image_list, label_list])                        # 两个列表放于数组中
    temp = temp.transpose()                                          # 转置
    np.random.shuffle(temp)                                          # 打乱顺序

    #从打乱的temp中再取出list（img和lab）
    image_list = list(temp[:, 0])                                    # temp第一列为图片路径
    
    label_list = list(temp[:, 1])                                    # 第二列为label
    label_list = [float(i) for i in label_list]                        # label转换为int格式
    return image_list,label_list

## 生成Batch

In [3]:
#step1：将上面生成的List传入get_batch() ，转换类型，产生一个输入队列queue，因为img和lab是分开的，所以使用tf.train.slice_input_producer()，然后用tf.read_file()从队列中读取图像
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    '''
    image_W, image_H, ：设置好固定的图像高度和宽度
    batch_size：每个batch要放多少张图片
    capacity：一个队列最大多少
    '''
    #转换类型
    image = tf.cast(image, tf.string)                                         # tf.cast 转换类型
    label = tf.cast(label, tf.int32)

    # make an input queue
    input_queue = tf.train.slice_input_producer([image, label])               # tf.train.slice_input_producer()  产生一个输入队列queue input_queue

    label = input_queue[1]                                                    # label不需经过转换
    image_contents = tf.read_file(input_queue[0]) #read img from a queue      # image需经tf.read_file从队列中读取图片
    
    
#step2：将图像解码，不同类型的图像不能混在一起，要么只用jpeg，要么只用png等。
    image = tf.image.decode_jpeg(image_contents, channels=3)                 # 图片读取后，利用tf.image.decode_jpeg进行解码
    
    
#step3：数据预处理，对图像进行旋转、缩放、裁剪、归一化等操作，让计算出的模型更健壮。
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)  #  tf.image.resize_image_with_crop_or_pad设置图像的宽度和高度
    image = tf.image.per_image_standardization(image)                        #  tf.image.per_image_standardization 图片标准化

#step4：生成batch
#image_batch: 4D tensor [batch_size, width, height, 3],dtype=tf.float32
#label_batch: 1D tensor [batch_size], dtype=tf.int32
    image_batch, label_batch = tf.train.batch([image, label],                # tf.train.batch
                                                batch_size= batch_size,
                                                num_threads= 32)
    return image_batch, label_batch

## 测试


In [4]:
#step1：变量初始化，每批2张图，尺寸208x208，设置好自己的图像路径
BATCH_SIZE = 1
CAPACITY = 256
IMG_W = 208
IMG_H = 208
train_dir = 'D:/CatVSDog/train'


（这里有个问题：官网的start_queue_runners()是有两个参数的，sess和coord，但是在这里加上sess的话会报错）。 
利用try——except——finally结构来执行队列操作（官网推荐的方法），避免程序卡死什么的。i<2:先执行两次队列操作，每一次取出2张图放进batch里面，然后imshow出来看看效果。

## 建立神经网络

In [5]:
def weight_variable(shape):                                                # 建立变量，初始化权值的函数  （filter的权值）
    return tf.Variable(tf.truncated_normal(shape,stddev=0.1))
def bias_vairable(shape):                                                 # 建立变量，初始化偏置值的函数
    return tf.Variable(tf.constant(0.1, shape=shape))
def conv2d(x,W):                                                          # 卷积的函数 tf.nn.conv2d    
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME') 
def max_pool_2x2(x):                                                     # 池化的函数，这里用最大池化
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [6]:
x = tf.placeholder(tf.float32,[None,208,208,3])                         # 208*208的图片 
y = tf.placeholder(tf.float32,[None,2])                                 # 两分类  

In [7]:
keep_prob = tf.placeholder(tf.float32)                                  # drop out

In [8]:
#layer 1
#conv
W_conv1 = weight_variable([5,5,3,32])                                    
b_conv1 = bias_vairable([32])  
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
#pooling
h_pool1 = max_pool_2x2(h_conv1)                                    # 104x104x32

In [9]:
#layer 2
#conv
W_conv2 = weight_variable([5,5,32,64])                                   
b_conv2 = bias_vairable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1,W_conv2) + b_conv2)
#pooling                                                                 
h_pool2 = max_pool_2x2(h_conv2)                                   # 52x52x64     

In [10]:
#layer 3
#conv
W_conv3 = weight_variable([5,5,64,128])                                   
b_conv3 = bias_vairable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2,W_conv3) + b_conv3)
#pooling                                                                 
h_pool3 = max_pool_2x2(h_conv3)                                   # 26x26x128    

In [11]:
#layer 4
#conv
W_conv4 = weight_variable([5,5,128,256])                                   
b_conv4 = bias_vairable([256])
h_conv4 = tf.nn.relu(conv2d(h_pool3,W_conv4) + b_conv4)
#pooling                                                                 
h_pool4 = max_pool_2x2(h_conv4)                                   # 13x13x256 

In [12]:
#layer 5
#fullly connected 1
W_fc1 = weight_variable([13*13*256,1024])                                   # 第一个全连接层，全连接层的                               
b_fc1 = bias_vairable([1024])

h_pool4_flat = tf.reshape(h_pool4,[-1,13*13*256])                            # 扁平化

h_fc1 = tf.nn.relu(tf.matmul(h_pool4_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)


In [13]:
#layer 7
#fullly connected 2
#output layer
W_fc2 = weight_variable([1024,2])
b_fc2 = bias_vairable([2])
logits = tf.matmul(h_fc1_drop,W_fc2) + b_fc2
prediction = tf.nn.softmax(logits)

In [14]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=prediction))

In [15]:
train_step = tf.train.AdamOptimizer(0.2).minimize(loss)

In [16]:
init = tf.global_variables_initializer()

In [17]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(prediction,1)) # argmax 返回一维张量中最大值索引( 在axis=1的轴上找 )
                                                                       # 求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))      # 把布尔值转换为浮点型tf.float32  False=0,True=1 求平均数

In [None]:
#step3：开启会话session，利用tf.train.Coordinator()和tf.train.start_queue_runners(coord=coord)来监控队列
with tf.Session() as sess:
    sess.run(init)
    i = 0
    coord = tf.train.Coordinator()                                         # tf.train.Coordinator()和tf.train.start_queue_runners(coord=coord)来监控队列
    threads = tf.train.start_queue_runners(coord=coord)

    for epoch in range(1):                                  # 迭代21个周期
        for batch in range(1):                              # 遍历批次
            try:                                                                   # 利用try——except——finally结构来执行队列操作
                while not coord.should_stop() and i<1:
# step2：调用前面的两个函数，生成batch
                    image_list, label_list = get_files(train_dir)                                                    # get_files得到图片路径及对应的label
                    image_batch, label_batch = get_batch(image_list, label_list, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) # 输入图像路径 label路径，图宽，图长，批次大小，输出图片的tf类的4维张量，输出lebal的rank1array

                                  # tf张量转换为 ndarray
            
                    #label=label.reshape(-1,1)                                      # rank1array转换（n,1）向量 n为batch_size
                    #label_onehot=np.eye(2)[label.reshape(-1)].astype(int)          # 标签的onehot转换
                    
                    #batch_xs, batch_ys = img, label_onehot                  # 获得一个批次为batch_size的图片的像素点数据和标签
                    #sess.run(train_step, feed_dict={x:img, y:label_onehot,  keep_prob:0.6})                  # 训练，更新变量w和b
                    print("epoch" + str(epoch) + " done ")
                    
                    
                    # 输出原图片
                    # just test one batch
                    #for j in np.arange(BATCH_SIZE):
                        #print('label: %d' %label[j])                              #
                        #plt.imshow(img[j,:,:,:])
                        #plt.show()
                    i+=1

                    
                    
            except tf.errors.OutOfRangeError:
                print('done!')
            finally:
                coord.request_stop()
            coord.join(threads)
            img, label = sess.run([image_batch, label_batch])

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(tuple(tensor_list)).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.range(limit).shuffle(limit).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To 

with tf.Session() as sess:
    sess.run(init)                                          # 执行初始化
    for epoch in range(1):                                  # 迭代21个周期
        for batch in range(1):                              # 遍历批次
                                                            # 获得批次数据
            batch_xs, batch_ys = img, label_onehot                  # 获得一个批次为batch_size的图片的像素点数据和标签
            sess.run(train_step, feed_dict={x:img, y:label_onehot,  keep_prob:0.6})                  # 训练，更新变量w和b
        #acc = sess.run(accuracy, feed_dict={x:mnist.test.images,y:mnist.test.labels}) # 观察准确率的变化
        #print("Iter " + str(epoch) + " Testing Accuracy: " + str(acc))