## 从头开始构建vgg
本文目的是根据Keras来构建下vgg16

In [1]:
import os, sys
current_directory = os.getcwd()
LESSON_HOME_DIR = current_directory
DATA_HOME_DIR = os.path.join(current_directory,"../data/redux")
print(DATA_HOME_DIR)
print(LESSON_HOME_DIR)

/Users/zhuanxu/PycharmProjects/udacity/fast-ai/lesson1/../data/redux
/Users/zhuanxu/PycharmProjects/udacity/fast-ai/lesson1


In [None]:
%cd $DATA_HOME_DIR
!tree -d

In [None]:
import os, json
from glob import glob
import numpy as np
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

from keras import backend as K
from keras.layers.normalization import BatchNormalization
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda,Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import SGD, RMSprop, Adam
from keras.preprocessing import image

In [None]:
vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1))
def vgg_preprocess(x):
    """
        Subtracts the mean RGB value, and transposes RGB to BGR.
        The mean RGB was computed on the image set used to train the VGG model.

        Args: 
            x: Image array (height x width x channels)
        Returns:
            Image array (height x width x transposed_channels)
    """
    x = x - vgg_mean
    return x[:, ::-1] # reverse axis rgb->bgr

In [None]:
print(vgg_mean.shape,vgg_mean)

下面开始是Vgg16的模型
![](vgg16_original.png)

通过5个卷积层提取出特征，然后再通过一个全连接做分类，下面我们开始构建的

In [None]:
import keras
from keras import backend as K
print(keras.__version__)

In [None]:
print(K.image_data_format())
print(K.image_dim_ordering()) # image_dim_ordering 和 image_data_format 是一致的

In [None]:
# K.set_image_data_format("channels_last")
# K.set_image_dim_ordering("tf")

In [None]:
def ConvBlock(model, layers, filters):
    """
        Adds a specified number of ZeroPadding and Covolution layers
        to the model, and a MaxPooling layer at the very end.

        Args:
            layers (int):   The number of zero padded convolution layers
                            to be added to the model.
            filters (int):  The number of convolution filters to be 
                            created for each layer.
    """
    for i in range(layers):
        # the dimensionality of the output space
        model.add(Conv2D(filters,kernel_size=(3,3))) # padding='valid'
        model.add(Activation('relu'))          
        model.add(MaxPooling2D(pool_size=(2, 2)))
    return model

In [None]:
model = Sequential()
# 第一层是一个对输入预处理的层，将图片0-255减去一个平均数，然后将 rgb->bgr
model.add(Lambda(vgg_preprocess, input_shape=(3,224,224), output_shape=(3,224,224)))
ConvBlock(model,2, 64)

In [None]:
model.layers

In [None]:
for layer in model.layers:
    print(layer.input)

## keras自带的VGG16模型

In [2]:
from keras import applications
from keras import backend as K
# 我们看下自带的模型是如何的

Using TensorFlow backend.


In [3]:
# K.set_image_data_format("channels_last")

In [3]:
print(K.image_data_format())
print(K.image_dim_ordering())

channels_last
tf


In [4]:
image_witdth = 150
image_height = 150
input_shape = (image_witdth, image_height, 3)

In [5]:
vgg16 = applications.VGG16(include_top=False,weights='imagenet',input_shape=input_shape)

In [15]:
%cd $DATA_HOME_DIR

#Set path to sample/ path if desired
path = DATA_HOME_DIR + '/' #'/sample/'
test_path = DATA_HOME_DIR + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'
utils_path = current_directory + "/../utils"

/Users/zhuanxu/PycharmProjects/udacity/fast-ai/data/redux


In [21]:
from keras.preprocessing.image import ImageDataGenerator

In [22]:
gen = ImageDataGenerator(featurewise_center=True) #featurewise_center: set input mean to 0 over the dataset.

In [23]:
import numpy as np
gen.mean = np.array([103.939, 116.779, 123.68],dtype=np.float32).reshape(1,1,3)

In [39]:
for layer in vgg16.layers:
    print(layer)
    print(layer.input_shape) # channels_last
    print(layer.output_shape) # channels_last
# 此处第一第二层干了什么事情？不是很明白呢，看代码中其实就是一个简单的input，没什么特殊处理

<keras.engine.topology.InputLayer object at 0x117ea6438>
(None, 150, 150, 3)
(None, 150, 150, 3)
<keras.layers.convolutional.Conv2D object at 0x106dbcfd0>
(None, 150, 150, 3)
(None, 150, 150, 64)
<keras.layers.convolutional.Conv2D object at 0x117ee5d30>
(None, 150, 150, 64)
(None, 150, 150, 64)
<keras.layers.pooling.MaxPooling2D object at 0x117ea6470>
(None, 150, 150, 64)
(None, 75, 75, 64)
<keras.layers.convolutional.Conv2D object at 0x117ea67b8>
(None, 75, 75, 64)
(None, 75, 75, 128)
<keras.layers.convolutional.Conv2D object at 0x117ef5358>
(None, 75, 75, 128)
(None, 75, 75, 128)
<keras.layers.pooling.MaxPooling2D object at 0x117f2c550>
(None, 75, 75, 128)
(None, 37, 37, 128)
<keras.layers.convolutional.Conv2D object at 0x117f61e80>
(None, 37, 37, 128)
(None, 37, 37, 256)
<keras.layers.convolutional.Conv2D object at 0x117f6e278>
(None, 37, 37, 256)
(None, 37, 37, 256)
<keras.layers.convolutional.Conv2D object at 0x117f9af60>
(None, 37, 37, 256)
(None, 37, 37, 256)
<keras.layers.pooli

In [25]:
# layer.input_shape
# layer.output_shape
batch_size = 8

In [29]:
generator = gen.flow_from_directory(
        test_path,
        target_size=(image_witdth, image_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

Found 50 images belonging to 1 classes.


In [30]:
print(generator.class_indices)
print(generator.samples)
print(generator.batch_size)

{'unknown': 0}
50
8


In [31]:
# 序
bottleneck_features_train = vgg16.predict_generator(
        generator, steps = generator.samples // generator.batch_size)
# 其实这一步就是简单的提取特征，现在我不明白的是Vgg16中前两层是干啥用的？

In [32]:
len(bottleneck_features_train) # 48 // 8 = 6

48

In [35]:
print(type(bottleneck_features_train[0]))
print(bottleneck_features_train[0].shape)
# 所以说其实就是提取了特征
# 然后我们希望输入的

<class 'numpy.ndarray'>
(4, 4, 512)


In [41]:
i = 0
for batch in generator:
    i+=1
    if i == 1:
        break

In [48]:
print(batch[0].shape)
print(batch[1].shape)

(8, 150, 150, 3)
(8, 1)


In [94]:
batch[0][0][...,0].shape

(150, 150)

## numpy array index 说明
一直对这块不是很理解，导致经常出错于是有了本文，参照：https://docs.scipy.org/doc/numpy-1.12.0/reference/arrays.indexing.html

最基本的形式是：i:j:k ，i是开始，j是结束，k是step [i,i+k,i+2k,...j) 此处j是不包含的

In [51]:
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]:
# 前开后闭
x[3:7:1]

array([3, 4, 5, 6])

In [53]:
# 当 i 和 j 中有一个是负数的时候，用 n+i n+j 代替， n是元素个数
x[-2:10:1]

array([8, 9])

In [56]:
# 当 k 小于 0 的时候， [i:j:k] => [i,i+k,i+2k] 倒着数
x[-2:3:-2]

array([8, 6, 4])

In [57]:
# :: 和 : 是一个意思，表示选择所有的数据
x[4:]

array([4, 5, 6, 7, 8, 9])

In [58]:
x = np.array([[[1],[2],[3]], [[4],[5],[6]]])

In [59]:
x.shape

(2, 3, 1)

In [64]:
x[1:2]

array([[[4],
        [5],
        [6]]])

In [69]:
# 此处 ... 能
print(x[...,0])
print(x[...,0].shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


## numpy.broadcast
broadcast 的意思是numpy对于不同shape的array怎么处理

https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html

In [70]:
# 最简单的同shape的运算
a = np.array([1.0, 2.0, 3.0])
b = np.array([2.0, 2.0, 2.0])
a * b

array([ 2.,  4.,  6.])

当两个array的shape 不同的时候，是从最后一个 dimension 开始比较的
例子：
```
Image  (3d array): 256 x 256 x 3
Scale  (1d array):             3
Result (3d array): 256 x 256 x 3
```

当比较过程中其中一个是维度是1的时候，将会将其 expand 到另一个大小，如：
```
A      (4d array):  8 x 1 x 6 x 1
B      (3d array):      7 x 1 x 5
Result (4d array):  8 x 7 x 6 x 5
```
更多的例子：
```
A      (2d array):  5 x 4
B      (1d array):      1
Result (2d array):  5 x 4

A      (2d array):  5 x 4
B      (1d array):      4
Result (2d array):  5 x 4

A      (3d array):  15 x 3 x 5
B      (3d array):  15 x 1 x 5
Result (3d array):  15 x 3 x 5

A      (3d array):  15 x 3 x 5
B      (2d array):       3 x 5
Result (3d array):  15 x 3 x 5

A      (3d array):  15 x 3 x 5
B      (2d array):       3 x 1
Result (3d array):  15 x 3 x 5
```

下面是一些不能 extend 的例子
```
A      (1d array):  3
B      (1d array):  4 # trailing dimensions do not match

A      (2d array):      2 x 1
B      (3d array):  8 x 4 x 3 # second from last dimensions mismatched

```

In [72]:
## 一些case
x = np.arange(4)
xx = x.reshape(4,1)
y = np.ones(5)
z = np.ones((3,4))

In [76]:
print(x)
print(xx)
print(y)
print(z)

[0 1 2 3]
[[0]
 [1]
 [2]
 [3]]
[ 1.  1.  1.  1.  1.]
[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]


In [77]:
print(x.shape,y.shape,xx.shape,z.shape)

(4,) (5,) (4, 1) (3, 4)


In [79]:
# shape does't match (4,) (5,)
# x + y

In [81]:
## (4,1) + (5,)
## 出 xx 从 (4,1) 扩展到了  (4,5)
print(xx + y)
(xx + y).shape


[[ 1.  1.  1.  1.  1.]
 [ 2.  2.  2.  2.  2.]
 [ 3.  3.  3.  3.  3.]
 [ 4.  4.  4.  4.  4.]]


(4, 5)

In [82]:
# (4,) + (3,4) => (3,4)
x+z

array([[ 1.,  2.,  3.,  4.],
       [ 1.,  2.,  3.,  4.],
       [ 1.,  2.,  3.,  4.]])

![](http://scipy.github.io/old-wiki/pages/image0020619.gif?action=AttachFile&do=get&target=image002.gif![image.png](attachment:image.png)
![](http://scipy.github.io/old-wiki/pages/image004de9e.gif?action=AttachFile&do=get&target=image004.gif)

In [83]:
## 数组乘法的例子
a = np.array([0.0, 10.0, 20.0, 30.0])
b = np.array([1.0, 2.0, 3.0])

In [85]:
print(a.shape)
print(b.shape)

(4,)
(3,)


In [92]:
a[:,np.newaxis] # shape (4,1)

array([[  0.],
       [ 10.],
       [ 20.],
       [ 30.]])

In [93]:
# (4,1) * (3,) 扩展方法如上图
a[:,np.newaxis] * b

array([[  0.,   0.,   0.],
       [ 10.,  20.,  30.],
       [ 20.,  40.,  60.],
       [ 30.,  60.,  90.]])