In [3]:
"""导入狗数据集"""
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob   # 查找符合特定规则的文件路径名

# 定义函数来加载train，test和validation数据集
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 133)
    return dog_files, dog_targets

# 加载train，test和validation数据集
train_files, train_targets = load_dataset(r'E:\USTC\AI_UAV\homework\week8\dogImages\train')
valid_files, valid_targets = load_dataset(r'E:\USTC\AI_UAV\homework\week8\dogImages\valid')
test_files, test_targets = load_dataset(r'E:\USTC\AI_UAV\homework\week8\dogImages\test')

# 加载狗品种列表
dog_names = [item[72:-1] for item in sorted(glob('E:/USTC/AI_UAV/homework/week8/dogImages/train/*/'))]
                                                 
# 打印数据统计描述
print('There are %d total dog categories.' % len(dog_names))#dog_names是字符串名字组成的list
print('There are %s total dog images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training dog images.' % len(train_files))
print('There are %d validation dog images.' % len(valid_files))
print('There are %d test dog images.'% len(test_files))

There are 133 total dog categories.
There are 8351 total dog images.

There are 6680 training dog images.
There are 835 validation dog images.
There are 836 test dog images.


In [12]:
"""导入人脸数据集"""
import random
random.seed(8675309)

# 加载打乱后的人脸数据集的文件名
human_files = np.array(glob("E:/USTC/AI_UAV/homework/week8/lfw/*/*"))
random.shuffle(human_files)

# 打印数据集的数据量
print('There are %d total human images.' % len(human_files))

There are 13233 total human images.


In [13]:
"""将图像转换为CNN的输入张量(nb_samples, rows, columns, channels)"""
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):    #传进一个图片地址，生成一个四维数组
    img = image.load_img(img_path,target_size=(224, 224))    # 用PIL加载RGB图像为PIL.Image.Image类型
    x = image.img_to_array(img)         # 将PIL.Image.Image类型转化为格式为(224, 224, 3)的3维张量
    return np.expand_dims(x, axis=0)    # 将3维张量转化为格式为(1, 224, 224, 3)的4维张量并返回

def paths_to_tensor(img_paths):  #传入多个图片地址，生成4维数组
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

print(np.shape(path_to_tensor(human_files[0])))
print(np.shape(paths_to_tensor(human_files[:10])))

(1, 224, 224, 3)


100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 35.19it/s]


(10, 224, 224, 3)


In [4]:
"""定义模型"""
from keras.applications.resnet50 import ResNet50

ResNet50_model = ResNet50(weights='imagenet')

In [25]:
"""模型测试"""
from keras.applications.resnet50 import preprocess_input, decode_predictions
def ResNet50_predict_labels(img_path):
    # 返回img_path路径的图像的预测向量
    img = preprocess_input(path_to_tensor(img_path))#经过pre处理的图片，img也是(1, 224, 224, 3)，但是经过了归一化和rgb重排列
    out=ResNet50_model.predict(img)#out为1*1000的概率分布组成的数组
    return  np.argmax(out)  #取概率最大的下标

In [26]:
"""狗检测"""
def dog_detector(img_path):
    prediction = ResNet50_predict_labels(img_path)
    return ((prediction <= 268) & (prediction >= 151)) 

In [27]:
human_files_short = human_files[:100]
dog_files_short = train_files[:100]
num_human2=0
num_dog2=0
error_human_pic2=[]
error_dog_pic2=[]
num_human_per2=np.mean([dog_detector(f) for f in human_files_short])
num_dog_per2=np.mean([dog_detector(f) for f in dog_files_short])
print(num_human_per2,num_dog_per2)

0.01 1.0


In [None]:
# 迁移学习

In [17]:
"""加载特征向量"""
import numpy as np

bottleneck_features = np.load(r'E:\code\python\UAV_homework\week8\DogResnet50Data.npz')
train_Resnet = bottleneck_features['train']
valid_Resnet = bottleneck_features['valid']
test_Resnet = bottleneck_features['test']

print(np.shape(train_Resnet))
print(np.shape(valid_Resnet))
print(np.shape(test_Resnet))

(6680, 1, 1, 2048)
(835, 1, 1, 2048)
(836, 1, 1, 2048)


In [18]:
"""构建模型"""
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

Resnet_model = Sequential()
Resnet_model.add(GlobalAveragePooling2D(input_shape=train_Resnet.shape[1:]))
Resnet_model.add(Dense(133, activation='softmax'))
Resnet_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 133)               272517    
Total params: 272,517
Trainable params: 272,517
Non-trainable params: 0
_________________________________________________________________


In [19]:
## 编译模型

Resnet_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [23]:
## 训练模型
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath=r'E:\code\python\UAV_homework\week8\weights.best.ResNet.hdf5', 
                               verbose=1, save_best_only=True)

Resnet_model.fit(train_Resnet, train_targets, 
          validation_data=(valid_Resnet, valid_targets),
          epochs=20, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 6680 samples, validate on 835 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


Epoch 5/20
Epoch 6/20
Epoch 7/20


Epoch 8/20
Epoch 9/20
Epoch 10/20


Epoch 11/20
Epoch 12/20
Epoch 13/20


Epoch 14/20
Epoch 15/20
Epoch 16/20


Epoch 17/20
Epoch 18/20
Epoch 19/20


Epoch 20/20


<keras.callbacks.History at 0x158c6048>

In [24]:
"""模型测试"""
Resnet_model.load_weights(r'E:\code\python\UAV_homework\week8\weights.best.ResNet.hdf5')    # 加载具有最好验证loss的模型

# 获取测试数据集中每一个图像所预测的狗品种的index
Resnet_predictions = [np.argmax(Resnet_model.predict(np.expand_dims(feature, axis=0))) for feature in test_Resnet]

# 报告测试准确率
test_accuracy = 100*np.sum(np.array(Resnet_predictions)==np.argmax(test_targets, axis=1))/len(Resnet_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 81.6986%
