In [1]:
import numpy as np
import numpy.matlib
import os
import cv2
import glob
import matplotlib.pyplot as plt
import shutil
from sklearn import decomposition
import pandas as pd
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *

from keras.optimizers import Adadelta, SGD
from keras.utils.np_utils import to_categorical
from keras.callbacks import *
from collections import Counter 
from sklearn.neighbors import NearestNeighbors
np.random.seed(42)

%matplotlib inline
from pylab import rcParams
rcParams['figure.figsize'] = (12, 12)      # setting default size of plots

Using TensorFlow backend.


In [2]:
# 预处理：司机-类别-图片名字
driver_char = ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022',
                  'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p045',
                  'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061',
                  'p064', 'p066', 'p072', 'p075', 'p081']
driver_data = pd.read_csv('driver_imgs_list.csv')
driver = driver_data['subject']
driver_class = driver_data['classname']
driver_img = driver_data['img']
driver_dict = dict()
for i in range(len(driver)):
    if driver[i] not in driver_dict.keys():
        driver_dict[driver[i]] = dict()
    if driver_class[i] not in driver_dict[driver[i]].keys():
        driver_dict[driver[i]][driver_class[i]] = []
    driver_dict[driver[i]][driver_class[i]].append(driver_img[i])


In [3]:
# ResNet的图片减均值处理
def preprocess_input(x):
    # Already 'BGR' in openCV
    # Zero-center by mean pixel
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x

In [4]:
# 将模型训练得到的bottleneck features保存到本地，方便迁移学习使用，可以节约计算时间
def save_bottlebeck_features(X_train, y_train, X_valid, y_valid, idx = None):
    batch_size = 1
    nb_train_samples = X_train.shape[0]
    nb_valid_samples = X_valid.shape[0]
    train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)
    valid_datagen = ImageDataGenerator( preprocessing_function = preprocess_input)

    ResNet50_model = ResNet50(include_top=False, weights='imagenet', pooling='max')

    generator = train_datagen.flow(
        X_train,y_train,
        batch_size=batch_size,
        shuffle=False)
 
    bottleneck_features_train = ResNet50_model.predict_generator(
                                generator,  nb_train_samples)
    if idx is None:
        np.save(open('ResNet50_bottleneck_features_train.npy', 'w'),
                bottleneck_features_train)
        np.save(open('ResNet50_bottleneck_y_train.npy', 'w'), y_train)
    else:
        np.save(open('ResNet50_bottleneck_features_train_'+str(idx)+'.npy', 'w'),
                bottleneck_features_train)
        np.save(open('ResNet50_bottleneck_y_train_'+str(idx)+'.npy', 'w'), y_train)
    

    generator = valid_datagen.flow(
        X_valid, y_valid,
        batch_size=batch_size,
        shuffle=False)
  
    bottleneck_features_validation = ResNet50_model.predict_generator(
                                     generator, nb_valid_samples)
    if idx is None:
        np.save(open('ResNet50_bottleneck_features_validation.npy', 'w'),
                bottleneck_features_validation)
        np.save(open('ResNet50_bottleneck_y_valid.npy', 'w'), y_valid)
    else:
        np.save(open('ResNet50_bottleneck_features_validation_'+str(idx)+'.npy', 'w'),
                bottleneck_features_validation)
        np.save(open('ResNet50_bottleneck_y_valid_'+str(idx)+'.npy', 'w'), y_valid)

In [5]:
# 按照司机编号分割训练数据和验证数据。
def train_valid_split( resize, train_drivers, valid_drivers):
    
    X_train = []
    y_train = []
    X_valid = []
    y_valid = []
    
    print "Train drivers: "+str(train_drivers)
    print "Validation drivers: "+str(valid_drivers)

    for j in range(10):
        path = os.path.join('.', 'imgs', 'train', 'c'+str(j), '*.jpg')
        files = glob.glob(path)
        for f in files:
            bname = os.path.basename(f)
            img = cv2.imread(f)
            img = cv2.resize(img, resize)
            # train validation split
            valid_driver_imgs = [driver_dict[d]['c'+str(j)] for d in valid_drivers]
            valid_driver_imgs = np.concatenate(valid_driver_imgs, axis=0)
            train_driver_imgs = [driver_dict[d]['c'+str(j)] for d in train_drivers]
            train_driver_imgs = np.concatenate(train_driver_imgs, axis=0)
            if bname in valid_driver_imgs:
                X_valid.append(img)
                y_valid.append(j)
            elif bname in train_driver_imgs:
                X_train.append(img)
                y_train.append(j)
    X_train = np.array(X_train, dtype=np.uint8)
    y_train = np.array(y_train, dtype=np.uint8)
    y_train = to_categorical(y_train)
    X_valid = np.array(X_valid, dtype=np.uint8)
    y_valid = np.array(y_valid, dtype=np.uint8)  
    y_valid = to_categorical(y_valid)

    print "Train data shape:"+str(X_train.shape)
    print "Train label shape:"+str(y_train.shape)
    print  "Validation data shape:"+str(X_valid.shape)
    print  "Validation label shape:"+str(y_valid.shape)
    
    return X_train, y_train, X_valid, y_valid

In [7]:
# 随机挑选一个司机出来作为验证集，保存bottleneck features，读取已经训练好的bottleneck features到模型中，
# 读取后只训练全连接网络，重复8次，得到8个模型。
nb_rounds = 8
valid_driver_set = []
target_size = (224,224)
for i in range(nb_rounds):
    print "In "+str(i+1)+" round"
    np.random.shuffle(driver_char)
    length = len(driver_char)
    X_train, y_train, X_valid, y_valid = train_valid_split(resize=target_size,
                                                           train_drivers=driver_char[:length-1],
                                                           valid_drivers=driver_char[length-1:])
    valid_driver_set.append(driver_char[length-1:])
    path_1 = os.path.join('.', 'ResNet50_bottleneck_features_train_'+str(i)+'.npy')
    path_2 = os.path.join('.', 'ResNet50_bottleneck_y_train_'+str(i)+'.npy')
    path_3 = os.path.join('.', 'ResNet50_bottleneck_features_validation_'+str(i)+'.npy')
    path_4 = os.path.join('.', 'ResNet50_bottleneck_y_valid_'+str(i)+'.npy')
    if not (os.path.exists(path_1) and os.path.exists(path_2) and os.path.exists(path_3) and os.path.exists(path_4)):
        print "save bottleneck features..."
        save_bottlebeck_features(X_train, y_train, X_valid, y_valid, idx = i)
    print "Train fully connected layers..."
    train_data = np.load(open('ResNet50_bottleneck_features_train_'+str(i)+'.npy'))
    train_label = np.load(open('ResNet50_bottleneck_y_train_'+str(i)+'.npy'))
    validation_data = np.load(open('ResNet50_bottleneck_features_validation_'+str(i)+'.npy'))
    validation_label = np.load(open('ResNet50_bottleneck_y_valid_'+str(i)+'.npy'))
    print "Train data shape: "+str(train_data.shape)
    print "Train label shape:"+str(train_label.shape)
    print "Valid data shape: "+str(validation_data.shape)
    print "Valid label shape:"+str(validation_label.shape)
    
    inputT = Input(train_data.shape[1:])
    x = Dropout(0.8)(inputT) 
    x = Dense(10, activation='softmax', name='fc_10')(x)
    model = Model(inputT, x)

    model.compile(optimizer='adadelta',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_data, train_label,
              epochs=30, batch_size=64, 
              shuffle=True, callbacks=[EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=5)],
              validation_data=(validation_data, validation_label))
    model.save_weights('ResNet50_bottleneck_fc_model_'+str(i)+'.h5')

In 1 round
Train drivers: ['p026', 'p050', 'p002', 'p075', 'p041', 'p035', 'p045', 'p012', 'p072', 'p021', 'p014', 'p042', 'p049', 'p015', 'p016', 'p064', 'p051', 'p066', 'p052', 'p081', 'p061', 'p024', 'p039', 'p047', 'p056']
Validation drivers: ['p022']
Train data shape:(21191, 224, 224, 3)
Train label shape:(21191, 10)
Validation data shape:(1233, 224, 224, 3)
Validation label shape:(1233, 10)
Train fully connected layers...
Train data shape: (21191, 2048)
Train label shape:(21191, 10)
Valid data shape: (1233, 2048)
Valid label shape:(1233, 10)
Train on 21191 samples, validate on 1233 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
In 2 round
Train drivers: ['p021', 'p026', 'p0

In [8]:
# 随机选取的8个验证集司机，p022出现了两次
print valid_driver_set

[['p022'], ['p066'], ['p022'], ['p021'], ['p039'], ['p012'], ['p042'], ['p002']]


In [9]:
# 针对每个模型进行fine-tune
# 训练完成后保存模型参数和模型结构
# 注意： 第一个p022完成后要将名字改为p022_01，否则会被覆盖。
n = 8
batch_size = 16
for i in range(n):
    print "In "+str(i+1)+" round, "
    model = ResNet50(include_top=False, weights='imagenet', pooling='max')
    x = Dropout(.8)(model.output) 
    x = Dense(10, activation='softmax', name='fc_10')(x)
    ResNet50_model = Model(model.input, x)
    ResNet50_model.load_weights('ResNet50_bottleneck_fc_model_'+str(i)+'.h5', by_name=True)
    for layer in ResNet50_model.layers[:100]:
        layer.trainable = False
    for layer in ResNet50_model.layers[100:]:
        layer.trainable = True

    X_train =[]
    y_train =[]
    X_valid =[]
    y_valid =[]
    
    print "Remove "+str(valid_driver_set[i][0])+" from driver set..."
    driver_char.remove(valid_driver_set[i][0])
    X_train, y_train, X_valid, y_valid = train_valid_split(resize=(224,224),
                                                           train_drivers=driver_char, 
                                                           valid_drivers=valid_driver_set[i])
    driver_char.append(valid_driver_set[i][0])
    train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)
    validation_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)
    
    train_generator = train_datagen.flow(
        X_train, y_train,
        batch_size=batch_size,
        shuffle=True)
    validation_generator = validation_datagen.flow(
        X_valid, y_valid,
        batch_size=batch_size,
            shuffle=True)
    ResNet50_model.compile(optimizer=Adadelta(),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
    ResNet50_model.fit_generator(train_generator,
                        steps_per_epoch=2000,
                        validation_data=validation_generator,
                        validation_steps=150,
                        epochs=3,
                        verbose=1)
    # Save the transfer learning results and model
    ResNet50_model.save_weights('ResNet50_model_'+str(i)+'.h5')
    with open('ResNet50_model_'+str(i)+'.json', 'w') as f:
        f.write(ResNet50_model.to_json())

In 1 round, 
Remove p022 from driver set...
Train drivers: ['p015', 'p075', 'p016', 'p042', 'p024', 'p039', 'p066', 'p041', 'p072', 'p061', 'p012', 'p049', 'p064', 'p021', 'p056', 'p045', 'p052', 'p050', 'p081', 'p026', 'p014', 'p035', 'p051', 'p047', 'p002']
Validation drivers: ['p022']
Train data shape:(21191, 224, 224, 3)
Train label shape:(21191, 10)
Validation data shape:(1233, 224, 224, 3)
Validation label shape:(1233, 10)
Epoch 1/3
Epoch 2/3
Epoch 3/3
In 2 round, 
Remove p066 from driver set...
Train drivers: ['p015', 'p075', 'p016', 'p042', 'p024', 'p039', 'p041', 'p072', 'p061', 'p012', 'p049', 'p064', 'p021', 'p056', 'p045', 'p052', 'p050', 'p081', 'p026', 'p014', 'p035', 'p051', 'p047', 'p002', 'p022']
Validation drivers: ['p066']
Train data shape:(21390, 224, 224, 3)
Train label shape:(21390, 10)
Validation data shape:(1034, 224, 224, 3)
Validation label shape:(1034, 10)
Epoch 1/3
Epoch 2/3
Epoch 3/3
In 3 round, 
Remove p022 from driver set...
Train drivers: ['p015', 'p075'

In [4]:
# 读取一张照片，进行一次预测。
# Try to predict
def load_and_predict_dataset(model):
    count = 0
    pred = dict()
    path = os.path.join('.', 'imgs', 'test', '*.jpg')
    files = glob.glob(path) # lazy matched file names
    for f in files:
        basename = os.path.basename(f)
        prefix = basename.split('.')[0]
        main = prefix.split('_')[1]
        img = cv2.imread(f)
        img = cv2.resize(img, (224, 224))
        img = np.array(img, dtype=float)
        img = preprocess_input(img)
        img = np.reshape(img, (1,224,224,3))
        pred_prob = model.predict(img)
        pred_prob = pred_prob.clip(min=0.005, max=0.995)
        if main not in pred.keys():
            pred[main] = [] # init a list
        pred[main].append(pred_prob)
        count += 1
        if count % 1000 == 0:
            print "Load and predict "+str(count)+" imgs "
#             break
    print "Load and predict "+str(count)+" imgs "
    return pred

In [5]:
# 重新得到验证司机集
valid_driver_set = [['p022_01'], ['p066'], ['p022'], ['p021'], ['p039'], ['p012'], ['p042'], ['p002']]

In [8]:
# 读取模型和模型参数，对测试集进行预测
for i in range(8):
    print "Load model from ResNet50_model_"+str(i)+".json"
    print "Load weights from ResNet50_model_"+str(i)+".h5"
    ResNet50_model = model_from_json(open('ResNet50_model_'+str(index[i])+'.json').read())  
    ResNet50_model.load_weights('ResNet50_model_'+str(index[i])+'.h5') 
    pred = load_and_predict_dataset(ResNet50_model)
    count = 0
    head = "img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9\n"
    with open('predict_'+valid_driver_set[i][0]+'.csv', 'w') as f:
        f.write(head)
        for item in pred.keys():
            prob = pred[item][0][0]
            count += 1
            line = "img_"+item+'.jpg,'+str(prob[0])+',' \
                                      +str(prob[1])+',' \
                                    +str(prob[2])+',' \
                                    +str(prob[3])+',' \
                                    +str(prob[4])+',' \
                                    +str(prob[5])+',' \
                                    +str(prob[6])+',' \
                                    +str(prob[7])+',' \
                                    +str(prob[8])+',' \
                                    +str(prob[9])+'\n'
            f.write(line)
    #     if count == 5:
    #         break
    print "write "+str(count)+" lines!"
    
    

Load model from ResNet50_model_0.json
Load weights from ResNet50_model_0.h5
Load and predict 1000 imgs 
Load and predict 2000 imgs 
Load and predict 3000 imgs 
Load and predict 4000 imgs 
Load and predict 5000 imgs 
Load and predict 6000 imgs 
Load and predict 7000 imgs 
Load and predict 8000 imgs 
Load and predict 9000 imgs 
Load and predict 10000 imgs 
Load and predict 11000 imgs 
Load and predict 12000 imgs 
Load and predict 13000 imgs 
Load and predict 14000 imgs 
Load and predict 15000 imgs 
Load and predict 16000 imgs 
Load and predict 17000 imgs 
Load and predict 18000 imgs 
Load and predict 19000 imgs 
Load and predict 20000 imgs 
Load and predict 21000 imgs 
Load and predict 22000 imgs 
Load and predict 23000 imgs 
Load and predict 24000 imgs 
Load and predict 25000 imgs 
Load and predict 26000 imgs 
Load and predict 27000 imgs 
Load and predict 28000 imgs 
Load and predict 29000 imgs 
Load and predict 30000 imgs 
Load and predict 31000 imgs 
Load and predict 32000 imgs 
Load 

IndexError: list index out of range

In [4]:
# 选取较好的5个模型，将预测值求平均。
prediction_fname = ['p042', 'p002', 'p022_01', 'p039', 'p021']
goal = np.zeros((79726, 10))
for fname in prediction_fname:
    path = os.path.join('.', 'predict_'+fname+'.csv')
    p = pd.read_csv(path)
    goal += p.drop('img', axis=1)
goal = goal / 5
img_name = p['img']
count = 0
head = "img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9\n"
with open('predict_avg.csv', 'w') as f:
    f.write(head)
    for i, prob in enumerate(goal):
        count += 1
        line = img_name[i]+','+str(prob[0])+',' \
                                  +str(prob[1])+',' \
                                +str(prob[2])+',' \
                                +str(prob[3])+',' \
                                +str(prob[4])+',' \
                                +str(prob[5])+',' \
                                +str(prob[6])+',' \
                                +str(prob[7])+',' \
                                +str(prob[8])+',' \
                                +str(prob[9])+'\n'
        f.write(line)
#     if count == 5:
#         break
print "write "+str(count)+" lines!"

write 79726 lines!


In [None]:
# 针对每个模型，获取每张测试图片在卷积层的输出和图片名称，分别保存成npy格式
# Get indices of the 10 nearest neighbors of each test image.
# Store the convolutional outputs of each test image.
for k in range(8):
    print "Calculate conv output of model "+str(k)+'...'
    model = model_from_json(open('resnet50_models/ResNet50_model_'+str(k)+'.json').read())  
    model.load_weights('resnet50_models/ResNet50_model_'+str(k)+'.h5')
    model_conv = Model(model.input, model.layers[172].output)
    path = os.path.join('.', 'imgs', 'test', '*.jpg')
    img_names = glob.glob(path)
    conv_weights = []
    name_list = []
    count = 0
    for i, img_path in enumerate(img_names):
        count += 1
        img = cv2.imread(img_path)
        img = cv2.resize(img, (224, 224))
        conv_outputs = model_conv.predict(np.expand_dims(img, axis=0))
        # 为了防止计算时间过长，将(7,7,2048)取前两个维度的均值，(7,7,2048)->(2048)
        conv_space_mean = [np.mean(conv_outputs[0][:,:,d]) for d in range(2048)]
        conv_weights.append(conv_space_mean)
        name_list.append(os.path.basename(img_path))
        if count % 10000 == 0:
            print "Predict "+str(count)+" imgs "
    print "Predict "+str(count)+" imgs, exit! "
    np.save(open('model_'+str(k)+'_knn_weights.npy', 'w'),
                np.array(conv_weights))
    np.save(open('model_'+str(k)+'_name_list.npy', 'w'),
                np.array(name_list))

Calculate conv output of model 0...


In [2]:
# 使用sklearn中的KNN API，特征为卷积层的输出，计算测试集中每一张图片的10个近邻，保存到csv文件中
for k in range(8):
    print "Calculating neighbors of model "+str(k)+'...'
    conv_weights = np.load(open('model_'+str(k)+'_knn_weights.npy'))
    name_list = np.load(open('model_'+str(k)+'_name_list.npy'))
    neigh = NearestNeighbors(n_neighbors=11)
    neigh.fit(conv_weights)
    neighbors = neigh.kneighbors(conv_weights, return_distance=False)
    # Calculate 10 nearest neighbors... Store the basenames
    head = "img,1,2,3,4,5,6,7,8,9,10\n"
    with open('model_'+str(k)+'_knn.csv', 'w') as f:
        f.write(head)
        for i, n in enumerate(neighbors):
            knn_index = n[1:]
            line = name_list[i]+','+str(name_list[knn_index[0]])+',' \
                                      +str(name_list[knn_index[1]])+',' \
                                    +str(name_list[knn_index[2]])+',' \
                                    +str(name_list[knn_index[3]])+',' \
                                    +str(name_list[knn_index[4]])+',' \
                                    +str(name_list[knn_index[5]])+',' \
                                    +str(name_list[knn_index[6]])+',' \
                                    +str(name_list[knn_index[7]])+',' \
                                    +str(name_list[knn_index[8]])+',' \
                                    +str(name_list[knn_index[9]])+'\n'
            f.write(line)
            if i % 10000 == 0:
                print "write "+str(i)+"'s neighbors."
        print "Calculate "+str(k)+"th 10 neighbors DONE, EXIT!"

Calculating neighbors of model 0...
write 0's neighbors.
write 10000's neighbors.
write 20000's neighbors.
write 30000's neighbors.
write 40000's neighbors.
write 50000's neighbors.
write 60000's neighbors.
write 70000's neighbors.
Calculate 0th 10 neighbors DONE, EXIT!
Calculating neighbors of model 1...
write 0's neighbors.
write 10000's neighbors.
write 20000's neighbors.
write 30000's neighbors.
write 40000's neighbors.
write 50000's neighbors.
write 60000's neighbors.
write 70000's neighbors.
Calculate 1th 10 neighbors DONE, EXIT!
Calculating neighbors of model 2...
write 0's neighbors.
write 10000's neighbors.
write 20000's neighbors.
write 30000's neighbors.
write 40000's neighbors.
write 50000's neighbors.
write 60000's neighbors.
write 70000's neighbors.
Calculate 2th 10 neighbors DONE, EXIT!
Calculating neighbors of model 3...
write 0's neighbors.
write 10000's neighbors.
write 20000's neighbors.
write 30000's neighbors.
write 40000's neighbors.
write 50000's neighbors.
write

In [None]:
# 在已经得到结果中搜索指定图片的预测概率值
def searchDataFrame(src_data, tar_data, img_fname):
    '''
    src_data: knn table
    tar_data: prob table
    Search probs of the img from src_data
    return: (10,) prob of that img
    '''
    ret = 0
    neighbors = src_data.loc[src_data['img']==img_fname]
    neighbors = np.array(neighbors)
    for i, nei in enumerate(neighbors[0]):
        ret += 1./11 * np.array(tar_data.loc[tar_data['img']==nei])[0][1:]
    return ret

In [None]:
# 计算测试集中每张照片和其10个近邻的平均概率值
model_name = ['p022_01', 'p066', 'p022', 'p021', 'p039', 'p012', 'p042', 'p002']
for i in range(8):
    pred = pd.read_csv('predict_'+str(model_name[i])+'.csv')
    knn = pd.read_csv('resnet50_models/model_'+str(i)+'_knn.csv')
    print "Read knn from model "+model_name[i]+"..."
    count = 0
    name_list = np.array(pred['img'])
    head = "img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9\n"
    with open('predict_'+str(model_name[i])+'_KNN.csv', 'w') as f:
        f.write(head)
        print 'Calculate knn prediction...'
        for name in name_list:
            prob = searchDataFrame(knn, pred, name)
            count += 1
            line = name+','+str(prob[0])+',' \
                                      +str(prob[1])+',' \
                                    +str(prob[2])+',' \
                                    +str(prob[3])+',' \
                                    +str(prob[4])+',' \
                                    +str(prob[5])+',' \
                                    +str(prob[6])+',' \
                                    +str(prob[7])+',' \
                                    +str(prob[8])+',' \
                                    +str(prob[9])+'\n'
            f.write(line)
            if count % 10000 == 0:
                print "Predict "+str(count)+" imgs "
    print "write "+str(count)+" lines! DONE!"

In [None]:
# 选出6个表现较好的模型，对他们的预测概率再做一次平均。
pick_model = ['p039', 'p042', 'p021', 'p066', 'p022_01','p002']
head = "img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9\n"
data = np.zeros((79726, 10))
with open('predict_6_model_avg.csv', 'w') as f:
    f.write(head)
    for i in range(6):
        pick = pd.read_csv('predict_'+str(pick_model[i])+'_KNN.csv')
        name_list = np.array(pick['img'])
        data += np.array(pick.drop('img', axis=1))
        print "Read predict from model "+pick_model[i]+"..."
    data = data / 6.
    print 'Calculate knn prediction...'
    for i, name in enumerate(name_list):
        line = name+','+str(data[i][0])+',' \
                                  +str(data[i][1])+',' \
                                +str(data[i][2])+',' \
                                +str(data[i][3])+',' \
                                +str(data[i][4])+',' \
                                +str(data[i][5])+',' \
                                +str(data[i,6])+',' \
                                +str(data[i][7])+',' \
                                +str(data[i][8])+',' \
                                +str(data[i][9])+'\n'
        f.write(line)
    print "DONE!"