In [5]:
# model define
from keras.models import Model, Sequential
from keras.layers import Dense, GlobalMaxPool2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout
from keras import optimizers
import numpy as np

model = Sequential([
    GlobalAveragePooling2D(input_shape=(2048, 8, 8)),
    Dense(1024, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1024, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='tanh')
    ])

optimizer = optimizers.RMSprop(lr=1e-3, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(loss='mean_squared_error',
              optimizer=optimizer,
              metrics=['mae'])

In [2]:
# prepare img scores
img2score = {}
with open("./data/img_score.csv", 'r') as fin:
    for line in fin:
        imgid, score = line.strip().split("\t")
        img2score[int(imgid)] = float(score)
        
        if img2score[int(imgid)] > 5:
            img2score[int(imgid)] = 5
        if img2score[int(imgid)] < -5:
            img2score[int(imgid)] = -5
        img2score[int(imgid)] = img2score[int(imgid)] / 5.0

In [6]:
# load train and valid data
import os
import random

feature_path = "../data/pretrained_features/"
npzfile_list = os.listdir(feature_path)

def decompressNPZ(npzfile):
    ids = []
    X = []
    Y = []
    npz = np.load(feature_path+npzfile)
    img_ids = npz['img_ids']
    img_features = npz['img_features']
    for _ in range(len(img_ids)):
        imgid= img_ids[_]
        feature = img_features[_]
        if imgid in img2score:
            ids.append(imgid)
            X.append(feature)
            Y.append(img2score[imgid])

    return ids, np.array(X), np.array(Y)
        

imgid_valid, X_valid, Y_valid = decompressNPZ("imgs_epoch=25.npz")

for _pass in range(10):
    random.shuffle(npzfile_list)
    for npzfile in npzfile_list:
        if npzfile == "imgs_epoch=25.npz":
                continue
        print "loading npzfile: %s of pass=%d"%(npzfile, _pass)

        _, X_train, Y_train = decompressNPZ(npzfile)

        model.fit(X_train, Y_train, batch_size=64, epochs=1, validation_data=(X_valid, Y_valid), shuffle=True)

loading npzfile: imgs_epoch=5.npz of pass=0
Train on 4894 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=11.npz of pass=0
Train on 4906 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=4.npz of pass=0
Train on 4905 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=10.npz of pass=0
Train on 4910 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=6.npz of pass=0
Train on 4907 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=19.npz of pass=0
Train on 4920 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=20.npz of pass=0
Train on 4888 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=7.npz of pass=0
Train on 4885 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=12.npz of pass=0
Train on 4903 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=16.npz of pass=0
Train on 4894 samples, validate on 392 sa

Train on 4907 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=24.npz of pass=1
Train on 4899 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=0.npz of pass=1
Train on 4877 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=13.npz of pass=1
Train on 4874 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=22.npz of pass=1
Train on 4890 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=20.npz of pass=1
Train on 4888 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=15.npz of pass=1
Train on 4888 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=9.npz of pass=1
Train on 4889 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=14.npz of pass=1
Train on 4906 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=5.npz of pass=1
Train on 4894 samples, validate on 392 samples
Epoch 1/1
loading npzfile: imgs_epoch=

KeyboardInterrupt: 

In [7]:
Y_predict = model.predict(X_valid)
#for _ in range(len(Y_valid)):
#    print Y_valid[_], Y_predict[_, 0]

data = np.array([Y_valid, Y_predict.reshape((len(Y_predict), ))])
print np.corrcoef(data)

plt.plot(Y_valid, Y_predict[:,0], 'bo')

[[ 1.          0.13345201]
 [ 0.13345201  1.        ]]


NameError: name 'plt' is not defined

In [None]:
%matplotlib inline
from PIL import Image
import matplotlib.pyplot as plt

def plot(imgid, title=""):
    if type(imgid) != str:
        imgid = str(int(imgid))
    img_file = "../data/img/%s.jpg"%imgid
    img = Image.open(img_file)
    plt.figure()
    plt.axis('off')
    plt.title(title)
    plt.imshow(img)
    plt.show()
    
img_ids, img_features, img_scores = decompressNPZ("imgs_epoch=25.npz")
img_predicts = model.predict(img_features)

In [None]:
sort_idx = img_predicts.reshape((len(img_predicts),)).argsort(axis=0)
sort_idx = img_scores.argsort(axis=0)

for idx in sort_idx[-10:]:
    plot(img_ids[idx], "%s: %s vs %s"%(img_ids[idx], img_predicts[idx, 0], img_scores[idx]))

In [None]:
plt.hist(img_scores, 100)
plt.show()

np.corrcoef(img_scores, img_predicts[:,0])

In [None]:
sort_idx

In [None]:
model.save("./layer3_bn_do0.5_tanh.h5")