In [1]:
# define the model
from keras.models import Model, Sequential
from keras.layers import Input, Dense, concatenate
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout
from keras import optimizers
from keras.utils import to_categorical

img_feature_a = Input(shape=(2048,))
img_feature_b = Input(shape=(2048,))

shared_fc_layer = Sequential([
    Dense(1024, activation='sigmoid', input_shape=(2048, )),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1024, activation='sigmoid'),
    BatchNormalization(),
    Dropout(0.5),
])

encoded_a = shared_fc_layer(img_feature_a)
encoded_b = shared_fc_layer(img_feature_b)

merged_vector = concatenate([encoded_a, encoded_b])

x = Dense(1024, activation='sigmoid')(merged_vector)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
output = Dense(2, activation='softmax')(x)

model = Model(inputs=[img_feature_a, img_feature_b], outputs=output)

optimizer = optimizers.RMSprop(lr=1e-3, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 1060 6GB (CNMeM is disabled, cuDNN 5110)


In [2]:
import cPickle as pickle
import numpy as np
import os
import random

def load_dataset(pklfile):
    pair_samples = pickle.load(open(pklfile, 'rb'))
    X_1 = np.array(map(lambda x: x[2], pair_samples))
    X_2 = np.array(map(lambda x: x[3], pair_samples))
    Y = np.array(map(lambda x: x[4], pair_samples))
    Y[Y==-1] = 0
    return X_1, X_2, Y

X_1_valid, X_2_valid, Y_valid = load_dataset("./data/201704.pkl")
Y_valid = to_categorical(Y_valid, num_classes=2)

batch_size = 128
pklfile_list = os.listdir("./data/")

for _pass in range(10):
    random.shuffle(pklfile_list)
    for pklfile in pklfile_list:
        #if pklfile < "2017":
        #    continue
        if pklfile == "201704.pkl":
            continue
        print "loading pklfile: %s of pass=%d"%(pklfile, _pass)
        
        X_1, X_2, Y = load_dataset("./data/"+pklfile)
        Y = to_categorical(Y, num_classes=2)

        model.fit([X_1, X_2], Y, batch_size=batch_size, epochs=1, 
                  validation_data=([X_1_valid, X_2_valid], Y_valid), shuffle=True)

loading pklfile: 201702.pkl of pass=0
Train on 62962 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201701.pkl of pass=0
Train on 48047 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201703.pkl of pass=0
Train on 69487 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201701.pkl of pass=1
Train on 48047 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201703.pkl of pass=1
Train on 69487 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201702.pkl of pass=1
Train on 62962 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201701.pkl of pass=2
Train on 48047 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201702.pkl of pass=2
Train on 62962 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201703.pkl of pass=2
Train on 69487 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201701.pkl of pass=3
Train on 48047 samples, validate on 18399 samples
Epoch 1/1
loading pklfile: 201

KeyboardInterrupt: 

In [3]:
import cPickle as pickle
import numpy as np
import os
import random

def load_dataset(pklfile):
    pair_samples = pickle.load(open(pklfile, 'rb'))
    X_1 = np.array(map(lambda x: x[2], pair_samples))
    X_2 = np.array(map(lambda x: x[3], pair_samples))
    Y = np.array(map(lambda x: x[4], pair_samples))
    Y[Y==-1] = 0
    return X_1, X_2, Y

X_1, X_2, Y = load_dataset("./data/sybj.pkl")
Y = to_categorical(Y, num_classes=2)
X_1_valid, X_2_valid, Y_valid = load_dataset("./data/sybj.valid.pkl")
Y_valid = to_categorical(Y_valid, num_classes=2)

batch_size = 128

model.fit([X_1, X_2], Y, batch_size=batch_size, epochs=10, 
            validation_data=([X_1_valid, X_2_valid], Y_valid), shuffle=True)

Train on 930400 samples, validate on 20418 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
102528/930400 [==>...........................] - ETA: 88s - loss: 0.1483 - acc: 0.9396

KeyboardInterrupt: 

In [None]:
print X_1_valid.shape
print X_2_valid.shape
print Y_valid.shape

In [4]:
X_1_valid, X_2_valid, Y_valid = load_dataset("./data/201704.pkl")

In [5]:
Y_predict = model.predict([X_1_valid, X_2_valid])

In [8]:
%matplotlib inline
from PIL import Image
import matplotlib.pyplot as plt

#plt.plot(Y_valid[:,1], Y_predict[:,1], 'bo')
np.corrcoef([Y_valid[:], Y_predict[:,1]])

array([[ 1.        ,  0.17752544],
       [ 0.17752544,  1.        ]])