In [10]:
import os, json, glob, csv
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error as mse
import sys
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import cross_val_score, ShuffleSplit, KFold
from util.d3mds import *

here = os.path.dirname(os.path.abspath('__file__'))
dspath = os.path.join(here, 'data', '22_handgeometry', 
                      '22_handgeometry_dataset')
rawpath = os.path.join(dspath, 'media')
prpath = os.path.join(here, 'data', '22_handgeometry', 
                      '22_handgeometry_problem')
solpath = os.path.join(here, 'log_d3m')
assert os.path.exists(dspath)
assert os.path.exists(prpath)
assert os.path.exists(rawpath)

In [16]:
d3mds = D3MDS(dspath, prpath)

# pd.read_csv(os.path.join(dspath, 'trainData.csv'), index_col=0)
trainData = d3mds.get_train_data()
trainTargets = d3mds.get_train_targets()
testData = d3mds.get_test_data()
testTargets = d3mds.get_test_targets()

## Transfer Learning via ResNet50

In [3]:
import time
from keras.applications import resnet50
#from util.resnet50 import ResNet50
from keras.preprocessing import image
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Activation, Flatten

from keras.preprocessing import image
preprocess_input = resnet50.preprocess_input
#from util.imagenet_utils import preprocess_input
from keras.layers import Input
from keras.models import Model
from keras.utils import np_utils # one-hot-encoding
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split

Using TensorFlow backend.


In [6]:
image_input = Input(shape=(224, 224, 3))
model = resnet50.ResNet50(input_tensor=image_input, 
                 include_top=True,
                 weights='imagenet')
#model.summary()
last_layer = model.get_layer('avg_pool').output
x = Flatten(name='flatten')(last_layer) # 2048 features
out = x
#out = Dense(1, init='normal')(x)
custom_resnet_model = Model(inputs=image_input, outputs=out)

for layer in custom_resnet_model.layers[:-1]:
    layer.trainable = False
custom_resnet_model.layers[-1].trainable = False
#custom_resnet_model.layers[-1].trainable

custom_resnet_model.compile(loss='categorical_crossentropy', 
                            optimizer='adam', metrics=['accuracy'])
custom_resnet_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        input_3[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_99 (Activation)      (None, 112, 112, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [21]:
#img_data_list = []
X_train = np.zeros([len(trainData), 2048])
X_train2 = np.zeros([len(trainData), 1000])
for i, row in enumerate(trainData.iterrows()):
    impath = os.path.join(rawpath, row[1].image_file)
    img = image.load_img(impath, target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    #print('Input image shape:', x.shape)
    #img_data_list.append(x)
    # featurize using resnet
    X_train[i,:] = custom_resnet_model.predict(x)
    X_train2[i,:] = model.predict(x)
#img_data = np.array(img_data_list)
#img_data = np.rollaxis(img_data, 1, 0)
#img_data = img_data[0]

X_test = np.zeros([len(testData), 2048])
X_test2 = np.zeros([len(testData), 1000])
for i, row in enumerate(testData.iterrows()):
    impath = os.path.join(rawpath, row[1].image_file)
    img = image.load_img(impath, target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    #print('Input image shape:', x.shape)
    #img_data_list.append(x)
    # featurize using resnet
    X_test[i,:] = custom_resnet_model.predict(x)
    X_test2[i,:] = model.predict(x)

In [23]:
from sklearn.metrics import mean_squared_error
alphas = [0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10, 15, 30, 50, 75]
model_ridge = RidgeCV(alphas=alphas).fit(X_train, trainTargets)
ridge_preds = model_ridge.predict(X_test)
print('ridge regressor RMSE: %s'%np.sqrt(mean_squared_error(testTargets, ridge_preds)))

model_ridge2 = RidgeCV(alphas=alphas).fit(X_train2, trainTargets)
ridge_preds2 = model_ridge2.predict(X_test2)
print('ridge regressor RMSE: %s'%np.sqrt(mean_squared_error(testTargets, ridge_preds2)))

ridge regressor RMSE: 0.425583275832
ridge regressor RMSE: 0.618998582695


In [24]:
y_pred = ridge_preds

targetCols = []
targets = d3mds.problem.get_targets()
for target in targets: # could have multiple targets
    targetCols.append(target['colName'])
    
y_pred_df = pd.DataFrame(index=testData.index, data=y_pred, columns=targetCols)
print(y_pred_df)

y_pred_df.to_csv(os.path.join(solpath, 'predictions.csv'))

          WRISTBREADTH
d3mIndex              
74            7.662159
75            7.417250
76            7.613348
77            7.331093
78            7.744633
79            6.673907
80            7.599621
81            7.760135
82            6.546133
83            6.959371
84            6.658557
85            7.004660
86            7.016580
87            7.233458
88            7.637869
89            6.475722
90            7.193224
91            7.638365
92            7.493393
93            7.922658
94            7.389863
95            7.388899
96            7.209383
97            6.765046
98            6.833617
99            6.498900
100           7.744020
101           6.853239
102           7.417250
103           7.208607
104           7.374989
105           7.422140
106           6.417453
107           6.694529
108           7.389415
109           7.257825
110           7.337703
111           7.591013
