In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [14]:
dataPath = '/src/Data/ANIML/'
X_train0 = np.load(dataPath+'X_train.npy')
y_train0 = np.load(dataPath+ 'y_train.npy')
X_test0 = np.load(dataPath+ 'X_test.npy')
y_test0 = np.load(dataPath+'y_test.npy')
X_train_edges = np.load(dataPath+ 'X_train_edges.npy')
X_test_edges = np.load(dataPath+ 'X_test_edges.npy')

In [3]:
class LeNetMnistClassifier():
    @staticmethod
    def toDigit(hot_encode):
        return np.argmax(hot_encode)
        
    def __init__(self, **kwargs):
        if kwargs.get("model_path", None):
            from keras.models import load_model
            self.model = load_model(kwargs['model_path'])
        else:
            self.activation_function = kwargs.pop('activation_function', 'relu')
            self.batch_size = kwargs.pop('batch_size', 30)
            self.epochs = kwargs.pop('epochs', 5)
            self.kernal_size = kwargs.pop('kernal_size', (3, 3))
            self.loss_function = kwargs.pop('loss_function', 'categorical_crossentropy')
            self.optimizer = kwargs.pop('optimizer', 'sgd')
            self.pool_size = kwargs.pop('pool_size', (2, 2))
            self.model = self._model(X_train, X_train2)
            
    
    def _model(self, X_train, X_train2):
        model = Sequential()
        model.add(Conv2D(32, kernel_size=self.kernal_size, padding='same',
                 activation=self.activation_function,
                 input_shape=(224,224,1)))
        # One additional convolutional layer (32 channels)
        model.add(Conv2D(32, kernel_size=self.kernal_size, padding='same',
                 activation=self.activation_function))
        model.add(Conv2D(32, kernel_size=self.kernal_size, padding='same',
                 activation=self.activation_function))
        model.add(MaxPooling2D(pool_size=self.pool_size))
        model.add(Conv2D(64, self.kernal_size, padding='same', activation=self.activation_function))
        # One additional convolutional layer (64 channels)
        model.add(Conv2D(64, self.kernal_size, padding='same', activation=self.activation_function))
        model.add(Conv2D(64, self.kernal_size, padding='same', activation=self.activation_function))
        model.add(MaxPooling2D(pool_size=self.pool_size))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(512, activation=self.activation_function))
        model.add(Dropout(0.25))
        model.add(Dense(2, activation='softmax'))
        
        return model
    
    def predict(self, digits=()):
        return self.model.predict(digits)
    
    def evaluate(self, X_test=None, y_test=None):
        X_test = X_test.reshape(X_test.shape[0], 224, 224, 1)
        X_test = X_test.astype('float32')
        X_test/=255        
        number_of_classes = 2
        y_test = np_utils.to_categorical(y_test, number_of_classes)
        return self.model.evaluate(X_test, y_test)
    
    def preprocess_and_train(self, X_train=None,X_train2=None, y_train=None, X_test=None, X_test2=None,y_test=None):
        self._train(*self._preprocess(X_train, X_train2, y_train, X_test, X_test2,y_test))
        
    def _preprocess(self, X_train, X_train2, y_train, X_test, X_test2,y_test):
        X_train = X_train.reshape(X_train.shape[0], 224, 224, 1)
        X_test = X_test.reshape(X_test.shape[0], 224, 224, 1)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        
        X_train/=255
        X_test/=255
        
        X_train2 = X_train2.reshape(X_train2.shape[0], 224, 224, 1)
        X_test2 = X_test2.reshape(X_test2.shape[0], 224, 224, 1)

        X_train2 = X_train2.astype('float32')
        X_test2 = X_test2.astype('float32')
        
        X_train2/=255
        X_test2/=255
        
        
        number_of_classes = 2
        y_train = np_utils.to_categorical(y_train, number_of_classes)
        y_test = np_utils.to_categorical(y_test, number_of_classes)
        
        return X_train, X_train2, y_train, X_test, X_test2, y_test
    
    def _train(self, X_train, X_train2, y_train, X_test, X_test2, y_test):
        self.model.compile(loss=self.loss_function,
              optimizer=self.optimizer,
              metrics=['accuracy'])
        
        fit_output = self.model.fit([X_train, X_train2],
                        y_train,
                        batch_size=self.batch_size,
                        epochs=self.epochs,
                        verbose=1,
                        validation_data=([X_test, X_test2], y_test))
        self._history = fit_output.history
        
        import time
        timestr = time.strftime("%Y%m%d-%H%M%S")
        self.model.save("oasis_test_{0}.h5".format(timestr)) 
    
    @property
    def history(self):
        return self._history

In [15]:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model

X_train = X_train0.reshape(X_train0.shape[0], 224, 224, 1)
X_test = X_test0.reshape(X_test0.shape[0], 224, 224, 1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train/=255
X_test/=255

X_train2 = X_train_edges.reshape(X_train_edges.shape[0], 224, 224, 1)
X_test2 = X_test_edges.reshape(X_test_edges.shape[0], 224, 224, 1)

X_train2 = X_train2.astype('float32')
X_test2 = X_test2.astype('float32')

X_train2/=255
X_test2/=255


number_of_classes = 2
y_train = np_utils.to_categorical(y_train0, number_of_classes)
y_test = np_utils.to_categorical(y_test0, number_of_classes)

# return X_train, X_train2, y_train, X_test, X_test2, y_test


main_input = Input(shape=(224,224,1), dtype='float32', name='main_input')
auxiliary_input = Input(shape=(224,224,1), dtype='float32', name='aux_input')

kernel_size = (3,3)
activation_function = 'elu'
pool_size = (2,2)
loss_function = 'categorical_crossentropy'
optimizer = 'sgd'

x = keras.layers.concatenate([main_input, auxiliary_input])
x = Conv2D(32, kernel_size=kernel_size, padding='same',
                 activation=activation_function)(x)
x = Conv2D(32, kernel_size=kernel_size, padding='same',
                 activation=activation_function)(x)
x = MaxPooling2D(pool_size=pool_size)(x)
x = Conv2D(64, kernel_size, padding='same', 
           activation=activation_function)(x)
x = Conv2D(64, kernel_size, padding='same', 
           activation=activation_function)(x)
x = MaxPooling2D(pool_size=pool_size)(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, activation=activation_function, name = 'features')(x)
x = Dropout(0.25)(x)
main_output = Dense(2, activation='softmax', name='main_output')(x)

model = Model(inputs=[main_input, auxiliary_input], outputs=main_output)
intermediate_layer_model = Model(inputs=model.input, 
                                outputs=model.get_layer('features').output)
model.compile(loss=loss_function,
              optimizer=optimizer,
              metrics=['accuracy'])
model.fit([X_train, X_train2], [y_train], batch_size = 15, epochs = 10, verbose =1,
          validation_data=([X_test, X_test2], y_test))





Train on 2000 samples, validate on 160 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2acc132b38>

In [10]:
intermediate_output = intermediate_layer_model.predict([X_train, X_train2])

In [323]:
intermediate_output_test = intermediate_layer_model.predict([X_test, X_test2])

In [210]:
from sklearn import svm
import random

intermediate_output

random.seed(1)
train_ind = random.sample(range(0,216),200)

subjs = pd.read_csv('/src/Data/ANIML/OASIS_subjs.csv',header=None)
subjs_X_train = subjs.take(train_ind)

df= pd.DataFrame(columns=["subjid"])
test=[]
# for i in train_ind:
#     df.append(([subjs_X_train[0]]*10))
subjid = pd.DataFrame(pd.concat([subjs_X_train[0]]*10)).reset_index()
subjid = subjid.rename(columns={subjid.columns[1]: 'id'})

featpd = pd.DataFrame(intermediate_output)
labeledfeatpd = pd.concat((subjid, featpd), axis=1)

labeledfeatpd

# foo = test[0]
# for i in range(1, len(test)):
#     foo = foo.append(test[i])

# featpd = pd.DataFrame(intermediate_output)
# subjid = pd.DataFrame(foo).reset_index()
# subjid.rename(columns= {subjid.columns[1]:'id'})

# labeledfeatpd = pd.concat((subjid, featpd), axis=1)
# labeledfeatpd.rename(columns= {labeledfeatpd.columns[1]:'id'})

# demog = pd.read_csv('/src/Data/ANIML/X_DataFrame.csv')
# # demog['X_id']

# subjid.columns

# labeledfeatpd[labeledfeatpd[0].isin(demog['X_id'])]

Unnamed: 0,index,id,0,1,2,3,4,5,6,7,...,246,247,248,249,250,251,252,253,254,255
0,34,OAS1_0333_MR1,0.845307,0.541152,0.646300,-0.348115,0.795130,0.044302,-0.097138,0.502693,...,-0.084682,0.244346,-0.028608,0.178583,0.472665,0.319420,-0.074870,0.264320,0.475974,-0.409909
1,145,OAS1_0385_MR1,0.506852,0.178760,-0.324862,-0.322831,-0.188337,0.185498,-0.061994,0.235907,...,-0.312608,0.125032,-0.042912,0.334540,0.210424,0.597997,0.262269,0.413058,0.293909,-0.126539
2,205,OAS1_0273_MR1,0.647710,0.786470,0.043029,-0.309748,0.423004,0.575774,0.493758,0.143868,...,-0.152845,0.043284,-0.269998,-0.233524,0.992090,0.499155,0.055982,0.152251,0.539243,0.384594
3,195,OAS1_0422_MR1,0.245953,0.403558,-0.407905,-0.059553,0.407996,1.051734,-0.269667,0.174024,...,-0.030730,-0.075592,0.187080,0.077558,0.210181,0.086136,0.162595,0.214607,-0.203568,-0.516645
4,16,OAS1_0355_MR1,0.264472,0.257353,-0.031768,-0.004129,-0.083634,0.799816,0.374579,0.425225,...,-0.077162,-0.007440,-0.217284,0.404307,-0.151338,0.234005,0.623779,0.222627,-0.215452,-0.228101
5,65,OAS1_0341_MR1,0.160767,0.517819,-0.304218,-0.044690,0.135304,0.881183,0.346240,0.162276,...,-0.072195,-0.206754,-0.165782,0.346648,-0.087939,0.345063,0.244366,0.658552,0.226406,0.316942
6,30,OAS1_0378_MR1,1.241379,0.856002,0.803014,0.194614,0.223137,-0.085853,0.606270,-0.110200,...,-0.121014,0.088338,-0.103024,-0.227020,0.565995,0.659235,-0.145080,-0.187699,0.708094,0.422957
7,126,OAS1_0270_MR1,0.284916,0.204069,-0.383723,-0.139789,-0.085929,0.610922,-0.065565,0.307111,...,-0.247777,0.034308,0.190996,0.377510,0.071290,0.238579,0.664601,0.553393,0.035709,-0.276601
8,194,OAS1_0239_MR1,0.660809,0.215624,-0.328114,-0.149402,-0.050368,0.890308,0.074709,-0.161542,...,-0.215196,0.181844,-0.021921,0.241523,0.450869,0.369902,0.143576,-0.079176,0.456696,-0.033780
9,115,OAS1_0131_MR1,0.978040,0.671237,0.356848,-0.412902,0.381986,-0.132150,0.738967,0.442528,...,-0.287969,0.544634,-0.211737,-0.437431,0.554416,-0.099997,-0.093118,-0.238338,0.411276,0.021014


In [221]:
subset = labeledfeatpd[labeledfeatpd['id'].isin(demog['X_id'])]

X = subset[subset.columns[2:]]

demog = pd.read_csv('/src/Data/ANIML/subset_demog.csv')
subdemog = demog[demog['X_id'].isin(subset['id'])]

subdemog = pd.DataFrame(subdemog).reset_index()

demogdup =[]
# for i in range(0,len(subdemog)):
#     demogdup.append(pd.DataFrame([subdemog['X_age'][i]]*10))

# subdemogapp = demogdup[0]
# for i in range(1, len(demogdup)):
#     subdemogapp = subdemogapp.append(demogdup[i])

subdemogapp = pd.DataFrame(pd.concat([subdemog[['X_age','X_sex_binary','X_SES']]]*10)).reset_index()
# subjid = subjid.rename(columns={subjid.columns[1]: 'id'})

# subdemogDF = pd.DataFrame(subdemogapp).reset_index()
# subdemogDF = subdemogDF.reset_index()
                                      
subdemogapp=subdemogapp.drop(['index'], axis=1)


In [320]:
cinfo = pd.read_csv('/src/Data/ANIML/Y_DataFrame.csv')
subjs_X_train = subjs_X_train.rename(columns={0: 'id'})

cinfoDF = pd.concat((subjs_X_train, cinfo[['Y_CDR', 'Y_MMSE']]) ,axis=1)
subcinfo = cinfoDF[cinfoDF['id'].isin(demog['X_id'])]
subcinfoapp = pd.DataFrame(pd.concat([subcinfo]*10)).reset_index()
# X = labeledfeatpd.iloc[:,2:]
meta = pd.concat((subdemogapp, subcinfoapp[['Y_CDR', 'Y_MMSE']]), axis=1)

featmeta = pd.concat((X.reset_index(),meta), axis=1).drop(['index'], axis=1)
X_final = np.asarray(featmeta.reset_index())
# y_train0

labeledypd = pd.concat((subjid, pd.DataFrame(y_train0)), axis=1)
subsety = labeledypd[labeledypd['id'].isin(demog['X_id'])]
subsety = np.asarray(subsety[[0]]).ravel()

# subsety
clf = svm.SVC()
clf.fit(X_final, subsety)

# X_final

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [330]:
ind= range(0,216)
test_ind = list(set(ind) - set(train_ind))

subjs_X_test = subjs.take(test_ind)

subjid = pd.DataFrame(pd.concat([subjs_X_test[0]]*10)).reset_index()
subjid = subjid.rename(columns={subjid.columns[1]: 'id'})

featpd = pd.DataFrame(intermediate_output_test)
labeledfeatpd = pd.concat((subjid, featpd), axis=1)

subset = labeledfeatpd[labeledfeatpd['id'].isin(demog['X_id'])]

X = subset[subset.columns[2:]]

demog = pd.read_csv('/src/Data/ANIML/subset_demog.csv')
subdemog = demog[demog['X_id'].isin(subset['id'])]

subdemog = pd.DataFrame(subdemog).reset_index()

demogdup =[]

subdemogapp = pd.DataFrame(pd.concat([subdemog[['X_age','X_sex_binary','X_SES']]]*10)).reset_index()

subdemogapp=subdemogapp.drop(['index'], axis=1)

cinfo = pd.read_csv('/src/Data/ANIML/Y_DataFrame.csv')
subjs_X_test = subjs_X_test.rename(columns={0: 'id'})

cinfoDF = pd.concat((subjs_X_test, cinfo[['Y_CDR', 'Y_MMSE']]) ,axis=1)
subcinfo = cinfoDF[cinfoDF['id'].isin(demog['X_id'])]
subcinfoapp = pd.DataFrame(pd.concat([subcinfo]*10)).reset_index()
# X = labeledfeatpd.iloc[:,2:]
meta = pd.concat((subdemogapp, subcinfoapp[['Y_CDR', 'Y_MMSE']]), axis=1)

featmeta = pd.concat((X.reset_index(),meta), axis=1).drop(['index'], axis=1)
X_final = np.asarray(featmeta.reset_index())
# y_train0

labeledypd = pd.concat((subjid, pd.DataFrame(y_train0)), axis=1)
subsety = labeledypd[labeledypd['id'].isin(demog['X_id'])]
subsety = np.asarray(subsety[[0]]).ravel()

# subsety.shape
clf.predict(X_final)

array([0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1])

In [335]:
result = clf.predict(X_final) - subsety
(60-np.count_nonzero(np.asarray(result)))/60

0.6166666666666667