In [6]:
import pandas as pd
import numpy as np

import os

from keras.models import Sequential
from keras.utils import np_utils
from keras.constraints import maxnorm
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Activation, Flatten

In [7]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

from collections import namedtuple

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix

In [8]:
def get_performance(y_true, y_pred):
    """
    Return named tuple which has accuracy, precision, recall, f1-score, sensitivity, specificity
    """
    Metric = namedtuple('Metric', 'accuracy precision recall fscore sensitivity specificity')
    a = accuracy_score(y_true, y_pred)
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    se, sp = sens_spec_support(y_true, y_pred)
    return Metric(accuracy=a, precision=p, recall=r, fscore=f, sensitivity=se, specificity=sp)


In [9]:
def sens_spec_support(y_true, y_pred):      
    """
    Return sensitivity and specificity
    WARNING: This function supports only binary classification!
    """
    cm = confusion_matrix(y_true, y_pred)
    spec = float(cm[0][0])/np.sum(cm[0]) if np.sum(cm[0]) != 0 else 0
    sens = float(cm[1][1])/np.sum(cm[1]) if np.sum(cm[1]) != 0 else 0
    return sens, spec

In [11]:
data_path = os.getcwd() + "/dataset/total.csv"
features = ["A_F", "A_V", "A_L", "L_F", "L_T", "L_L", "T_F", "T_V", "T_L"]

dataset = pd.read_csv(data_path)
dataset = dataset[["A_F", "A_V", "A_L", "L_F", "L_V", "L_L", "T_F", "T_V", "T_L", "Action"]].values

window_length = int(1*64)
total_windows = int((len(dataset))/window_length)

In [12]:
'''
X = np.empty((len(dataset), 9))
y = np.empty((len(dataset), 1))

for i in range(len(dataset)):
    y[i] = dataset[i, 9]
    for data in range(9):
        X[i, data] = dataset[i, data]

del i, data

'''
X = np.empty((total_windows, window_length*9))
y = np.empty((total_windows, 1))
j = 0

window_count = 0
for items in range(total_windows):
    for i in range(window_length):
        if i == 0:
            y[j] = dataset[int(window_count*window_length), 9]
            j = j + 1
        for data in range(9):
            X[items, i*9 + data] = dataset[int(window_count*window_length)+i, data]
    window_count = window_count+1

del window_count, i, j, items, data


In [33]:
filter_width = 64
dropout_rate = 0.1
input_shape = (64, 9, 1)

In [34]:
model = Sequential()

model.add(Conv2D(
    filters=16,
    kernel_size=(1, 64*9),
    input_shape = (64, 9, 1),
    activation='relu',
    padding='same',
    kernel_constraint=maxnorm(3)
))
model.add(MaxPooling2D(
    pool_size=(1, 2)
))
model.add(Dropout(
    dropout_rate
))

In [15]:
model.add(Conv2D(
    filters=32,
    kernel_size=(6, 32),
    activation='relu',
    padding='same',
    kernel_constraint=maxnorm(3)
))
model.add(MaxPooling2D(
    pool_size=(2, 2)
))
model.add(Dropout(
    dropout_rate
))

In [16]:
model.add(Flatten())
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 9, 64, 16)         160       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 9, 32, 16)        0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 9, 32, 16)         0         
                                                                 
 conv2d_3 (Conv2D)           (None, 9, 32, 32)         98336     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 4, 16, 32)        0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 4, 16, 32)        

In [None]:
'''model = Sequential()

# First layer: CONV - POOL
model.add(Conv2D(input_shape, num_of_filters, (1, filter_width), padding='same', kernel_constraint=maxnorm(3), activation='relu'))
model.add(MaxPooling2D(pool_size=(1,2)))
model.add(Dropout(dropout_rate))

# Second layer: CONV - POOL
model.add(Conv2D(num_of_filters, (6, int(filter_width/2)), padding='same', kernel_constraint=maxnorm(3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(dropout_rate))

# Output layer
model.add(Flatten())
model.add(Dense(2))
model.add(Activation('softmax'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])'''

In [36]:
def oned_to_twod(array):        
    """
    Transform 2d array into 4d tensor for CNN input
    arr          => array of raw data
    nrows => size of 2d image
    """
    arr_2d = np.zeros((len(array), 64, 9, 1))
        
    '''for i in range(len(arr)):
        signal = arr[i,:].reshape((window_length, 9))
        image = signal
        arr_2d[i, :, :, 0] = image'''

    arr_2d = np.reshape(array, (len(array), 64, 9, 1))
    return arr_2d

In [20]:
skf = StratifiedKFold(n_splits=3, random_state=42, shuffle=True)

In [21]:
train_metric_list, test_metric_list = [list() for _ in range(2)]

In [22]:
X_, X_test, y_, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [23]:
initial_weight = model.get_weights()

In [41]:
for i, (train, val) in enumerate(skf.split(X_, y_)):
    print("==> Fold #%d" % i)
        
   
    '''
    EPOCH = 200
    BATCH = 10'''

    X_train, X_val = X_[train], X_[val]
    y_train, y_val = y_[train], y[val]


    X_train = oned_to_twod(X_train)
    y_train = np_utils.to_categorical(y_train) 

    X_val  = oned_to_twod(X_val)
    y_val  = np_utils.to_categorical(y_val) 


    # model.set_weights(initial_weight)
    hist = model.fit(X_train, y_train, epoch=200, batch_size=10, verbose=0)
    
        
    

==> Fold #0


TypeError: fit() got an unexpected keyword argument 'epoch'

In [20]:
def cv_kfold(X, y, k):
    """
    Perform k-fold CV
    X, y       => data
    k          => number of cross validation (e.g. 10)
    """
    
    skf = StratifiedKFold(n_splits=k, random_state=42, shuffle=True)
    
    train_metric_list, test_metric_list = [list() for _ in range(2)]
    
    X_, X_test, y_, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    model = create_cnn_model(20)
    initial_weight = model.get_weights()
    # ------------------ k-fold CV start -------------------
    for i, (train, val) in enumerate(skf.split(X, y)):
        print("==> Fold #%d" % i)
        
        D = 9
        L = 64

        EPOCH = 200
        BATCH = 10

        X_train = oned_to_twod(X_[train], D, L)
        y_train = np_utils.to_categorical(y_[train]) 
        
        X_val  = oned_to_twod(X_[val], D, L)
        y_val  = y[val]
        
        model.set_weights(initial_weight)
        hist = model.fit(X_train, y_train, epoch=EPOCH, batch_size=BATCH, verbose=0)
        
        y_pred = model.predict_classes(X_train, verbose=0)
        train_metric = get_performance(y_train, y_pred)
        
        y_pred = model.predict_classes(X_val, verbose=0)
        test_metric  = get_performance(y_val, y_pred)
        
        train_metric_list.append(train_metric)
        test_metric_list.append(test_metric)
    # ------------------ k-fold CV end ---------------------

    y_pred = model.predict(X_test)
    test_metric = get_performance(y_test, y_pred)
        
    df_train = pd.DataFrame(train_metric_list)
    df_val   = pd.DataFrame(test_metric_list)
    
    return df_train.mean(), df_val.mean(), test_metric


In [17]:
skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
train_metric_list, test_metric_list = [list() for _ in range(2)]
X_, X_test, y_, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [21]:
for i, (train, val) in enumerate(skf.split(X, y)):
        print("==> Fold #%d" % i)
        
        D = 9
        L = 64

        EPOCH = 200
        BATCH = 10

        X_train = oned_to_twod(X_[train], D, L)
        y_train = np_utils.to_categorical(y_[train]) 
        
        X_val  = oned_to_twod(X_[val], D, L)
        y_val  = y[val]

==> Fold #0


IndexError: index 11390 is out of bounds for axis 0 with size 11390

In [21]:
cv_kfold(X, y, 10)

TypeError: '<' not supported between instances of 'tuple' and 'int'