In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Input,Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as kb

import warnings
warnings.filterwarnings(action='ignore')

import autokeras as ak

import tensorflow as tf
tf.get_logger().setLevel('ERROR')

In [2]:
import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices("GPU")
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [3]:
def txtToDataframe(filename, flag1, flag2):
    file = open(filename, 'r')
    lines = file.readlines()
    datas = []
    for line in lines:
        txt = line.replace('   ', ' ').lstrip().rstrip().replace(' ', ',')
        data = txt.split(',')
        datas.append(data)
    df = pd.DataFrame(datas)
    df.columns = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'F8', 'AF4']
    df['label1']=flag1
    df['label2']=flag2
    return df


def getData(src) :
    file_list = os.listdir(src)
    
    rating = pd.DataFrame(pd.read_csv(src+'ratings.txt'))
    file_list.remove('ratings.txt')
    
    dataList=[]
    highList=[]
    lowList=[]
    
    print(rating.columns)
    j=0
    for i in rating['subject']:
        if i<10:
            num = str(0)+str(i)
        else:
            num = str(i)
        dataList.append(txtToDataframe(src+'sub'+num+'_hi.txt', 1,rating['test'][j]))
        dataList.append(txtToDataframe(src+'sub'+num+'_lo.txt', 0,rating['rest'][j]))
        highList.append(txtToDataframe(src+'sub'+num+'_hi.txt', 1,rating['test'][j]))
        lowList.append(txtToDataframe(src+'sub'+num+'_lo.txt', 0,rating['rest'][j]))
        j+=1
    return dataList, highList, lowList

src = './STEW Dataset/'
originalData, highData, lowData = getData(src)

Index(['subject', 'rest', 'test'], dtype='object')


In [4]:
mergedData = pd.concat([originalData[0],originalData[1]],ignore_index=True)
for i in range(2,len(originalData)):
    mergedData = pd.concat([mergedData,originalData[i]],ignore_index=True)
mergedData = mergedData.apply(pd.to_numeric)
mergedData

Unnamed: 0,AF3,F7,F3,FC5,T7,O1,O2,P8,T8,FC6,F4,F8,F8.1,AF4,label1,label2
0,4584.62,3902.05,4571.79,4589.23,4124.62,3825.13,4152.82,4579.49,4690.77,4260.00,4027.18,4385.13,4480.51,4230.77,1,8
1,4584.10,3895.90,4574.87,4567.69,4124.10,3827.18,4157.95,4585.13,4695.38,4268.21,4034.36,4380.00,4501.54,4197.44,1,8
2,4574.36,3893.85,4576.92,4572.82,4123.59,3829.23,4165.13,4590.26,4702.56,4281.54,4030.77,4366.67,4521.03,4176.41,1,8
3,4573.85,3906.15,4572.82,4612.31,4137.95,3830.77,4167.18,4596.92,4706.15,4285.64,4038.46,4376.41,4518.97,4207.18,1,8
4,4583.59,3911.28,4570.26,4621.03,4150.77,3833.85,4166.15,4597.44,4705.13,4282.05,4051.79,4387.18,4520.51,4220.00,1,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727995,4168.21,4124.62,4123.08,4181.54,4621.03,4051.79,4295.38,4333.85,4434.87,4038.97,4557.44,4714.87,4711.79,4698.46,0,1
1727996,4166.15,4123.08,4124.10,4180.51,4625.13,4053.85,4294.87,4326.15,4429.23,4035.90,4556.92,4716.92,4706.67,4693.85,0,1
1727997,4167.69,4120.51,4122.05,4178.97,4622.05,4051.79,4289.74,4311.28,4420.00,4024.62,4553.33,4712.82,4699.49,4688.72,0,1
1727998,4170.26,4120.00,4121.54,4181.03,4616.92,4050.77,4288.21,4312.31,4421.54,4024.10,4553.85,4712.31,4703.08,4693.85,0,1


In [5]:
label=mergedData['label1']
label2=mergedData['label2']
data=mergedData.drop(['label1','label2'],axis=1)

In [6]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled = scaler.fit_transform(data)
data = pd.DataFrame(scaled, columns = data.columns, index=data.index)
data

Unnamed: 0,AF3,F7,F3,FC5,T7,O1,O2,P8,T8,FC6,F4,F8,F8.1,AF4
0,0.558932,0.452258,0.539627,0.553295,0.479273,0.449631,0.482450,0.542340,0.558323,0.505813,0.479338,0.521798,0.533296,0.494656
1,0.558867,0.451482,0.540022,0.550698,0.479207,0.449895,0.483076,0.543015,0.558871,0.506793,0.480193,0.521187,0.535799,0.490618
2,0.557644,0.451223,0.540286,0.551317,0.479142,0.450158,0.483952,0.543629,0.559726,0.508383,0.479766,0.519600,0.538119,0.488070
3,0.557580,0.452775,0.539759,0.556078,0.480956,0.450357,0.484202,0.544427,0.560153,0.508873,0.480681,0.520759,0.537874,0.491798
4,0.558803,0.453422,0.539430,0.557129,0.482576,0.450753,0.484076,0.544489,0.560032,0.508444,0.482268,0.522042,0.538057,0.493351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727995,0.506663,0.480335,0.481982,0.504143,0.541975,0.478809,0.499843,0.512926,0.527864,0.479439,0.542453,0.561057,0.560825,0.551323
1727996,0.506404,0.480140,0.482113,0.504018,0.542493,0.479074,0.499781,0.512004,0.527193,0.479073,0.542391,0.561301,0.560215,0.550765
1727997,0.506598,0.479816,0.481850,0.503833,0.542104,0.478809,0.499155,0.510224,0.526094,0.477727,0.541964,0.560813,0.559361,0.550143
1727998,0.506920,0.479752,0.481785,0.504081,0.541456,0.478677,0.498968,0.510347,0.526277,0.477665,0.542026,0.560752,0.559788,0.550765


In [7]:
def windowing_dataset(data, label, window_size):
    data_list = []
    label_list = []
    
    for i in range(0,len(data)//window_size,window_size):
        data_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i]))
    return np.array(data_list), np.array(label_list)

dataList, labelList = windowing_dataset(data,label,10)
#labelList = to_categorical(labelList,9)
print(data.shape, label.shape, dataList.shape, labelList.shape)

(1728000, 14) (1728000,) (17280, 10, 14) (17280,)


Autokeras

In [9]:
import autokeras as ak

In [10]:
from tensorflow.keras.utils import plot_model

In [11]:
from datetime import datetime

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(dataList,labelList, train_size=0.7, random_state=True ,stratify = labelList)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

(12096, 10, 14) (5184, 10, 14) (12096, 9) (5184, 9)


In [13]:
x_train, x_test, y_train, y_test = train_test_split(data,label, train_size=0.7, random_state=True ,stratify = label)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(1209600, 14) (518400, 14) (1209600,) (518400,)


In [None]:
trials=[20] #3,5,10, 
for trial in trials:
    clf_ = ak.ImageClassifier(overwrite=True, max_trials=trial)
    clf_.fit(x=X_train, y=Y_train, epochs=20)

    predicted_y = clf_.predict(X_test)
    print(predicted_y)
    loss, acc = clf_.evaluate(X_test, Y_test)
    print(clf_.evaluate(X_test, Y_test))
    print('Loss: %.3f   Accuracy: %.3f' % (loss,acc))

    model = clf_.export_model()
    model.summary()
    plot_model(model, show_shapes=True)
    tmp = int(acc*100)
    print(tmp)
    model.save('model/WindowingLabel2AutoKeras_'+
                str(datetime.now().strftime('%Y-%m-%d %H-%M-%S'))+' ACC_'+str(tmp)+'try.h5')

Trial 2 Complete [00h 22m 32s]
val_loss: 0.32987186312675476

Best val_loss So Far: 0.12271249294281006
Total elapsed time: 00h 23m 01s

Search: Running Trial #3

Hyperparameter    |Value             |Best Value So Far 
image_block_1/b...|efficient         |vanilla           
image_block_1/n...|True              |True              
image_block_1/a...|True              |False             
image_block_1/i...|True              |None              
image_block_1/i...|False             |None              
image_block_1/i...|0                 |None              
image_block_1/i...|0                 |None              
image_block_1/i...|0.1               |None              
image_block_1/i...|0                 |None              
image_block_1/e...|True              |None              
image_block_1/e...|b7                |None              
image_block_1/e...|True              |None              
image_block_1/e...|True              |None              
classification_...|global_avg        |f

Model1 + (Model2-1 + Model2-2)

In [None]:
def windowing_dataset(data, label, window_size):
    data_list = []
    label_list = []
    
    for i in range(0,len(data)//window_size,window_size):
        data_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i]))
    return np.array(data_list), np.array(label_list)

dataList, labelList = windowing_dataset(data,label,10)
#labelList = to_categorical(labelList,9)
print(data.shape, label.shape, dataList.shape, labelList.shape)

In [None]:
model = tf.keras.models.load_model('_.h5')

X_train, X_test, Y_train, Y_test = train_test_split(dataList,labelList, train_size=0.7, random_state=True ,stratify = labelList)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

if model.predict_classes():
    

for i in range(5):
    print('True : ' + str(argmax(y_test[xhat_idx[i]])) + ', Predict : ' + str(yhat[i]))

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(dataList,labelList, train_size=0.7, random_state=True ,stratify = labelList)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

clf = ak.ImageClassifier(overwrite=True)
clf.fit(x=X_train, y=Y_train, epochs=30)

predicted_y = clf.predict(X_test)
print(predicted_y)
loss, acc = clf.evaluate(X_test, Y_test)
print('Loss: %.3f   Accuracy: %.3f' % (loss,acc, v))

model = clf.export_model()
model.summary()
plot_model(model, show_shapes=True)
tmp = int(acc*100)
model.save('model/WindowingAutoKeras_'+
            str(datetime.now().strftime('%Y-%m-%d %H-%M-%S'))+' ACC_'+str(tmp)+'try.h5')

In [None]:
for trial in trials:
    clf = ak.StructuredDataClassifier(max_trials=trial)
    clf.fit(x=x_train, y=y_train)

    predicted_y = clf.predict(x_test)
    print(predicted_y)
    loss, acc = clf.evaluate(x_test, y_test)
    print('Loss: %.3f   Accuracy: %.3f' % (loss,acc))

    model = clf.export_model()
    model.summary()
    plot_model(model, show_shapes=True)
    model.save('model/autoKeras_'+str(trial)+'.h5')

In [None]:
trials=[10, 15, 20]

for trial in trials:
    for epoch in epochs:
        clf = ak.ImageClassifier(max_trials=trial)
        clf.fit(x=X_train, y=Y_train, epochs=epoch)

        predicted_y = clf.predict(X_test)
        print(predicted_y)
        loss, acc = clf.evaluate(X_test, Y_test)
        print('Loss: %.3f   Accuracy: %.3f' % (loss,acc))

        model = clf.export_model()
        model.summary()
        plot_model(model, show_shapes=True)
        model.save('model/WindowingAutoKeras_'+str(trial)+'_'+str(epoch)+'.h5')
        
        clf = ak.StructuredDataClassifier(max_trials=trial)
        clf.fit(x=x_train, y=y_train, epochs=30)

        predicted_y = clf.predict(x_test)
        print(predicted_y)
        loss, acc = clf.evaluate(x_test, y_test)
        print('Loss: %.3f   Accuracy: %.3f' % (loss,acc))

        model = clf.export_model()
        model.summary()
        plot_model(model, show_shapes=True)
        model.save('model/autoKeras_'+str(trial)+'_'+str(epoch)+'.h5')

Keras

In [None]:
#epochs = [x for x in range(10, 101, 10)]
epochs = [10, 50, 100]

In [None]:
from tensorflow.keras.utils import plot_model

In [None]:
from keras import backend as K

def recall(y_target, y_pred):
    # clip(t, clip_value_min, clip_value_max) : clip_value_min~clip_value_max 이외 가장자리를 깎아 낸다
    # round : 반올림한다
    y_target_yn = K.round(K.clip(y_target, 0, 1)) # 실제값을 0(Negative) 또는 1(Positive)로 설정한다
    y_pred_yn = K.round(K.clip(y_pred, 0, 1)) # 예측값을 0(Negative) 또는 1(Positive)로 설정한다

    # True Positive는 실제 값과 예측 값이 모두 1(Positive)인 경우이다
    count_true_positive = K.sum(y_target_yn * y_pred_yn) 

    # (True Positive + False Negative) = 실제 값이 1(Positive) 전체
    count_true_positive_false_negative = K.sum(y_target_yn)

    # Recall =  (True Positive) / (True Positive + False Negative)
    # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다
    recall = count_true_positive / (count_true_positive_false_negative + K.epsilon())

    # return a single tensor value
    return recall


def precision(y_target, y_pred):
    # clip(t, clip_value_min, clip_value_max) : clip_value_min~clip_value_max 이외 가장자리를 깎아 낸다
    # round : 반올림한다
    y_pred_yn = K.round(K.clip(y_pred, 0, 1)) # 예측값을 0(Negative) 또는 1(Positive)로 설정한다
    y_target_yn = K.round(K.clip(y_target, 0, 1)) # 실제값을 0(Negative) 또는 1(Positive)로 설정한다

    # True Positive는 실제 값과 예측 값이 모두 1(Positive)인 경우이다
    count_true_positive = K.sum(y_target_yn * y_pred_yn) 

    # (True Positive + False Positive) = 예측 값이 1(Positive) 전체
    count_true_positive_false_positive = K.sum(y_pred_yn)

    # Precision = (True Positive) / (True Positive + False Positive)
    # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다
    precision = count_true_positive / (count_true_positive_false_positive + K.epsilon())

    # return a single tensor value
    return precision


def f1score(y_target, y_pred):
    _recall = recall(y_target, y_pred)
    _precision = precision(y_target, y_pred)
    # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다
    _f1score = ( 2 * _recall * _precision) / (_recall + _precision+ K.epsilon())
    
    # return a single tensor value
    return _f1score

In [None]:
def lossAcc(hist):
  fig, loss_ax = plt.subplots()

  acc_ax = loss_ax.twinx()

  loss_ax.plot(hist.history['loss'], 'y', label='train loss')

  acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')

  loss_ax.set_xlabel('epoch')
  loss_ax.set_ylabel('loss')
  acc_ax.set_ylabel('accuray')

  loss_ax.legend(loc='upper left')
  acc_ax.legend(loc='lower left')

  plt.show()

  _loss, _acc, _precision, _recall, _f1score = model.evaluate(x_test, y_test)
  print('loss: {:.3f}, accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}, f1score: {:.3f}'.format(_loss, _acc, _precision, _recall, _f1score))

In [None]:
def recallAndPrecision(hist):
  fig, rec_ax = plt.subplots()

  pre_ax = rec_ax.twinx()

  rec_ax.plot(hist.history['recall'], 'y', label='recall')

  pre_ax.plot(hist.history['precision'], 'b', label='precision')

  rec_ax.set_xlabel('epoch')
  rec_ax.set_ylabel('recall')
  pre_ax.set_ylabel('precision')

  rec_ax.legend(loc='upper left')
  pre_ax.legend(loc='lower left')

  plt.show()

In [None]:
model = Sequential()
model.add(Input(shape=(10, 14)))
model.add(Conv1D(32, 1, activation='tanh', padding='same'))
model.add(MaxPooling1D(1, padding='same'))
model.add(LSTM(64, activation='tanh'))
model.add(Dense(1))
plot_model(model, show_shapes=True)

for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(Input(shape=(10, 14)))
    model.add(Conv1D(32, 1, activation='relu', padding='same'))
    model.add(MaxPooling1D(1, padding='same'))
    model.add(LSTM(64, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(Input(shape=(10, 14)))
    model.add(Dense(50, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(150, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(200, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
model = Sequential()
model.add(LSTM(64, input_shape=(10,14), return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(Flatten())
model.add(Dense(100))
model.add(Dense(1))
plot_model(model, show_shapes=True)

for epoch in epochs:
    print(epoch)
    model = Sequential()
    odel.add(LSTM(64, input_shape=(10,14), return_sequences=True))
    model.add(LSTM(32, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
model = Sequential()
model.add(Input(shape=(10, 14)))
model.add(Conv1D(32, 1, activation='relu', padding='same'))
model.add(MaxPooling1D(1, padding='same'))
model.add(Flatten())
model.add(Dense(100))
model.add(Dense(1))
plot_model(model, show_shapes=True)

for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(Input(shape=(10, 14)))
    model.add(Conv1D(32, 1, activation='relu', padding='same'))
    model.add(MaxPooling1D(1, padding='same'))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
from tensorflow.keras.layers import SimpleRNN

model = Sequential()
model.add(SimpleRNN(50, input_shape=(10,14), return_sequences=True))
model.add(SimpleRNN(50, return_sequences=False))
model.add(Dense(100))
model.add(Dense(1))
plot_model(model, show_shapes=True)

for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(SimpleRNN(50, input_shape=(10,14), return_sequences=True))
    model.add(SimpleRNN(50, return_sequences=False))
    model.add(Dense(100))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
from tensorflow.keras.layers import GRU

model = Sequential()
model.add(GRU(50, return_sequences=True, input_shape=(10,7)))
model.add(GRU(60, return_sequences=True))
model.add(Flatten())
model.add(Dense(100))
model.add(Dense(1))
plot_model(model, show_shapes=True)

for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(GRU(50, return_sequences=True, input_shape=(10,14)))
    model.add(GRU(60, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
model = Sequential()
model.add(Input(shape=(10, 14)))
model.add(Conv1D(32, 1, activation='tanh', padding='same'))
model.add(MaxPooling1D(1, padding='same'))
model.add(SimpleRNN(64, activation='tanh'))
model.add(Dense(1))
plot_model(model, show_shapes=True)

for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(Input(shape=(10, 7)))
    model.add(Conv1D(32, 1, activation='tanh', padding='same'))
    model.add(MaxPooling1D(1, padding='same'))
    model.add(SimpleRNN(64, activation='tanh'))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')

In [None]:
model = Sequential()
model.add(Input(shape=(10, 14)))
model.add(Conv1D(32, 1, activation='tanh', padding='same'))
model.add(MaxPooling1D(1, padding='same'))
model.add(GRU(64, activation='tanh'))
model.add(Dense(1))
plot_model(model, show_shapes=True)


for epoch in epochs:
    print(epoch)
    model = Sequential()
    model.add(Input(shape=(10, 14)))
    model.add(Conv1D(32, 1, activation='tanh', padding='same'))
    model.add(MaxPooling1D(1, padding='same'))
    model.add(GRU(64, activation='tanh'))
    model.add(Dense(1))
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy', precision, recall, f1score])
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=64)
    lossAcc(hist)
    recallAndPrecision(hist)
    print('\n\n')