In [1]:
import sys
import os

import dbfread
from dbfread import DBF
import dbf
from glob import glob 

import pandas as pd
import csv
import numpy as np
import scipy.stats as stats
from scipy.interpolate import interp1d

import matplotlib.pyplot as plt
from matplotlib import rc, font_manager
import matplotlib.font_manager as fm
from matplotlib import font_manager, rc
rc('font', family='AppleGothic')
import plotly as py
import plotly.express as px
from plotly import graph_objs as go
from plotly.subplots import make_subplots
import seaborn as sns

from datetime import datetime
import time

from collections import Counter

from tqdm import tqdm
from datetime import datetime
import datetime

import pickle

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedShuffleSplit, LeaveOneGroupOut, StratifiedKFold
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix

In [2]:
os.chdir('../dataset')
dir_data = os.getcwd()
filename = 'lab_DATASET_v16.pickle'
audio_filename = 'lab_DATASET_audio_new_v1.pickle'

with open(os.path.join(dir_data, filename), 'rb') as f:
  DATASET = pickle.load(f)
with open(os.path.join(dir_data, audio_filename), 'rb') as f:
  audio_DATASET = pickle.load(f)

In [3]:
pnum = [1,2,3,4,8,10,12,13,14,16,18,19,20,21,22,23,25,26,27] # 5: 임산부 / 6: voice없음 / 7: 라벨없음 / 9: 웃기다고함 / 11,17: E4이상 / 24: 임산부 / 15: 높은 PSS 점수

In [16]:
pd.options.mode.chained_assignment = None

import model_utils
from model_utils import signal_interp

EDA = pd.DataFrame()
EEG = pd.DataFrame()
TEMP = pd.DataFrame()
ACC = pd.DataFrame()
ECG = pd.DataFrame()

STRESS = pd.DataFrame()
AROUSAL = pd.DataFrame()
VALENCE = pd.DataFrame()

for p in pnum:
    eda_data = DATASET[str(p)]['data']['e4_eda']
    eeg_data = DATASET[str(p)]['data']['muse']
    temp_data = DATASET[str(p)]['data']['e4_temp']
    acc_data = DATASET[str(p)]['data']['e4_acc']
    bvp_data = DATASET[str(p)]['data']['e4_bvp']
    ecg_data = DATASET[str(p)]['data']['polar_ecg']

    stress_data = DATASET[str(p)]['labels']['stress']
    arousal_data = DATASET[str(p)]['labels']['arousal']
    valence_data = DATASET[str(p)]['labels']['valence']
    
    eeg_data = signal_interp(eeg_data, 256)

    t_eda = eda_data['Timestamp'].tolist()
    t_eeg = eeg_data['Timestamp'].tolist()
    t_temp = temp_data['Timestamp'].tolist()
    t_acc = acc_data['Timestamp'].tolist()
    t_bvp = bvp_data['Timestamp'].tolist()
    t_ecg = ecg_data['Timestamp'].tolist()
    t_stress = stress_data['Timestamp'].tolist()
    t_arousal = arousal_data['Timestamp'].tolist()
    t_valence = valence_data['Timestamp'].tolist()

    max_start = max(t_eda[0], t_eeg[0], t_temp[0], t_acc[0], t_bvp[0], t_ecg[0], t_stress[0], t_arousal[0], t_valence[0])
    min_end = min(t_eda[-1], t_eeg[-1], t_temp[-1], t_acc[-1], t_bvp[-1], t_ecg[-1], t_stress[-1], t_arousal[-1], t_valence[-1])
    
    eda_data = eda_data.loc[(eda_data['Timestamp']>=max_start) & (eda_data['Timestamp']<=min_end)]
    eeg_data = eeg_data.loc[(eeg_data['Timestamp']>=max_start) & (eeg_data['Timestamp']<=min_end)] 
    temp_data = temp_data.loc[(temp_data['Timestamp']>=max_start) & (temp_data['Timestamp']<=min_end)]
    acc_data = acc_data.loc[(acc_data['Timestamp']>=max_start) & (acc_data['Timestamp']<=min_end)]
    bvp_data = bvp_data.loc[(bvp_data['Timestamp']>=max_start) & (bvp_data['Timestamp']<=min_end)]
    ecg_data = ecg_data.loc[(ecg_data['Timestamp']>=max_start) & (ecg_data['Timestamp']<=min_end)]
    stress_data = stress_data.loc[(stress_data['Timestamp']>=max_start) & (stress_data['Timestamp']<=min_end)]        
    arousal_data = arousal_data.loc[(arousal_data['Timestamp']>=max_start) & (arousal_data['Timestamp']<=min_end)]        
    valence_data = valence_data.loc[(valence_data['Timestamp']>=max_start) & (valence_data['Timestamp']<=min_end)]

    marker = DATASET[str(p)]['data']['marker']
    c1_s = marker['Timestamp'][marker['session']=='c1_start']
    c1_e = marker['Timestamp'][marker['session']=='c1_end']
    c2_s = marker['Timestamp'][marker['session']=='c2_start']
    c2_e = marker['Timestamp'][marker['session']=='c2_end']
    c3_s = marker['Timestamp'][marker['session']=='c3_start']
    c3_e = marker['Timestamp'][marker['session']=='c3_end']

    eda_data = pd.concat([eda_data.loc[(eda_data['Timestamp']>=c1_s.values[0]) & (eda_data['Timestamp']<=c1_e.values[0])],
                          eda_data.loc[(eda_data['Timestamp']>=c2_s.values[0]) & (eda_data['Timestamp']<=c2_e.values[0])],
                          eda_data.loc[(eda_data['Timestamp']>=c3_s.values[0]) & (eda_data['Timestamp']<=c3_e.values[0])]], axis=0, ignore_index=True)
    eeg_data = pd.concat([eeg_data.loc[(eeg_data['Timestamp']>=c1_s.values[0]) & (eeg_data['Timestamp']<=c1_e.values[0])],
                          eeg_data.loc[(eeg_data['Timestamp']>=c2_s.values[0]) & (eeg_data['Timestamp']<=c2_e.values[0])],
                          eeg_data.loc[(eeg_data['Timestamp']>=c3_s.values[0]) & (eeg_data['Timestamp']<=c3_e.values[0])]], axis=0, ignore_index=True)
    temp_data = pd.concat([temp_data.loc[(temp_data['Timestamp']>=c1_s.values[0]) & (temp_data['Timestamp']<=c1_e.values[0])],
                          temp_data.loc[(temp_data['Timestamp']>=c2_s.values[0]) & (temp_data['Timestamp']<=c2_e.values[0])],
                          temp_data.loc[(temp_data['Timestamp']>=c3_s.values[0]) & (temp_data['Timestamp']<=c3_e.values[0])]], axis=0, ignore_index=True)
    acc_data = pd.concat([acc_data.loc[(acc_data['Timestamp']>=c1_s.values[0]) & (acc_data['Timestamp']<=c1_e.values[0])],
                          acc_data.loc[(acc_data['Timestamp']>=c2_s.values[0]) & (acc_data['Timestamp']<=c2_e.values[0])],
                          acc_data.loc[(acc_data['Timestamp']>=c3_s.values[0]) & (acc_data['Timestamp']<=c3_e.values[0])]], axis=0, ignore_index=True)
    bvp_data = pd.concat([bvp_data.loc[(bvp_data['Timestamp']>=c1_s.values[0]) & (bvp_data['Timestamp']<=c1_e.values[0])],
                          bvp_data.loc[(bvp_data['Timestamp']>=c2_s.values[0]) & (bvp_data['Timestamp']<=c2_e.values[0])],
                          bvp_data.loc[(bvp_data['Timestamp']>=c3_s.values[0]) & (bvp_data['Timestamp']<=c3_e.values[0])]], axis=0, ignore_index=True)
    ecg_data = pd.concat([ecg_data.loc[(ecg_data['Timestamp']>=c1_s.values[0]) & (ecg_data['Timestamp']<=c1_e.values[0])],
                          ecg_data.loc[(ecg_data['Timestamp']>=c2_s.values[0]) & (ecg_data['Timestamp']<=c2_e.values[0])],
                          ecg_data.loc[(ecg_data['Timestamp']>=c3_s.values[0]) & (ecg_data['Timestamp']<=c3_e.values[0])]], axis=0, ignore_index=True)
    
    len_1 = len(stress_data.loc[(stress_data['Timestamp']>=c1_s.values[0]) & (stress_data['Timestamp']<=c1_e.values[0])])
    len_2 = len(stress_data.loc[(stress_data['Timestamp']>=c2_s.values[0]) & (stress_data['Timestamp']<=c2_e.values[0])])
    len_3 = len(stress_data.loc[(stress_data['Timestamp']>=c3_s.values[0]) & (stress_data['Timestamp']<=c3_e.values[0])])
    
    arr_1 = np.zeros(len_1)
    arr_2 = np.ones(len_2)
    arr_3 = np.ones(len_3)

    arr_label = np.concatenate((arr_1, arr_2, arr_3))
    
    dict_data = {'signal':
            {'EDA': eda_data[[' eda']].to_numpy(),
            'EEG': eeg_data[['TP9', 'AF7', 'AF8', 'TP10']].to_numpy(),
            'TEMP': temp_data[[' temp']].to_numpy(),
            'ACC': acc_data[[' accX', ' accY', ' accZ']].to_numpy(),
            'BVP': bvp_data[[' bvp']].to_numpy(),
            'ECG': ecg_data[['ecg [uV]']].to_numpy()},
                'label':
            {'STRESS': arr_label},
                'subject': f"S{p}"}
    
    # os.chdir('../dataset/KEmoWork/')

    folder_name = f"S{p}"
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    pkl_file_path = os.path.join(folder_name, f"S{p}.pkl")
    with open(pkl_file_path, 'wb') as pkl_file:
        pickle.dump(dict_data, pkl_file)

  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
  result = result.interpolate(method='pad')
