<H1> Assembling Dataset </H1>

<H3>Libraries:

In [1]:
import os
import glob
import numpy as np
import pandas as pd
from biosppy import signals as bio_signals

<H3>Variables:

General Variables

In [2]:
generate_classes = True
classification = 'binary'
#classification = 'selected'

project = 'athena-i'

default_path = "%s/%s/dataset/raw/%s/*.csv*" % (os.getenv('HOME'), project, classification)
dataset_path = "%s/%s/dataset/%s/" % (os.getenv('HOME'), project, classification)

classes = ["normal", "stress"]

Dataframes

In [3]:
df = pd.DataFrame()
df_final = pd.DataFrame()
df_features = pd.DataFrame()
df_ecg = pd.DataFrame()
df_eda = pd.DataFrame()
df_resp = pd.DataFrame()
df_class = pd.DataFrame()

Load data

In [4]:
files = glob.glob(default_path)
for file in files:
    new_df = pd.read_csv(file, delimiter=';')
    ecg = bio_signals.ecg.ecg(signal=new_df['ECG'], sampling_rate=1000, show=False)
    eda = bio_signals.eda.eda(signal=new_df['EDA'], sampling_rate=1000, show=False)
    resp = bio_signals.resp.resp(signal=new_df['RESP'], sampling_rate=1000, show=False)
    
    ecg = pd.DataFrame(ecg['filtered'])
    df_ecg = df_ecg.append(ecg[90000:150000])
    df_ecg = df_ecg.append(ecg[300000:360000])
    if classification != 'binary':
        df_ecg = df_ecg.append(ecg[510000:570000])
    
    eda = pd.DataFrame(eda['filtered'])
    df_eda = df_eda.append(eda[220000:280000])
    df_eda = df_eda.append(eda[370000:430000])
    if classification != 'binary':
        df_eda = df_eda.append(eda[520000:580000])
    
    resp = pd.DataFrame(resp['filtered'])
    df_resp = df_resp.append(resp[90000:150000])
    df_resp = df_resp.append(resp[300000:360000])
    if classification != 'binary':
        df_resp = df_resp.append(resp[510000:570000])
    
    new_df = new_df.drop(columns=['ECG', 'EDA', 'RESP', 'TIME'])
    df_features = df_features.append(new_df[90000:150000].reset_index())
    df_features = df_features.append(new_df[300000:360000].reset_index())
    if classification != 'binary':
        df_features = df_features.append(new_df[510000:570000].reset_index())
    
    if generate_classes:
        for x in range(60000):
            df_class = df_class.append([1])

        for x in range(60000):
            df_class = df_class.append([2])

        if classification != 'binary':
            for x in range(60000):
                df_class = df_class.append([3])
        
    print("%s --- %s --- %s --- %s --- %s" % (len(df_ecg), len(df_eda), len(df_resp), len(df_features), len(df_class)))

180000 --- 180000 --- 180000 --- 180000 --- 180000
360000 --- 360000 --- 360000 --- 360000 --- 360000
540000 --- 540000 --- 540000 --- 540000 --- 540000
720000 --- 720000 --- 720000 --- 720000 --- 720000
900000 --- 900000 --- 900000 --- 900000 --- 900000
1080000 --- 1080000 --- 1080000 --- 1080000 --- 1080000
1260000 --- 1260000 --- 1260000 --- 1260000 --- 1260000
1440000 --- 1440000 --- 1440000 --- 1440000 --- 1440000
1620000 --- 1620000 --- 1620000 --- 1620000 --- 1620000
1800000 --- 1800000 --- 1800000 --- 1800000 --- 1800000


Export CSVs

In [5]:
df_ecg.to_csv("%secg" % dataset_path, sep=';',header=True, index=False)
df_eda.to_csv("%seda" % dataset_path, sep=';',header=True, index=False)
df_resp.to_csv("%srsp" % dataset_path, sep=';',header=True, index=False)
df_features.to_csv("%sfeatures" % dataset_path, sep=';',header=True, index=False)

In [6]:
df_class.to_csv("%strain-y" % dataset_path, sep=';',header=True, index=False)

Assemble dataset

In [7]:
df_ecg = df_ecg.reset_index()
df_eda = df_eda.reset_index()
df_resp = df_resp.reset_index()
df_features = df_features.reset_index()
print("End: %s --- %s --- %s --- %s --- %s" % (len(df_ecg), len(df_eda), len(df_resp), len(df_features), len(df_class)))
df = pd.DataFrame()
print("dataframe created")
df = pd.concat([df, df_ecg], axis=1)
print("ecg: ok")
df = pd.concat([df, df_eda], axis=1)
print("eda: ok")
df = pd.concat([df, df_resp], axis=1)
print("resp: created")
df = pd.concat([df, df_features], axis=1)
print("features: ok")

End: 1800000 --- 1800000 --- 1800000 --- 1800000 --- 1800000
dataframe created
ecg: ok
eda: ok
resp: created
features: ok


Export CSVs

In [8]:
df.to_csv("%strain-x-index" % dataset_path, sep=';',header=True, index=False)

In [9]:
new_df = df.drop(columns=['index','level_0'])
new_df.to_csv("%strain-x" % dataset_path, sep=';',header=True, index=False)