## Este notebook es encargado de generar un CSV que consolide todas las pruebas y estandarice los valores de cada columna

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sklearn
from sklearn import preprocessing
import math
import sys

In [17]:
df_1 = pd.read_csv("..//resources//csv//data//sujeto1-rocio-2020-10-25-biometrics.csv")
df_2 = pd.read_csv("..//resources//csv//data//sujeto2-juan-2020-11-01-biometrics.csv")
df_3 = pd.read_csv("..//resources//csv//data//sujeto3-brenda-2020-11-16-biometrics.csv")
df_4 = pd.read_csv("..//resources//csv//data//sujeto4-matias-2020-11-16-biometrics.csv")
df_5 = pd.read_csv("..//resources//csv//data//sujeto5-sebastian-2021-01-05-biometrics.csv")
df_6 = pd.read_csv("..//resources//csv//data//sujeto6-rocio-2021-01-07-biometrics.csv")
# df_7 = pd.read_csv("..//resources//csv//data//sujeto7-seb-2021-03-24-biometrics.csv")
# df_8 = pd.read_csv("..//resources//csv//data//sujeto8-bren-2021-04-04-biometrics.csv")

dfs = [df_1, df_2, df_3, df_5, df_6]

In [18]:
# df_1.head()

In [19]:
def add_prev_values(df, col):
    df.loc[0, f'{col}-4'] = df.loc[0, col]
    df.loc[0, f'{col}-3'] = df.loc[0, col]
    df.loc[0, f'{col}-2'] = df.loc[0, col]
    df.loc[0, f'{col}-1'] = df.loc[0, col]

    for i in range(0, len(df)):
        if i >= 4:
            df.loc[i, f'{col}-4'] = df.loc[i-4, col]
            df.loc[i, f'{col}-3'] = df.loc[i-3, col]
            df.loc[i, f'{col}-2'] = df.loc[i-2, col]
            df.loc[i, f'{col}-1'] = df.loc[i-1, col]
        else:
            df.loc[i, f'{col}-4'] = df.loc[i, col]
            df.loc[i, f'{col}-3'] = df.loc[i, col]
            df.loc[i, f'{col}-2'] = df.loc[i, col]
            df.loc[i, f'{col}-1'] = df.loc[i, col]

In [20]:
# Función que determina cuando un sujeto se encuentra en un estado de excitación (1) o relajación (0) dado un valor de arousal.
def isAroused(arousal):
    return 0 if arousal < 5 else 1

# Función que estandariza las columas dadas de un dataframe
def standarize(df):
    df_to_standarize = df.copy()
    
    # Se remueven las mediciones sin estímulo (previas al inicio de estímulos), para no ofuscar el entrenamiento
    filterNoArousalMeasurements = df_to_standarize['ArousalMean'] > 0
    df_to_standarize = df_to_standarize[filterNoArousalMeasurements]

    # remover las mediciones para las que el sam no coincidió antes de la estandarización, para no afectar la desviación
    # filterNoMatchesSAMMeasurements = filtered1['MatchesSam'] == True
    # df_to_standarize = df_to_standarize[filterNoMatchesSAMMeasurements]
    
    # Se agrega la columna Aroused (0/1) para la clasificación binaria
    df_to_standarize['Aroused'] = df_to_standarize['ArousalMean'].map(isAroused)
    
    columns_to_standarize = ['HR', 'RR', 'HRV', 'MicroSiemens']

    scaler = preprocessing.StandardScaler()
#     scaler = preprocessing.MinMaxScaler()
    standarized_df = scaler.fit_transform(df_to_standarize[columns_to_standarize])

    standarized_df_with_rest_of_data = np.append(standarized_df, df_to_standarize[['SCR', 'SCR_MIN', 'PhaseName', 'ArousalMean', 'Aroused', 'TimeStamp']], axis=1)
    
    # Se convierte a DataFrame
    standarized_df_with_rest_of_data = pd.DataFrame(standarized_df_with_rest_of_data, columns=['HR', 'RR', 'HRV', 'MicroSiemens', 'SCR', 'SCR_MIN', 'PhaseName', 'ArousalMean', 'Aroused', 'TimeStamp'])
    
    # Conversión de tipos
    standarized_df_with_rest_of_data['HR'] = standarized_df_with_rest_of_data['HR'].astype(float)
    standarized_df_with_rest_of_data['RR'] = standarized_df_with_rest_of_data['RR'].astype(float)
    standarized_df_with_rest_of_data['HRV'] = standarized_df_with_rest_of_data['HRV'].astype(float)
    standarized_df_with_rest_of_data['MicroSiemens'] = standarized_df_with_rest_of_data['MicroSiemens'].astype(float)
    standarized_df_with_rest_of_data['SCR'] = standarized_df_with_rest_of_data['SCR'].astype(int)
    standarized_df_with_rest_of_data['SCR_MIN'] = standarized_df_with_rest_of_data['SCR_MIN'].astype(int)
    standarized_df_with_rest_of_data['PhaseName'] = standarized_df_with_rest_of_data['PhaseName']
    standarized_df_with_rest_of_data['ArousalMean'] = standarized_df_with_rest_of_data['ArousalMean'].astype(float)
    standarized_df_with_rest_of_data['Aroused'] = standarized_df_with_rest_of_data['Aroused'].astype(int)
    standarized_df_with_rest_of_data['TimeStamp'] = standarized_df_with_rest_of_data['TimeStamp']
    

    add_prev_values(standarized_df_with_rest_of_data, "HR")
    add_prev_values(standarized_df_with_rest_of_data, "HRV")
    add_prev_values(standarized_df_with_rest_of_data, "MicroSiemens")

    return standarized_df_with_rest_of_data

In [21]:
# Se estandariza cada dataframe
standarized_dfs = list(map(standarize, dfs))

# Se los une a todos en un solo dataframe para entrenamiento luego de la estandarización
biometrics_df = pd.concat(standarized_dfs)

# Save dataframe to CSV

In [22]:
biometrics_df.to_csv(r'./1_standarized_biometrics.csv', index = False)
# When you want to see the grphs comment the exit line
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Exploratory Data Analysis (EDA)

In [None]:
df_to_explore = biometrics_df # todos los sujetos consolidados
# df_to_explore = standarized_dfs[0] # Sujeto 1
# df_to_explore = standarized_dfs[1] # Sujeto 2
# df_to_explore = standarized_dfs[2] # Sujeto 3
# df_to_explore = standarized_dfs[3] # Sujeto 4
# df_to_explore = standarized_dfs[4] # Sujeto 5
# df_to_explore = standarized_dfs[5] # Sujeto 6

In [None]:
df_to_explore.head()

In [None]:
df_to_explore.describe()

In [None]:
df_to_explore.groupby('Aroused').size()

In [None]:
df_to_explore.hist(edgecolor='black', linewidth=1.2, figsize=(12,8));

In [None]:
plt.figure(figsize=(12,8));
plt.subplot(2,3,1)
sns.violinplot(x='Aroused', y='HR', data=df_to_explore)
plt.subplot(2,3,2)
sns.violinplot(x='Aroused', y='HRV', data=df_to_explore)
plt.subplot(2,3,3)
sns.violinplot(x='Aroused', y='MicroSiemens', data=df_to_explore)
plt.subplot(2,3,4)
sns.violinplot(x='Aroused', y='SCR_MIN', data=df_to_explore);
plt.subplot(2,3,5)
sns.violinplot(x='Aroused', y='SCR', data=df_to_explore);

In [None]:
plt.figure(figsize=(12,8))
# filtered = biometrics_df[cols]
mask = np.triu(np.ones_like(df_to_explore.corr(), dtype=np.bool))
heatmap = sns.heatmap(df_to_explore.corr(), annot=True, mask=mask, fmt=".2f", cmap='YlGnBu')
heatmap.set_title('Mapa de calor de coorrelación', fontdict={'fontsize':12}, pad=12);

In [None]:
filterPhaseName = df_to_explore['PhaseName'] == "HA_NV_16"
df_to_pairplot = df_to_explore[filterPhaseName]

# Ignore filter
# df_to_pairplot = df_to_explore

sns.pairplot(df_to_pairplot, palette='Paired', hue='PhaseName', corner=True, diag_kind="hist", plot_kws=dict(marker="+", linewidth=1))
# for df in standarized_dfs:
#     sns.pairplot(df, vars=cols, palette='Paired', hue='PhaseName', corner=True, diag_kind="hist", plot_kws=dict(marker="+", linewidth=1))
#     sns.pairplot(df, vars=cols, palette='Paired', hue='Aroused', hue_order=[1, 0], corner=True, diag_kind="hist", plot_kws=dict(marker="+", linewidth=1))