In [4]:
import pandas as pd
import numpy as np
import os, sys
import core.main as main
from sklearn import preprocessing

gname_map = {'participant': 'Participant_id', 'video': 'Experiment_id'}
gname_sortmap = {'participant': 'Experiment_id', 'video': 'Participant_id'}

facet_group_map = {'participant': 'video', 'video': 'participant'}

def run_tests_for_emotions(clean_frame, group_name, estimator):
    feature_importance_data = []
    for video_id, group_data in clean_frame.groupby([group_name]):
        feats = group_data.drop(columns=['participant', 'video'])

        # scale values
        scaler = preprocessing.StandardScaler()
        feats[feats.columns] = scaler.fit_transform(feats[feats.columns])
        input_data = feats.values.tolist()
        feature_names = feats.columns.values
        print(feature_names)
        for emotion in ['Valence', 'Arousal', 'Dominance', 'Liking']:
            ratings = pd.read_csv('metadata_csv/participant_ratings.csv')
            target_emotion = ratings[(ratings[gname_map[group_name]] == 1)].sort_values(
                by=[gname_sortmap[group_name]])[emotion].to_list()

            test_score, importance = getattr(main, estimator)(input_data, target_emotion)
            metric_names = list(test_score.keys())
            metric_values = list(test_score.values())

            feature_importance_data.append([video_id, emotion] + metric_values
                                           # + importance
                                           )
    results_frame = pd.DataFrame(feature_importance_data,
                                 columns=['video_id', 'emotion'] + metric_names
                                         # + feature_names.tolist()
                                 )
    return results_frame

def run_tests_for_emotions_feat_selector(clean_frame, group_name, estimator):
    feature_importance_data = []
    for video_id, group_data in clean_frame.groupby([group_name]):
        print("{0} {1}".format(group_name, video_id))
        # continue
        feats = group_data.drop(columns=['participant', 'video'])

        # scale values
        scaler = preprocessing.StandardScaler()
        feats[feats.columns] = scaler.fit_transform(feats[feats.columns])
        input_data = feats.values.tolist()
        feature_names = feats.columns.values
        # print(feature_names)
        for emotion in ['Valence', 'Arousal', 'Dominance', 'Liking']:
            # ratings = pd.read_csv('metadata_csv/participant_ratings.csv')
            # target_emotion = ratings[(ratings[gname_map[group_name]] == 1)].sort_values(
            #     by=[gname_sortmap[group_name]])[emotion].to_list()
            # target_emotion = [t for i, t in enumerate(target_emotion) if i not in main.exclude_participant]

            exclude = True if group_name == 'video' else False
            target_emotion = get_ratings(emotion, exclude, video_id, group_name, facet_group_map[group_name])


            test_score, feat_idx = getattr(main, estimator)(input_data, target_emotion)
            best_features = [', '.join([feature_names[i] for i in feat_idx])]
            feature_importance_data.append([video_id, emotion, test_score, best_features])

    results_frame = pd.DataFrame(feature_importance_data,
                                 columns=['video_id', 'emotion', 'mae', 'best_features'])
    return results_frame

def get_ratings(emotion, exclude, selector_id, in_column, groupby):
    ratings = pd.read_csv('metadata_csv/participant_ratings.csv')
    print(ratings)
    target_emotion = ratings[(ratings[in_column] == selector_id)].sort_values(
        by=[groupby])[emotion].to_list()
    if exclude:
        target_emotion = [t for i, t in enumerate(target_emotion) if i not in main.exclude_participant]
    return target_emotion


config = [
    # # # # # # # # # # # # REGRESSORS # # # # # # # # # # # # #
    # # # # all features # # # #
    # {'path': 'participant_all_feats_forest.csv',
    #  'grouping': 'participant', 'estimator': 'run_test_forest'},
    # {'path': 'video_all_feats_forest.csv',
    #  'grouping': 'video', 'estimator': 'run_test_forest'},
    # {'path': 'participant_all_feats_xgboost.csv',
    #  'grouping': 'participant', 'estimator': 'run_test_xgboost'},
    # {'path': 'video__all_feats_xgboost.csv',
    #  'grouping': 'video', 'estimator': 'run_test_xgboost'},
    # {'path': 'participant_all_feats_regression.csv',
    #  'grouping': 'participant', 'estimator': 'run_test_regression'},
    # {'path': 'video__all_feats_regression.csv',
    #  'grouping': 'video', 'estimator': 'run_test_regression'},
    # # # # best features # # # #
    {'path': 'participant_best_score_regression.csv',
     'grouping': 'participant', 'estimator': 'run_test_regression_feature_selector'},
    {'path': 'video_best_score_regression.csv',
     'grouping': 'video', 'estimator': 'run_test_regression_feature_selector'},
    {'path': 'participant_best_score_forest.csv',
     'grouping': 'participant', 'estimator': 'run_test_forest_feature_selector'},
    {'path': 'video_best_score_forest.csv',
     'grouping': 'video', 'estimator': 'run_test_forest_feature_selector'},
    {'path': 'participant_best_score_regression.csv',
     'grouping': 'participant', 'estimator': 'run_test_xgboost_feature_selector'},
    {'path': 'video_best_score_xgboost.csv',
     'grouping': 'video', 'estimator': 'run_test_regression_feature_selector'},
    # # # # # # # # # # # CLASSIFIERS # # # # # # # # # # # # #
    # # # # all features # # # #
    # # # # best features # # # #
]

for c in config:
    c['path'] = os.path.join('results', c['path'])
    if os.path.isfile(c['path']):
        feats_by_participant = pd.read_csv(c['path'])
        print(feats_by_participant)
    else:
        hr_path = "hr_features.pkl"
        eda_path = "eda_features.pkl"
        if os.path.isfile(hr_path):
            hr_features_frame = pd.read_pickle(hr_path)
        if os.path.isfile(eda_path):
            eda_features_frame = pd.read_pickle(eda_path)

        # print(eda_features_frame.groupby(['participant'])['video'].count())
        # print(hr_features_frame.groupby(['participant'])['video'].count())

        eda_features_frame = eda_features_frame.drop(columns=['participant', 'video'])
        full_feature_data = pd.concat([hr_features_frame, eda_features_frame], axis=1)
        df = full_feature_data

        # what do about missing/failing feature values
        clean_frame = df.fillna(0)
        group_name = c['grouping']
        estimator = c['estimator']
        feature_importance_data = []
        for video_id, group_data in clean_frame.groupby([group_name]):
            print("{0} {1}".format(group_name, video_id))
            # continue
            feats = group_data.drop(columns=['participant', 'video'])

            # scale values
            scaler = preprocessing.StandardScaler()
            feats[feats.columns] = scaler.fit_transform(feats[feats.columns])
            input_data = feats.values.tolist()
            feature_names = feats.columns.values
            # print(feature_names)
            for emotion in ['Valence', 'Arousal', 'Dominance', 'Liking']:
                exclude = True if group_name == 'video' else False
                target_val = int(video_id)
                target_col = group_name
                groupby = facet_group_map[group_name]
                target_val = int(target_val) + 1
                ratings = pd.read_csv('metadata_csv/participant_ratings.csv')
                target_emotion = ratings[(ratings[target_col] == target_val)].sort_values(
                    by=[groupby])[emotion].to_list()
                if exclude:
                    target_emotion = [t for i, t in enumerate(target_emotion) if i not in main.exclude_participant]

                print(type(target_emotion[0]))
                binarize = [0 if (float(e) < 5.0) else 1 for e in target_emotion]
                print(binarize)

                import sys
                sys.exit(0)

                test_score, feat_idx = getattr(main, estimator)(input_data, target_emotion)
                best_features = [', '.join([feature_names[i] for i in feat_idx])]
                feature_importance_data.append([video_id, emotion, test_score, best_features])

        results_frame = pd.DataFrame(feature_importance_data,
                                     columns=['video_id', 'emotion', 'mae', 'best_features'])

        # results_frame.to_csv(c['path'])



     Unnamed: 0  video_id    emotion       mae  \
0             0       0.0    Valence -2.323249   
1             1       0.0    Arousal -1.794769   
2             2       0.0  Dominance -1.772289   
3             3       0.0     Liking -0.792702   
4             4       1.0    Valence -2.621139   
..          ...       ...        ...       ...   
119         119      30.0     Liking -2.264043   
120         120      31.0    Valence -1.674095   
121         121      31.0    Arousal -1.272534   
122         122      31.0  Dominance -1.164978   
123         123      31.0     Liking -2.207026   

                                         best_features  
0    ['bpm, rmssd, pnn20, sd1, breathingrate, CDA.n...  
1                               ['bpm, pnn20, hr_mad']  
2                                ['ibi, sd2, CDA.SCR']  
3    ['bpm, ibi, sdsd, hr_mad, s, sd1/sd2, CDA.SCR,...  
4                           ['sd1/sd2, breathingrate']  
..                                                 ...  


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
