# Individual Model Generation

Individual models are generated in this file. These models use 80% of a subject's data for training and 20% for testing.

The models are saved in a CSV file called `train_individual.csv` for the training models and `test_individual.csv` for the testing models. The feature importance data is also saved for each model. This data is saved in a CSV file called `feature_importance_individual.csv`. These files are in the directory `outputs/individual/`.

In [1]:
from pandas import read_csv
from pycaret.regression import *
import csv
import pandas as pd

In [2]:
%%capture
df = read_csv('data/st-final_data.csv')
if 'Unnamed: 0' in df.columns:
    df = df.drop('Unnamed: 0', axis=1)

In [3]:
def split_fourths(group):
    n = len(group)
    index_25 = int(n * 0.25)
    index_50 = int(n * 0.50)
    index_75 = int(n * 0.75)
    train_segments = [group.iloc[0:index_25], group.iloc[index_50:index_75]]
    test_segments = [group.iloc[index_25:index_50], group.iloc[index_75:n]]
    return pd.concat(train_segments, ignore_index=True), pd.concat(test_segments, ignore_index=True)

In [7]:
train_individual_df = pd.DataFrame(columns=["Model", "MAE", "MSE", "RMSE", "R2", "RMSLE", "MAPE", "TT (Sec)", "Subject"])
test_individual_df = pd.DataFrame(columns=["Model", "MAE", "MSE", "RMSE", "R2", "RMSLE", "MAPE", "Subject"])
feature_importance_individual_df = pd.DataFrame(columns=["Feature", "Value", "Model", "Subject"])


# Predictors to remove: experimental_condition, subject
for subject in df['subject'].unique():  # df['subject'].unique()
    print(subject)
    # New way!
    df_subject = df[df['subject'] == subject]
    
    train_frames = []
    test_frames = []

    for _, group in df_subject.groupby('task'):
        train_segment, test_segment = split_fourths(group)
        train_frames.append(train_segment)
        test_frames.append(test_segment)

    train = pd.concat(train_frames, ignore_index=True)
    test = pd.concat(test_frames, ignore_index=True)
    
    print(len(train.index), len(test.index))

    reg = setup(
        data = train, test_data = test,
        target='RPE', index = False,
        ignore_features=['sub_tsk', 'task', 'time', 'time_pc', 'time_perc', 'subject'],
        session_id = 2024)

    best = compare_models(sort='MAE', n_select = 18)
    all = pull()
    all['Subject'] = str(subject)

    train_individual_df = pd.concat([train_individual_df, all], ignore_index=True)

    test_results = pd.DataFrame()
    for model in best:
        test_result = predict_model(model, verbose = False)
        test_result = pull()
        test_results = test_results.append(test_result)

        try:
            importance = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(model.feature_importances_)}).sort_values(by='Value', ascending=False).reset_index().drop('index', axis=1)
        except:
            try:
                importance = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(model.coef_)}).sort_values(by='Value', ascending=False).reset_index().drop('index', axis=1)
            except:
                importance = pd.DataFrame({'Feature': ['error'], 'Value': [0]})
        importance['Model'] = str(model)
        importance['Subject'] = str(subject)

        feature_importance_individual_df = pd.concat([feature_importance_individual_df, importance], ignore_index=True)


    test_results['Subject'] = str(subject)
    test_individual_df = pd.concat([test_individual_df, test_results], ignore_index=True)


train_individual_df.to_csv('outputs/individual/train_individual.csv', header=True, index=False)
test_individual_df.to_csv('outputs/individual/test_individual.csv', header=True, index=False)
feature_importance_individual_df.to_csv('outputs/individual/feature_importance_individual.csv', header=True, index=False)

Subject 1
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 2
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 3
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 5
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 6
12 18


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(30, 40)"
4,Transformed data shape,"(30, 34)"
5,Transformed train set shape,"(12, 34)"
6,Transformed test set shape,"(18, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,1.3763,3.9435,1.3916,,0.1349,0.1771,0.704
br,Bayesian Ridge,2.0558,6.2586,2.0606,,0.1745,0.2171,0.155
huber,Huber Regressor,2.2807,9.3113,2.3206,,0.2517,0.298,0.153
ridge,Ridge Regression,2.6295,10.0996,2.6467,,0.2307,0.2938,0.131
omp,Orthogonal Matching Pursuit,2.9521,16.4205,3.0143,,0.2695,0.3802,0.147
ada,AdaBoost Regressor,3.25,18.75,3.341,,0.2485,0.3222,0.147
gbr,Gradient Boosting Regressor,3.3479,24.7934,3.4315,,0.2632,0.2977,0.162
et,Extra Trees Regressor,3.5065,17.2725,3.5508,,0.28,0.3362,0.165
rf,Random Forest Regressor,3.5595,19.4042,3.5818,,0.2874,0.3534,0.177
en,Elastic Net,3.5856,17.3657,3.7886,,0.3013,0.3574,0.13


Subject 7
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 8
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 9
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 10
12 18


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(30, 40)"
4,Transformed data shape,"(30, 34)"
5,Transformed train set shape,"(12, 34)"
6,Transformed test set shape,"(18, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
ridge,Ridge Regression,2.4433,8.4064,2.4989,,0.2268,0.2611,0.139
knn,K Neighbors Regressor,2.54,7.384,2.5641,,0.2369,0.2875,0.146
et,Extra Trees Regressor,2.5465,9.8358,2.6257,,0.2328,0.2638,0.155
rf,Random Forest Regressor,2.67,10.3433,2.7269,,0.2452,0.2833,0.152
huber,Huber Regressor,2.7416,10.595,2.7491,,0.2553,0.2724,0.14
en,Elastic Net,2.8329,9.7119,2.9081,,0.2599,0.2984,0.159
lasso,Lasso Regression,2.9415,10.3532,3.0053,,0.2705,0.3124,0.138
llar,Lasso Least Angle Regression,2.9415,10.3532,3.0052,,0.2705,0.3124,0.153
ada,AdaBoost Regressor,3.25,16.65,3.3778,,0.2965,0.3393,0.145
gbr,Gradient Boosting Regressor,3.2646,16.7049,3.3748,,0.3027,0.3328,0.149


Subject 12
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 15
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 16
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 17
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 18
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 19
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 20
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 21
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 22
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 24
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 25
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 26
12 18


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(30, 40)"
4,Transformed data shape,"(30, 34)"
5,Transformed train set shape,"(12, 34)"
6,Transformed test set shape,"(18, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,1.138,2.2214,1.1618,,0.0948,0.108,0.153
lr,Linear Regression,1.2228,2.9679,1.2409,,0.112,0.1332,0.149
gbr,Gradient Boosting Regressor,1.2403,3.3933,1.2847,,0.0978,0.1077,0.158
dt,Decision Tree Regressor,1.3,3.4,1.3236,,0.0871,0.0945,0.155
ada,AdaBoost Regressor,1.3,4.2,1.4657,,0.1203,0.1335,0.151
rf,Random Forest Regressor,1.739,4.4385,1.7504,,0.1522,0.1822,0.142
ridge,Ridge Regression,2.4161,8.4445,2.5023,,0.199,0.2207,0.166
huber,Huber Regressor,2.4208,8.3287,2.4954,,0.2173,0.2543,0.17
br,Bayesian Ridge,2.8029,10.4737,2.903,,0.2327,0.265,0.166
lasso,Lasso Regression,3.16,12.7996,3.2152,,0.2571,0.3018,0.152


Subject 27
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 28
12 18


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(30, 40)"
4,Transformed data shape,"(30, 34)"
5,Transformed train set shape,"(12, 34)"
6,Transformed test set shape,"(18, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
dt,Decision Tree Regressor,1.25,2.95,1.3121,,0.095,0.0989,0.146
gbr,Gradient Boosting Regressor,1.4154,3.5665,1.459,,0.1145,0.1192,0.156
et,Extra Trees Regressor,1.6675,4.4037,1.7138,,0.1322,0.1427,0.15
ada,AdaBoost Regressor,1.75,6.55,1.8536,,0.1365,0.1349,0.158
ridge,Ridge Regression,2.1565,6.2036,2.1854,,0.1817,0.204,0.16
rf,Random Forest Regressor,2.1625,5.4347,2.1933,,0.1845,0.2143,0.148
br,Bayesian Ridge,2.2444,6.9096,2.2756,,0.1797,0.2002,0.18
huber,Huber Regressor,2.4227,6.8838,2.4438,,0.2388,0.2627,0.177
en,Elastic Net,2.4448,7.3473,2.4647,,0.2031,0.2278,0.191
llar,Lasso Least Angle Regression,2.5092,7.5957,2.5236,,0.2125,0.2413,0.148


Subject 29
4 6


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(10, 40)"
4,Transformed data shape,"(10, 34)"
5,Transformed train set shape,"(4, 34)"
6,Transformed test set shape,"(6, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True


Subject 33
8 12


Unnamed: 0,Description,Value
0,Session id,2024
1,Target,RPE
2,Target type,Regression
3,Original data shape,"(20, 40)"
4,Transformed data shape,"(20, 34)"
5,Transformed train set shape,"(8, 34)"
6,Transformed test set shape,"(12, 34)"
7,Ignore features,6
8,Numeric features,33
9,Preprocess,True
