# Individual Model Generation

Individual models are generated in this file. These models use 80% of a subject's data for training and 20% for testing.

The models are saved in a CSV file called `train_individual.csv` for the training models and `test_individual.csv` for the testing models. The feature importance data is also saved for each model. This data is saved in a CSV file called `feature_importance_individual.csv`. These files are in the directory `outputs/individual/`.

In [3]:
from pycaret.datasets import get_data
from pycaret.regression import *
import csv
import pandas as pd

In [None]:
%%capture
data_dir = '../../data'
outputs_dir = '../../outputs/individual'
df = read(f'{data_dir}/initial_features')

In [5]:
def split_fourths(group):
    n = len(group)
    index_25 = int(n * 0.25)
    index_50 = int(n * 0.50)
    index_75 = int(n * 0.75)
    train_segments = [group.iloc[0:index_25], group.iloc[index_50:index_75]]
    test_segments = [group.iloc[index_25:index_50], group.iloc[index_75:n]]
    return pd.concat(train_segments, ignore_index=True), pd.concat(test_segments, ignore_index=True)

In [None]:
%%capture
train_individual_df = pd.DataFrame(columns=["Model", "MAE", "MSE", "RMSE", "R2", "RMSLE", "MAPE", "TT (Sec)", "Subject"])
test_individual_df = pd.DataFrame(columns=["Model", "MAE", "MSE", "RMSE", "R2", "RMSLE", "MAPE", "Subject"])
feature_importance_individual_df = pd.DataFrame(columns=["Feature", "Value", "Model", "Subject"])


for subject in df['subject'].unique():
    # New way!
    df_subject = df[df['subject'] == subject]
    train_frames = []
    test_frames = []

    for _, group in df_subject.groupby('experimental_condition'):
        train_segment, test_segment = split_fourths(group)
        train_frames.append(train_segment)
        test_frames.append(test_segment)

    train = pd.concat(train_frames, ignore_index=True)
    test = pd.concat(test_frames, ignore_index=True)

    reg = setup(
        data = train, test_data = test,
        target='rpe', index = False,
        ignore_features=['experimental_condition', 'subject', 'wrist_acc_time'],
        use_gpu = True, session_id = 2024)

    best = compare_models(sort='MAE', n_select = 18)
    all = pull()
    all['Subject'] = str(subject)

    train_individual_df = pd.concat([train_individual_df, all], ignore_index=True)

    test_results = pd.DataFrame()
    for model in best:
        test_result = predict_model(model, verbose = False)
        test_result = pull()
        test_results = test_results.append(test_result)

        try:
            importance = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(model.feature_importances_)}).sort_values(by='Value', ascending=False).reset_index().drop('index', axis=1)
        except:
            try:
                importance = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(model.coef_)}).sort_values(by='Value', ascending=False).reset_index().drop('index', axis=1)
            except:
                importance = pd.DataFrame({'Feature': ['error'], 'Value': [0]})
        importance['Model'] = str(model)
        importance['Subject'] = str(subject)

        feature_importance_individual_df = pd.concat([feature_importance_individual_df, importance], ignore_index=True)


    test_results['Subject'] = str(subject)
    test_individual_df = pd.concat([test_individual_df, test_results], ignore_index=True)


train_individual_df.to_csv(f'{outputs_dir}/train_individual.csv', header=True, index=False)
test_individual_df.to_csv(f'{outputs_dir}/test_individual.csv', header=True, index=False)
feature_importance_individual_df.to_csv(f'{outputs_dir}/feature_importance_individual.csv', header=True, index=False)