## Seed analysis
Look at the variability in the results of different seeds on the training of the models, and look which seeds perform well across models

In [1]:
import pandas as pd
import ast

In [15]:
# models = ['count_naive_bayes_201021', 'count_log_reg_201022', 'tfidf_naive_bayes_201021', 'tfidf_log_reg_201021',
# 'bert_naive_bayes_bert_201021', 'bert_log_reg_bert_201022', 'bert_log_reg_scibert_201022', 
#           'count_SVM_201022', 'tfidf_SVM_201022', 'bert_SVM_bert_201022', 'bert_SVM_scibert_201022']

In [16]:
models = ['count_naive_bayes_210128', 'count_log_reg_210128', 'count_SVM_210128',
          'tfidf_naive_bayes_210128', 'tfidf_log_reg_210128', 'tfidf_SVM_210128',
        'bert_naive_bayes_bert_210128', 'bert_log_reg_bert_210128','bert_SVM_bert_210128',
         'bert_naive_bayes_scibert_210128','bert_log_reg_scibert_210128', 'bert_SVM_scibert_210128']

In [17]:
model_scores = []
for model in models:
    with open(f'../model_repeats/{model}/repeated_results.txt', 'r') as f:
        for i, line in enumerate(f):
            model_run = ast.literal_eval(line)
            model_run['Model'] = model
            for metric, value in model_run['Train scores'].items():
                model_run[f'Train {metric}'] = value
            for metric, value in model_run['Test scores'].items():
                model_run[f'Test {metric}'] = value
            model_scores.append(model_run)

model_scores_df = pd.DataFrame(model_scores)

In [18]:
# All info for results
model_scores_df.groupby('Model')[['Test accuracy','Test f1','Test precision_score','Test recall_score']
                                ].agg({
    'Test accuracy':['mean', 'std', 'min','max'],
    'Test f1':['mean', 'std', 'min','max'],
    'Test precision_score':['mean', 'std', 'min','max'],
    'Test recall_score':['mean', 'std', 'min','max']}).round(3)

Unnamed: 0_level_0,Test accuracy,Test accuracy,Test accuracy,Test accuracy,Test f1,Test f1,Test f1,Test f1,Test precision_score,Test precision_score,Test precision_score,Test precision_score,Test recall_score,Test recall_score,Test recall_score,Test recall_score
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max,mean,std,min,max,mean,std,min,max
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
bert_SVM_bert_210128,0.774,0.017,0.753,0.799,0.78,0.02,0.742,0.809,0.773,0.051,0.667,0.847,0.794,0.064,0.694,0.883
bert_SVM_scibert_210128,0.786,0.027,0.753,0.845,0.782,0.033,0.728,0.851,0.81,0.047,0.724,0.865,0.764,0.088,0.647,0.885
bert_log_reg_bert_210128,0.766,0.02,0.736,0.805,0.768,0.025,0.718,0.817,0.77,0.041,0.691,0.826,0.769,0.051,0.659,0.844
bert_log_reg_scibert_210128,0.797,0.027,0.759,0.851,0.798,0.021,0.769,0.838,0.807,0.028,0.758,0.859,0.791,0.034,0.753,0.87
bert_naive_bayes_bert_210128,0.728,0.041,0.655,0.776,0.738,0.031,0.688,0.789,0.723,0.036,0.673,0.768,0.757,0.064,0.685,0.84
bert_naive_bayes_scibert_210128,0.78,0.03,0.741,0.833,0.789,0.026,0.759,0.842,0.772,0.044,0.696,0.836,0.812,0.064,0.718,0.909
count_SVM_210128,0.726,0.044,0.661,0.787,0.711,0.052,0.629,0.786,0.767,0.055,0.69,0.853,0.67,0.094,0.518,0.818
count_log_reg_210128,0.772,0.025,0.741,0.816,0.769,0.021,0.744,0.812,0.793,0.055,0.701,0.877,0.751,0.039,0.671,0.805
count_naive_bayes_210128,0.79,0.02,0.753,0.828,0.809,0.014,0.791,0.835,0.755,0.04,0.7,0.817,0.875,0.047,0.788,0.951
tfidf_SVM_210128,0.768,0.04,0.73,0.833,0.748,0.051,0.662,0.828,0.836,0.055,0.742,0.921,0.688,0.107,0.541,0.857


## Best seeds

In [19]:
# Best seed for highest Test F1 score
model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3)

Unnamed: 0_level_0,Test accuracy,Test f1,Test precision_score,Test recall_score
Split random seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,0.805,0.813,0.784,0.849
8,0.783,0.784,0.735,0.849
0,0.782,0.779,0.812,0.755
4,0.773,0.775,0.703,0.87
9,0.766,0.768,0.747,0.795
2,0.749,0.765,0.778,0.759
1,0.759,0.765,0.804,0.736
3,0.75,0.763,0.829,0.715
6,0.748,0.753,0.779,0.733
5,0.766,0.737,0.812,0.684


In [20]:
# Best seed for highest Test precision score
model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test precision_score"], ascending = False).round(3)

Unnamed: 0_level_0,Test accuracy,Test f1,Test precision_score,Test recall_score
Split random seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,0.75,0.763,0.829,0.715
5,0.766,0.737,0.812,0.684
0,0.782,0.779,0.812,0.755
1,0.759,0.765,0.804,0.736
7,0.805,0.813,0.784,0.849
6,0.748,0.753,0.779,0.733
2,0.749,0.765,0.778,0.759
9,0.766,0.768,0.747,0.795
8,0.783,0.784,0.735,0.849
4,0.773,0.775,0.703,0.87


In [21]:
# Best seed for highest Test recall score
model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test recall_score"], ascending = False).round(3)

Unnamed: 0_level_0,Test accuracy,Test f1,Test precision_score,Test recall_score
Split random seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4,0.773,0.775,0.703,0.87
8,0.783,0.784,0.735,0.849
7,0.805,0.813,0.784,0.849
9,0.766,0.768,0.747,0.795
2,0.749,0.765,0.778,0.759
0,0.782,0.779,0.812,0.755
1,0.759,0.765,0.804,0.736
6,0.748,0.753,0.779,0.733
3,0.75,0.763,0.829,0.715
5,0.766,0.737,0.812,0.684


In [50]:
## Best rankings for all models
print(model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3).index)
print(model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test precision_score"], ascending = False).round(3).index)
print(model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test recall_score"], ascending = False).round(3).index)

Int64Index([7, 8, 0, 4, 9, 2, 1, 3, 6, 5], dtype='int64', name='Split random seed')
Int64Index([3, 5, 0, 1, 7, 6, 2, 9, 8, 4], dtype='int64', name='Split random seed')
Int64Index([4, 8, 7, 9, 2, 0, 1, 6, 3, 5], dtype='int64', name='Split random seed')


In [57]:
top_models = [model for model, value in dict(
    model_scores_df.groupby('Model')['Test f1'].mean()
).items() if value > 0.78]
print(len(top_models))
top_model_scores_df = model_scores_df.loc[model_scores_df['Model'].isin(top_models)]

6


In [58]:
## Best rankings for top models only
print(top_model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3).index)
print(top_model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test precision_score"], ascending = False).round(3).index)
print(top_model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test recall_score"], ascending = False).round(3).index)

Int64Index([7, 0, 8, 1, 3, 4, 9, 2, 6, 5], dtype='int64', name='Split random seed')
Int64Index([3, 1, 0, 5, 6, 2, 7, 9, 8, 4], dtype='int64', name='Split random seed')
Int64Index([4, 8, 7, 9, 0, 2, 1, 6, 3, 5], dtype='int64', name='Split random seed')
