## Seed analysis
Look at the variability in the results of different seeds on the training of the models, and look which seeds perform well across models

In [1]:
import pandas as pd
import ast

In [80]:
models = ['count_naive_bayes_210218', 'count_log_reg_210218', 'count_SVM_210218',
         'tfidf_naive_bayes_210218', 'tfidf_log_reg_210218','tfidf_SVM_210218',
         'bert_naive_bayes_bert_210218', 'bert_log_reg_bert_210218', 'bert_SVM_bert_210218',
         'bert_naive_bayes_scibert_210218','bert_log_reg_scibert_210218', 'bert_SVM_scibert_210218']

In [81]:
model_scores = []
for model in models:
    with open(f'../model_repeats/{model}/repeated_results.txt', 'r') as f:
        for i, line in enumerate(f):
            model_run = ast.literal_eval(line)
            model_run['Model'] = model
            for metric, value in model_run['Train scores'].items():
                model_run[f'Train {metric}'] = value
            for metric, value in model_run['Test scores'].items():
                model_run[f'Test {metric}'] = value
            model_scores.append(model_run)

model_scores_df = pd.DataFrame(model_scores)

In [82]:
# All info for results
d=model_scores_df.groupby('Model')[['Test accuracy','Test f1','Test precision_score','Test recall_score']
                                ].agg({
    'Test accuracy':['mean', 'std', 'min','max'],
    'Test f1':['mean', 'std', 'min','max'],
    'Test precision_score':['mean', 'std', 'min','max'],
    'Test recall_score':['mean', 'std', 'min','max']}).round(3)
d

Unnamed: 0_level_0,Test accuracy,Test accuracy,Test accuracy,Test accuracy,Test f1,Test f1,Test f1,Test f1,Test precision_score,Test precision_score,Test precision_score,Test precision_score,Test recall_score,Test recall_score,Test recall_score,Test recall_score
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max,mean,std,min,max,mean,std,min,max
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
bert_SVM_bert_210218,0.772,0.024,0.729,0.805,0.775,0.026,0.739,0.818,0.772,0.027,0.724,0.804,0.78,0.039,0.742,0.861
bert_SVM_scibert_210218,0.782,0.025,0.736,0.818,0.784,0.028,0.739,0.817,0.788,0.03,0.741,0.849,0.781,0.041,0.722,0.854
bert_log_reg_bert_210218,0.81,0.024,0.781,0.849,0.814,0.028,0.779,0.866,0.803,0.027,0.765,0.845,0.827,0.047,0.762,0.918
bert_log_reg_scibert_210218,0.824,0.033,0.781,0.866,0.826,0.034,0.775,0.863,0.825,0.027,0.786,0.872,0.828,0.047,0.762,0.887
bert_naive_bayes_bert_210218,0.744,0.024,0.702,0.781,0.749,0.023,0.715,0.781,0.745,0.019,0.708,0.77,0.754,0.034,0.697,0.806
bert_naive_bayes_scibert_210218,0.773,0.016,0.75,0.798,0.779,0.014,0.754,0.8,0.767,0.022,0.732,0.814,0.792,0.024,0.754,0.833
count_SVM_210218,0.78,0.027,0.736,0.822,0.781,0.023,0.755,0.821,0.789,0.039,0.732,0.877,0.776,0.041,0.735,0.854
count_log_reg_210218,0.8,0.022,0.767,0.842,0.804,0.021,0.764,0.84,0.802,0.031,0.772,0.877,0.808,0.04,0.728,0.861
count_naive_bayes_210218,0.812,0.023,0.781,0.849,0.827,0.021,0.797,0.86,0.774,0.02,0.751,0.823,0.89,0.032,0.833,0.937
tfidf_SVM_210218,0.814,0.019,0.788,0.842,0.817,0.02,0.788,0.841,0.817,0.029,0.788,0.888,0.818,0.042,0.762,0.903


In [83]:
# For markdown table of results
import numpy as np
for i, row in d.iterrows():
    metric_results = '|'.join([f'{m[0]}/{m[1]}/({m[2]}, {m[3]})' for m in np.array_split(row.tolist(), 4)])
    print(f'|{i}|{metric_results}|')

|bert_SVM_bert_210218|0.772/0.024/(0.729, 0.805)|0.775/0.026/(0.739, 0.818)|0.772/0.027/(0.724, 0.804)|0.78/0.039/(0.742, 0.861)|
|bert_SVM_scibert_210218|0.782/0.025/(0.736, 0.818)|0.784/0.028/(0.739, 0.817)|0.788/0.03/(0.741, 0.849)|0.781/0.041/(0.722, 0.854)|
|bert_log_reg_bert_210218|0.81/0.024/(0.781, 0.849)|0.814/0.028/(0.779, 0.866)|0.803/0.027/(0.765, 0.845)|0.827/0.047/(0.762, 0.918)|
|bert_log_reg_scibert_210218|0.824/0.033/(0.781, 0.866)|0.826/0.034/(0.775, 0.863)|0.825/0.027/(0.786, 0.872)|0.828/0.047/(0.762, 0.887)|
|bert_naive_bayes_bert_210218|0.744/0.024/(0.702, 0.781)|0.749/0.023/(0.715, 0.781)|0.745/0.019/(0.708, 0.77)|0.754/0.034/(0.697, 0.806)|
|bert_naive_bayes_scibert_210218|0.773/0.016/(0.75, 0.798)|0.779/0.014/(0.754, 0.8)|0.767/0.022/(0.732, 0.814)|0.792/0.024/(0.754, 0.833)|
|count_SVM_210218|0.78/0.027/(0.736, 0.822)|0.781/0.023/(0.755, 0.821)|0.789/0.039/(0.732, 0.877)|0.776/0.041/(0.735, 0.854)|
|count_log_reg_210218|0.8/0.022/(0.767, 0.842)|0.804/0.021/(0.

## Best seeds

In [84]:
# Best seed for highest Test F1 score
model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3)

Unnamed: 0_level_0,Test accuracy,Test f1,Test precision_score,Test recall_score
Split random seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,0.818,0.821,0.834,0.812
9,0.802,0.813,0.765,0.87
6,0.788,0.811,0.785,0.841
8,0.802,0.805,0.785,0.829
3,0.801,0.801,0.782,0.826
0,0.792,0.798,0.799,0.801
2,0.782,0.793,0.779,0.811
1,0.788,0.793,0.768,0.825
5,0.771,0.772,0.76,0.789
4,0.762,0.771,0.769,0.778


In [85]:
# Best seed for highest Test precision score
model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test precision_score"], ascending = False).round(3)

Unnamed: 0_level_0,Test accuracy,Test f1,Test precision_score,Test recall_score
Split random seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,0.818,0.821,0.834,0.812
0,0.792,0.798,0.799,0.801
6,0.788,0.811,0.785,0.841
8,0.802,0.805,0.785,0.829
3,0.801,0.801,0.782,0.826
2,0.782,0.793,0.779,0.811
4,0.762,0.771,0.769,0.778
1,0.788,0.793,0.768,0.825
9,0.802,0.813,0.765,0.87
5,0.771,0.772,0.76,0.789


In [86]:
# Best seed for highest Test recall score
model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test recall_score"], ascending = False).round(3)

Unnamed: 0_level_0,Test accuracy,Test f1,Test precision_score,Test recall_score
Split random seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9,0.802,0.813,0.765,0.87
6,0.788,0.811,0.785,0.841
8,0.802,0.805,0.785,0.829
3,0.801,0.801,0.782,0.826
1,0.788,0.793,0.768,0.825
7,0.818,0.821,0.834,0.812
2,0.782,0.793,0.779,0.811
0,0.792,0.798,0.799,0.801
5,0.771,0.772,0.76,0.789
4,0.762,0.771,0.769,0.778


In [87]:
## Best rankings for all models
print(model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3).index)
print(model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test precision_score"], ascending = False).round(3).index)
print(model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test recall_score"], ascending = False).round(3).index)

Int64Index([7, 9, 6, 8, 3, 0, 2, 1, 5, 4], dtype='int64', name='Split random seed')
Int64Index([7, 0, 6, 8, 3, 2, 4, 1, 9, 5], dtype='int64', name='Split random seed')
Int64Index([9, 6, 8, 3, 1, 7, 2, 0, 5, 4], dtype='int64', name='Split random seed')


In [88]:
top_models = [model for model, value in dict(
    model_scores_df.groupby('Model')['Test f1'].mean()
).items() if value > 0.81]
print(len(top_models))
top_model_scores_df = model_scores_df.loc[model_scores_df['Model'].isin(top_models)]

5


In [89]:
## Best rankings for top models only
print(top_model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3).index)
print(top_model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test precision_score"], ascending = False).round(3).index)
print(top_model_scores_df.groupby('Split random seed')[['Test accuracy','Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test recall_score"], ascending = False).round(3).index)

Int64Index([7, 8, 6, 3, 9, 0, 2, 1, 4, 5], dtype='int64', name='Split random seed')
Int64Index([7, 8, 0, 6, 3, 2, 4, 9, 5, 1], dtype='int64', name='Split random seed')
Int64Index([9, 6, 8, 3, 7, 1, 2, 0, 4, 5], dtype='int64', name='Split random seed')


## The model results with a particular seed

In [90]:
seed_number = 7

In [91]:
model_scores_df.loc[model_scores_df['Split random seed']==seed_number][['Model','Test f1','Test precision_score','Test recall_score']].sort_values(["Test f1"], ascending = False).round(3)

Unnamed: 0,Model,Test f1,Test precision_score,Test recall_score
7,count_naive_bayes_210218,0.86,0.823,0.9
107,bert_log_reg_scibert_210218,0.853,0.853,0.853
17,count_log_reg_210218,0.84,0.877,0.807
37,tfidf_naive_bayes_210218,0.839,0.758,0.94
57,tfidf_SVM_210218,0.838,0.888,0.793
47,tfidf_log_reg_210218,0.835,0.881,0.793
77,bert_log_reg_bert_210218,0.824,0.821,0.827
117,bert_SVM_scibert_210218,0.817,0.849,0.787
27,count_SVM_210218,0.814,0.877,0.76
97,bert_naive_bayes_scibert_210218,0.8,0.814,0.787


In [92]:
model_scores_df.groupby(['Model'])[['Test f1','Test precision_score','Test recall_score']].mean().sort_values(["Test f1"], ascending = False).round(3)

Unnamed: 0_level_0,Test f1,Test precision_score,Test recall_score
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
count_naive_bayes_210218,0.827,0.774,0.89
bert_log_reg_scibert_210218,0.826,0.825,0.828
tfidf_SVM_210218,0.817,0.817,0.818
bert_log_reg_bert_210218,0.814,0.803,0.827
tfidf_log_reg_210218,0.812,0.807,0.819
tfidf_naive_bayes_210218,0.805,0.702,0.945
count_log_reg_210218,0.804,0.802,0.808
bert_SVM_scibert_210218,0.784,0.788,0.781
count_SVM_210218,0.781,0.789,0.776
bert_naive_bayes_scibert_210218,0.779,0.767,0.792


In [93]:
model_scores_df[['Model','Split random seed', 'Test f1','Test precision_score','Test recall_score']].sort_values(["Test f1"], ascending = False).round(3).head(10)

Unnamed: 0,Model,Split random seed,Test f1,Test precision_score,Test recall_score
76,bert_log_reg_bert_210218,6,0.866,0.82,0.918
108,bert_log_reg_scibert_210218,8,0.863,0.872,0.854
103,bert_log_reg_scibert_210218,3,0.863,0.84,0.887
106,bert_log_reg_scibert_210218,6,0.862,0.839,0.887
7,count_naive_bayes_210218,7,0.86,0.823,0.9
107,bert_log_reg_scibert_210218,7,0.853,0.853,0.853
78,bert_log_reg_bert_210218,8,0.85,0.833,0.868
8,count_naive_bayes_210218,8,0.849,0.79,0.917
109,bert_log_reg_scibert_210218,9,0.844,0.827,0.861
6,count_naive_bayes_210218,6,0.842,0.764,0.937
