In [23]:
import pandas as pd

metrics = [
    pd.read_csv(f'{folder}/metrics.csv') for folder in ['dslim-bert-base-NER-uncased', 'FacebookAI-roberta-base', 'google-bert-bert-base-uncased', 'microsoft-deberta-v3-base']
    ]
# header of each metrics file: training on,tested on,model,accuracy,precision,recall,f1
# make a new metrics file, with training on, testedon, average accuracy, average precision, average recall, average f1
# sort by f1 score
# remove model column
metrics = pd.concat(metrics)
metrics = metrics.drop(columns=['model'])
metrics = metrics.groupby(['training on', 'tested on']).mean().reset_index()
metrics = metrics.sort_values(by='f1', ascending=False)

# save the metrics file
metrics.to_csv('metrics.csv', index=False)

In [25]:
import pandas as pd

metrics = pd.read_csv('microsoft-deberta-v3-base/metrics.csv')

# keep only columns that contain f1 
metrics = metrics[metrics.columns[metrics.columns.str.contains('f1|training on|tested on')]]

# remove all rows that are tested on "qi_all"
metrics = metrics[~metrics['tested on'].str.contains('oai_all')]

# remove rows where training on and tested on are the same
#metrics = metrics[metrics['training on'] != metrics['tested on']]

# remove rows where tested on contains "chekalina"
metrics = metrics[~metrics['tested on'].str.contains('chekalina')]

# round all values to 2 decimal places
metrics = metrics.round(2)

metrics

Unnamed: 0,training on,tested on,f1,object_f1,aspect_f1
0,oai_webis_2022,oai_webis_2022,0.84,0.82,0.86
1,oai_webis_2022,oai_beloucif,0.65,0.8,0.36
4,oai_beloucif,oai_webis_2022,0.32,0.48,0.14
5,oai_beloucif,oai_beloucif,0.77,0.83,0.53
8,oai_chekalina_2021,oai_webis_2022,0.2,0.16,0.24
9,oai_chekalina_2021,oai_beloucif,0.34,0.35,0.33
12,oai_all,oai_webis_2022,0.84,0.83,0.85
13,oai_all,oai_beloucif,0.78,0.85,0.55
16,oai_chekalina_webis_merged,oai_webis_2022,0.81,0.78,0.84
17,oai_chekalina_webis_merged,oai_beloucif,0.62,0.75,0.35


In [22]:
import pandas as pd
import os

folders = ['dslim-bert-base-NER-uncased', 'FacebookAI-roberta-base', 'google-bert-bert-base-uncased', 'microsoft-deberta-v3-base']

# from each folder retrieve the following files:
# all files that contains _oai_beloucif or _oai_webis and list them in a dataframe
# example file name oai_webis_2022_oai_webis_2022_dslim_test_results.csv
# format: oai_[training on]_oai_[tested on]_[model]_test_results.csv
# categorize the files by training on, tested on, model

files = []
for folder in folders:
    for file in os.listdir(folder):
        if '_oai_beloucif' in file or '_oai_webis' in file:
            files.append(file)

files = pd.DataFrame(files, columns=['file'])

files['training on'] = files['file'].str.extract(r'oai_(.*?)_oai_')
files['tested on'] = files['file'].str.extract(r'oai_.*?_oai_(.*?)_')
files['model'] = files['file'].str.extract(r'_oai_.*?_(.*?)_test_results.csv')

# remove numbers from prefix in model column
files['model'] = files['model'].str.replace(r'\d+_', '', regex=True)

files


Unnamed: 0,file,training on,tested on,model
0,oai_webis_2022_oai_webis_2022_dslim_test_resul...,webis_2022,webis,dslim
1,oai_chekalina_beloucif_merged_oai_webis_2022_d...,chekalina_beloucif_merged,webis,dslim
2,oai_beloucif_oai_beloucif_dslim_test_results.csv,beloucif,beloucif,dslim
3,oai_webis_2022_oai_beloucif_dslim_test_results...,webis_2022,beloucif,dslim
4,oai_chekalina_webis_merged_oai_webis_2022_dsli...,chekalina_webis_merged,webis,dslim
5,oai_chekalina_2021_oai_beloucif_dslim_test_res...,chekalina_2021,beloucif,dslim
6,oai_chekalina_webis_merged_oai_beloucif_dslim_...,chekalina_webis_merged,beloucif,dslim
7,oai_webis_beloucif_merged_oai_webis_2022_dslim...,webis_beloucif_merged,webis,dslim
8,oai_all_oai_beloucif_dslim_test_results.csv,all,beloucif,dslim
9,oai_chekalina_beloucif_merged_oai_beloucif_dsl...,chekalina_beloucif_merged,beloucif,dslim


In [28]:
import pandas as pd

# read metrics file
metrics = pd.read_csv('metrics.csv')

# show columns
# ['training on', 'tested on', 'accuracy', 'precision', 'recall', 'f1', 'object_precision', 'object_recall', 'object_f1', 'aspect_precision','aspect_recall', 'aspect_f1']

# convert to a table where rows are training on, columns are tested on and values are f1 scores

metrics = metrics.pivot(index='training on', columns='tested on', values='f1')
metrics = metrics.round(2)
metrics

tested on,oai_all,oai_beloucif,oai_chekalina_2021,oai_webis_2022
training on,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
oai_all,0.81,0.76,0.83,0.82
oai_beloucif,0.51,0.76,0.45,0.35
oai_chekalina_2021,0.49,0.3,0.84,0.21
oai_chekalina_beloucif_merged,0.65,0.75,0.84,0.36
oai_chekalina_webis_merged,0.75,0.59,0.84,0.8
oai_webis_2022,0.63,0.61,0.47,0.81
oai_webis_beloucif_merged,0.68,0.76,0.46,0.82
