## Imports

In [1]:
from pathlib import Path
import json
import pandas as pd
import numpy as np
import os 

from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import

import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

## Plotting function

In [2]:
def plot_F1(F1, expcode, expcodes, save_path):
    for model, M_results in F1.items():
        x = []; y = []; z1 = []; z2 = []

    #     fig, axs = plt.subplots(1, 2, figsize = (13,5))
        fig = plt.figure(figsize = (13,6))    
        ax1 = fig.add_subplot(121, projection='3d'); ax2 = fig.add_subplot(122, projection='3d')

        for test_perc, T_results in M_results.items():
            perc = test_perc.split("=")[1]
            for item in T_results:
                x.append(float(perc)); y.append(item['num_epochs']);z1.append(item['avg_f1'][0]); z2.append(item['avg_f1'][1])
        
        fig.suptitle(expcodes[expcode]+ "_" + model, fontsize=16)
        ax1.set_zlim(0, 1); ax2.set_zlim(0, 1)
        ax1.set_xlabel('test_perc'); ax2.set_xlabel('test_perc')
        ax1.set_ylabel('num_epochs'); ax2.set_ylabel('num_epochs')
        ax1.set_zlabel('F1'); ax2.set_zlabel('F1')
        ax1.set_title('Avg Macro'); ax2.set_title('Avg Weighted')
        fig.subplots_adjust(wspace=0.5)
        ax1.scatter3D(np.array(x), np.array(y), np.array(z1), c=np.array(z1), cmap='viridis')
        ax2.scatter3D(np.array(x), np.array(y), np.array(z2), c=np.array(z2), cmap='viridis')
        
        plt.savefig(f"{save_path}/{expcodes[expcode]}_{model}")
        plt.show()
        
def select_best_scores(dictionary):
    results_list = []
    for key, value in dictionary.items():
        for model, T_results in value.items():
            for item in T_results:
#                 print(model," -- ", key," -- ",item['num_epochs']," -- ",item['avg_f1'][0])
                results_list.append([model, key, item['num_epochs'], item['avg_f1'][1]])
    results_list.sort(key=lambda x:x[3], reverse = True)
    print(results_list[0:3])
    
    
        

def transform_data(dictionary):
    new_dict = {}
    for key, value in dictionary.items():
        for model, T_results in value.items():
            if model in new_dict:
                new_dict[model][key] = T_results
            else:
                new_dict[model] = {}
                new_dict[model][key] = T_results
            
    return new_dict
    
  

## Data loading

Temporary requirement: Put all the EXPi folders in a single folder called `F1_multiclass_results` in the `input` folder.

**Note:** Jordi, I know you used `Path` but for some reason I was not able to retrieve all the json files using that method, so I added an alternative.

In [3]:
# path = Path("C:/Users/user/Google Drive/Els_meus_documents/projectes/CompetitiveIntelligence/WRI/Notebooks/Data/finetuningResults")
# filename = "FineTuningResults.json"

# sub_path = Path("C:/Users/user/Google Drive/Els_meus_documents/projectes/CompetitiveIntelligence/WRI/Notebooks/Data/finetuningResults/")
# paths = sub_path.glob('*.json')


models = ["stsb-xlm-r", "paraphrase-xlm-r"]#"distiluse-base", "quora-distilbert", 

exp_codes = {'0' : "Rater2 combined labels",
            '1' : "Rater2 only new labels",
            '2' : "Rater3 combined labels",
            '3' : "Rater3 only new labels",
            '4' : "Rater1 combined labels",
            '5' : "Rater1 only new labels"}

# exp_codes = {'20' : "Rater3",
#             '21' : "Rater2",
#             '22' : "Rater1"}

results_path = "../input/F1_multiclass_results/"
output_path = "../output/"
all_files = [os.path.join(root, file) for root, dirs, files in os.walk(results_path) for file in files]
exp_results_json = [file for file in all_files if file.endswith(".json")]

for exp_result in exp_results_json:
    print(exp_result)
    if "EXPTEST" in exp_result:
        exp_number = exp_result.split("EXPTEST")[1].split("_")[0]
#         if int(exp_number) > 19:
        with open(exp_result, "r") as f:
            F1 = json.load(f)
        select_best_scores(F1)
        plot_F1(transform_data(F1), exp_number, exp_codes, output_path)

# for path in paths:
#     # because path is object not string
#     path_in_str = str(path)
#     exp_number = path_in_str.split("EXP")[1].split("_")[0]
#     print(exp_number)
#     print(path_in_str)
#     if int(exp_number) > 9:
#         with open(path_in_str, "r") as f:
#             F1 = json.load(f)
#         plot_F1(transform_data(F1), exp_number, exp_codes, sub_path)

../input/F1_multiclass_results/EXP13/EXP13_FineTuningResults.json
../input/F1_multiclass_results/EXP14/EXP14_FineTuningResults.json
../input/F1_multiclass_results/EXP15/EXP15_FineTuningResults.json
../input/F1_multiclass_results/EXP12/EXP12_FineTuningResults.json
../input/F1_multiclass_results/EXP10/EXP10_FineTuningResults.json
../input/F1_multiclass_results/EXP11/EXP11_FineTuningResults.json


In [4]:
def max_f1_per_experiment(results, weighted=False):
    
    max_results = {"f1-score": 0.0, "epochs": 0, "test_perc": 0.0}
    for model in results:
        for test_perc in results[model]:
            for result in results[model][test_perc]:
                cur_f1 = result['avg_f1'][1] if weighted else result['avg_f1'][0]
                if cur_f1 > max_results["f1-score"]:
                    max_results["f1-score"] = round(cur_f1, 2)
                    max_results["epochs"] =  result['num_epochs']
                    max_results["test_perc"] = test_perc.split("=")[-1]
                    max_results["model"] = model
    
    return max_results

def pretty_print_max_results(max_results):
    for parameter, value in max_results.items():
        print(f"- {parameter}: {value}")
        
def store_results_table(df, exp_num, max_results):
    row = {"Experiment number": exp_num}
    row.update(max_results)
    return df.append(row, ignore_index=True)

In [6]:
df = pd.DataFrame(columns=["Experiment number", "model", "f1-score", "epochs", "test_perc"])
weighted_f1 = True

for exp_result in exp_results_json:
    exp_number = exp_result.split("EXP")[2].split("_")[0]
    print("Experiment number:", exp_number)
    with open(exp_result, "r") as f:
        results_json = json.load(f)
    
    print("Best results:")
    max_res = max_f1_per_experiment(transform_data(results_json), weighted_f1)
    pretty_print_max_results(max_res)
    df = store_results_table(df, exp_number, max_res)
    
    print("===============================================================")

if weighted_f1:
    df.to_csv("../output/weighted-f1-results-compilation.csv")
else:
    df.to_csv("../output/avg-f1-results-compilation.csv")

Experiment number: 13
Best results:
- f1-score: 0.4
- epochs: 6
- test_perc: 0.15
- model: paraphrase-xlm-r-multilingual-v1
Experiment number: 14
Best results:
- f1-score: 0.48
- epochs: 8
- test_perc: 0.3
- model: paraphrase-xlm-r-multilingual-v1
Experiment number: 15
Best results:
- f1-score: 0.65
- epochs: 10
- test_perc: 0.2
- model: paraphrase-xlm-r-multilingual-v1
Experiment number: 12
Best results:
- f1-score: 0.71
- epochs: 8
- test_perc: 0.2
- model: paraphrase-xlm-r-multilingual-v1
Experiment number: 10
Best results:
- f1-score: 0.82
- epochs: 10
- test_perc: 0.3
- model: paraphrase-xlm-r-multilingual-v1
Experiment number: 11
Best results:
- f1-score: 0.66
- epochs: 8
- test_perc: 0.25
- model: paraphrase-xlm-r-multilingual-v1


In [7]:
df

Unnamed: 0,Experiment number,model,f1-score,epochs,test_perc
0,13,paraphrase-xlm-r-multilingual-v1,0.4,6,0.15
1,14,paraphrase-xlm-r-multilingual-v1,0.48,8,0.3
2,15,paraphrase-xlm-r-multilingual-v1,0.65,10,0.2
3,12,paraphrase-xlm-r-multilingual-v1,0.71,8,0.2
4,10,paraphrase-xlm-r-multilingual-v1,0.82,10,0.3
5,11,paraphrase-xlm-r-multilingual-v1,0.66,8,0.25
