In [None]:
# Import various dependencies, including the relevant modules from the Perch
# repository. Note that "chirp" is the old name that the Perch team used, so any
# chirp modules imported here were installed as part of the Perch repository in
# one of the previous cells.

import collections
from etils import epath
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.io import wavfile
import shutil
import tensorflow as tf
from tqdm import tqdm
import os
import json
import pathlib
import seaborn as sns

In [None]:
path_to_working_dir = pathlib.Path('/home/reindert/Valentin_REVO/surfperch_toshare/eval_texel Outputs/september 2024/final_results')
datasets_to_include = ['texel']

In [None]:
final_results = pd.DataFrame(columns=['sound_name', 'model', 'picking_strategy', 'ratio', 'n_samples', 'split', 'value', 'metric'])
metrics = ['precision', 'recall', 'auc_roc', 'auc_pr']
for dataset_name in datasets_to_include:
    dataset_folder = path_to_working_dir.joinpath(dataset_name)
    for model_folder in dataset_folder.glob('*'): 
        if model_folder.is_dir(): 
            results_folder = model_folder.joinpath('results')
            if results_folder.exists(): 
                for results_json in results_folder.glob('*'): 
                    if not results_json.is_dir():
                        sound_name, model_name, strategy, ratio, _ = results_json.name.split('_')
                        df_results = pd.read_json(str(results_json))
                        for n_samples in df_results.columns: 
                            for split in df_results.index: 
                                results = df_results.loc[split][n_samples]
                                for metric in metrics: 
                                    
                                    df_i = pd.DataFrame({'sound_name': sound_name, 'model': model_name, 'picking_strategy': strategy, 'ratio': ratio, 
                                                     'n_samples': n_samples, 'split': split, 'value': results[metric], 'metric': metric})
                                
                                    if final_results is None:
                                        final_results = df_i
                                    else:
                                        final_results = pd.concat([final_results, df_i], ignore_index=True)

In [None]:
for sound_name, sound_results in final_results.groupby('sound_name'): 
    # Experiment 1: model 
    filtered_sound_results = sound_results.loc[(sound_results.ratio == '11') & (sound_results.picking_strategy == 'hockey')]
    sns.relplot(
        data=filtered_sound_results, style="model", row='split', x="n_samples", y="value", col="metric", hue="model", kind="line", row_order=["validation", "testset"]
    )
    plt.suptitle(f'{sound_name.capitalize()}, experiment 1')
    plt.tight_layout()
    plt.savefig(path_to_working_dir.joinpath('plots', f'{sound_name}_exp1.png'))
    plt.show()

    # Experiment 2: picking strategy
    filtered_sound_results = sound_results.loc[(sound_results.model == 'surfperch') & (sound_results.ratio == '11')]
    sns.relplot(
        data=filtered_sound_results, style="picking_strategy", row='split', x="n_samples", y="value", col="metric", hue="picking_strategy", kind="line", row_order=["validation", "testset"]
    )
    plt.suptitle(f'{sound_name.capitalize()}, experiment 2')
    plt.tight_layout()
    plt.savefig(path_to_working_dir.joinpath('plots', f'{sound_name}_exp2.png'))
    plt.show()

    # Experiment 2: picking strategy
    filtered_sound_results = sound_results.loc[(sound_results.model == 'surfperch') & (sound_results.picking_strategy == 'hockey')]
    sns.relplot(
        data=filtered_sound_results, style="ratio", row='split', x="n_samples", y="value", col="metric", hue="ratio", kind="line", row_order=["validation", "testset"]
    )
    plt.suptitle(f'{sound_name.capitalize()}, experiment 3')
    plt.tight_layout()
    plt.savefig(path_to_working_dir.joinpath('plots', f'{sound_name}_exp3.png'))
    plt.show()
