# Choose what data should be loaded in this cell
Unlike the other notebook, this one loads only evaluations. To be used for evaluation/postprocessing focused side experiments (rather than model-training ones).

In [2]:
import ipywidgets
import os
import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path
from typing import Dict, Any, Iterable
import logging
# curr = os.getcwd()
# os.chdir(Path(curr).resolve().parents[1])
from octopus.experiment_db_api.database_reader_api import ExperimentDatabaseReader
from octopus.summaries.eval_results_loader import EvalResultsLoader
from octopus.summaries.enums import ImgAvgMode, MetricAvgMode, ParamsIgnoringMode
from octopus.visualization.summary_maker import make_results_to_params_chart
from octopus.notebooks.helpers import print_in_2_cols, drop_unchanging_columns
# os.chdir(curr)

# Logging configuration
logging.basicConfig(
    encoding='utf-8',
    level=logging.INFO,
    format='%(asctime)s|%(levelname)s|%(name)s|%(message)s'
)

# Experiments names to be loaded
# experiments = ['DAMD01_10-06-2025_DS_loss_fns+wo_borders_3_EVAL_2', 'DAMD01_10-06-2025_DS_loss_fns+border_thickness_EVAL_2',
#                'DAMD01_10-06-2025_DS_2_loss_fns+border_inputs_EVAL_2', 'DAMD01_10-06-2025_DS_2_loss_fns+border_inputs+border_thickness_EVAL_2']
experiments = ['DS_31-07-2025_CNV_NEGATIVES_EVAL', 'DS_31-07-2025_CNV_NEGATIVES_EVAL2']

# Loading results from database
db_api = ExperimentDatabaseReader('10.130.20.43:5432', 'test_usr', '975ZuGM1XTEg', 'experiments_tests')  #'han:5432', 'exp_user', 'bright', 'experiments')
eval_loader = EvalResultsLoader(db_api)

# or

# Loading results from local directory (now per-run only). In experiments_dir param pass a directory that contains folders named the same as experiments list elements.
# experiments_dir = r''
# eval_loader = EvalResultsLoader(experiments_dir)

avg_images = ImgAvgMode.GLOBAL
classes = ['hard_drusen', 'reticular_p', 'soft_drusen', 'confluent_drusen', 'foci', 'drusen']

# Load all experiments data, averaged for each model
If you are loading results from database (using ExperimentDatabaseReader) and you receive connection errors in this cell: 
1. Make sure you are connected to our Poznan network directly or through VPN. 
2. Try adding "han" with its IP to your hostfile (example entry: "10.130.9.104 han"). The hostfile is in C:\Windows\System32\drivers\etc\hosts (for Windows), or /etc/hosts (for Linux).

In [2]:
experiment_results = []
for eval_name in experiments:
    results_eval = eval_loader.load(
        eval_name,
        avg_images=avg_images,
        avg_metrics=MetricAvgMode.MEAN_AND_SPLIT,
        avg_cases_across_runs=False,
        ignore_params=ParamsIgnoringMode.NONE,
        ignore_unfinished=True)
    results_eval.columns = results_eval.columns.map(lambda x: (str(x[0]) + '_eval', x[1]))
    experiment_results.append(results_eval)

results = pd.concat(experiment_results, join='outer')

2025-08-25 14:31:27,707|INFO|EvalResultsLoader|Loading experiment results from database...
2025-08-25 14:31:33,083|INFO|EvalResultsLoader|Results loaded successfully. Columns: 67, Rows: 2124
  return np.nanmean(variable)
  return np.nanmean(variable)
2025-08-25 14:31:33,413|INFO|EvalResultsLoader|Results processing done. Columns: 88, Rows: 6
2025-08-25 14:31:33,413|INFO|EvalResultsLoader|Loading experiment results from database...
2025-08-25 14:31:33,718|INFO|EvalResultsLoader|Results loaded successfully. Columns: 66, Rows: 354
  return np.nanmean(variable)
  return np.nanmean(variable)
2025-08-25 14:31:33,816|INFO|EvalResultsLoader|Results processing done. Columns: 87, Rows: 1


In [3]:
# Configure columns dropping
drop_obsolete = True
drop_unchanging = True 

# Specify obsolete columns to be dropped (if drop_obsolete is True)
obsolete_cols = [('run_train', 'metrics'), ('run_train', 'train_ds_params.dataset_records'),
                 ('run_train', 'valid_ds_params.dataset_records'), ('run_train', 'dataset_records'),
                 ('run_eval', 'series_name'), ('run_eval', 'user_name'), ('run_train', 'version'), ('run_train', 'visualize'),
                 ('run_train', 'user_name'), ('run_eval', 'dry_run'), ('run_train', 'record_dir_to_split'),
                 ('run_eval', 'dataset_records'),]

# Drop columns that don't change (parameters or results that stay the same), or are likely not very useful
if drop_unchanging and len(results.index) > 1:
    results = drop_unchanging_columns(results)

# Drop obsolete columns
if drop_obsolete:
    obsolete_cols = [col for col in obsolete_cols if col in results.columns]
    results.drop(obsolete_cols, axis=1, inplace=True)

for i, c in enumerate(classes):
    results[('result_eval', f'pr_auc_{c}')] = results[('result_eval', f'pr_auc_{i}')]
    results[('result_eval', f'roc_auc_{c}')] = results[('result_eval', f'roc_auc_{i}')]
    if avg_images == ImgAvgMode.GLOBAL:
        results[('result_global_eval', f'pr_auc_{c}')] = results[('result_global_eval', f'pr_auc_{i}')]
        results[('result_global_eval', f'roc_auc_{c}')] = results[('result_global_eval', f'roc_auc_{i}')]

# print("Loaded evaluation experiments:")
# print_in_2_cols(results[('run_eval', 'experiment_name')].unique())
print(f"Avaliable columns ({len(results.columns)}):")
print_in_2_cols(list(results.columns))

# Save results to CSV
results.reset_index(col_level=1).to_csv("results.csv", sep=';', index=False)

Avaliable columns (66):
('result_eval', 'pr_auc')                              ('result_global_eval', 'pr_auc_3')
('result_eval', 'roc_auc')                             ('result_global_eval', 'pr_auc_4')
('run_eval', 'anomaly_tree_api')                       ('result_global_eval', 'pr_auc_5')
('run_eval', 'comb_idx')                               ('result_global_eval', 'roc_auc_0')
('run_eval', 'exp_id')                                 ('result_global_eval', 'roc_auc_1')
('run_eval', 'experiment_id')                          ('result_global_eval', 'roc_auc_2')
('run_eval', 'experiment_name')                        ('result_global_eval', 'roc_auc_3')
('run_eval', 'learning_rate')                          ('result_global_eval', 'roc_auc_4')
('run_eval', 'model_path')                             ('result_global_eval', 'roc_auc_5')
('run_eval', 'model_run')                              ('result_eval', 'pr_auc_hard_drusen')
('run_eval', 'run_id')                                 ('result_eva

# Chart for multiple experiments

By defalut, `make_results_to_params_chart()` uses `plot_type=sns.violinplot`, if you want to use boxplot, pass `plot_type=sns.boxplot`.
Moreover you can pass kwargs for the selected plot in `plot_kwarg` parameter.
If you'll specify `save_dir`, the plot will be saved in file with a name specified in `save_name`.

Remember: Columns specified in `y_vars` must have numeric values. If they contain arrays, use `MetricAvgMode.MEAN` mode in `load()` function above.

In [4]:
y_vars = [('result_eval', 'pr_auc')]
          # ('result_eval', 'roc_auc')]

y_vars.extend([('result_eval', f'pr_auc_{c}') for c in classes])
# y_vars.extend([('result_eval', f'roc_auc_{c}') for c in classes])


x_vars = [('run_eval', 'experiment_name'),
          ('run_eval', 'learning_rate'),
          ('run_eval', 'losses'),
          ('run_eval', 'n_img_kernels'),
          ('run_eval', 'pool_kernel_size')
          ]

# x_vars = [('run_eval', 'learning_rate')]

make_results_to_params_chart(results, "DS_31-07-2025 - NCC", y_vars, x_vars,
                             save_dir=Path().cwd(), save_name="exp.png",
                             print_shortened_labels=True, shorten_axis_names=True,
                             plot_type=sns.violinplot, plot_kwargs=None)

KeyError: ('run_eval', 'losses')

In [6]:
{i: e for i, e in enumerate(classes)}

{0: 'hard_drusen',
 1: 'reticular_p',
 2: 'soft_drusen',
 3: 'confluent_drusen',
 4: 'foci',
 5: 'drusen'}

In [7]:
results.describe()

Unnamed: 0_level_0,result_eval,result_eval,run_eval,run_eval,run_eval,run_eval,result_eval,result_eval,result_eval,result_eval,...,result_global_eval,result_global_eval,result_eval,result_eval,result_global_eval,result_global_eval,result_eval,result_eval,result_global_eval,result_global_eval
Unnamed: 0_level_1,pr_auc,roc_auc,comb_idx,exp_id,experiment_id,learning_rate,pr_auc_0,pr_auc_1,pr_auc_2,pr_auc_3,...,pr_auc_confluent_drusen,roc_auc_confluent_drusen,pr_auc_foci,roc_auc_foci,pr_auc_foci,roc_auc_foci,pr_auc_drusen,roc_auc_drusen,pr_auc_drusen,roc_auc_drusen
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,...,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.661023,0.938396,2.142857,1909.571429,1909.571429,0.000486,0.244305,0.511033,0.465497,0.810451,...,0.833359,0.983334,0.004011,0.580735,0.001234,0.583613,0.805289,0.961236,0.837905,0.965766
std,0.111859,0.075474,1.9518,1.511858,1.511858,0.000481,0.120434,0.224648,0.10932,0.027483,...,0.019908,0.006811,0.004805,0.10449,0.001132,0.112251,0.101464,0.069737,0.088284,0.058708
min,0.410088,0.771267,0.0,1909.0,1909.0,0.0001,0.000263,0.003534,0.229711,0.755358,...,0.793719,0.972466,0.000226,0.5,0.000226,0.5,0.576535,0.804191,0.639706,0.833704
25%,0.68132,0.943914,0.5,1909.0,1909.0,0.0001,0.23231,0.57478,0.463446,0.805624,...,0.829834,0.980026,0.000226,0.5,0.000226,0.5,0.83077,0.978379,0.855383,0.979146
50%,0.701311,0.972625,2.0,1909.0,1909.0,0.0001,0.269832,0.593436,0.496043,0.816709,...,0.834763,0.981864,0.003187,0.538556,0.000686,0.517077,0.837138,0.986628,0.868819,0.986851
75%,0.716104,0.976561,3.5,1909.0,1909.0,0.001,0.293066,0.606114,0.531166,0.826683,...,0.845627,0.988361,0.005405,0.640628,0.002324,0.659345,0.852898,0.992657,0.878717,0.99296
max,0.720912,0.98393,5.0,1913.0,1913.0,0.001,0.389285,0.618469,0.543496,0.836475,...,0.85411,0.992234,0.013399,0.74533,0.00263,0.749522,0.856011,0.995764,0.88861,0.995597


# Ranking on checked parameters
Values in columns correspond to the achieved rank on that criteria. Lower ranking place is better.

In [5]:
ranked_parameters = [('run_eval', 'experiment_name'),
                     ('run_eval', 'run_id'),
                    #  ('run_eval', 'losses'),
                    #  ('run_eval', 'learning_rate'),
                    #  ('result_global_eval', 'pr_auc')
                    #  ('result_eval', 'pr_auc'),
                    #  ('entity_eval', 'name')
                     ]

agg_cols = {('result_global_eval', 'pr_auc'): 'mean'}
for c in classes:
    agg_cols[('result_global_eval', f'pr_auc_{c}')] = 'mean'
for column in results.columns:
    if column[0] == 'result_eval':
        agg_cols[column] = 'mean'
results_to_rank = results.groupby(ranked_parameters).agg(agg_cols)

# create an example ranking
rankings = {
    # ('result_global_eval', 'pr_auc'): 'max',
    # ('result_eval', 'roc_auc'): 'max',
    # ('result_eval', 'pr_auc_4'): 'max',
    # ('result_eval', 'roc_auc_4'): 'max',
}
for c in classes:
    rankings[('result_global_eval', f'pr_auc_{c}')] = 'max'

def create_ranking(df: pd.DataFrame, column_directions: Dict[Any, str]):
    ranking_df = pd.DataFrame()
    for column in rankings:
        direction = 1 if rankings[column] == "min" else -1
        # method decribes rank assignment if tied
        ranking_df[column[1]] = (df[column] * direction).rank(method='min')
    ranking_df["avg_rank"] = ranking_df.mean(axis = 1)
    return ranking_df
    
ranking_df = create_ranking(results_to_rank, rankings)
ranking_df["pr_auc_value"] = results_to_rank[('result_global_eval', 'pr_auc')]

ranking_df.sort_values(by=['avg_rank'], inplace=True)
ranking_df = ranking_df.rename_axis(index=[column[1] for column in ranked_parameters])
ranking_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,pr_auc_hard_drusen,pr_auc_reticular_p,pr_auc_soft_drusen,pr_auc_confluent_drusen,pr_auc_foci,pr_auc_drusen,avg_rank,pr_auc_value
experiment_name,run_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
'DS_31-07-2025_CNV_NEGATIVES_EVAL','3992',2.0,1.0,2.0,3.0,5.0,1.0,2.333333,0.520787
'DS_31-07-2025_CNV_NEGATIVES_EVAL','3990',3.0,4.0,3.0,2.0,3.0,2.0,2.833333,0.503943
'DS_31-07-2025_CNV_NEGATIVES_EVAL','3991',4.0,5.0,1.0,1.0,2.0,4.0,2.833333,0.504742
'DS_31-07-2025_CNV_NEGATIVES_EVAL2','3999',1.0,2.0,4.0,6.0,4.0,5.0,3.666667,0.520114
'DS_31-07-2025_CNV_NEGATIVES_EVAL','3995',5.0,3.0,6.0,4.0,5.0,3.0,4.333333,0.480157
'DS_31-07-2025_CNV_NEGATIVES_EVAL','3994',6.0,6.0,5.0,5.0,1.0,6.0,4.833333,0.463907
'DS_31-07-2025_CNV_NEGATIVES_EVAL','3993',7.0,7.0,7.0,7.0,5.0,7.0,6.666667,0.258021


In [23]:
results_to_rank

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,result_eval,result_eval,result_eval,result_eval
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,pr_auc,roc_auc,pr_auc_0,roc_auc_0
"(run_eval, run_id)","(run_eval, losses)","(run_eval, learning_rate)","(result_global_eval, pr_auc)",Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
'2889',"[Dice(epsilon=1e-06, aggregate_per_scan=True, channel_axis=-1, squared_terms=True)]",0.0001,0.901604,0.849364,0.995551,0.849364,0.995551
'2890',"[Dice(epsilon=1e-06, aggregate_per_scan=True, channel_axis=-1, squared_terms=False)]",0.0001,0.699425,0.710781,0.930892,0.710781,0.930892
'2891',"[Tversky(alpha=0.5, beta=0.5, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=False, gamma=1.0)]",0.0001,0.607313,0.666338,0.962669,0.666338,0.962669
'2892',"[Tversky(alpha=0.5, beta=0.5, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=True, gamma=1.0)]",0.0001,0.899595,0.851728,0.994923,0.851728,0.994923
'2893',"[Tversky(alpha=0.7, beta=0.3, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=True, gamma=1.0)]",0.0001,0.901729,0.846982,0.997504,0.846982,0.997504
...,...,...,...,...,...,...,...
'2956',"[Tversky(alpha=0.7, beta=0.3, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=False, gamma=2.0)]",0.0010,0.686325,0.681129,0.901560,0.681129,0.901560
'2957',"[Tversky(alpha=0.3, beta=0.7, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=False, gamma=2.0)]",0.0010,0.651736,0.696056,0.955638,0.696056,0.955638
'2958',"[Tversky(alpha=0.5, beta=0.5, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=False, gamma=1.3)]",0.0010,0.694423,0.723503,0.946545,0.723503,0.946545
'2959',"[Tversky(alpha=0.5, beta=0.5, average_over_batch=False, epsilon=1e-06, channel_axis=-1, squared_terms=True, gamma=2.0)]",0.0010,0.912894,0.853947,0.992059,0.853947,0.992059
