In [None]:
import pandas as pd
import itertools
import datetime
import json
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter


In [None]:
def load_metrics(run_dir, metrics_dir):
    """Load the training metrics from the JSON file."""
    run_metrics_path = os.path.join(metrics_dir, run_dir, 'training_metrics.json')
    
    if not os.path.isfile(run_metrics_path):
        print(f"Skipping {run_dir} because 'training_metrics.json' is missing.")
        return None
    
    with open(run_metrics_path, 'r') as f:
        metrics = json.load(f)
    
    return metrics

In [None]:
def smooth_data(y, window_size=51, polyorder=3):
    """Smooth data using Savitzky-Golay filter."""
    if len(y) < window_size:
        return y
    return savgol_filter(y, window_size, polyorder)


In [None]:
def extract_loss_data(metrics):
    """Extract loss data for plotting from the metrics."""
    train_metrics = metrics.get('train', [])
    test_metrics = metrics.get('test', [])
    
    train_iterations = [entry['iteration'] for entry in train_metrics]
    train_total_loss = [entry['total_loss'] for entry in train_metrics]
    test_iterations = [entry['iteration'] for entry in test_metrics]
    test_total_loss = [entry['total_loss'] for entry in test_metrics]
    
    return {
        'train_iterations': train_iterations,
        'train_total_loss': train_total_loss,
        'test_iterations': test_iterations,
        'test_total_loss': test_total_loss
    }

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

def smooth_data(y, window_size=51, polyorder=3):
    """Smooth data using Savitzky-Golay filter."""
    if len(y) < window_size:
        return y
    return savgol_filter(y, window_size, polyorder)

def plot_losses_with_settings(loss_data, run_dir, y_min, y_max, settings):
    """Plot the losses with trendline and decreased opacity, and return iterations of min and best overall losses."""
    # Extract loss data
    train_iterations = loss_data['train_iterations']
    train_total_loss = loss_data['train_total_loss']
    test_iterations = loss_data['test_iterations']
    test_total_loss = loss_data['test_total_loss']
    
    # Smooth the loss data using Savitzky-Golay filter for trendline
    smoothed_train_total_loss = smooth_data(train_total_loss)
    smoothed_test_total_loss = smooth_data(test_total_loss)

    # Set plot settings
    fill_replay_uniform = settings.get("fill_replay_uniform", "N/A")
    rgb_augmentation = settings.get("RGB_AUGMENTATION", "N/A")
    keypoint_approach = settings.get("keypoint_approach", "N/A")
    cameras_count = len(settings.get("cameras", []))  # Get the number of cameras

    # Create a plot title with the extracted settings information
    title = (f'Losses for Run: {run_dir}\n'
             f'Fill Replay Uniform: {fill_replay_uniform}, RGB Augmentation: {rgb_augmentation}, '
             f'Keypoint Approach: {keypoint_approach}, Cameras: {cameras_count}')
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(10, 6))
    fig.suptitle(title)
    
    # Plot Total Loss with lower opacity
    ax.plot(train_iterations, train_total_loss, label='Train Total Loss', color='blue', alpha=0.3)
    ax.plot(test_iterations, test_total_loss, label='Test Total Loss', color='red', alpha=0.3)

    # Plot smoothed (trendline) Total Loss
    ax.plot(train_iterations, smoothed_train_total_loss, label='Smoothed Train Total Loss', color='blue', linestyle='--', alpha=0.8)
    ax.plot(test_iterations, smoothed_test_total_loss, label='Smoothed Test Total Loss', color='red', linestyle='--', alpha=0.8)

    # Highlight the minimum loss for train and test using vertical lines (full opacity)
    min_train_loss_iter = train_iterations[np.argmin(train_total_loss)]
    min_test_loss_iter = test_iterations[np.argmin(test_total_loss)]
    
    ax.axvline(x=min_train_loss_iter, color='blue', linestyle=':', alpha=1.0)  # Full opacity for vertical line
    ax.axvline(x=min_test_loss_iter, color='red', linestyle=':', alpha=1.0)   # Full opacity for vertical line
    
    # Add a horizontal line at the minimum loss values in the legend
    min_train_loss = min(train_total_loss)
    min_test_loss = min(test_total_loss)
    
    # Find the test loss corresponding to the minimum training loss
    min_train_loss_index = np.argmin(train_total_loss)
    corresponding_test_loss = test_total_loss[min_train_loss_index]
    
    # Find the training loss corresponding to the minimum test loss
    min_test_loss_index = np.argmin(test_total_loss)
    corresponding_train_loss = train_total_loss[min_test_loss_index]
    
    # Adding the minimum loss points to the legend with counterpart losses
    ax.plot([], [], color='blue', linestyle=':', 
            label=f'Min. Train Loss: {min_train_loss:.4f} - Test Loss: {corresponding_test_loss:.4f} @ Iter {min_train_loss_iter}')
    ax.plot([], [], color='red', linestyle=':', 
            label=f'Min. Test Loss: {min_test_loss:.4f} - Train Loss: {corresponding_train_loss:.4f} @ Iter {min_test_loss_iter}')
    
    # Find the best overall loss
    best_overall_loss_iter = None
    best_overall_loss = float('inf')
    
    for i in range(1, len(train_total_loss)):
        # Consider the iteration where both train and test losses are lower than previous iterations
        if train_total_loss[i] < train_total_loss[i - 1] and test_total_loss[i] < test_total_loss[i - 1]:
            overall_loss = train_total_loss[i] + test_total_loss[i]
            if overall_loss < best_overall_loss:
                best_overall_loss = overall_loss
                best_overall_loss_iter = train_iterations[i]  # Corresponds to both training and testing iteration

    # Add the best overall loss to the plot (vertical line)
    best_overall_train_loss = None
    best_overall_test_loss = None
    if best_overall_loss_iter is not None:
        ax.axvline(x=best_overall_loss_iter, color='green', linestyle='-.', alpha=1.0)
        
        # Find corresponding losses
        best_overall_train_loss = train_total_loss[train_iterations.index(best_overall_loss_iter)]
        best_overall_test_loss = test_total_loss[test_iterations.index(best_overall_loss_iter)]

        # Add label for the best overall loss
        ax.plot([], [], color='green', linestyle='-.', 
                label=f'Best Overall Loss: Train {best_overall_train_loss:.4f} - Test {best_overall_test_loss:.4f} @ Iter {best_overall_loss_iter}')
    
    # Set y-axis limits
    ax.set_ylim(y_min, y_max)
    
    # Set axis labels and legend
    ax.set_xlabel('Iteration')
    ax.set_ylabel('Total Loss')
    ax.legend(loc='upper right')
    
    # Display the plot
    plt.tight_layout()
    plt.subplots_adjust(hspace=0.3)
    plt.show()

    # Return the min and best overall loss information along with the iteration numbers
    return (min_train_loss, min_test_loss, min_train_loss_iter, min_test_loss_iter, best_overall_train_loss, best_overall_test_loss, best_overall_loss_iter)


In [None]:
def highlight_lowest(df):
    """Highlight the lowest values in columns for Min and Best Overall Losses."""
    is_min_train_loss = df['Min Train Loss'] == df['Min Train Loss'].min()
    is_min_test_loss = df['Min Test Loss'] == df['Min Test Loss'].min()
    is_best_overall_train = df['Best Overall Train Loss'] == df['Best Overall Train Loss'].min()
    is_best_overall_test = df['Best Overall Test Loss'] == df['Best Overall Test Loss'].min()

    # Create a new column for the styles to apply
    styles = pd.DataFrame('', index=df.index, columns=df.columns)
    
    # Highlight individual minima
    styles.loc[is_min_train_loss, 'Min Train Loss'] = 'background-color: yellow'
    styles.loc[is_min_test_loss, 'Min Test Loss'] = 'background-color: yellow'
    
    # Highlight Best Overall Train and Test Loss (separately)
    styles.loc[is_best_overall_train, 'Best Overall Train Loss'] = 'background-color: yellow'
    styles.loc[is_best_overall_test, 'Best Overall Test Loss'] = 'background-color: yellow'
    
    return styles


In [None]:
def process_runs_with_settings(metrics_dir):
    """Process all runs to generate plots with consistent scaling and sorted by date and generate a summary table including settings."""
    run_dirs = [d for d in os.listdir(metrics_dir) if os.path.isdir(os.path.join(metrics_dir, d))]
    valid_run_dirs = []
    for run_dir in run_dirs:
        try:
            datetime.datetime.strptime(run_dir, "%Y-%m-%d_%H-%M")
            valid_run_dirs.append(run_dir)
        except ValueError:
            print(f"Skipping invalid folder: {run_dir}")
            continue
    
    all_settings = []
    available_cameras = [f"view_{camera_i}" for camera_i in range(3)]
    grid = {
        'fill_replay_uniform': [False, True],
        'cameras': [available_cameras, [available_cameras[0]]],
        'RGB_AUGMENTATION': ['None', 'partial', 'full'],
        'keypoint_approach': [True, False]
    }
    for values in itertools.product(*grid.values()):
        point = dict(zip(grid.keys(), values))
        all_settings.append(point)
    
    valid_run_dirs.sort(key=lambda x: datetime.datetime.strptime(x, "%Y-%m-%d_%H-%M"))
    results = []

    for i, run_dir in enumerate(valid_run_dirs):
        run_path = os.path.join(metrics_dir, run_dir)
        metrics = load_metrics(run_dir, metrics_dir)
        if metrics is None:
            continue
        
        loss_data = extract_loss_data(metrics)
        y_min = min(loss_data['train_total_loss'] + loss_data['test_total_loss'])
        y_max = max(loss_data['train_total_loss'] + loss_data['test_total_loss'])
        
        min_train_loss, min_test_loss, min_train_iter, min_test_iter, best_overall_train_loss, best_overall_test_loss, best_overall_iter = plot_losses_with_settings(
            loss_data, run_dir, y_min, y_max, all_settings[i]
        )
        
        results.append({
            'Fill Replay Uniform': all_settings[i]['fill_replay_uniform'],
            'Cameras': len(all_settings[i]['cameras']),
            'RGB Augmentation': all_settings[i]['RGB_AUGMENTATION'],
            'Keypoint Approach': all_settings[i]['keypoint_approach'],
            'Run': run_dir,
            'Min Train Loss': min_train_loss,
            'Min Test Loss': min_test_loss,
            'Min Train Iteration': min_train_iter,
            'Min Test Iteration': min_test_iter,
            'Best Overall Train Loss': best_overall_train_loss,
            'Best Overall Test Loss': best_overall_test_loss,
            'Best Overall Iteration': best_overall_iter
        })
    
    results_df = pd.DataFrame(results)
    settings_columns = ['Fill Replay Uniform', 'Cameras', 'RGB Augmentation', 'Keypoint Approach']
    loss_columns = ['Run', 'Min Train Loss', 'Min Test Loss', 'Min Train Iteration', 'Min Test Iteration', 
                    'Best Overall Train Loss', 'Best Overall Test Loss', 'Best Overall Iteration']
    ordered_columns = settings_columns + loss_columns
    results_df = results_df[ordered_columns]
    styled_df = results_df.style.apply(highlight_lowest, axis=None)
    display(styled_df)
    return results_df


In [None]:
metrics_dir = '/home/bepgroup/Projects/PerAct_ws/outputs/models/handing_over_banana/'
process_runs_with_settings(metrics_dir)