In [1]:
import pandas as pd
import itertools
import datetime
import json
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter


In [2]:
def load_metrics(run_dir, metrics_dir):
    """Load the training metrics from the JSON file."""
    run_metrics_path = os.path.join(metrics_dir, run_dir, 'training_metrics.json')
    
    if not os.path.isfile(run_metrics_path):
        print(f"Skipping {run_dir} because 'training_metrics.json' is missing.")
        return None
    
    with open(run_metrics_path, 'r') as f:
        metrics = json.load(f)
    
    return metrics

def load_settings(run_dir, metrics_dir):
    """Load the training settings from the JSON file."""
    run_settings_path = os.path.join(metrics_dir, run_dir, "training_settings.json")

    if not os.path.isfile(run_settings_path):
        print(f"Skipping {run_dir} because 'training_settings.json' is missing.")
        return None
        
    with open(run_settings_path, 'r') as f:
        settings = json.load(f)
    
    return settings

In [3]:
def smooth_data(y, window_size=51, polyorder=3):
    """Smooth data using Savitzky-Golay filter."""
    if len(y) < window_size:
        return y
    return savgol_filter(y, window_size, polyorder)


In [4]:
def extract_loss_data(metrics):
    """Extract loss data for plotting from the metrics."""
    train_metrics = metrics.get('train', [])
    test_metrics = metrics.get('test', [])
    
    train_iterations = [entry['iteration'] for entry in train_metrics]
    train_total_loss = [entry['total_loss'] for entry in train_metrics]
    train_trans_loss = [entry['trans_loss'] for entry in train_metrics]
    train_rot_loss = [entry['rot_loss'] for entry in train_metrics]
    test_iterations = [entry['iteration'] for entry in test_metrics]
    test_total_loss = [entry['total_loss'] for entry in test_metrics]
    test_trans_loss = [entry['trans_loss'] for entry in test_metrics]
    test_rot_loss = [entry['rot_loss'] for entry in test_metrics]
    
    return {
        'train_iterations': train_iterations,
        'train_total_loss': train_total_loss,
        'train_trans_loss': train_trans_loss,
        'train_rot_loss': train_rot_loss,
        'test_iterations': test_iterations,
        'test_total_loss': test_total_loss,
        'test_trans_loss': test_trans_loss,
        'test_rot_loss': test_rot_loss,
    }

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

def smooth_data(y, window_size=51, polyorder=3):
    """Smooth data using Savitzky-Golay filter."""
    if len(y) < window_size:
        return y
    return savgol_filter(y, window_size, polyorder)

def plot_losses_with_settings(loss_data, run_dir, y_min, y_max, settings):
    """Plot the losses with trendline and decreased opacity, and return iterations of min and best overall losses."""
    # Extract loss data
    train_iterations = loss_data['train_iterations']
    train_total_loss = loss_data['train_total_loss']
    train_trans_loss = loss_data['train_trans_loss']
    train_rot_loss = loss_data['train_rot_loss']
    test_iterations = loss_data['test_iterations']
    test_total_loss = loss_data['test_total_loss']
    test_trans_loss = loss_data['test_trans_loss']
    test_rot_loss = loss_data['test_rot_loss']
    
    # Smooth the loss data using Savitzky-Golay filter for trendline
    smoothed_train_total_loss = smooth_data(train_total_loss)
    smoothed_train_trans_loss = smooth_data(train_trans_loss)
    smoothed_train_rot_loss = smooth_data(train_rot_loss)
    smoothed_test_total_loss = smooth_data(test_total_loss)
    smoothed_test_trans_loss = smooth_data(test_trans_loss)
    smoothed_test_rot_loss = smooth_data(test_rot_loss)

    # Set plot settings
    keys_to_extract = ["fill_replay_setting", "RGB_AUGMENTATION", "keypoint_approach", "cameras"]
    fill_replay = settings.get("fill_replay_setting", "N/A")
    rgb_augmentation = settings.get("RGB_AUGMENTATION", "N/A")
    keypoint_approach = settings.get("keypoint_approach", "N/A")
    cameras_count = len(settings.get("cameras", []))  # Get the number of cameras
    other_settings = {key: settings[key] for key in settings if key not in keys_to_extract}

    # Create a plot title with the extracted settings information
    title = (f'Losses for Run: {run_dir}\n'
             f'Fill Replay: {fill_replay}, RGB Augmentation: {rgb_augmentation}, '
             f'Keypoint Approach: {keypoint_approach}, Cameras: {cameras_count}, '
             f'Others: {other_settings}')
    
    # Create the plot
    fig, axs = plt.subplots(3, 1, figsize=(10, 18))
    # fig.suptitle(title)
    
    # ---------- Total loss ----------

    # Plot Total Loss with lower opacity
    axs[0].plot(train_iterations, train_total_loss, label='Train Total Loss', color='blue', alpha=0.3)
    axs[0].plot(test_iterations, test_total_loss, label='Test Total Loss', color='red', alpha=0.3)

    # Plot smoothed (trendline) Total Loss
    axs[0].plot(train_iterations, smoothed_train_total_loss, label='Smoothed Train Total Loss', color='blue', linestyle='--', alpha=0.8)
    axs[0].plot(test_iterations, smoothed_test_total_loss, label='Smoothed Test Total Loss', color='red', linestyle='--', alpha=0.8)

    # Highlight the minimum loss for train and test using vertical lines (full opacity)
    min_train_loss_iter = train_iterations[np.argmin(train_total_loss)]
    min_test_loss_iter = test_iterations[np.argmin(test_total_loss)]
    
    axs[0].axvline(x=min_train_loss_iter, color='blue', linestyle=':', alpha=1.0)  # Full opacity for vertical line
    axs[0].axvline(x=min_test_loss_iter, color='red', linestyle=':', alpha=1.0)   # Full opacity for vertical line

    # Add a horizontal line at the minimum loss values in the legend
    min_train_loss = min(train_total_loss)
    min_test_loss = min(test_total_loss)
    
    # Find the test loss corresponding to the minimum training loss
    min_train_loss_index = np.argmin(train_total_loss)
    corresponding_test_loss = test_total_loss[min_train_loss_index]
    
    # Find the training loss corresponding to the minimum test loss
    min_test_loss_index = np.argmin(test_total_loss)
    corresponding_train_loss = train_total_loss[min_test_loss_index]
    
    # Adding the minimum loss points to the legend with counterpart losses
    axs[0].plot([], [], color='blue', linestyle=':', 
            label=f'Min. Train Loss: {min_train_loss:.4f} - Test Loss: {corresponding_test_loss:.4f} @ Iter {min_train_loss_iter}')
    axs[0].plot([], [], color='red', linestyle=':', 
            label=f'Min. Test Loss: {min_test_loss:.4f} - Train Loss: {corresponding_train_loss:.4f} @ Iter {min_test_loss_iter}')
    
    # Find the best overall loss
    best_overall_loss_iter = None
    best_overall_loss = float('inf')
    
    for i in range(1, len(train_total_loss)):
        # Consider the iteration where both train and test losses are lower than previous iterations
        if train_total_loss[i] < train_total_loss[i - 1] and test_total_loss[i] < test_total_loss[i - 1]:
            overall_loss = train_total_loss[i] + test_total_loss[i]
            if overall_loss < best_overall_loss:
                best_overall_loss = overall_loss
                best_overall_loss_iter = train_iterations[i]  # Corresponds to both training and testing iteration

    # Add the best overall loss to the plot (vertical line)
    best_overall_train_loss = None
    best_overall_test_loss = None
    if best_overall_loss_iter is not None:
        axs[0].axvline(x=best_overall_loss_iter, color='green', linestyle='-.', alpha=1.0)
        
        # Find corresponding losses
        best_overall_train_loss = train_total_loss[train_iterations.index(best_overall_loss_iter)]
        best_overall_test_loss = test_total_loss[test_iterations.index(best_overall_loss_iter)]

        # Add label for the best overall loss
        axs[0].plot([], [], color='green', linestyle='-.', 
                label=f'Best Overall Loss: Train {best_overall_train_loss:.4f} - Test {best_overall_test_loss:.4f} @ Iter {best_overall_loss_iter}')
    
    # Set y-axis limits
    axs[0].set_ylim(0, 30)#y_min, y_max)
    
    # Set axis labels and legend
    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Total Loss')
    axs[0].legend(loc='upper right')

    # ---------- Trans loss ----------

    # Plot Total Loss with lower opacity
    axs[1].plot(train_iterations, train_trans_loss, label='Train Trans Loss', color='blue', alpha=0.3)
    axs[1].plot(test_iterations, test_trans_loss, label='Test Trans Loss', color='red', alpha=0.3)

    # Plot smoothed (trendline) Total Loss
    axs[1].plot(train_iterations, smoothed_train_trans_loss, label='Smoothed Train Trans Loss', color='blue', linestyle='--', alpha=0.8)
    axs[1].plot(test_iterations, smoothed_test_trans_loss, label='Smoothed Test Trans Loss', color='red', linestyle='--', alpha=0.8)

    # Set axis labels and legend
    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Trans Loss')
    axs[1].legend(loc='upper right')
    axs[1].set_ylim(0, 15)

    # ---------- Rot loss ----------

    # Plot Total Loss with lower opacity
    axs[2].plot(train_iterations, train_rot_loss, label='Train Rot Loss', color='blue', alpha=0.3)
    axs[2].plot(test_iterations, test_rot_loss, label='Test Rot Loss', color='red', alpha=0.3)

    # Plot smoothed (trendline) Total Loss
    axs[2].plot(train_iterations, smoothed_train_rot_loss, label='Smoothed Train Rot Loss', color='blue', linestyle='--', alpha=0.8)
    axs[2].plot(test_iterations, smoothed_test_rot_loss, label='Smoothed Test Rot Loss', color='red', linestyle='--', alpha=0.8)

    # Set axis labels and legend
    axs[2].set_xlabel('Iteration')
    axs[2].set_ylabel('Rot Loss')
    axs[2].legend(loc='upper right')
    axs[2].set_ylim(0, 15)
    
    # Display the plot
    plt.tight_layout()
    # plt.subplots_adjust(hspace=0.3)
    plt.show()

    # Return the min and best overall loss information along with the iteration numbers
    return (min_train_loss, min_test_loss, min_train_loss_iter, min_test_loss_iter, best_overall_train_loss, best_overall_test_loss, best_overall_loss_iter)


In [27]:
def highlight_lowest(df):
    """Highlight the lowest values in columns for Min and Best Overall Losses."""
    is_min_train_loss = df['Min Train Loss'] == df['Min Train Loss'].min()
    is_min_test_loss = df['Min Test Loss'] == df['Min Test Loss'].min()
    is_best_overall_train = df['Best Overall Train Loss'] == df['Best Overall Train Loss'].min()
    is_best_overall_test = df['Best Overall Test Loss'] == df['Best Overall Test Loss'].min()

    # Create a new column for the styles to apply
    styles = pd.DataFrame('', index=df.index, columns=df.columns)
    
    # Highlight individual minima
    styles.loc[is_min_train_loss, 'Min Train Loss'] = 'background-color: yellow'
    styles.loc[is_min_test_loss, 'Min Test Loss'] = 'background-color: yellow'
    
    # Highlight Best Overall Train and Test Loss (separately)
    styles.loc[is_best_overall_train, 'Best Overall Train Loss'] = 'background-color: yellow'
    styles.loc[is_best_overall_test, 'Best Overall Test Loss'] = 'background-color: yellow'
    
    return styles


In [28]:
def process_runs_with_settings(metrics_dir, filter_data = None):
    """Process all runs to generate plots with consistent scaling and sorted by date and generate a summary table including settings."""
    run_dirs = [d for d in os.listdir(metrics_dir) if os.path.isdir(os.path.join(metrics_dir, d))]
    valid_run_dirs = []
    for run_dir in run_dirs:
        try:
            datetime.datetime.strptime(run_dir, "%Y-%m-%d_%H-%M")
            valid_run_dirs.append(run_dir)
        except ValueError:
            print(f"Skipping invalid folder: {run_dir}")
            continue
    
    valid_run_dirs.sort(key=lambda x: datetime.datetime.strptime(x, "%Y-%m-%d_%H-%M"))
    results = []

    for i, run_dir in enumerate(valid_run_dirs):
        if filter_data is None:
            pass
        else:
            if run_dir in filter_data:
                pass
            else:
                continue

        run_path = os.path.join(metrics_dir, run_dir)
        metrics = load_metrics(run_dir, metrics_dir)
        settings = load_settings(run_dir, metrics_dir)
        if metrics is None or settings is None:
            continue
        # if settings.get("data", None) is None:
        #     continue
        # print(settings)
        
        loss_data = extract_loss_data(metrics)
        y_min = min(loss_data['train_total_loss'] + loss_data['test_total_loss'])
        y_max = max(loss_data['train_total_loss'] + loss_data['test_total_loss'])
        
        min_train_loss, min_test_loss, min_train_iter, min_test_iter, best_overall_train_loss, best_overall_test_loss, best_overall_iter = plot_losses_with_settings(
            loss_data, run_dir, y_min, y_max, settings
        )
        
        results.append({
            'Fill Replay': settings['fill_replay_setting'],
            'Cameras': len(settings['cameras']),
            'RGB Augmentation': settings['RGB_AUGMENTATION'],
            'Keypoint Approach': settings['keypoint_approach'],
            'Run': run_dir,
            'Min Train Loss': min_train_loss,
            'Min Test Loss': min_test_loss,
            'Min Train Iteration': min_train_iter,
            'Min Test Iteration': min_test_iter,
            'Best Overall Train Loss': best_overall_train_loss,
            'Best Overall Test Loss': best_overall_test_loss,
            'Best Overall Iteration': best_overall_iter
        })
    
    results_df = pd.DataFrame(results)
    settings_columns = ['Fill Replay', 'Cameras', 'RGB Augmentation', 'Keypoint Approach']
    loss_columns = ['Run', 'Min Train Loss', 'Min Test Loss', 'Min Train Iteration', 'Min Test Iteration', 
                    'Best Overall Train Loss', 'Best Overall Test Loss', 'Best Overall Iteration']
    ordered_columns = settings_columns + loss_columns
    results_df = results_df[ordered_columns]
    styled_df = results_df.style.apply(highlight_lowest, axis=None)
    display(styled_df)
    return results_df


In [None]:
# metrics_dir = '/home/bepgroup/Projects/PerAct_ws/outputs/models/handing_over_banana/'
# process_runs_with_settings(metrics_dir, ["2024-12-17_16-44", "2024-12-17_21-14", "2024-12-18_01-48"])
# metrics_dir = '/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs/models/open_drawer'
# process_runs_with_settings(metrics_dir, ["2025-01-07_10-48"])
metrics_dir_tasks = '/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs_sampling/models'

# TASK = "handing_over_banana"
# run_dir = f"/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs/models/{TASK}/2025-01-16_11-22"
# run_dir = f"/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs/models/{TASK}/2025-01-12_12-06"
# run_dir = f"/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs/models/{TASK}/2025-01-21_13-10"
# run_dir = f"/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs/models/{TASK}/2025-01-21_14-15"
# run_dir = f"/home/bepgroup/Projects/PerAct_ws/peract_colab/outputs/models/{TASK}/2025-01-21_15-19"
for metrics_dir_task in os.listdir(metrics_dir_tasks):
    # if metrics_dir_task in ["handing_over_tuna_fish_can"]:
    print(f"Processing task: {metrics_dir_task}")
    process_runs_with_settings(os.path.join(metrics_dir_tasks, metrics_dir_task))