In [None]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from matplotlib.ticker import MultipleLocator, AutoMinorLocator

def find_log_files(root_dir):
    return list(Path(root_dir).rglob('*.log'))

def parse_log_files(log_file_paths):
    all_data = []
    for log_file_path in log_file_paths:
        with open(log_file_path, 'r') as file:
            content = file.read()
        
        # Split the content into sections based on the positional_encoding line
        sections = re.split(r'positional_encoding will be changed', content)
        
        for section in sections:
            if not section.strip():
                continue
            
            # Extract information using updated regex patterns
            checkpoint_info = re.search(r'The checkpoint was saved at epoch (\d+), train_loss: ([\d.]+), val_loss: ([\d.]+)', section)
            file_info = re.search(r'Processing: (.+\.pt)', section)
            dataset_info = re.search(r'dataset: (.+) (\d+)', section)
            success_rate_info = re.search(r'Correct cases = (\d+)/(\d+), success rate ([\d.]+)%, total correct tokens: (\d+)', section)
            
            if checkpoint_info and file_info and dataset_info and success_rate_info:
                full_path = Path(file_info.group(1))
                file_name = full_path.name
                folder_name = full_path.parent.name[:15]
                
                correct_cases = int(success_rate_info.group(1))
                total_cases = int(success_rate_info.group(2))
                
                all_data.append({
                    'file': f'{folder_name}/{file_name}',
                    'epoch': int(checkpoint_info.group(1)),
                    'train_loss': float(checkpoint_info.group(2)),
                    'val_loss': float(checkpoint_info.group(3)),
                    'dataset_name': dataset_info.group(1),
                    'dataset_size': int(dataset_info.group(2)),
                    'failed_cases': total_cases - correct_cases,  # Calculate failed cases
                    'total_cases': total_cases,
                    'success_rate': float(success_rate_info.group(3)),
                    'correct_tokens': int(success_rate_info.group(4))
                })

    df = pd.DataFrame(all_data)
    # Sort the DataFrame by 'epoch' and 'dataset_name'
    df = df.sort_values(['epoch', 'dataset_name'], ascending=[True, True])
    return df
    
def plot_from_log(df):    
    # Create a figure with subplots
    fig, axs = plt.subplots(8, 1, figsize=(15, 20))
    fig.suptitle('Model Performance Analysis', fontsize=16)
    
    # Plot 1: Train Loss over epoch
    axs[0].plot(df['epoch'], df['train_loss'], marker='o')
    axs[0].set_yscale('log')  # Set y-axis to logarithmic scale
    axs[0].set_xlabel('Epoch')
    axs[0].set_ylabel('Train Loss')
    axs[0].set_title('Train Loss over Epochs')
    axs[0].xaxis.set_major_locator(MultipleLocator(100))
    axs[0].grid(True)
    axs[0].set_xticklabels(axs[0].get_xticklabels(), rotation=45, ha='right')
    
    # Plot 2: Validation Loss over epoch
    axs[1].plot(df['epoch'], df['val_loss'], marker='o')
    axs[1].set_yscale('log')  # Set y-axis to logarithmic scale
    axs[1].set_xlabel('Epoch')
    axs[1].set_ylabel('Validation Loss')
    axs[1].set_title('Validation Loss over Epochs')
    axs[1].xaxis.set_major_locator(MultipleLocator(100))
    axs[1].grid(True)
    axs[1].set_xticklabels(axs[1].get_xticklabels(), rotation=45, ha='right')

    # Plot 3: Success Rate over epoch
    summed_success_rates = df.groupby('epoch')['success_rate'].sum()
    axs[2].plot(summed_success_rates.index, summed_success_rates.values, marker='o')
    axs[2].set_xlabel('Epoch')
    axs[2].set_ylabel('Success Rate (%)')
    axs[2].set_title('Success Rate over Epochs')
    axs[2].xaxis.set_major_locator(MultipleLocator(100))
    axs[2].grid(True)
    axs[2].set_xticklabels(axs[2].get_xticklabels(), rotation=45, ha='right')

    print('summed_success_rates peak: \n', summed_success_rates.agg(['idxmax', 'max']))
    
    # Plot 4: Correct Tokens over epoch
    summed_correct_tokens = df.groupby('epoch')['correct_tokens'].sum()
    axs[3].plot(summed_correct_tokens.index, summed_correct_tokens.values, marker='o')
    axs[3].set_xlabel('Epoch')
    axs[3].set_ylabel('Correct Tokens')
    axs[3].set_title('Correct Tokens over Epochs')
    axs[3].xaxis.set_major_locator(MultipleLocator(100))
    axs[3].grid(True)
    axs[3].set_xticklabels(axs[3].get_xticklabels(), rotation=45, ha='right')

    print('summed_correct_tokens peak:\n', summed_correct_tokens.agg(['idxmax', 'max']))

    # Plot 5: Success Rate over epoch, evaluation only
    evaluation = df[df['dataset_name'] == 'arc-agi_evaluation']
    axs[4].plot(evaluation['epoch'], evaluation['success_rate'], marker='o')
    axs[4].set_xlabel('Epoch')
    axs[4].set_ylabel('Success Rate (%)')
    axs[4].set_title('Success Rate over Epochs (evaluation only)')
    axs[4].xaxis.set_major_locator(MultipleLocator(100))
    axs[4].grid(True)
    axs[4].set_xticklabels(axs[2].get_xticklabels(), rotation=45, ha='right')

    # Find success rate peak with corresponding epoch
    max_success_idx = evaluation['success_rate'].idxmax()
    max_success_epoch = evaluation.loc[max_success_idx, 'epoch']
    max_success_rate = evaluation.loc[max_success_idx, 'success_rate']
    print(f'Evaluation success_rate peak: epoch {max_success_epoch}, value {max_success_rate:.2f}%')

    
    # Plot 6: Correct Tokens over epoch, evaluation only
    axs[5].plot(evaluation['epoch'], evaluation['correct_tokens'], marker='o')
    axs[5].set_xlabel('Epoch')
    axs[5].set_ylabel('Correct Tokens')
    axs[5].set_title('Correct Tokens over Epochs (evaluation only)')
    axs[5].xaxis.set_major_locator(MultipleLocator(100))
    axs[5].grid(True)
    axs[5].set_xticklabels(axs[3].get_xticklabels(), rotation=45, ha='right')

    # Find correct tokens peak with corresponding epoch
    max_tokens_idx = evaluation['correct_tokens'].idxmax()
    max_tokens_epoch = evaluation.loc[max_tokens_idx, 'epoch']
    max_tokens_value = evaluation.loc[max_tokens_idx, 'correct_tokens']
    print(f'Evaluation correct_tokens peak: epoch {max_tokens_epoch}, value {max_tokens_value:.2f}')

    # Plot 5: Success Rate over epoch, training only
    training = df[df['dataset_name'] == 'arc-agi_training']
    axs[6].plot(training['epoch'], training['success_rate'], marker='o')
    axs[6].set_xlabel('Epoch')
    axs[6].set_ylabel('Success Rate (%)')
    axs[6].set_title('Success Rate over Epochs (training only)')
    axs[6].xaxis.set_major_locator(MultipleLocator(100))
    axs[6].grid(True)
    axs[6].set_xticklabels(axs[2].get_xticklabels(), rotation=45, ha='right')

    # Find success rate peak with corresponding epoch
    max_success_idx = training['success_rate'].idxmax()
    max_success_epoch = training.loc[max_success_idx, 'epoch']
    max_success_rate = training.loc[max_success_idx, 'success_rate']
    print(f'Training success_rate peak: epoch {max_success_epoch}, value {max_success_rate:.2f}%')

    
    # Plot 6: Correct Tokens over epoch, training only
    axs[7].plot(training['epoch'], training['correct_tokens'], marker='o')
    axs[7].set_xlabel('Epoch')
    axs[7].set_ylabel('Correct Tokens')
    axs[7].set_title('Correct Tokens over Epochs (training only)')
    axs[7].xaxis.set_major_locator(MultipleLocator(100))
    axs[7].grid(True)
    axs[7].set_xticklabels(axs[3].get_xticklabels(), rotation=45, ha='right')

    # Find correct tokens peak with corresponding epoch
    max_tokens_idx = training['correct_tokens'].idxmax()
    max_tokens_epoch = training.loc[max_tokens_idx, 'epoch']
    max_tokens_value = training.loc[max_tokens_idx, 'correct_tokens']
    print(f'Training correct_tokens peak: epoch {max_tokens_epoch}, value {max_tokens_value:.2f}')
        
    
    plt.tight_layout()
    plt.show()

In [None]:
root_dir = '../report/cloud_runs/69.55.141.236/2500/runs/2500' # _validate_model

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.236/l9_h8/runs/' # _validate_model

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.224/old_genesis/runs' # _validate_model

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.236/new_order/runs/new_order' # _validate_model

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.236/2500_1080/runs/2500_1080' # _validate_model

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.224/8000/runs'

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.119/es_960/runs'

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.119/barc/runs/'

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
root_dir = '../report/cloud_runs/69.55.141.236/fv'

log_files = find_log_files(root_dir)
df = parse_log_files(log_files)
# print(df['epoch'].value_counts(ascending=False))
display(df)
plot_from_log(df)

In [None]:
def new_plot_from_log(dfs_dict, column_name):    
    """
    Plot specified column from multiple dataframes on the same graph, separated by dataset_name
    
    Args:
        dfs_dict: Dictionary of dataframes with format {name: dataframe}
        column_name: Name of the column to plot
    """
    # Create figure with two subplots side by side
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
    fig.suptitle(f'{column_name} Analysis Across Different Runs', fontsize=16)
    
    # Define dataset names
    dataset_names = ['arc-agi_training', 'arc-agi_evaluation']
    axes = {'arc-agi_training': ax1, 'arc-agi_evaluation': ax2}
    
    # Plot for each dataset
    for dataset_name in dataset_names:
        ax = axes[dataset_name]
        
        # Plot each dataframe
        for name, df in dfs_dict.items():
            # Filter data for current dataset
            df_filtered = df[df['dataset_name'] == dataset_name]
            
            if len(df_filtered) == 0:
                continue
                
            # If plotting loss values, use log scale
            if 'loss' in column_name.lower():
                ax.set_yscale('log')
                
            # Plot the data
            ax.plot(df_filtered['epoch'], 
                   df_filtered[column_name], 
                   marker='o', 
                   label=name,
                   markersize=4,
                   alpha=0.7)
            
            # Find and print peak values
            if len(df_filtered) > 0:
                max_idx = df_filtered[column_name].idxmax()
                max_epoch = df_filtered.loc[max_idx, 'epoch']
                max_value = df_filtered.loc[max_idx, column_name]
                print(f'{name} - {dataset_name} peak {column_name}: epoch {max_epoch}, value {max_value:.2f}')
        
        # Customize each subplot
        ax.set_xlabel('Epoch')
        ax.set_ylabel(column_name)
        ax.set_title(f'{dataset_name}')
        ax.grid(True)
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        ax.tick_params(axis='x', rotation=45)
        
        # Set reasonable y-axis limits if not loss
        if 'loss' not in column_name.lower():
            ax.set_ylim(bottom=0)
            
            # For success rate, set upper limit to 100
            if 'success' in column_name.lower():
                ax.set_ylim(top=100)
    
    plt.tight_layout()
    plt.show()

In [None]:
df_2500 = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.236/2500/runs/2500'))
df_es960 = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.119/es_960/runs'))
df_8000 = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.224/8000/runs'))
df_2500_1080 = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.236/2500_1080/runs/2500_1080'))
df_new_order = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.236/new_order/runs/new_order'))
df_og = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.224/old_genesis/runs'))
df_l9_h8 = parse_log_files(find_log_files('../report/cloud_runs/69.55.141.236/l9_h8/runs/'))



# Example usage:
dfs_dict = {
    '2500': df_2500,
    'es960': df_es960,
    '8000': df_8000,
    '2500_1080': df_2500_1080,
    'new_order': df_new_order,
    'old_genesis': df_og,
    'df_l9_h8': df_l9_h8,
}

# Plot success rate for all dataframes
new_plot_from_log(dfs_dict, 'success_rate')

# Plot train loss for all dataframes
new_plot_from_log(dfs_dict, 'train_loss')
