In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import json
import os

In [2]:
# Get the current script's directory
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)

In [3]:
# Load data from json
paths = [parent_dir + '/logs/acdc/seed71/wide_resnet_28x10_71_acdc_block_sparsity0.8_momentum0.9_logs.json',
         parent_dir +  '/logs/acdc/seed71/wide_resnet_28x10_71_acdc_block_L1_sparsity0.8_momentum0.9_logs.json',
         parent_dir +  '/logs/acdc/seed71/wide_resnet_28x10_71_acdc_block_L2_sparsity0.8_momentum0.9_logs.json',
         parent_dir +  '/logs/acdc/seed71/wide_resnet_28x10_71_acdc_block_min_sparsity0.8_momentum0.9_logs.json',
         parent_dir +  '/logs/acdc/seed71/wide_resnet_28x10_71_acdc_global_unstructured_sparsity0.8_momentum0.9_logs.json'
        ]       

In [4]:
def create_pandas_dataframe_entries_for_each_epoch_from_logs(log_file_paths: list) -> pd.DataFrame:
    """
    Create a pandas DataFrame from the log files. The DataFrame will have a row for each epoch in each training.
    """
    df_list = []
    for log_file_path in log_file_paths:
        with open(log_file_path, 'r') as file:
            data = json.load(file)
        common_data = {key: value for key, value in data.items() if key != 'epochs'}
        for epoch_data in data['epochs']:
            combined_data = {**common_data, **epoch_data}
            df_list.append(pd.DataFrame([combined_data]))
    df = pd.concat(df_list, ignore_index=True)
    return df

In [5]:
df = create_pandas_dataframe_entries_for_each_epoch_from_logs(paths)
df.head(11)

Unnamed: 0,device,initial_learning_rate,net_type,depth,widen_factor,dropout,dataset,checkpoint_filename,seed,resume,...,elapsed_time_seconds,final_global_sparsity,final_sparsity_across_pruned_layers,epoch,learning_rate,valid_accuracy,valid_loss,epoch_time,phase,optimizer_momentum
0,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,1,0.1,12.66,372.157214,114.806673,dense,0.9
1,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,2,0.1,22.85,306.089084,105.455266,dense,0.9
2,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,3,0.1,25.74,306.950976,105.382299,dense,0.9
3,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,4,0.1,33.5,271.965646,105.348509,dense,0.9
4,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,5,0.1,38.84,239.321878,105.274102,dense,0.9
5,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,6,0.1,41.029999,219.552708,105.250989,dense,0.9
6,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,7,0.1,46.93,194.306056,105.209845,dense,0.9
7,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,8,0.1,47.740002,193.125646,105.180278,dense,0.9
8,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,9,0.1,45.459999,207.808055,105.126694,dense,0.9
9,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_sp...,71,False,...,21561.837193,0.799157,0.800766,10,0.1,46.950001,196.649434,105.142949,dense,0.9


In [6]:
df.columns

Index(['device', 'initial_learning_rate', 'net_type', 'depth', 'widen_factor',
       'dropout', 'dataset', 'checkpoint_filename', 'seed', 'resume',
       'resumed_from_file', 'testOnly', 'pruning_method', 'sparsity',
       'block_criterion', 'acdc', 'pruning_time', 'block_size', 'batch_size',
       'num_epochs', 'elapsed_time_seconds', 'final_global_sparsity',
       'final_sparsity_across_pruned_layers', 'epoch', 'learning_rate',
       'valid_accuracy', 'valid_loss', 'epoch_time', 'phase',
       'optimizer_momentum'],
      dtype='object')

In [7]:
# Change block_criterion values: max to abs max, min to abs min, L1 to L1 norm, L2 to L2 norm
df['block_criterion'] = df['block_criterion'].replace({'max': 'abs max', 'min': 'abs min', 'L1': 'L1 norm', 'L2': 'L2 norm'})
# Change pruning_method value: global_unstructured to unstructured
df['pruning_method'] = df['pruning_method'].replace({'global_unstructured': 'unstructured'})
# Change block_criterion to L1 norm for pruning_method = unstructured
df.loc[df['pruning_method'] == 'unstructured', 'block_criterion'] = 'L1 norm'
# Create column pruning_type as a combination of pruning_method and block_criterion
df['pruning_type'] = df['pruning_method'] + ', ' + df['block_criterion']

In [8]:
df['pruning_method'] = pd.Categorical(df['pruning_method'], ['unstructured', 'block'])
df = df.sort_values(['pruning_method', 'block_criterion'])

df

Unnamed: 0,device,initial_learning_rate,net_type,depth,widen_factor,dropout,dataset,checkpoint_filename,seed,resume,...,final_global_sparsity,final_sparsity_across_pruned_layers,epoch,learning_rate,valid_accuracy,valid_loss,epoch_time,phase,optimizer_momentum,pruning_type
800,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_global_u...,71,False,...,0.798392,0.8,1,0.1000,13.430000,365.187206,114.155555,dense,0.9,"unstructured, L1 norm"
801,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_global_u...,71,False,...,0.798392,0.8,2,0.1000,24.440001,298.479884,106.599436,dense,0.9,"unstructured, L1 norm"
802,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_global_u...,71,False,...,0.798392,0.8,3,0.1000,27.570000,289.479594,106.701546,dense,0.9,"unstructured, L1 norm"
803,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_global_u...,71,False,...,0.798392,0.8,4,0.1000,30.850000,287.857772,106.710074,dense,0.9,"unstructured, L1 norm"
804,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_global_u...,71,False,...,0.798392,0.8,5,0.1000,38.110001,242.412813,106.630279,dense,0.9,"unstructured, L1 norm"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_mi...,71,False,...,0.798392,0.8,196,0.0008,78.040001,89.649708,104.185717,sparse,0.9,"block, abs min"
796,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_mi...,71,False,...,0.798392,0.8,197,0.0008,78.279999,89.309683,105.193275,sparse,0.9,"block, abs min"
797,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_mi...,71,False,...,0.798392,0.8,198,0.0008,78.290001,88.787976,105.545434,sparse,0.9,"block, abs min"
798,NVIDIA GeForce RTX 2080 Ti,0.1,wide-resnet,28,10,0,cifar100,seed71/acdc/wide_resnet_28x10_71_acdc_block_mi...,71,False,...,0.798392,0.8,199,0.0008,77.989998,90.864106,105.268249,sparse,0.9,"block, abs min"


In [9]:
def create_visualisation_accuracy_over_epochs(df=df, hue='pruning_type', title=None):
    
    custom_colors = ['#6A5ACD', '#E71D36', '#95C623', '#E082DA', '#FFA526']


    fig = px.line(df, x='epoch', y='valid_accuracy', color=hue,
                  hover_data={'learning_rate': True, 'final_sparsity_across_pruned_layers': True, 'phase': True},
                  color_discrete_sequence=custom_colors)
    
    # Add shaded regions for sparse windows
    shaded_regions = [(11, 30), (51, 70), (91, 110), (131, 150), (171, 200)]
    for start, end in shaded_regions:
        fig.add_shape(
            type='rect',
            x0=start, x1=end,
            y0=10, y1=85,
            # Blue with transparency
            fillcolor='rgba(0, 0, 255, 0.2)',
            line=dict(width=0)  # No border
        )

    # Add a blank trace to create space in the legend
    fig.add_trace(
        go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=0, opacity=0),  # Invisible marker
        name=''  # Empty name to act as a separator
        )
    )
    
    fig.add_trace(
        go.Scatter(
            x=[None], y=[None],
            mode='markers',
            marker=dict(size=10, color='rgba(0, 0, 255, 0.2)'),
            name='Sparse phase'
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[None], y=[None],
            mode='markers',
            marker=dict(size=10, color='rgba(240, 240, 240, 1)'),
            name='Dense phase'
        )
    )

    fig.update_yaxes(range=[10, 85], title_text='Validation accuracy [%]')
    fig.update_xaxes(title_text='Epoch')

    fig.update_layout(
        plot_bgcolor='rgba(240, 240, 240, 1)',  # Background for the plot area
        height=600*1.2,
        width=800*1.2,
        # Change legend title
        legend_title_text='Pruning method, Metric',
        margin=dict(
            l=50,  
            r=50,  
            t=70,  
            b=50
        ),
        xaxis_title_font=dict(size=19),  
        yaxis_title_font=dict(size=19), 
        legend_font=dict(size=17),  
        font=dict(size=19),
        title=dict(
            text=title,
            x=0.41,  # Center the title,
            y=0.95,
            font=dict(size=22) 
        ),
    )   

    fig.show()

In [10]:
create_visualisation_accuracy_over_epochs(df, title='AC/DC')