In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import json
from plotly.subplots import make_subplots
import plotly.io as pio
import os

In [12]:
# Get the current script's directory
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)

paths1 = [
    parent_dir + '/logs/dense-baseline/seed31/wide_resnet_28x10_31_logs.json',
    parent_dir + '/logs/base-experiments/seed31/lr_schedule_b/wide_resnet_28x10_31_block_iterative_max_sparsity70_logs.json',
    parent_dir + '/logs/base-experiments/seed31/lr_schedule_b/wide_resnet_28x10_31_block_iterative_from70_max_sparsity90_logs.json'
    ]

paths2 = [
    parent_dir + '/logs/dense-baseline/seed31/wide_resnet_28x10_31_logs.json',
    parent_dir + '/logs/base-experiments/seed31/lr_schedule_b/wide_resnet_28x10_31_block_max_sparsity90_new_lr_logs.json',
    ]

In [13]:
def create_pandas_dataframe_entries_for_each_epoch_from_logs(log_file_paths: list) -> pd.DataFrame:
    """
    Create a pandas DataFrame from the log files. The DataFrame will have a row for each epoch in each training.
    """
    df_list = []
    for log_file_path in log_file_paths:
        with open(log_file_path, 'r') as file:
            data = json.load(file)
        common_data = {key: value for key, value in data.items() if key != 'epochs'}
        for epoch_data in data['epochs']:
            combined_data = {**common_data, **epoch_data}
            df_list.append(pd.DataFrame([combined_data]))
    df = pd.concat(df_list, ignore_index=True)
    return df

In [14]:
def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:

    # Create a new column 'color' based on the checkpoint file name: if it contains 'block_iterative', set color to 'green', else if it contains 'block_max', set to 'red', else 'black'
    df['color'] = df['checkpoint_filename'].apply(lambda x: 'green' if 'block_iterative' in x else ('red' if 'block_max' in x else 'black'))

    # Change sparsity to 0 if pruning_method is 'none'
    df.loc[df['pruning_method'] == 'none', 'sparsity'] = 0
    
    return df

In [15]:
df1 = create_pandas_dataframe_entries_for_each_epoch_from_logs(paths1)
df1 = preprocess_data(df1)

df2 = create_pandas_dataframe_entries_for_each_epoch_from_logs(paths2)
df2 = preprocess_data(df2)

# Display all columns
pd.set_option('display.max_columns', None)
df1.head(11)

Unnamed: 0,device,initial_learning_rate,net_type,depth,widen_factor,dropout,dataset,checkpoint_filename,seed,resume,resumed_from_file,testOnly,pruning_method,sparsity,block_criterion,pruning_time,block_size,batch_size,num_epochs,elapsed_time_seconds,epoch,learning_rate,valid_accuracy,valid_loss,epoch_time,final_global_sparsity,color
0,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,1,0.1,15.26,368.323646,177.584694,,black
1,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,2,0.1,17.940001,376.000861,171.070768,,black
2,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,3,0.1,27.690001,295.623534,170.959781,,black
3,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,4,0.1,38.09,236.44047,170.943164,,black
4,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,5,0.1,36.939999,243.221322,170.971099,,black
5,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,6,0.1,40.91,229.94635,171.00977,,black
6,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,7,0.1,41.869999,232.600413,170.804712,,black
7,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,8,0.1,46.700001,198.741326,170.803392,,black
8,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,9,0.1,43.580002,221.719157,170.774924,,black
9,NVIDIA GeForce RTX 3060 Ti,0.1,wide-resnet,28,10,0,cifar100,testy/wide_resnet_28x10_31,31,False,,False,none,0.0,max,0.0,8,128,200,34102.389638,10,0.1,50.77,181.474457,170.787324,,black


In [16]:
df1.columns

Index(['device', 'initial_learning_rate', 'net_type', 'depth', 'widen_factor',
       'dropout', 'dataset', 'checkpoint_filename', 'seed', 'resume',
       'resumed_from_file', 'testOnly', 'pruning_method', 'sparsity',
       'block_criterion', 'pruning_time', 'block_size', 'batch_size',
       'num_epochs', 'elapsed_time_seconds', 'epoch', 'learning_rate',
       'valid_accuracy', 'valid_loss', 'epoch_time', 'final_global_sparsity',
       'color'],
      dtype='object')

In [18]:
# Create a subplot figure with two rows
fig = make_subplots(
    rows=2, 
    cols=1, 
    shared_xaxes=False, 
    subplot_titles=('Block pruning in one step', 'Iterative block pruning in two steps'),
    vertical_spacing=0.2 
)

# Update subtitle font size
fig.update_layout(
    annotations=[
        dict(
            text='Block pruning in one step',
            x=0.5,
            y=1.02,  # Adjust position to add space
            xref='paper',
            yref='paper',
            showarrow=False,
            font=dict(size=18) 
        ),
        dict(
            text='Iterative block pruning in two steps',
            x=0.5,
            y=0.42,  # Adjust position to add space
            xref='paper',
            yref='paper',
            showarrow=False,
            font=dict(size=18)  
        )
    ]
)

fig2 = px.line(df2, x='epoch', y='valid_accuracy', hover_name='checkpoint_filename')
fig2.update_traces(mode='lines', line_color='rgba(140, 30, 30, 1)')
for trace in fig2.data:
    fig.add_trace(trace, row=1, col=1)

# Add shaded regions and annotations for the first plot (df2)
shaded_regions2 = [(0, 200), (200, 400)]
region_labels2 = ['Sparsity: 0%', 'Sparsity: 90%']
for i, (start, end) in enumerate(shaded_regions2):
    fig.add_shape(
        type='rect',
        x0=start, x1=end,
        y0=0, y1=100,
        fillcolor='grey',
        opacity=0.2 * (start / 100),
        line=dict(width=0),
        row=1, col=1
    )
    fig.add_annotation(
        x=(start + end) / 2,
        y=25,
        text=region_labels2[i],
        showarrow=False,
        font=dict(size=16, color='rgba(0, 0, 0, 0.8)'),
        row=1, col=1
    )
fig.add_annotation(
    x=200.5,
    y=51,
    text="Block pruning applied",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=40,
    font=dict(size=13, color='rgba(0, 0, 0, 0.8)'),
    row=1, col=1
)

# Second plot (df1) - now on bottom
fig1 = px.line(df1, x='epoch', y='valid_accuracy', hover_name='checkpoint_filename')
fig1.update_traces(mode='lines', line_color='rgba(20, 150, 160, 1)')
for trace in fig1.data:
    fig.add_trace(trace, row=2, col=1)

# Add shaded regions and annotations for the second plot (df1)
shaded_regions1 = [(0, 200), (200, 300), (300, 400)]
region_labels1 = ['Sparsity: 0%', 'Sparsity: 70%', 'Sparsity: 90%']
for i, (start, end) in enumerate(shaded_regions1):
    fig.add_shape(
        type='rect',
        x0=start, x1=end,
        y0=0, y1=100,
        fillcolor='grey',
        opacity=0.2 * i,
        line=dict(width=0),
        row=2, col=1
    )
    fig.add_annotation(
        x=(start + end) / 2,
        y=25,
        text=region_labels1[i],
        showarrow=False,
        font=dict(size=16, color='rgba(0, 0, 0, 0.8)'),
        row=2, col=1
    )
fig.add_annotation(
    x=200.5,
    y=60,
    text="Block pruning applied",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=40,
    font=dict(size=13, color='rgba(0, 0, 0, 0.8)'),
    row=2, col=1
)
fig.add_annotation(
    x=300.5,
    y=70,
    text="Block pruning applied",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=40,
    font=dict(size=13, color='rgba(0, 0, 0, 0.8)'),
    row=2, col=1
)

fig.update_layout(
    height=800,
    width=800,
    xaxis_title="Epoch",
    yaxis_title="Validation accuracy [%]",
    xaxis2_title="Epoch",  # Add x-axis label for the second plot
    yaxis2_title="Validation accuracy [%]",  # Add y-axis label for the second plot,
    margin=dict(l=50, r=10, t=50, b=50)
)

# pio.write_image(fig, "plots/exports/training_one-step_vs_iterative.png", width=800, height=800, scale=3)
fig.show()