# Combined File

This file should combine all the other experiments

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
from matplotlib.colors import to_rgb
from matplotlib.colors import LinearSegmentedColormap

# Model and Training Params

In [None]:
# Model Params
num_hidden = 6 # number of hidden layers. The total network has additionl 2 layers: input to hidden and hidden to output
input_dim = 2
hidden_dim = 2
output_dim = 1
activation = 'tanh' #'relu' and 'tanh' are supported

# Training Params
load_file = None
cross_entropy = True #True supported with binary classification only
num_epochs = 300

In [None]:
import models.training
from models.training import make_circles_uniform

# Generate training data

# Set random seed for reproducibility
seed = np.random.randint(1000)
# seed = 163
np.random.seed(seed)
torch.manual_seed(seed)

#footnnote to display on plots to make sure that plots and model/trainign params do not get confused
footnote = f'num_hidden={num_hidden}, hidden_dim={hidden_dim}, output_dim={output_dim}, act={activation}, seed={seed}, ce={cross_entropy}'

n_points = 2000 #number of points in the dataset

inner_radius = 0.5
outer_radius = 1
buffer = 0.2

import importlib
importlib.reload(models.training) # Reload the module

train_loader, test_loader = make_circles_uniform(output_dim = 1, n_samples = n_points, inner_radius = 0.5, outer_radius = 1.0, buffer = 0.1, cross_entropy=cross_entropy, seed = seed)

In [None]:
for input, label in train_loader:
    print(input[:5], label[:5])
    break

# Define model and training


In [None]:
# to reload models.resnet module after changes without restarting the kernel
import importlib
import models.resnets
import models.training
importlib.reload(models.resnets) # Reload the module
importlib.reload(models.training) # Reload the module
from models.resnets import ResNet
from models.training import compute_accuracy, train_model, train_until_threshold, plot_loss_curve


# Constant width 2

In [None]:

# Train models
model_base, acc_base, losses_base = train_until_threshold(ResNet,
    train_loader, test_loader,
    load_file = load_file, max_retries=5, threshold=0.95,
    input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_hidden=num_hidden, skip_param=0, activation=activation
)

plot_loss_curve(losses_base, title=f"Base Model Loss Curve", filename = 'ff6hidden')

In [None]:
model_base.parameters

In [None]:
import importlib
import plots.plots 
from plots.plots import plot_decision_boundary, plot_level_sets
importlib.reload(plots.plots) # Reload the module

X_test, y_test = next(iter(test_loader))
plot_decision_boundary(model_base, X_test, y_test, show=True, file_name= 'ff6hiddencirc' + str(num_hidden), footnote = footnote, amount_levels= 100)
plot_level_sets(model_base, show=True, file_name= 'ff6hiddencirc_contour' + str(num_hidden), footnote = footnote, amount_levels= 20)



# Augmented model: width 3

In [None]:
hidden_dim = 3
num_hidden = 1

footnote = f'num_hidden={num_hidden}, hidden_dim={hidden_dim}, output_dim={output_dim}, act={activation}, seed={seed}, ce={cross_entropy}'

seed = 288

model_aug, acc_aug, losses_aug = train_until_threshold(ResNet,
    train_loader, test_loader,
    load_file = load_file, max_retries=5, threshold=0.95, seed=seed,
    input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_hidden=num_hidden, skip_param=0, activation=activation
)

plot_loss_curve(losses_aug, title=f"Augmented Model Loss Curve", filename = 'ff6hidden')

In [None]:
X_test, y_test = next(iter(test_loader))
footnote = f'num_hidden={num_hidden}, hidden_dim={hidden_dim}, output_dim={output_dim}, act={activation}, seed={seed}, ce={cross_entropy}'
plot_decision_boundary(model_aug, X_test, y_test, show=True, file_name= 'ffaugcirc' + str(num_hidden), footnote = footnote, amount_levels= 100)
plot_level_sets(model_aug, show=True, file_name= 'ffaugcirc_contour' + str(num_hidden), footnote = footnote, amount_levels= 20)



# ResNet model

In [None]:
skip_param = 1 # this sets model from feed forward to residual network

num_hidden = 6
hidden_dim = 2

num_epochs = 500

model_res, acc_res, losses_res = train_until_threshold(ResNet,
    train_loader, test_loader,
    load_file = load_file, max_retries=5, threshold=0.95,
    input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_hidden=num_hidden, skip_param=skip_param, activation=activation
)

plot_loss_curve(losses_res, title=f"ResNet Model Loss Curve", filename = 'ff6_res')


In [None]:
X_test, y_test = next(iter(test_loader))
plot_decision_boundary(model_res, X_test, y_test, show=True, file_name= 'ff6resnetcirc' + str(num_hidden), footnote = footnote, amount_levels= 100)
plot_level_sets(model_res, show=True, file_name= 'ff6resnetcirc_contour' + str(num_hidden), footnote = footnote, amount_levels= 10)

In [None]:

X_test, y_test = next(iter(test_loader))
num_epochs = 100

skip_values = np.linspace(0, 5, 6)  # e.g., [0.0, 0.125, ..., 1.0]
# skip_values = [0, 0, 0 , 0 , 0]  # e.g., [0.0, 0.125, ..., 1.0]
n_cols = 3
n_rows = int(np.ceil(len(skip_values) / n_cols))
fig, axes = plt.subplots(n_rows, n_cols, figsize=(3.5 * n_cols, 4 * n_rows), facecolor='white', dpi = 900)
plt.subplots_adjust(wspace=0.05, hspace=0.) 
# num_epochs = 10
for idx, skip in enumerate(skip_values):
    print(f"Training model with skip_param = {skip:.2f}")
    
    seed = 163
    print(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    model = ResNet(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim,
                   num_hidden=num_hidden, skip_param=skip, activation=activation)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.BCEWithLogitsLoss() if cross_entropy else nn.MSELoss()

    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

    ax = axes.flatten()[idx]
    plot_decision_boundary(model, X_test, y_test, title=f"Skip: {skip:.2f}", ax=ax, show=False, colorbar=False, show_points=False, amount_levels=100)
    
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.axis('tight')
    ax.set_aspect('equal') 
    

# Hide unused subplots
for i in range(len(skip_values), len(axes.flatten())):
    fig.delaxes(axes.flatten()[i])

fig.suptitle(f"ResNets with amount layers = {num_hidden + 2} and different weights of shortcut", fontsize=16)

# ðŸ’¡ Leave room at top for the title
plt.tight_layout(rect=[0, 0, 1, 0.96])

plt.savefig('ff6hidden_skip_params.png', dpi=900, bbox_inches='tight', facecolor='white')
plt.show()

# Singular value computations and plotting
We want to determine singular points in the compact space

In [None]:
# Define a grid over the input space.
grid_size = 200 # Adjust as needed.

importlib.reload(plots.plots) # Reload the module
from plots.plots import psi_manual, model_to_func, sv_plot

# def psi_manual(x, func):
#     """
#     x: a tensor of shape (2,) representing a point in R^2.
#     model: a function mapping R^2 to R^output_dim.
    
#     Returns:
#       The smallest singular value of the Jacobian of model at x.
#     """
#     # Ensure x is a leaf variable with gradient tracking enabled.
#     x = x.clone().detach().requires_grad_(True)  
#     # print(f"x shape: {x.shape}")  # Debugging line to check the shape of x
    
    
#     # Compute the Jacobian using torch.autograd.functional.jacobian (compatible with Python 3.8)
#     jacobian = torch.autograd.functional.jacobian(func, x, create_graph=True)
#     # print(f"Jacobian shape: {jacobian.shape}")  # Debugging line to check the shape of the Jacobian
#     jacobian = jacobian  # Remove the artificial batch dimension
#     # print(f"Jacobian shape after squeeze: {jacobian.shape}")  # Debugging line to check the shape after squeeze
#     # Compute singular values using svdvals (available in PyTorch 1.8, compatible with Python 3.8)
#     singular_values = torch.svd(jacobian, compute_uv=False)[1] #svd interprets here the jacobian as a SQUARE matrix of the largest dimension, hence it 
    
   
#     return singular_values.detach().numpy()
  

# def model_to_func(model,from_layer=0, to_layer=-1):
  
#   if from_layer == 0 and to_layer == -1: # this is the case for input to last hidden layer (without output layer)
#     func = lambda inp: model(inp.unsqueeze(0), output_layer = False).squeeze(0)  # Add artificial batch dimension which is needed because of batch normalization layer BatchNorm1d and remove it again from the model output.
#   else: 
#     func = lambda inp: model.sub_model(inp.unsqueeze(0), from_layer=from_layer, to_layer = to_layer).squeeze(0)
  
#   return func
  

# def sv_plot(func, sv_index = 0, x_range = [-1,1], y_range = [-1,1], grid_size = 100, ax = None, title = ''):
#   x_values = np.linspace(x_range[0], x_range[1], grid_size)
#   y_values = np.linspace(y_range[0], y_range[1], grid_size)
#   psi_values = np.zeros((grid_size, grid_size, 2))
  
#   # Evaluate psi(x) over the grid.
#   for i, xv in enumerate(x_values):
#       for j, yv in enumerate(y_values):
#           # Create a 2D point as a torch tensor.
#           x_point = torch.tensor([xv, yv], dtype=torch.float32)
#           psi_values[j, i,:] = psi_manual(x_point, func) #one subtlety here: if there is only one SV it gets broadcast to all dimensions of psi_values[j,i,:] in the last dimension. this reduces if statements for e.g. the last layer, but we need to notice that the SINGLE SV gets plotted twice  
   

#   # Here we plot the contour at a small level, e.g., 0.01.
#   # CS = plt.contour(x_range, y_range, psi_values, levels=[0,0.05,0.1,0.2,0.3], colors='red')

#   # Define the number of levels for the contour plot
#   vmin1, vmax1 = psi_values[:, :, sv_index].min(), psi_values[:, :, sv_index].max()
#   num_levels = 200

#   levels = np.linspace(0, vmax1, num_levels)
  
#   # Plot on the provided axis
#   if ax is not None:
#       cs = ax.contourf(x_values, y_values, psi_values[:, :, sv_index], levels=levels, cmap='viridis')
#       ax.set_title(title)
#       ax.set_xlabel('x1')
#       ax.set_ylabel('x2')
#       ax.set_aspect('equal')
#       return cs
#   else:
#     # Create the contour plot using the 'binary' colormap
#     plt.figure(figsize=(8, 6))
    
#     CS = plt.contourf(x_values, y_values, psi_values[:,:,sv_index], levels=levels, cmap = 'viridis')
#     cbar = plt.colorbar(CS)
#     plt.title(f'Singular value no.{sv_index} of Jacobian \nwith output layer')
#     plt.xlabel('x1')
#     plt.ylabel('x2')
#     plt.show()



  




In [None]:
        
# Put the model in evaluation mode.
model = model_base
model.eval()
func = model_to_func(model)  # Add artificial batch dimension which is needed because of batch normalization layer BatchNorm1d and remove it again from the model output.

sv_plot(func, sv_index = 0, title = f'Largest SV without output layer')
sv_plot(func, sv_index = 1, title = f'Second largest SV of Jacobian without output layer')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Put the model in evaluation mode.
model.eval()

num_hidden = model.num_hidden


# Prepare figure and axes
fig, axes = plt.subplots(2, num_hidden + 2, figsize=(5 * (num_hidden + 2), 10))  # Adjust figsize if needed

for layer in range(num_hidden + 2):
    func = model_to_func(model, from_layer=0, to_layer = layer)

    
    ax = axes[0, layer] if num_hidden > 1 else axes[0]
    cs = sv_plot(func, sv_index = 0, ax = ax, grid_size=100)
    # fig.colorbar(cs, ax=ax)
    ax.set_title(f'Min SV\n layer_in = 0, layer_out = {layer}')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_aspect('equal')
    
    

    # Plot largest singular value (index 0) - second row
    ax = axes[1, layer] if num_hidden > 1 else axes[1]
    cs = sv_plot(func, sv_index = 1, ax = ax, grid_size=100)
    # fig.colorbar(cs, ax=ax)
    ax.set_title(f'Max SV\n layer_in = 0, layer_out = {layer}')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_aspect('equal')

plt.tight_layout()
plt.show()


In [None]:
# Put the model in evaluation mode.
model.eval()

# Prepare figure and axes
fig, axes = plt.subplots(2, num_hidden + 2, figsize=(5 * (num_hidden + 2), 10))  # Adjust figsize if needed

for layer in range(num_hidden + 2):
    func = model_to_func(model, from_layer=layer, to_layer = layer)
    
    ax = axes[0, layer] if num_hidden > 1 else axes[0]
    cs = sv_plot(func, sv_index= 0 , ax = ax, grid_size=100)
    fig.colorbar(cs, ax=ax)
    ax.set_title(f'Max SV\n layer {layer}')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_aspect('equal')
    

    # Plot largest singular value (index 0) - second row
    ax = axes[1, layer] if num_hidden > 1 else axes[1]
    cs = sv_plot(func, sv_index=1, ax = ax, grid_size=100)
    fig.colorbar(cs, ax=ax)
    ax.set_title(f'Min SV\n layer {layer}')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_aspect('equal')

plt.tight_layout()

plt.savefig('SV_each_layer.png', dpi=600, bbox_inches='tight', facecolor='white')
plt.show()


In [None]:
importlib.reload(plots.plots) # Reload the module
from plots.plots import plot_singular_values_of_weightmatrix
plot_singular_values_of_weightmatrix(model_base, log_scale=False, title = 'Baseline model')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Put the model in evaluation mode.
model.eval()

# Prepare figure and axes
fig, axes = plt.subplots(2, num_hidden + 2, figsize=(5 * (num_hidden + 2), 10))  # Adjust figsize if needed

x_range = np.linspace(-1, 1, grid_size)
y_range = np.linspace(-1, 1, grid_size)

for layer in range(num_hidden + 2):
    func = lambda inp: model.sub_model(inp, from_layer = layer, to_layer=layer)

    # Evaluate psi(x) over the grid.
    for i, xv in enumerate(x_range):
        for j, yv in enumerate(y_range):
            x_point = torch.tensor([xv, yv], dtype=torch.float32)
            psi_values[j, i, :] = psi_manual(x_point, func)

    # Determine contour levels
    vmin1, vmax1 = psi_values[:, :, 1].min(), psi_values[:, :, 1].max()
    num_levels_contour = 200
    levels1 = np.linspace(0, vmax1, num_levels_contour)

    # Plot smallest singular value (index 1) - first row
    ax = axes[0, layer] if num_hidden > 1 else axes[0]
    cs = ax.contourf(x_range, y_range, psi_values[:, :, 1], 
                     levels=levels1,
                     cmap='viridis')
    fig.colorbar(cs, ax=ax)
    ax.set_title(f'Min SV\n layer {layer}')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_aspect('equal')
    

    # Determine contour levels
    vmin0, vmax0 = psi_values[:, :, 0].min(), psi_values[:, :, 0].max()
    num_levels_contour = 200
    levels0 = np.linspace(0, vmax0, num_levels_contour)

    # Plot largest singular value (index 0) - second row
    ax = axes[1, layer] if num_hidden > 1 else axes[1]
    cs = ax.contourf(x_range, y_range, psi_values[:, :, 0],
                     levels=levels0, 
                     cmap='viridis')
    fig.colorbar(cs, ax=ax)
    ax.set_title(f'Max SV\n layer {layer}')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_aspect('equal')

plt.tight_layout()
plt.show()


In [None]:
# Prepare figure and axes
fig, axes = plt.subplots(1, 2, figsize=(10 , 5))  # Adjust figsize if needed



class TestFunc(nn.Module):
    def __init__(self):
        super(TestFunc, self).__init__()

    def forward(self, x):
        return torch.norm(x)**2
    
    
def psi_manual_test(x, func):
    """
    x: a tensor of shape (2,) representing a point in R^2.
    model: a function mapping R^2 to R^output_dim.
    
    Returns:
      The smallest singular value of the Jacobian of model at x.
    """
    # Ensure x is a leaf variable with gradient tracking enabled.
    x = x.clone().detach().requires_grad_(True)
    
    # Define a lambda function to ensure accurate input-output mapping
    # func = lambda inp: model(inp, output_layer = False)
    
    # Compute the Jacobian using torch.autograd.functional.jacobian (compatible with Python 3.8)
    jacobian = torch.autograd.functional.jacobian(func, x, create_graph=True).unsqueeze(0)
    print(f'{jacobian = }')
    
    # Compute singular values using svdvals (available in PyTorch 1.8, compatible with Python 3.8)
    singular_values = torch.svd(jacobian, compute_uv=False)[1]
    print(f'{singular_values = }')
    
   
    return singular_values.detach().numpy()

test_func = TestFunc()

# Evaluate psi(x) over the grid.
for i, xv in enumerate(x_range):
    for j, yv in enumerate(y_range):
        x_point = torch.tensor([xv, yv], dtype=torch.float32)
        psi_values[j, i, :] = psi_manual_test(x_point, test_func)
        

# Determine contour levels
vmin1, vmax1 = psi_values[:, :, 1].min(), psi_values[:, :, 1].max()
num_levels_contour = 200
levels1 = np.linspace(0, vmax1, num_levels_contour)

# Plot smallest singular value (index 1) - first row
ax = axes[0]
cs = ax.contourf(x_range, y_range, psi_values[:, :, 1], 
                    levels=levels1,
                    cmap='viridis')
fig.colorbar(cs, ax=ax)
ax.set_title(f'Min SV\n layer {layer}')
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_aspect('equal')


# Determine contour levels
vmin0, vmax0 = psi_values[:, :, 0].min(), psi_values[:, :, 0].max()
num_levels_contour = 200
levels0 = np.linspace(0, vmax0, num_levels_contour)

# Plot largest singular value (index 0) - second row
ax = axes[1] 
cs = ax.contourf(x_range, y_range, psi_values[:, :, 0],
                    levels=levels0, 
                    cmap='viridis')
fig.colorbar(cs, ax=ax)
ax.set_title(f'Max SV\n layer {layer}')
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_aspect('equal')

plt.tight_layout()
plt.show()
