## Plotting learning dynamic of task representation

In [None]:
n_epochs = 5000  # Number of epochs
dim_input = 2  # Input dimension = (X, Y)
dim_hidden = 500  # Hidden neurons
dim_output = 1  # Output dimension = Category A or B (0, 1)

lr = 2e-3  # Learning rate
# Initialization scale
# w1_inits = [0.001,0.01,0.1,0.5]
w1_inits = [0.001, 0.25]
w2_init = 1/dim_hidden

# Training

models = [LNNet(dim_input, dim_hidden, dim_output, w1_init, w2_init) for w1_init in w1_inits]
optimizers = [optim.SGD(model.parameters(), lr=lr) for model in models ]
criterions = [nn.MSELoss() for model in models ]

model_titles = ['rich', 'lazy']

for i in range(n_epochs):
    optimizers[m_i].zero_grad()
    for m_i, model in enumerate(models):
        optimizers[m_i].zero_grad()
        y_pred, hiddens = model(X)
        loss = criterions[m_i](y_pred, y)
        loss.backward()
        optimizers[m_i].step()
    
    if i % 200 == 0:
        fig, axes = plt.subplots(2, 2)
        fig.set_size_inches(16, 14)
        for m_i, model in enumerate(models):
            for p_i, (name, param) in enumerate(model.named_parameters()):
                axis = axes[m_i, p_i]
                if name == 'in_hid.weight':
                    in_hid = param.detach().numpy()
                    axis.scatter(in_hid[:, 0], in_hid[:, 1])
                    axis.set_title(f'in->hid, {model_titles[m_i]}')

                elif name == 'hid_out.weight':
                    hid_out = param.detach().numpy()
                    axis.scatter(np.arange(hid_out.shape[1]), hid_out[0])
                    axis.set_title(f'hid->out, {model_titles[m_i]}')

        plt.show()
        # plt.savefig(f'task_rep_{i:04d}.png')
        # plt.cla()
        # plt.close()

## Plotting learning dynamic of task representation (subsampled units)

In [None]:
n_epochs = 5000  # Number of epochs
dim_input = 2  # Input dimension = (X, Y)
dim_hidden = 500  # Hidden neurons
dim_output = 1  # Output dimension = Category A or B (0, 1)

lr = 2e-3  # Learning rate
# Initialization scale
# w1_inits = [0.001,0.01,0.1,0.5]
w1_init = 0.5
w2_init = 1/dim_hidden

# Training
dlnn_model = LNNet(dim_input, dim_hidden, dim_output, w1_init, w2_init)

optimizer = optim.SGD(dlnn_model.parameters(), lr=lr)
criterion = nn.MSELoss()

vis_num_hidden = 10
sub_sample_idx = np.random.choice(dim_hidden, size=vis_num_hidden, replace=False)
vis_hidden_color = np.linspace(0, 1, vis_num_hidden)

for i in range(n_epochs):
    optimizer.zero_grad()
    y_pred, hiddens = dlnn_model(X)
    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()
    
    if i % 200 == 0:
        fig, axes = plt.subplots(1, 2)
        fig.set_size_inches(14, 5)
        for p_i, (name, param) in enumerate(dlnn_model.named_parameters()):
            axis = axes[p_i]
            
            if name == 'in_hid.weight':
                in_hid = param.detach().numpy()
                axis.scatter(in_hid[sub_sample_idx, 0], in_hid[sub_sample_idx, 1], c=vis_hidden_color)
                axis.set_title(f'{name}, lazy')
                
            elif name == 'hid_out.weight':
                hid_out = param.detach().numpy()
                axis.scatter(sub_sample_idx, hid_out[0, sub_sample_idx], c=vis_hidden_color)
                axis.set_title(f'{name}, lazy')
        
        plt.show()
        # plt.savefig(f'lazy_{i:04d}.png')
        # plt.cla()
        # plt.close()

In [None]:
# import glob, os
# for f in glob.glob("task_rep_*.png"):
#     os.remove(f)

## Rich-learning via L2-regularization

In [134]:
n_runs = 20
n_epochs = 5000  # Number of epochs
dim_input = 2  # Input dimension = (X, Y)
dim_hidden = 500  # Hidden neurons
dim_output = 1  # Output dimension = Category A or B (0, 1)

lr = 2e-3  # Learning rate
# Initialization scale
# w1_inits = [0.001,0.01,0.1,0.5]
w1_init = 0.1
w2_init = 1/dim_hidden

# wd_lambdas = [0, 0.25, 0.5, 1]
wd_lambdas = [0, 0.125, 0.25, 0.5]

models = []
losses = np.zeros((len(wd_lambdas), n_runs, n_epochs))
w1_rel_changes = np.zeros((len(wd_lambdas), n_runs, n_epochs))
w2_rel_changes = np.zeros((len(wd_lambdas), n_runs, n_epochs))

# Training
for w_i, wd_lambda in enumerate(wd_lambdas):
  for r_i in range(n_runs):
    # Model instantiation
    dlnn_model = LNNet(dim_input, dim_hidden, dim_output, w1_init, w2_init)
    
    loss_per_run, weight_rel_changes_per_run = train(dlnn_model, X, y, n_epochs=n_epochs, lr=lr, wd_lambda=wd_lambda)
    losses[w_i, r_i, :] = loss_per_run
    w1_rel_changes[w_i, r_i, :] = weight_rel_changes_per_run[0]
    w2_rel_changes[w_i, r_i, :] = weight_rel_changes_per_run[1]

  models.append(dlnn_model)

## Repeated training via L2-regularization

In [None]:
n_runs = 20
n_epochs = 5000  # Number of epochs

lr = 2e-3  # Learning rate
wd_lambda = 0.25

bev_bias_models_wd = []
bev_bias_losses = np.zeros((2, n_runs, n_epochs))
bev_bias_task_losses = np.zeros((2, n_runs, n_epochs))
# Training
for r_i in range(n_runs):
    bev_bias_models_wd.append([])
    for m_i, bev_bias_model in enumerate([bev_bias_models[0], bev_bias_models[-1]]):
        model = copy.deepcopy(bev_bias_model)
        bev_bias_loss_per_run, task_loss_per_run, _ = train_MLP(model, bev_bias_X, bev_bias_y, n_epochs=n_epochs, lr=lr, wd_lambda=wd_lambda)
        bev_bias_losses[m_i, r_i, :] = bev_bias_loss_per_run
        bev_bias_task_losses[m_i, r_i, :] = task_loss_per_run
        bev_bias_models_wd[r_i].append(model)