In [8]:
# L2 norm in pytorch
import torch
import random
import matplotlib.pyplot as plt
from torch import nn
from torch.utils import data
from d2l import torch as d2l

# create datasets
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05

# train data
train_data = d2l.synthetic_data(true_w, true_b, n_train)
train_iter = d2l.load_array(train_data, batch_size)

# test data
test_data = d2l.synthetic_data(true_w, true_b, n_test)
test_iter = d2l.load_array(test_data, batch_size, is_train=False)

In [9]:
# model
net = nn.Sequential(nn.Linear(num_inputs, 1))

# initialize parameters
# def initialize_weights(net):
#     if type(net) == nn.Linear:
#         nn.init.normal_(net.weight, std=0.01)
# net.apply(initialize_weights)
for param in net.parameters():
        param.data.normal_()

# loss
loss = nn.MSELoss(reduction='none')

# optimizer with L2 norm !!
L2_1 = 0
L2_2 = 3
optimizer_1 = torch.optim.SGD([{'params':net[0].weight, 'weight_decay':L2_1}, 
                               {'params':net[0].bias}], lr=0.003)
optimizer_2 = torch.optim.SGD([{'params':net[0].weight, 'weight_decay':L2_2}, 
                               {'params':net[0].bias}], lr=0.003)

# accuracy
def accuracy(y_hat, y):
    difference = y_hat.type(y.dtype) - y
    return sum(difference) / len(difference)

# tain with L2 norm
n_epochs = 100
def train(net, minibatch_data, n_epochs, loss, optimizer):
    net.train()
    for epoch in range(n_epochs):
        for X, y in minibatch_data:
            optimizer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.mean().backward()
            optimizer.step()
    print("train done")

def evaluate(net, data, n_epochs):
    net.eval()
    accuracy_epoch = []
    for n in range(n_epochs):
        accuracy_batch = []
        for X, y in data:
            y_hat = net(X)
            accuracy_batch.append(accuracy(y_hat, y))
            a_avg = sum(accuracy_batch) / len(accuracy_batch)
        accuracy_epoch.append(a_avg)
    return accuracy_epoch
            

train(net, train_iter, n_epochs, loss, optimizer_1)
net[0].weight.grad.zero_()
net[0].bias.grad.zero_()

train_accuracy = evaluate(net, train_iter, 100)
test_accuracy = evaluate(net, test_iter, 100)
print('L2 norm lambda = 0.0, accuracy of train and test:')
print(train_accuracy)
print(test_accuracy)

train(net, train_iter, n_epochs, loss, optimizer_2)
net[0].weight.grad.zero_()
net[0].bias.grad.zero_()

train_accuracy = evaluate(net, train_iter, 100)
test_accuracy = evaluate(net, test_iter, 100)
print('L2 norm lambda = 3, accuracy of train and test:')
print(train_accuracy)
print(test_accuracy)

train done
L2 norm lambda = 0.0, accuracy of train and test:
[tensor([2.8592e-08], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([8.8196e-08], grad_fn=<DivBackward0>), tensor([1.0012e-07], grad_fn=<DivBackward0>), tensor([1.6671e-08], grad_fn=<DivBackward0>), tensor([1.0012e-07], grad_fn=<DivBackward0>), tensor([4.7497e-09], grad_fn=<DivBackward0>), tensor([1.3588e-07], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([1.2396e-07], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([1.3588e-07], grad_fn=<DivBackward0>), tensor([8.8196e-08], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([1.6671e-08], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([1.1204e-07], grad_fn=<DivBackward0>), tensor([4.7497e-09], grad_fn=<DivBackward0>), tensor([1.2396e-07], grad_fn=<DivBackward0>), tensor([1.0012e-07