In [1]:
# Author-Vishal Burman

## High-dimensional Linear Regression

In [2]:
# First we generate the synthetic data using the below formula:

\begin{equation}
y = 0.05 + \sum_{i = 1}^d 0.01 x_i + \epsilon \text{ where }
\epsilon \sim \mathcal{N}(0, 0.01)
\end{equation}

In [3]:
# It represents the label as the linear function of our inputs, corrupted by Gaussian note
# The Gaussian note is taken with zero mean and 0.01 variance

In [4]:
from mxnet import gluon, autograd, init, nd
from mxnet.gluon import nn

In [6]:
# Function to generate synthetic data

In [8]:
def synthetic_data(w, b, num_examples):
    """generate y = Xw + b + noise"""
    X=nd.random.normal(scale=1, shape=(num_examples, len(w)))
    y=nd.dot(X, w)+b
    y+=nd.random.normal(scale=0.01, shape=y.shape)
    return X, y

In [10]:
# Create a gluon DataLoader from the array

In [11]:
def load_array(data_arrays, batch_size, is_train=True):
    """Construct a gluon data loader"""
    dataset= gluon.data.ArrayDataset(*data_arrays)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

In [9]:
n_train, n_test, num_inputs, batch_size= 20, 100, 200, 1

In [12]:
true_w, true_b=nd.ones((num_inputs, 1))*0.01, 0.05
train_data=synthetic_data(true_w, true_b, n_train)
train_iter=load_array(train_data, batch_size)
test_data=synthetic_data(true_w, true_b, n_test)
test_iter=load_array(test_data, batch_size, is_train=False)

# Implementation from Scratch (weight decay)

## Initialize Model Parameters

In [54]:
w=nd.random.normal(scale=1, shape=(num_inputs, 1))
b=nd.zeros(shape=(1, ))
w.attach_grad()
b.attach_grad()
params=[w, b]

In [58]:
num_epochs, lr=100, 0.3


## Define the l2 Norm Penalty

In [15]:
# The most conveninet way to implement this penalty is to square all terms in place and sum them up
# We divide by 2 by convention

In [14]:
def l2_penalty(w):
    return (w**2).sum()/2

## Define Training and Testing

In [16]:
def linreg(X, w, b):
    return nd.dot(X, w)+b

In [17]:
def squared_loss(y_hat, y):
    return (y_hat-y.reshape(y_hat.shape))**2/2

In [37]:
squared_loss(nd.array([6]), nd.array([4]))


[2.]
<NDArray 1 @cpu(0)>

In [19]:
def sgd(params, lr, batch_size):
    for param in params:
        param[:]=param-lr*param.grad/batch_size

In [20]:
import mxnet as mx

In [38]:
for X, y in train_iter:
    print(y.size)
    break

1


In [43]:
class Accumulator(object):
    """Sum a list of numbers over time"""
    def __init__(self, n):
        self.data=[0.0]*n
    def add(self, *args):
        self.data=[a+b for a, b in zip(self.data, args)]
    def __getitem__(self, i):
        return self.data[i]

In [44]:
def evaluate_loss(net, data_iter, loss):
    """Evaluate the loss of the model on the given dataset"""
    metric=Accumulator(2)
    for X, y in data_iter:
        metric.add(loss(net(X), y).sum().asscalar(), y.size)
    return metric[0]/metric[1]

In [51]:
updater=lambda batch_size: sgd(params, lr, batch_size)

In [59]:
def train(lambd):
    net, loss=lambda X: linreg(X, w, b), squared_loss    
    for epoch in range(1, num_epochs+1):
        for X, y in train_iter:
            with autograd.record():
                l=loss(net(X), y)+lambd*l2_penalty(w)
            l.backward()
            updater(X.shape[0])
        if epoch%5==0:
            print("epoch: ", epoch, "Train loss: ", evaluate_loss(net, train_iter, loss), "Test Loss: ", evaluate_loss(net, test_iter, loss))

In [60]:
train(lambd=0)

epoch:  5 Train loss:  nan Test Loss:  nan
epoch:  10 Train loss:  nan Test Loss:  nan
epoch:  15 Train loss:  nan Test Loss:  nan
epoch:  20 Train loss:  nan Test Loss:  nan
epoch:  25 Train loss:  nan Test Loss:  nan
epoch:  30 Train loss:  nan Test Loss:  nan
epoch:  35 Train loss:  nan Test Loss:  nan
epoch:  40 Train loss:  nan Test Loss:  nan
epoch:  45 Train loss:  nan Test Loss:  nan
epoch:  50 Train loss:  nan Test Loss:  nan
epoch:  55 Train loss:  nan Test Loss:  nan
epoch:  60 Train loss:  nan Test Loss:  nan
epoch:  65 Train loss:  nan Test Loss:  nan
epoch:  70 Train loss:  nan Test Loss:  nan
epoch:  75 Train loss:  nan Test Loss:  nan
epoch:  80 Train loss:  nan Test Loss:  nan
epoch:  85 Train loss:  nan Test Loss:  nan
epoch:  90 Train loss:  nan Test Loss:  nan
epoch:  95 Train loss:  nan Test Loss:  nan
epoch:  100 Train loss:  nan Test Loss:  nan
