In [None]:
# Step 1: Load dataset, split into training and test sets, and scale features
import numpy as np
from sklearn.datasets import load_boston

# load boston housing price dataset
boston = load_boston()
x = boston.data
y = boston.target
print(y.mean())
# split into training and test sets, namely 80 percent of examples goes for the training, 20 percent goes for the test set
N_train = int(0.8 * x.shape[0])
x_train = x[:N_train,:]
y_train = y[:N_train]
x_test = x[N_train:,:]
y_test = y[N_train:]

# scale features by removing mean and dividing by the standard deviation
x_bar = np.mean(x_train,axis=0)
x_std = np.std(x_train,axis=0)
x_train_scaled = (x_train - x_bar)/x_std
x_test_scaled = (x_test - x_bar)/x_std

print(x_train_scaled.shape)
print(y_train.shape)
print(x_test_scaled.shape)
print(y_test.shape)

: 

In [None]:
# Step 2: Add intercept terms and initialize parameters
# Note: If you run this step again, please run from step 1 because notebook keeps the value from the previous run
intercept_train = np.ones((N_train,1))
x_train_scaled = np.hstack((intercept_train,x_train_scaled))

intercept_test = np.ones((x.shape[0] - N_train,1))
x_test_scaled = np.hstack((intercept_test,x_test_scaled))

print(x_train_scaled.shape)
print(x_test_scaled.shape)

# init parameters using random values
theta = 0.5 * np.random.randn(x_train_scaled.shape[1])
print(theta)

: 

In [None]:
# Step 3: Implement the gradient and the cost function
# In this step, you have to calculate the gradient. You can use the provided formula but the best way is to vectorize
# that formula for efficiency
def compute_gradient(x,y,theta):
    n = len(x)
    return (1.0/n)*np.dot(x.T,(np.dot(x,theta) - y))

def compute_cost(x,y,theta):
    n = len(x)
    return (0.5/n)*np.sum((np.dot(x,theta) - y)**2)

: 

In [None]:
# Step 4: Try gradient descent algorithm with different learning rates
import matplotlib.pyplot as plt
import copy

# try different values for the learning rate
learning_rates = [0.001,0.003,0.01,0.03,0.1,0.3]

# this matrix keeps the learned parameters
theta_matrix = np.zeros((len(learning_rates),x_train_scaled.shape[1]))

# number of training iterations
N_iterations = 100

# prepare to plot
plt.subplot(111)

# calculate cost value and update theta
for indx,alpha in enumerate(learning_rates):
    # keep the cost value for each training step
    J = np.zeros(N_iterations)
    
    # initialize new parameters using random distribution
    theta = 0.5 * np.random.randn(x_train_scaled.shape[1])
    for step in range(N_iterations):
        # update theta
        theta = theta - alpha * compute_gradient(x_train_scaled,y_train,theta)
        
        # calculate the cost on traing set
        J[step] = compute_cost(x_train_scaled,y_train,theta)
        
    # save the value of theta
    theta_matrix[indx,:] = theta
    # plot cost function
    plt.plot(J)
plt.xlabel('Training step')
plt.ylabel('Cost')
plt.legend(('0.001','0.003','0.01','0.03','0.1','0.3'), loc='upper right')
plt.show()

: 

In [None]:
# Step 5: Predict the price of house
theta = theta_matrix[4,:]
predict_price = np.dot(x_test_scaled,theta)

# calculate the cost for the test set
test_cost = compute_cost(x_test_scaled,y_test,theta)
print('test cost: ',test_cost)

# plot the ground truth and the predicted
x_axis = np.linspace(1,len(y_test),len(y_test))
plt.plot(x_axis,y_test,'b',x_axis,predict_price,'r')
plt.legend(('Ground truth','Predicted'))
plt.show()

: 

Migration to Pytorch

In [None]:
import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

# Convert inputs and targets to tensors
inputs = torch.from_numpy(x)
targets = torch.from_numpy(y)
print(inputs)
print(targets)

#create dataset and dataloader
train_ds = TensorDataset(inputs, targets)
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)


# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

#define regression model
def model(x):
    return x @ w.t() + b

# Define loss function
loss_fn = F.mse_loss
# Define optimizer
opt = torch.optim.SGD([w,b], lr=1e-5)

# Generate predictions without training
preds = model(inputs)
print(preds)
#compare with targets -> preds should be horrible, compute accuracy to check
print(targets)

#train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = loss_fn(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

#check now if the loss is lower
preds = model(inputs)
loss = loss_fn(preds, targets)
print(loss)


# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt):
    
    # Repeat for given number of epochs
    for epoch in range(num_epochs):
        
        # Train with batches of data
        for xb,yb in train_dl:
            
            # 1. Generate predictions
            pred = model(xb)
            # 2. Calculate loss
            loss = loss_fn(pred, yb)
            # 3. Compute gradients
            loss.backward()
            # 4. Update parameters using gradients
            opt.step()
            # 5. Reset the gradients to zero
            opt.zero_grad()
        
        # Print the progress
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

#train the model for 100 epochs
fit(100, model, loss_fn, opt)
preds = model(inputs)
print(accuracy(preds, targets))