In [24]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from prepare import *
from costs import *
from grid_search import *
from gradient_descent import *
from stochastic_gradient_descent import *
from build_polynomial import *
from least_squares import *
from split_data import *
from ridge_regression import * 
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load the data

In [2]:
import datetime
from helpers import *

height, weight, gender = load_data_from_ex02(sub_sample=False, add_outlier=False)
x, mean_x, std_x = standardize(height)
y, tx = build_model_data(x, weight)

In [3]:
y.shape, tx.shape

((10000,), (10000, 2))

# Grid Search

In [4]:
#from grid_search import generate_w, get_best_parameters
#from plots import grid_visualization

# Generate the grid of parameters to be swept
grid_w0, grid_w1 = generate_w(num_intervals=10)

# Start the grid search
start_time = datetime.datetime.now()
grid_losses = grid_search(y, tx, grid_w0, grid_w1)

# Select the best combinaison
loss_star, w0_star, w1_star = get_best_parameters(grid_w0, grid_w1, grid_losses)
end_time = datetime.datetime.now()
execution_time = (end_time - start_time).total_seconds()

# Print the results
print("Grid Search: loss*={l}, w0*={w0}, w1*={w1}, execution time={t:.3f} seconds".format(
      l=loss_star, w0=w0_star, w1=w1_star, t=execution_time))

# Plot the results
#fig = grid_visualization(grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
#fig.set_size_inches(10.0,6.0)
#fig.savefig("grid_plot")  # Optional saving

Grid Search: loss*=42.42448314678248, w0*=66.66666666666669, w1*=16.666666666666686, execution time=0.106 seconds


# Gradient Descent

Again, please fill in the functions `compute_gradient` below:

Please fill in the functions `gradient_descent` below:

Test your gradient descent function through gradient descent demo shown below:

In [5]:
# from gradient_descent import *
#from plots import gradient_descent_visualization

# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7

# Initialization
w_initial = np.array([0, 0])

# Start gradient descent.
start_time = datetime.datetime.now()
gradient_losses, gradient_ws = gradient_descent(y, tx, w_initial, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("Gradient Descent: execution time={t:.3f} seconds".format(t=exection_time))

Gradient Descent(0/49): loss=2792.2367127591674, w0=51.30574540147352, w1=9.435798704492393
Gradient Descent(1/49): loss=265.302462108962, w0=66.69746902191565, w1=12.266538315840034
Gradient Descent(2/49): loss=37.87837955044161, w0=71.31498610804833, w1=13.115760199244338
Gradient Descent(3/49): loss=17.410212120174496, w0=72.70024123388814, w1=13.370526764265632
Gradient Descent(4/49): loss=15.568077051450455, w0=73.11581777164008, w1=13.446956733772023
Gradient Descent(5/49): loss=15.402284895265295, w0=73.24049073296567, w1=13.469885724623941
Gradient Descent(6/49): loss=15.38736360120863, w0=73.27789262136332, w1=13.476764421879517
Gradient Descent(7/49): loss=15.38602068474353, w0=73.28911318788263, w1=13.478828031056189
Gradient Descent(8/49): loss=15.385899822261674, w0=73.29247935783842, w1=13.47944711380919
Gradient Descent(9/49): loss=15.385888944638305, w0=73.29348920882516, w1=13.47963283863509
Gradient Descent(10/49): loss=15.3858879656522, w0=73.29379216412119, w1=13.47

# Stochastic gradient descent

In [7]:
# from stochastic_gradient_descent import *

# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
batch_size = 1

# Initialization
w_initial = np.array([0, 0])

# Start SGD.
start_time = datetime.datetime.now()
sgd_losses, sgd_ws = stochastic_gradient_descent(
    y, tx, w_initial, batch_size, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("SGD: execution time={t:.3f} seconds".format(t=exection_time))

SGD(0/49): loss=439.7872871106584, w0=51.79729490970891, w1=33.144346225988336
SGD(1/49): loss=125.89874008305473, w0=79.84217783915867, w1=0.1325760409551009
SGD(2/49): loss=157.5088606629287, w0=89.95378816480684, w1=16.06714456531969
SGD(3/49): loss=104.72073145071103, w0=80.30353031165458, w1=24.86106026863597
SGD(4/49): loss=151.2178665811003, w0=64.85216233046326, w1=-0.6765812561852762
SGD(5/49): loss=36.84822334154387, w0=79.84470434989466, w1=13.37052645915481
SGD(6/49): loss=24.487666374703853, w0=76.339597809916, w1=10.491834277254894
SGD(7/49): loss=17.48306158050214, w0=71.26984202177543, w1=13.791878507094536
SGD(8/49): loss=16.55294218223241, w0=71.77545147075095, w1=13.648104454440093
SGD(9/49): loss=70.51408604915358, w0=80.63667780395893, w1=20.985732258769936
SGD(10/49): loss=97.27964145410704, w0=74.58779446116539, w1=26.212088538067405
SGD(11/49): loss=93.24106689021808, w0=75.6024661091101, w1=25.742704816491628
SGD(12/49): loss=255.61063196923783, w0=57.499567522

# Least square

In [10]:
degree = 2
tx = build_poly(x, degree)
weights = least_squares(y, tx)
weights

array([7.32839801e+01, 1.34792217e+01, 9.94194214e-03])

In [15]:
# load dataset
x, y = load_data()
print("shape of x {}".format(x.shape))
print("shape of y {}".format(y.shape))

shape of x (50,)
shape of y (50,)


In [20]:
def train_test_split_demo(x, y, degree, ratio, seed):
    """polynomial regression with different split ratios and different degrees."""
    x_tr, y_tr, x_te, y_te = split_data(x, y, ratio, seed)
    # form tx
    tx_tr = build_poly(x_tr, degree)
    tx_te = build_poly(x_te, degree)
    
    print(y_tr.shape,tx_tr.shape)

    weight = least_squares(y_tr, tx_tr)
    

    # calculate RMSE for train and test data.
    rmse_tr = np.sqrt(2 * compute_mse(y_tr, tx_tr, weight))
    rmse_te = np.sqrt(2 * compute_mse(y_te, tx_te, weight))

    print("proportion={p}, degree={d}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
          p=ratio, d=degree, tr=rmse_tr, te=rmse_te))

In [21]:
seed = 6
degrees = [1, 3, 7, 12]
split_ratios = [0.9, 0.5, 0.1]

for split_ratio in split_ratios:
    for degree in degrees:
        train_test_split_demo(x, y, degree, split_ratio, seed)

(45,) (45, 2)
proportion=0.9, degree=1, Training RMSE=0.494, Testing RMSE=0.181
(45,) (45, 4)
proportion=0.9, degree=3, Training RMSE=0.264, Testing RMSE=0.206
(45,) (45, 8)
proportion=0.9, degree=7, Training RMSE=0.254, Testing RMSE=0.220
(45,) (45, 13)
proportion=0.9, degree=12, Training RMSE=0.242, Testing RMSE=0.250
(25,) (25, 2)
proportion=0.5, degree=1, Training RMSE=0.455, Testing RMSE=0.531
(25,) (25, 4)
proportion=0.5, degree=3, Training RMSE=0.239, Testing RMSE=0.296
(25,) (25, 8)
proportion=0.5, degree=7, Training RMSE=0.232, Testing RMSE=0.284
(25,) (25, 13)
proportion=0.5, degree=12, Training RMSE=0.205, Testing RMSE=1.548
(5,) (5, 2)
proportion=0.1, degree=1, Training RMSE=0.428, Testing RMSE=0.534
(5,) (5, 4)
proportion=0.1, degree=3, Training RMSE=0.085, Testing RMSE=0.460
(5,) (5, 8)
proportion=0.1, degree=7, Training RMSE=0.000, Testing RMSE=2.254
(5,) (5, 13)
proportion=0.1, degree=12, Training RMSE=0.000, Testing RMSE=4.651


In [26]:
def ridge_regression_demo(x, y, degree, ratio, seed):
    """ridge regression demo."""
    # define parameter
    lambdas = np.logspace(-5, 0, 15)
    # split data
    x_tr, x_te, y_tr, y_te = split_data(x, y, ratio, seed)
    # form tx
    tx_tr = build_poly(x_tr, degree)
    tx_te = build_poly(x_te, degree)

    # ridge regression with different lambda
    rmse_tr = []
    rmse_te = []
    for ind, lambda_ in enumerate(lambdas):
        # ridge regression
        weight = ridge_regression(y_tr, tx_tr, lambda_)
        rmse_tr.append(np.sqrt(2 * compute_mse(y_tr, tx_tr, weight)))
        rmse_te.append(np.sqrt(2 * compute_mse(y_te, tx_te, weight)))

        print("proportion={p}, degree={d}, lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
               p=ratio, d=degree, l=lambda_, tr=rmse_tr[ind], te=rmse_te[ind]))
    #plot_train_test(rmse_tr, rmse_te, lambdas, degree)

In [27]:
seed = 56
degree = 7
split_ratio = 0.5
ridge_regression_demo(x, y, degree, split_ratio, seed)

proportion=0.5, degree=7, lambda=0.000, Training RMSE=1.299, Testing RMSE=67.418
proportion=0.5, degree=7, lambda=0.000, Training RMSE=1.299, Testing RMSE=59.973
proportion=0.5, degree=7, lambda=0.000, Training RMSE=1.299, Testing RMSE=48.615
proportion=0.5, degree=7, lambda=0.000, Training RMSE=1.300, Testing RMSE=35.581
proportion=0.5, degree=7, lambda=0.000, Training RMSE=1.301, Testing RMSE=24.837
proportion=0.5, degree=7, lambda=0.001, Training RMSE=1.301, Testing RMSE=18.204
proportion=0.5, degree=7, lambda=0.001, Training RMSE=1.302, Testing RMSE=14.639
proportion=0.5, degree=7, lambda=0.003, Training RMSE=1.304, Testing RMSE=12.391
proportion=0.5, degree=7, lambda=0.007, Training RMSE=1.312, Testing RMSE=10.274
proportion=0.5, degree=7, lambda=0.016, Training RMSE=1.329, Testing RMSE=7.893
proportion=0.5, degree=7, lambda=0.037, Training RMSE=1.357, Testing RMSE=5.613
proportion=0.5, degree=7, lambda=0.085, Training RMSE=1.384, Testing RMSE=3.947
proportion=0.5, degree=7, lambd