In [1]:
# native libraries
import numpy as np
from helpers import *

In [2]:
# load and organize data
path_dataset = "exerciseData/height_weight_genders.csv"
data = np.genfromtxt(path_dataset, delimiter=",", skip_header=1, usecols=[1, 2])
height = data[:, 0]
weight = data[:, 1]
gender = np.genfromtxt(
    path_dataset, delimiter=",", skip_header=1, usecols=[0],
    converters={0: lambda x: 0 if b"Male" in x else 1})
# Convert to metric system
height *= 0.025
weight *= 0.454

x1, mean_x1, std_x2 = standardize(height)
x2, mean_x2, std_x2 = standardize(weight)
y, tx = build_model_data(x1, x2, gender)

m = num_samples = len(y)
tx = np.c_[np.ones(m), x1, x2]

In [3]:
# required functions
def calculate_mse(y, tx, w):
    """mean square error"""
    err = y - tx.dot(w)
    return 1/2*np.mean(err**2)

def compute_gradient(y, tx, w):
    """gradient computation for linear regression"""
    """(x transpose times w) is linear predictor"""
    err = tx.dot(w) - y
    grad = tx.T.dot(err) / len(err)
    return grad, err

In [4]:
# main functions
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """linear regression using gradient descent"""
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # compute gradient, loss
        grad, _ = compute_gradient(y, tx, w)
        # gradient w by descent update
        w = w - gamma * grad
        # calculate loss
        loss = calculate_mse(y, tx, w)
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("GD({bi}/{ti}): loss={l}, weights={},{},{}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, *w.round(5)))

    return losses, ws

In [5]:
# demo for least_squares_GD
# define the parameters
max_iters = 200
gamma = 0.3
# weight initialization
n = num_features = len(tx.T)
initial_w = np.random.randn(n)
# run GD
losses, ws = least_squares_GD(y, tx, initial_w, max_iters, gamma)

GD(0/199): loss=2.1861295668691993, weights=-1.56548,0.41748,-0.69178
GD(1/199): loss=1.0931332205591207, weights=-0.94584,0.38049,-0.71958
GD(2/199): loss=0.5590493310923998, weights=-0.51209,0.36231,-0.72877
GD(3/199): loss=0.2975629487071942, weights=-0.20846,0.35214,-0.73016
GD(4/199): loss=0.1694248818983369, weights=0.00408,0.3454,-0.72832
GD(5/199): loss=0.1065895540532475, weights=0.15285,0.34018,-0.72515
GD(6/199): loss=0.07574784463101396, weights=0.257,0.33564,-0.72149
GD(7/199): loss=0.060584123643682576, weights=0.3299,0.33145,-0.71767
GD(8/199): loss=0.05310468763070618, weights=0.38093,0.32745,-0.71383
GD(9/199): loss=0.04939270893461346, weights=0.41665,0.32359,-0.71003
GD(10/199): loss=0.04752887773827152, weights=0.44166,0.31984,-0.7063
GD(11/199): loss=0.046572644510732696, weights=0.45916,0.31617,-0.70265
GD(12/199): loss=0.0460630514761812, weights=0.47141,0.31259,-0.69908
GD(13/199): loss=0.04577414393002761, weights=0.47999,0.3091,-0.69559
GD(14/199): loss=0.0455

In [6]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma, batch_size=1):
    """linear regression using stochastic gradient descent"""
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = calculate_mse(y, tx, w)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, weights={},{},{}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, *w.round(5)))

    return losses, ws

In [7]:
# demo for least_squares_SGD
# define the parameters
batch_size = 10
max_iters = 200
gamma = 0.3
# weight initialization
n = len(tx.T)
initial_w = np.random.randn(n)
# run SGD
losses, ws = least_squares_SGD(y, tx, initial_w, max_iters, gamma, batch_size)

SGD(0/199): loss=1.8809274524530604, weights=-0.91219,1.26361,-0.34101
SGD(1/199): loss=0.7877918522795685, weights=-0.61997,0.84701,-0.77574
SGD(2/199): loss=0.3370006969139371, weights=-0.24555,0.61632,-0.97114
SGD(3/199): loss=0.18386796507735204, weights=-0.00114,0.58307,-0.98947
SGD(4/199): loss=0.13792111727396136, weights=0.10879,0.62544,-0.91825
SGD(5/199): loss=0.0944130529019627, weights=0.22618,0.56106,-0.96812
SGD(6/199): loss=0.07919437908857851, weights=0.29289,0.55218,-0.98156
SGD(7/199): loss=0.05987924440133609, weights=0.42928,0.522,-0.96965
SGD(8/199): loss=0.0705257229935633, weights=0.36183,0.59599,-0.86921
SGD(9/199): loss=0.05821440578071698, weights=0.40398,0.51936,-0.90258
SGD(10/199): loss=0.05584724569333515, weights=0.44024,0.49637,-0.91918
SGD(11/199): loss=0.0627370204238695, weights=0.37742,0.4702,-0.92598
SGD(12/199): loss=0.0628961442710562, weights=0.36401,0.47033,-0.90713
SGD(13/199): loss=0.056922492174512, weights=0.40039,0.47522,-0.87817
SGD(14/199

In [8]:
def least_squares(y, tx):
    """least squares regression using normal equations"""
    a = tx.T.dot(tx)
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    loss = calculate_mse(y, tx, w)
    return loss, w

In [9]:
# demo for least_squares
_, weight = least_squares(y, tx)
print(weight)

[ 0.5         0.15778576 -0.54427505]


In [10]:
# required functions
def ridge_mse(y, tx, w):
    """compute the loss by mse."""
    e = y - tx.dot(w)
    mse = e.dot(e) / (2 * len(e))
    return mse

# main function
def ridge_regression(y, tx, lambda_):
    """rige regression using normal equations"""
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    a = tx.T.dot(tx) + aI
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    loss = ridge_mse(y, tx, w)
    return loss, w

In [11]:
# demo for ridge_regression
path_dataset = "exerciseData/dataEx3.csv"
data = np.loadtxt(path_dataset, delimiter=",", skiprows=1, unpack=True)
x_reg = data[0]
y_reg = data[1]

seed = 56
degree = 3
ratio = 0.5
# define parameter
lambdas = np.logspace(-5, 0, 15)
# split data
x_tr, x_te, y_tr, y_te = split_data(x_reg, y_reg, ratio, seed)
# form tx
tx_tr = build_poly(x_tr, degree)
tx_te = build_poly(x_te, degree)

# ridge regression with different lambda
rmse_tr = []
rmse_te = []
for ind, lambda_ in enumerate(lambdas):
    # ridge regression
    _, weight = ridge_regression(y_tr, tx_tr, lambda_)
    rmse_tr.append(np.sqrt(2 * ridge_mse(y_tr, tx_tr, weight)))
    rmse_te.append(np.sqrt(2 * ridge_mse(y_te, tx_te, weight)))

    print("proportion={p}, degree={d}, lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
           p=ratio, d=degree, l=lambda_, tr=rmse_tr[ind], te=rmse_te[ind]))

proportion=0.5, degree=3, lambda=0.000, Training RMSE=0.240, Testing RMSE=0.312
proportion=0.5, degree=3, lambda=0.000, Training RMSE=0.240, Testing RMSE=0.312
proportion=0.5, degree=3, lambda=0.000, Training RMSE=0.240, Testing RMSE=0.312
proportion=0.5, degree=3, lambda=0.000, Training RMSE=0.240, Testing RMSE=0.313
proportion=0.5, degree=3, lambda=0.000, Training RMSE=0.240, Testing RMSE=0.316
proportion=0.5, degree=3, lambda=0.001, Training RMSE=0.240, Testing RMSE=0.321
proportion=0.5, degree=3, lambda=0.001, Training RMSE=0.241, Testing RMSE=0.330
proportion=0.5, degree=3, lambda=0.003, Training RMSE=0.244, Testing RMSE=0.343
proportion=0.5, degree=3, lambda=0.007, Training RMSE=0.250, Testing RMSE=0.353
proportion=0.5, degree=3, lambda=0.016, Training RMSE=0.264, Testing RMSE=0.350
proportion=0.5, degree=3, lambda=0.037, Training RMSE=0.297, Testing RMSE=0.345
proportion=0.5, degree=3, lambda=0.085, Training RMSE=0.356, Testing RMSE=0.381
proportion=0.5, degree=3, lambda=0.193, 

In [12]:
# required function(s)
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logistic_cost(y, tx, w):
    """cost for logistic regression """
    sig = sigmoid(tx.dot(w));
    cost = (-y) * np.log(sig) - (1-y) * np.log(1-sig)
    return np.mean(cost)

def logistic_gradient(y, tx, w):
    """gradient for logistic regression """
    err = sigmoid(tx.dot(w)) - y
    grad = tx.T.dot(err) / len(err)
    return grad, err

In [13]:
# main function(s)
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    """logistic regression using GD"""
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # compute gradient, loss
        grad, _ = logistic_gradient(y, tx, w)
        # gradient w by descent update
        w = w - gamma * grad
        # calculate loss
        loss = logistic_cost(y, tx, w)
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("GD({bi}/{ti}): loss={l}, weights={},{},{}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, *w.round(5)))

    return losses, ws

In [14]:
# demo for logistic_regression
# define the parameters
max_iters = 200
gamma = 0.3
# weight initialization
n = num_features = len(tx.T)
initial_w = np.random.randn(n)
# run GD
losses, ws = logistic_regression(y, tx, initial_w, max_iters, gamma)

GD(0/199): loss=0.31816071486732994, weights=-0.41974,-1.30815,-1.147
GD(1/199): loss=0.3161478102039085, weights=-0.4048,-1.31271,-1.16625
GD(2/199): loss=0.3142678024546776, weights=-0.39055,-1.31689,-1.18503
GD(3/199): loss=0.3125080695129788, weights=-0.37694,-1.32069,-1.20338
GD(4/199): loss=0.3108574405135453, weights=-0.36394,-1.32416,-1.22132
GD(5/199): loss=0.30930600081444326, weights=-0.3515,-1.3273,-1.23888
GD(6/199): loss=0.3078449269687315, weights=-0.33961,-1.33014,-1.25606
GD(7/199): loss=0.3064663465026151, weights=-0.32823,-1.33269,-1.2729
GD(8/199): loss=0.305163218307785, weights=-0.31734,-1.33497,-1.2894
GD(9/199): loss=0.3039292302398596, weights=-0.3069,-1.337,-1.3056
GD(10/199): loss=0.3027587111388808, weights=-0.29691,-1.33879,-1.32149
GD(11/199): loss=0.3016465549869755, weights=-0.28732,-1.34035,-1.33709
GD(12/199): loss=0.3005881553195915, weights=-0.27813,-1.34169,-1.35243
GD(13/199): loss=0.29957934833087946, weights=-0.26932,-1.34283,-1.3675
GD(14/199): 

In [15]:
def logistic_regression_SGD(y, tx, initial_w, max_iters, gamma, batch_size=1):
    """linear regression using stochastic SGD"""
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = logistic_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = logistic_cost(y, tx, w)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, weights={},{},{}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, *w.round(5)))
    return losses, ws

In [16]:
# demo for logistic_regression_SGD
# define the parameters
batch_size = 10
max_iters = 200
gamma = 0.3
# weight initialization
n = len(tx.T)
initial_w = np.random.randn(n)
# run SGD
losses, ws = logistic_regression_SGD(y, tx, initial_w, max_iters, gamma, batch_size)

SGD(0/199): loss=1.675245019115256, weights=0.00123,0.38271,1.36631
SGD(1/199): loss=1.4221540943245519, weights=-0.05173,0.1974,1.16086
SGD(2/199): loss=1.231697335641256, weights=-0.02329,0.05679,0.98964
SGD(3/199): loss=1.0438159578013746, weights=-0.10144,-0.09203,0.80228
SGD(4/199): loss=0.8807310815491292, weights=-0.08748,-0.23648,0.62113
SGD(5/199): loss=0.752133456475574, weights=-0.04521,-0.3728,0.46033
SGD(6/199): loss=0.6505464946313978, weights=0.01835,-0.5042,0.31135
SGD(7/199): loss=0.5658443369293982, weights=-0.02976,-0.65041,0.16823
SGD(8/199): loss=0.5190389618856986, weights=-0.04798,-0.73918,0.06499
SGD(9/199): loss=0.485476602563933, weights=-0.06044,-0.8067,-0.02539
SGD(10/199): loss=0.46114303434313386, weights=0.01793,-0.86931,-0.09261
SGD(11/199): loss=0.43884382287873125, weights=0.04205,-0.92706,-0.16891
SGD(12/199): loss=0.42709715790141983, weights=0.06093,-0.94447,-0.22413
SGD(13/199): loss=0.41265193416737045, weights=0.08251,-0.98605,-0.28605
SGD(14/199

In [17]:
# required function(s)
def reg_logistic_cost(y, tx, w, alpha):
    """cost for logistic regression with regularization"""
    sig = sigmoid(tx.dot(w));
    cost = (-y) * np.log(sig) - (1-y) * np.log(1-sig)
    reg = np.dot(w,w) * alpha / (2 * len(y))
    return np.mean(cost) + reg

def reg_logistic_gradient(y, tx, w, alpha):
    """gradient for logistic regression with with regularization"""
    err = sigmoid(tx.dot(w)) - y
    grad = tx.T.dot(err) / len(err)
    reg = w * alpha / len(err)
    return grad - reg, err

In [18]:
# main function(s)
def reg_logistic_regression(y, tx, alpha, initial_w, max_iters, gamma):
    """regularized logistic regression using GD"""
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # compute gradient, loss
        grad, _ = reg_logistic_gradient(y, tx, w, alpha)
        # gradient w by descent update
        w = w - gamma * grad
        # calculate loss
        loss = reg_logistic_cost(y, tx, w, alpha)
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("GD({bi}/{ti}): loss={l}, weights={},{},{}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, *w.round(5)))

    return losses, ws

In [19]:
# demo for reg_logistic_regression
# define the parameters
alpha = 0.01
max_iters = 200
gamma = 0.3
# weight initialization
n = num_features = len(tx.T)
initial_w = np.random.randn(n)
# run GD
losses, ws = reg_logistic_regression(y, tx, alpha, initial_w, max_iters, gamma)

GD(0/199): loss=0.49234066802359955, weights=0.35988,0.24664,-0.88955
GD(1/199): loss=0.45993454561344516, weights=0.33566,0.18213,-0.96411
GD(2/199): loss=0.4342775030547853, weights=0.31386,0.12555,-1.03081
GD(3/199): loss=0.41358353419616983, weights=0.29417,0.07547,-1.09105
GD(4/199): loss=0.3966071295335583, weights=0.2763,0.03075,-1.14595
GD(5/199): loss=0.3824681134564932, weights=0.26003,-0.00948,-1.19638
GD(6/199): loss=0.37053376243543135, weights=0.24514,-0.04591,-1.24304
GD(7/199): loss=0.36034092216416064, weights=0.23146,-0.0791,-1.28645
GD(8/199): loss=0.3515444868333099, weights=0.21885,-0.10949,-1.32707
GD(9/199): loss=0.34388295743668174, weights=0.20718,-0.13744,-1.36525
GD(10/199): loss=0.33715507710505427, weights=0.19635,-0.16324,-1.40129
GD(11/199): loss=0.33120372098419687, weights=0.18628,-0.18716,-1.43542
GD(12/199): loss=0.3259046030142275, weights=0.17687,-0.20939,-1.46785
GD(13/199): loss=0.3211582307115727, weights=0.16807,-0.2301,-1.49876
GD(14/199): loss

In [20]:
def reg_logistic_regression_SGD(y, tx, alpha, initial_w, max_iters, gamma, batch_size=1):
    """regularized logistic regression using SGD"""
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = reg_logistic_gradient(y_batch, tx_batch, w, alpha)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            loss = reg_logistic_cost(y, tx, w, alpha)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, weights={},{},{}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, *w.round(5)))
    return losses, ws

In [21]:
# demo for reg_logistic_regression_SGD
# define the parameters
batch_size = 10
alpha = 0.01
max_iters = 200
gamma = 0.3
# weight initialization
n = num_features = len(tx.T)
initial_w = np.random.randn(n)
# run GD
losses, ws = reg_logistic_regression_SGD(y, tx, alpha, initial_w, max_iters, gamma)

SGD(0/199): loss=1.118359449652398, weights=1.24238,-0.88944,1.307
SGD(1/199): loss=0.9051363941932274, weights=1.00462,-1.10076,1.13659
SGD(2/199): loss=0.8865239881879458, weights=1.08969,-1.15623,1.08746
SGD(3/199): loss=0.9069208223645194, weights=1.16358,-1.16015,1.09599
SGD(4/199): loss=0.8948778537706535, weights=1.25297,-1.20182,1.03924
SGD(5/199): loss=0.8880402365854403, weights=1.36195,-1.22274,0.95781
SGD(6/199): loss=0.7117307867328309, weights=1.16437,-1.47734,0.77589
SGD(7/199): loss=0.6136816787666682, weights=1.02414,-1.66835,0.65415
SGD(8/199): loss=0.6095799338735564, weights=1.03406,-1.68979,0.64281
SGD(9/199): loss=0.6041002142658052, weights=1.079,-1.72895,0.60626
SGD(10/199): loss=0.5327912993906979, weights=0.97306,-1.89268,0.4508
SGD(11/199): loss=0.49404651224313195, weights=0.89974,-2.00566,0.34831
SGD(12/199): loss=0.4668183853334942, weights=0.78847,-2.09794,0.30735
SGD(13/199): loss=0.46432548811244984, weights=0.80987,-2.12539,0.28178
SGD(14/199): loss=0.