In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%load_ext autoreload
%autoreload 2

In [2]:
# Read Data
y = pd.read_pickle("./pickles/y_full.pkl").to_numpy()
tx = pd.read_pickle("./pickles/x_full.pkl").to_numpy()

y = np.reshape(y, len(y))

In [3]:
def compute_mse(y, tx, w):
    """compute the loss by mse."""
    e = y - tx.dot(w)
    mse = e.dot(e) / (2 * len(e))
    return mse

In [39]:
def compute_stoch_gradient(y, tx, w):
    """Compute a stochastic gradient from just few examples n and their corresponding y_n labels."""
    e = y - tx.dot(w)
    grad = tx.T.dot(e) / (-len(e))
    
    return grad

def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent algorithm."""
    loss = 0
    w = initial_w
    
    for n_iter,(y_, tx_) in enumerate(batch_iter(y, tx, batch_size, num_batches=max_iters, shuffle=True)):
        
        stoch_gradient = compute_stoch_gradient(y_, tx_, w)
        loss = compute_mse(y_, tx_, w)
        w = w - gamma * stoch_gradient
        
        print("Gradient Descent({bi}/{ti}): loss={l}, \nw={w}\n".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w=w))
        
    return w, loss

In [40]:
def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generate a minibatch iterator for a dataset.
    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
    Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use :
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        <DO-SOMETHING>
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]

In [41]:
# Define the parameters of the algorithm.
max_iters = 70
gamma = 0.0000001
batch_size = 1

# Initialization
w_initial = np.zeros(tx.shape[1])

# Start SGD.
sgd_w, sgd_loss = stochastic_gradient_descent(
    y, tx, w_initial, batch_size, max_iters, gamma)

print('Final loss: ', sgd_loss, '\nFinal weight vector:\n', sgd_w)

Gradient Descent(0/69): loss=0.5, 
w=[-1.21858528e-05 -8.53980000e-06 -8.06950000e-06 -1.76390000e-06
 -2.57100000e-07 -1.76390000e-06 -8.51810000e-06 -1.48800000e-07
  1.40600000e-07 -3.42310000e-06 -2.30700000e-07  1.64700000e-07
 -5.09510000e-06 -2.05700000e-07 -9.12000000e-08 -4.47970000e-06
 -3.12300000e-07 -1.21472000e-05  0.00000000e+00 -8.48221045e-06
  3.27458741e-10  1.23928255e-09  0.00000000e+00]

Gradient Descent(1/69): loss=0.5092466101878709, 
w=[ 2.88398872e-06  1.20989042e-07 -8.29266862e-07  3.19371496e-06
  2.09357712e-08  2.93766548e-07  1.32590139e-05  7.39313782e-08
  2.65236725e-07 -1.27268758e-06 -1.72872596e-07  1.71461668e-07
 -3.48913328e-07 -2.66252255e-07  1.67156288e-07  3.17552407e-07
 -2.83537679e-07  1.16521574e-05  2.01840850e-07  9.45118744e-08
  2.27701177e-07  3.10257624e-07  1.48806158e-05]

Gradient Descent(2/69): loss=0.49336246157045127, 
w=[ 1.77142612e-05  6.78719636e-06  6.74386014e-06  1.20574890e-05
  2.74833548e-07  6.44813004e-07  3.03913