# 3. Gradient Descent for Regression

### Imports

In [1]:
import numpy as np
import pandas as pd
from collections import deque
import matplotlib.pyplot as plt

### Global

In [2]:
np.random.seed = 12345
datasets_path = "../Datasets/"

### Variables

In [3]:
# Storing data in dataframes, and dropping a random column with unknown values in it, plus renaming the columns
df_train = pd.read_csv(datasets_path + 
                       "Dataset_2_train.csv", 
                       header = None).drop(labels=2, axis=1).rename(index=str, columns={0: "x", 1: "y"})
df_test = pd.read_csv(datasets_path + 
                      "Dataset_2_test.csv", 
                      header = None).drop(labels=2, axis=1).rename(index=str, columns={0: "x", 1: "y"})
df_val = pd.read_csv(datasets_path + 
                     "Dataset_2_valid.csv", 
                     header = None).drop(labels=2, axis=1).rename(index=str, columns={0: "x", 1: "y"})

## Part 1

In [4]:
regression_fn_coeffs = np.random.rand(2)
patience = 500  # To measure learning stature

In [5]:
def get_loss(xs, ys, w_i):
    loss = 0
    for x,y in zip(xs,ys):
        loss += (compute_fn(regression_fn_coeffs, x) - y) * (x**w_i)
    return loss/(len(xs))

def compute_fn(coeffs, x):
    y = 0
    for order, c in enumerate(coeffs):
        y += (x**order) * c
    return y

def compute_mse(xs, ys):
    mse = 0
    for x,y in zip(xs,ys):
        mse += (compute_fn(regression_fn_coeffs, x) - y)**2
    return mse/len(xs)

In [6]:
def train(df_train, df_val, learning_rate = 1e-6):
    # train and valication data
    xs, ys = df_train.x.values, df_train.x.values
    x_val, y_val = df_val.x.values, df_val.x.values
    # rolling window over MSE to stop when stature
    rolling_mse_val = deque(maxlen=patience)
    all_mse_val, all_mse_tr = [], []
    epoch = 0
    
    # Training loop, stoping based on rolling_mse
    while True:
        for c_i, c in enumerate(regression_fn_coeffs):
            # update weights
            regression_fn_coeffs[c_i] -= learning_rate * get_loss(xs, ys, c_i)

        # get epoch MSE for validation and train
        current_mse_val, current_mse_tr = compute_mse(x_val, y_val), compute_mse(xs, ys)
        
        # add MSEs to lists
        all_mse_val.append(current_mse_val)
        all_mse_tr.append(current_mse_tr)
        rolling_mse_val.append(current_mse_val)
        
        # evaluate model every 1000 steps and stop if stature
        if epoch % patience == 0 and epoch > patience:
            print("Mean Squared Error at epoch %s is %s" % (epoch, current_mse_val))
            if sum(rolling_mse_val)/len(rolling_mse_val) < rolling_mse_val[-1]:
                # Not learning anymore, returning all mse
                return all_mse_tr, all_mse_val
        epoch += 1

In [7]:
all_mse_train, all_mse_val = train(df_train, df_val, 1e-6)

Mean Squared Error at epoch 1000 is 0.010385531778
Mean Squared Error at epoch 2000 is 0.00126101726935
Mean Squared Error at epoch 3000 is 0.000153113445538
Mean Squared Error at epoch 4000 is 1.85911230356e-05
Mean Squared Error at epoch 5000 is 2.25734490208e-06
Mean Squared Error at epoch 6000 is 2.74088122444e-07
Mean Squared Error at epoch 7000 is 3.32799382122e-08
Mean Squared Error at epoch 8000 is 4.04086932895e-09
Mean Squared Error at epoch 9000 is 4.90644688987e-10
Mean Squared Error at epoch 10000 is 5.95743616641e-11
Mean Squared Error at epoch 11000 is 7.23355342001e-12
Mean Squared Error at epoch 12000 is 8.78302236845e-13
Mean Squared Error at epoch 13000 is 1.06643965012e-13


KeyboardInterrupt: 

In [None]:
plt.plot(range(len(all_mse_train)), all_mse_train, 'ro', range(len(all_mse_val)), all_mse_val, 'bo')
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.show()

## Part 2

## Part 3