# 3. Gradient Descent for Regression

### Imports

In [1]:
import numpy as np
import pandas as pd
import itertools
from collections import deque
import matplotlib.pyplot as plt

### Global

In [2]:
np.random.seed = 12345
datasets_path = "../Datasets/"

### Variables

In [3]:
# Storing data in dataframes, and dropping a random column with unknown values in it, plus renaming the columns
df_train = pd.read_csv(datasets_path + 
                       "Dataset_2_train.csv", 
                       header = None).drop(labels=2, axis=1).rename(index=str, columns={0: "x", 1: "y"})
df_test = pd.read_csv(datasets_path + 
                      "Dataset_2_test.csv", 
                      header = None).drop(labels=2, axis=1).rename(index=str, columns={0: "x", 1: "y"})
df_val = pd.read_csv(datasets_path + 
                     "Dataset_2_valid.csv", 
                     header = None).drop(labels=2, axis=1).rename(index=str, columns={0: "x", 1: "y"})

## Part 1

In [4]:
patience = 10000  # To measure learning stature

In [5]:
def loss(xs, ys, w):
    ys = np.asarray(ys)
    return np.asarray(xs.dot(w) - ys).dot(xs)/len(xs)

def mse(xs, ys, w):
    ys = np.asarray(ys)
    return (np.asarray(xs.dot(w) - ys) ** 2).mean()

def plot_fcts(xs, ys, w):
    x_lin = np.linspace(min(xs[:,-1]), max(xs[:,-1]), 200)
    x_lin = np.hstack((np.ones_like(x_lin), x_lin)).reshape(2, len(x_lin)).T

    plt.plot(x_lin[:,-1], x_lin.dot(w), 'ro',
             xs[:,-1], ys, 'bo')
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    plt.show()

In [6]:
def train(df_train, df_val, w, learning_rate = 1e-6, print_ = False):
    # train and valication data
    xs, ys = df_train.x.values, df_train.y.values
    xs = np.hstack((np.ones_like(xs), xs)).reshape(2, len(xs)).T  # for matrix multiplication
    x_val, y_val = df_val.x.values, df_val.y.values
    x_val = np.hstack((np.ones_like(x_val), x_val)).reshape(2, len(x_val)).T  # for matrix multiplication

    # rolling window over MSE to stop when stature
    all_mse_val, all_mse_tr = [], []
    epoch = 0
    
    # Training loop, stoping based on rolling_mse
    while True:
        # choosing x randomly, to introduce randomess in the model
        position = np.random.randint(0,len(xs))
        x = xs[position]
        y = ys[position]

        w -= learning_rate * loss(x, y, w)
        current_mse_val, current_mse_tr = mse(x_val, y_val, w), mse(xs, ys, w)

        # add MSEs and loss to lists
        all_mse_val.append(current_mse_val)
        all_mse_tr.append(current_mse_tr)        

        # evaluate model every N steps and stop if stature
        if epoch % patience == 0:
            if print_:
                print("Epoch", epoch)
                plot_fcts(xs,ys,w)
            if epoch >= patience:
#                 print("Epoch #%s: MSE = %s" % (epoch, current_mse_val))
#                 print(np.mean(all_mse_val[-patience:int(-patience/2)]) - np.mean(all_mse_val[int(-patience/2):]))
                if np.mean(all_mse_val[-patience:int(-patience/2)]) - np.mean(all_mse_val[int(-patience/2):]) < 0.00001:
                    # Not learning anymore, returning all mse
                    return all_mse_tr, all_mse_val, w, epoch
        
        epoch += 1

In [None]:
w_ = np.random.rand(2)*10
all_mse_train_, all_mse_val_, weights_, epoch_ = train(df_train, df_train, w_, 1e-6)

In [None]:
print(np.poly1d(weights_))
x_lin = np.linspace(0, epoch_, epoch_+1)
plt.plot(x_lin, all_mse_train_, 'r', x_lin, all_mse_val_, 'b')
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.show()

## Part 2

### Different learning rate (2.a)

In [None]:
step_size = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-7, 1e-8]  # not doing 1e-6 again

In [None]:
mse_val_all, coeffs, epoch, ws_all = [], [], [], []

for alpha in step_size:
    weights = np.random.rand(2)*10
    mse_tr, mse_val, w_s, e = train(df_train, df_train, weights, alpha)
    print("For alpha %s (after %s epoch): MSE = %s" % (alpha, e, mse_val[-1]))
    mse_val_all.append(mse_val)
    coeffs.append(w_s)

mse_val_all.append(all_mse_val_)
coeffs.append(weights_)


### Optimal solution (2.b)

In [None]:
def get_train_mse(best_model_pos):
    xs, ys = df_test.x.values, df_test.y.values
    xs = np.hstack((np.ones_like(xs), xs)).reshape(2, len(xs)).T  # for matrix multiplication
    w = coeffs[best_model_pos]
    return mse(xs, ys, w)

In [None]:
best_model = mse_val_all.index(min(mse_val_all))
test_mse = get_train_mse(best_model)
print("Best MSE is %s with alpha = %s" % (test_mse, step_size[best_model]))

## Part 3

In [None]:
done = train(df_train, df_train, np.random.rand(2)*10, 1e-1, True)