## Lecture 10: Linear Regression

In [23]:
import numpy as np
from scipy.optimize import fmin
import os

import matplotlib.pyplot as plt
from matplotlib import rc

plt.rcParams['xtick.labelsize']=20      # change the tick label size for x axis
plt.rcParams['ytick.labelsize']=20      # change the tick label size for x axis
plt.rcParams['axes.linewidth']=1        # change the line width of the axis
plt.rcParams['xtick.major.width'] = 3   # change the tick line width of x axis
plt.rcParams['ytick.major.width'] = 3   # change the tick line width of y axis
rc('text', usetex=False)                # disable LaTeX rendering in plots
rc('font',**{'family':'DejaVu Sans'})   # set the font of the plot to be DejaVu Sans

### 1. Three different error metrics

Let's generate some synthetic data to look at the three different error metrics.

In [None]:
beta1 = 0.5 # True slope
beta2 = 2.0
x = np.arange(0,10,0.5)
y = beta1 * x + beta2 + 0.3*np.random.randn(*x.shape) # Add noise

In [None]:
# Let's plot the data
plt.plot(x, y, 'o', c='r')

Let's use the `scipy.optimize.fmin` to minimize the three different kinds of error metrics.

`scipy.optimize.fmin` documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin.html

To use `scipy.optimize.fmin`, we will need to first define functions for it to minimize, so let's first write out the three error metrics as functions.

In [None]:
def E_inf(beta, data):
  x, y = data
  return np.max(np.abs(beta[0]*x + beta[1]-y))

def E_1(beta, data):
  x, y = data
  return np.sum(np.abs(beta[0]*x + beta[1]-y))

def E_2(beta, data):
  x, y = data
  return np.sum((np.abs(beta[0]*x + beta[1]-y))**2)

In [None]:
beta_start = [1, 1]
data = x, y

beta_inf = fmin(E_inf, beta_start, args=(data,))
beta_1 = fmin(E_1, beta_start, args=(data,))
beta_2 = fmin(E_2, beta_start, args=(data,))
print("Based on infinity norm: ", beta_inf)
print("Based on 1 norm: ", beta_1)
print("Based on 2 norm: ", beta_2)
print("Real beta is: ", beta1, beta2)

Let's now plot the lines

In [None]:
# Let's plot the data
plt.plot(x, y, 'o', c='r', label = "data")
plt.plot(x, x*beta_inf[0]+beta_inf[1], '-.', label = "E_inf")
plt.plot(x, x*beta_1[0]+beta_1[1], '--', label = "E_1")
plt.plot(x, x*beta_2[0]+beta_2[1], '-', label = "E_2")
plt.legend(fontsize = 18)

### 2. Solving Least Square Analytically using `numpy.linalg.solve`

https://numpy.org/doc/stable/reference/generated/numpy.linalg.solve.html

First, we need to build our $Ax=b$ equation

In [None]:
A = np.array([[np.sum(x**2), np.sum(x)],
              [np.sum(x), len(x)]])
b = np.array([[np.sum(x*y)],
              [np.sum(y)]])
beta = np.linalg.solve(A, b)
print(A)
print(b)
print(beta)
print(beta_2)

### 3. Fitting Expoential Data

We will try two different ways to fit the data, first, we will use `fmin` to directly optimize over the original data set, second, we will use `fmin` to otpimize over the log of the original data set.

In [24]:
beta1 = 0.5
beta2 = 2.0
x = np.arange(0,10,0.5)
y = beta2 * np.exp(x*beta1) + 0.3*np.random.randn(*x.shape) # Add noise

In [None]:
beta_start = [1, 1]
data = x, y
log_data = x, np.log(y)

beta_2_exp = fmin(E_2, beta_start, args=(data,))
beta_2_log = fmin(E_2, beta_start, args=(log_data,))
print("Exponential Fit: ", beta_2_exp)
print("Log Fit: ", beta_2_log[0], np.exp(beta_2_log[1]))

plt.plot(x, y, 'o', label = "Data")
# plt.plot(x, beta_2_exp[1]*np.exp(x*beta_2_exp[0]), label = "Exponential Fit")
plt.plot(x, np.exp(beta_2_log[1])*np.exp(x*beta_2_log[0]), label = "Log Fit")
plt.legend(fontsize = 18)