## Problem 1

Create your own GitHub repository and a GitHub webpage where you can include information about you and your data projects and research interests. A GitHub webpage should be visible via an internet browser at the address yourname.github.io/projectname where "your name" represents your user name for GitHub and "project name" is the name you choose for your project/webspace.

https://willcameron2002.github.io/DATA441/

## Problem 2

Implement a gradient descent method for Ridge Regression by using the PyTorch library. Your implementation should be a class that has the required methods “.fit” and “.predict”. You should include an application of your code to a data set.

In [1]:
import torch
import pandas as pd
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
class Ridge():

  def fit(self, x, y, iterations = 1000, learning_rate = .0001, penalty = 2):

    self.weights = torch.ones(x.shape[1])
    self.bias = 0

    for i in range(iterations):
      if i % 250 == 0:
        print("Iteration: " + str(i))
      y_pred = self.predict(x)
      w_grad = (- (2 * torch.mm(torch.transpose(x,0,1),torch.sub(y, y_pred)[0].view(-1,1))) + (2 * penalty * self.weights)) / x.shape[0]
      w_grad = torch.transpose(w_grad,0,1)[0]
      b_grad = - 2 * torch.sum(torch.sub(y, y_pred)) / x.shape[0]

      self.weights = torch.sub(self.weights, learning_rate * w_grad)
      self.bias = torch.sub(self.bias, learning_rate * b_grad)
      #print(self.weights)
      #print(self.bias)
    return

  def predict(self, x):
    return torch.mm(x.double(), self.weights.double().view(-1,1)) + self.bias

In [3]:
data = pd.read_csv('drive/My Drive/Adv. App. Machine Learning/cars.csv')
data.head(2)

Unnamed: 0,MPG,CYL,ENG,WGT
0,18.0,8,307.0,3504
1,15.0,8,350.0,3693


In [4]:
x = torch.tensor(data.drop(columns=['MPG']).values)
y = torch.tensor(data['MPG'].values)

In [5]:
x_train, x_test, y_train, y_test = tts(x, y, test_size = .25, random_state = 123)
scaler = StandardScaler()
scaler.fit(x)
x_train, x_test = torch.tensor(scaler.transform(x_train)), torch.tensor(scaler.transform(x_test))

In [6]:
model = Ridge()
model.fit(x_train, y_train, iterations = 2500)

Iteration: 0
Iteration: 250
Iteration: 500
Iteration: 750
Iteration: 1000
Iteration: 1250
Iteration: 1500
Iteration: 1750
Iteration: 2000
Iteration: 2250


In [7]:
y_pred = model.predict(x_test)
print("Score: " + str(r2_score(y_test, y_pred)))

Score: 0.6487461624024191


## Problem 3

Complete the exercise provided in the Application to Locally Weighted Regression notebook and test the method on a data set, for example, the one provided in class.

In [107]:
from sklearn.utils.validation import check_is_fitted
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import numpy as np

In [108]:
# Gaussian Kernel
def Gaussian(x):
  return np.where(np.abs(x)>4,0,1/(np.sqrt(2*np.pi))*np.exp(-1/2*x**2))

In [109]:
# Tricubic Kernel, this is the correct vectorized version
def tricubic(x):
  return np.where(np.abs(x)>1,0,(1-np.abs(x)**3)**3)

In [110]:
# Epanechnikov Kernel
def Epanechnikov(x):
  return np.where(np.abs(x)>1,0,3/4*(1-np.abs(x)**2))

In [111]:
# Quartic Kernel
def Quartic(x):
  return np.where(np.abs(x)>1,0,15/16*(1-np.abs(x)**2)**2)

In [112]:
class Lowess:
    def __init__(self, kernel = Gaussian, tau=0.05):
        self.kernel = kernel
        self.tau = tau

    def fit(self, x, y):
        kernel = self.kernel
        tau = self.tau
        # w = weights_matrix(x,x,kernel,tau)
        # if np.isscalar(x):
        #   lm.fit(np.diag(w).dot(x.reshape(-1,1)),np.diag(w).dot(y.reshape(-1,1)))
        #   yest = lm.predict([[x]])[0][0]
        # else:
        #   n = len(x)
        #   yest = np.zeros(n)
        #   #Looping through all x-points
        #   for i in range(n):
        #     lm.fit(np.diag(w[i,:]).dot(x.reshape(-1,1)),np.diag(w[i,:]).dot(y.reshape(-1,1)))
        #     yest[i] = lm.predict(x[i].reshape(-1,1))
        self.xtrain_ = x
        self.yhat_ = y

    def dist(self, u, v):
      D = []
      if len(v.shape)==1:
        v = v.reshape(1,-1)
      for rowj in v:
        D.append(np.sqrt(np.sum((u-rowj)**2,axis=1)))
      return np.array(D).T

    def weights_matrix(self, u, v, kern = Gaussian, tau = 0.5):
      return kern(self.dist(u,v)/(2*tau))

    def predict(self, x_new):
        check_is_fitted(self)
        x = self.xtrain_
        y = self.yhat_
        lm = linear_model.Ridge(alpha=0.001)
        w = self.weights_matrix(x, x_new, self.kernel, self.tau)

        if np.isscalar(x_new):
          lm.fit(np.diag(w).dot(x.reshape(-1,1)),np.diag(w).dot(y.reshape(-1,1)))
          yest = lm.predict([[x_new]])[0][0]
        else:
          n = len(x_new)
          yest_test = np.zeros(n)
          #Looping through all x-points
          for i in range(n):
            #print(np.diag(w[i,:]).shape)
            lm.fit(np.diag(w[:,i])@x,np.diag(w[:,i])@y)
            yest_test[i] = lm.predict(x_new[i].reshape(1,-1))
        return yest_test

In [113]:
data = pd.read_csv('drive/My Drive/Adv. App. Machine Learning/cars.csv')
data.head(2)

Unnamed: 0,MPG,CYL,ENG,WGT
0,18.0,8,307.0,3504
1,15.0,8,350.0,3693


In [114]:
x = data.drop(columns=['MPG']).values
y = data['MPG'].values
x_scaled = scaler.fit_transform(x)

In [115]:
model = Lowess()

In [116]:
model.fit(x_scaled, y)

In [117]:
y_pred = model.predict(x_scaled)

In [118]:
mean_squared_error(y_pred, y)

9.345185161941703