In [None]:
#Imports

import numpy as np
import scipy.io as sio
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA
from PIL import Image

from sklearn.linear_model import LinearRegression
import numpy as np

In [None]:
# Load the dataset 

data = sio.loadmat('dataset')
x = data['data']
y = data['label']
y = y.reshape(-1,1)

In [None]:
# Load your features 

xx = np.load('test_xx_dino_s.npy')
xy = np.load('test_xy_dino_s.npy')
xz = np.load('test_xz_dino_s.npy')

In [None]:
# Concatenating and taking the mean of the 3 sets of features 

x_concat = np.concatenate((xx,xy,xz),axis = 1)
x_mean = np.mean((xx,xy,xz),axis = 0)

In [None]:
# Running PCA and polynomial regression for specified number of components and degree of polynomial. 
# Used specifically for hyperparameter tuning on the training and validation sets. 

def fit_pcr_model_search(x_0,y_0,n=10,order=2,scree=False):

    x_train,x_val,y_train,y_val = train_test_split(x_0,y_0, test_size=0.2, random_state=32)

    pca = PCA(
        svd_solver='full',
        n_components=n,
        random_state=10
    ).fit(x_train)
    
    x_train_red = pca.transform(x_train)
    x_val_red = pca.transform(x_val)

    print(x_train_red.shape)
    
    poly = PolynomialFeatures(order)
    x_train_poly = poly.fit_transform(x_train_red)
    x_val_poly = poly.fit_transform(x_val_red)

    model = LinearRegression().fit(x_train_poly, y_train)

    y_hat = model.predict(x_train_poly)
    mape_train = np.mean(np.abs(y_hat-y_train)/y_train)
        
    y_hat = model.predict(x_val_poly)
    mape_test = np.mean(np.abs(y_hat-y_val)/y_val)
                
    return mape_train,mape_test

In [None]:
# Running PCA and polynomial regression for specified number of components and degree of polynomial. 
# Used specifically for obtaining the final MAPE value on the test set.

def fit_pcr_model_test(x_train,y_train,x_test,y_test,n=10,order=2,scree=False):
    
    pca = PCA(
        svd_solver='full',
        n_components=n,
        random_state=10
    ).fit(x_train)

    
    x_train_red = pca.transform(x_train)
    x_test_red = pca.transform(x_test)

    print(x_train_red.shape)
    
    poly = PolynomialFeatures(order)
    x_train_poly = poly.fit_transform(x_train_red)
    x_test_poly = poly.fit_transform(x_test_red)

    model = LinearRegression().fit(x_train_poly, y_train)

    y_hat = model.predict(x_train_poly)
    mape_train = np.mean(np.abs(y_hat-y_train)/y_train)
        
    y_hat = model.predict(x_test_poly)
    mape_test = np.mean(np.abs(y_hat-y_test)/y_test)
    
    return mape_train,mape_test

In [None]:
# Splitting the Concatenated data into training and test sets

x_0, x_test,y_0,y_test = train_test_split(x_concat,y,test_size=0.1,random_state=11)

# Running grid search on the number of components to include in PCA
for num_comp in range(1,60,1): 
    mape_train,mape_val = fit_pcr_model_search(x_0,y_0,num_comp,2)
    print(num_comp,mape_train,mape_val)


In [None]:
# Finding the final Test MAPE with best known number of components from grid search 

# Results for Concatenated Data
mape_train,mape_test = fit_pcr_model_test(x_0,y_0,x_test,y_test,30,2)
print(30,mape_train,mape_test)

In [None]:
# Splitting the Mean data into training and test sets

x_0_m, x_test_m,y_0_m,y_test_m = train_test_split(x_mean,y,test_size=0.1,random_state=11)

# Running grid search on the number of components to include in PCA
for num_comp in range(1,60,1): 
    mape_train,mape_test = fit_pcr_model_search(x_0_m,y_0_m,num_comp,2)
    print(num_comp,mape_train,mape_test)

In [None]:
# Finding the final Test MAPE with best known number of components from grid search 

# Results for Mean Data

mape_train,mape_test = fit_pcr_model_test(x_0_m,y_0_m,x_test_m,y_test_m,16,2)
print(16,mape_train,mape_test)