In [1]:
import numpy as np
import kaggle
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge
import csv

In [2]:
################################################################
# load housing data
def load_housing_data():
    train_x = np.load('../../Data/Housing/train_x.npy')
    train_y = np.load('../../Data/Housing/train_y.npy')
    test_x = np.load('../../Data/Housing/test_x.npy')
    return train_x, train_y, test_x
################################################################

In [3]:
################################################################
# Compute MSE
def compute_MSE(y, y_hat):
        # mean squared error
        return np.mean(np.power(y - y_hat, 2))
################################################################

In [4]:
train_x, train_y, test_x = load_housing_data()

In [5]:
train_y = train_y.reshape(len(train_y),1)

In [6]:
kf = KFold(n_splits=5,shuffle=True, random_state=69)
models = []
rows = []

In [22]:
parameters = [['rbf',1,0.001],['poly',1,0.001],['linear',1,0.001],['rbf',1,0.01],['poly',1,0.01],['rbf',1,0.1],['poly',1,0.1],['rbf',1,1],['poly',1,1],['rbf',0.1,0.001],['poly',0.1,0.001],['linear',0.1,0.001],['rbf',0.1,0.01],['poly',0.1,0.01],['rbf',0.1,0.1],['poly',0.1,0.1],['rbf',0.1,1],['poly',0.1,1],['rbf',0.01,0.001],['poly',0.01,0.001],['linear',0.01,0.001],['rbf',0.01,0.01],['poly', 0.01,0.01],['rbf', 0.01,0.1],['poly', 0.01,0.1],['rbf', 0.01,1],['poly', 0.01,1],['rbf',0.001,0.001],['poly',0.001,0.001],['linear',0.001,0.001],['rbf',0.001,0.01],['poly', 0.001,0.01],['rbf', 0.001,0.1],['poly', 0.001,0.1],['rbf', 0.001,1],['poly', 0.001,1],['rbf', 0.005,0.001],['poly', 0.005,0.001],['linear', 0.005,0.001],['rbf', 0.005,0.01],['poly', 0.005,0.01],['rbf', 0.005,0.1],['poly', 0.005,0.1],['rbf', 0.005,1],['poly', 0.005,1]]

In [24]:
for i in range(len(parameters)):
    kernel = KernelRidge(kernel=parameters[i][0],alpha=parameters[i][1],gamma=parameters[i][2])
    accuracy = []
    #K-Fold Cross Validation
    for train_index, test_index in kf.split(train_x):
        x_train, x_test = train_x[train_index], train_x[test_index]
        y_train, y_test = train_y[train_index], train_y[test_index]
        #Fit model on each fold
        kernel.fit(x_train, y_train)
        #Predict on testing fold
        predict = kernel.predict(x_test)
        predict = predict.reshape(len(predict),1)
        #Accuracy between prediction and true value for each K-Fold Validation
        accuracy.append(compute_MSE(y_test, predict))
    mean = np.average(accuracy)
    if(parameters[i][0] == 'rbf'):
        print("Kernel = " + parameters[i][0] + ", Alpha = " + str(parameters[i][1]) + ", Gamma = " + str(parameters[i][2]))
        print("5-Fold Cross Validation MSE = " + str(mean))
    elif(parameters[i][0] == 'poly'):
        print("Kernel = " + parameters[i][0] + ", Alpha = " + str(parameters[i][1]) + ", Gamma = " + str(parameters[i][2]))
        print("5-Fold Cross Validation MSE = " + str(mean))
    else:
        print("Kernel = " + parameters[i][0] + ", Alpha = " + str(parameters[i][1]))
        print("5-Fold Cross Validation MSE = " + str(mean))
    models.append(mean)

Kernel = rbf, Alpha = 1, Gamma = 0.001
5-Fold Cross Validation MSE = 174.27503310113084
Kernel = poly, Alpha = 1, Gamma = 0.001
5-Fold Cross Validation MSE = 158.92865393875792
Kernel = linear, Alpha = 1
5-Fold Cross Validation MSE = 115.14977572790141
Kernel = rbf, Alpha = 1, Gamma = 0.01
5-Fold Cross Validation MSE = 113.3737962042439
Kernel = poly, Alpha = 1, Gamma = 0.01
5-Fold Cross Validation MSE = 107.22513571450438
Kernel = rbf, Alpha = 1, Gamma = 0.1
5-Fold Cross Validation MSE = 26.344429730541037
Kernel = poly, Alpha = 1, Gamma = 0.1
5-Fold Cross Validation MSE = 19.84477072610027
Kernel = rbf, Alpha = 1, Gamma = 1
5-Fold Cross Validation MSE = 20.419190056252777
Kernel = poly, Alpha = 1, Gamma = 1
5-Fold Cross Validation MSE = 15.598488199916767
Kernel = rbf, Alpha = 0.1, Gamma = 0.001
5-Fold Cross Validation MSE = 122.07345975117894
Kernel = poly, Alpha = 0.1, Gamma = 0.001
5-Fold Cross Validation MSE = 118.35332004484128
Kernel = linear, Alpha = 0.1
5-Fold Cross Validatio

In [7]:
final = KernelRidge(kernel='rbf',alpha=0.01,gamma=1)

In [8]:
final.fit(train_x, train_y)

KernelRidge(alpha=0.01, coef0=1, degree=3, gamma=1, kernel='rbf',
            kernel_params=None)

In [9]:
y_1 = final.predict(train_x)
y_1 = y_1.reshape(len(y_1),1)
y_1 = y_1.astype(int)

In [10]:
compute_MSE(train_y, y_1)

8.268932806324111

In [11]:
def kaggleize(predictions,file,float_flag):

	if(len(predictions.shape)==1):
		predictions.shape = [predictions.shape[0],1]

	ids = 1 + np.arange(predictions.shape[0])[None].T
	if float_flag:
		kaggle_predictions = np.hstack((ids,predictions)).astype(float)
	else:
		kaggle_predictions = np.hstack((ids,predictions)).astype(int)

	writer = csv.writer(open(file, 'w'))
	if predictions.shape[1] == 1:
		writer.writerow(['# id','Prediction'])
	elif predictions.shape[1] == 2:
		writer.writerow(['# id','Prediction1', 'Prediction2'])
	writer.writerows(kaggle_predictions)

In [12]:
kaggleize(final.predict(test_x),'submission.csv',True)