In [62]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import r2_score
from sklearn import linear_model
from sklearn.kernel_ridge import KernelRidge
import matplotlib as plt
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.ensemble import IsolationForest

In [45]:
N_FOLDS = 10
lambdas = [0.01, 0.1, 1, 10, 100, 1000]

In [46]:
# Load Data for training:
df_x_train = pd.read_csv('X_train.csv')
df_y_train = pd.read_csv('y_train.csv')

# Load Data for test
df_x_test = pd.read_csv('X_test.csv')

In [47]:
x_train_raw = df_x_train.iloc[:, 1:].to_numpy()
y_train_raw = df_y_train.iloc[:, 1:].to_numpy()
x_test_raw = df_x_test.iloc[:, 1:].to_numpy()

In [48]:
x_imp_train = SimpleImputer(strategy='median').fit_transform(x_train_raw)
x_imp_test = SimpleImputer(strategy='median').fit_transform(x_test_raw)




In [49]:
def fit(X, y, lam, kernel='linrar'):
    reg = KernelRidge(alpha=lam, kernel=kernel)
    return reg.fit(X, y)

In [50]:
def score(model, X, y):
    y_pred = model.predict(X)
    return r2_score(y, y_pred)

In [51]:
@ignore_warnings(category=ConvergenceWarning)
def train(X, y, lam, kernel='linear'):
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    w = fit(x_train, y_train, lam, kernel)
    s = score(w, x_test, y_test)
    return s
    


In [60]:
def outlierRemoval(X, contamination=0.1, randomstate=42):
    rem = IsolationForest(contamination=contamination, random_state=randomstate)
    rem.fit(X)
    l = rem.predict(X)
    
    return rem.predict(X)

In [68]:
print(x_imp_train.shape)
x = outlierRemoval(x_imp_train)
print(x.shape)
unique, counts = np.unique(x, return_counts=True)
dict(zip(unique, counts))

(1212, 832)
(1212,)


{-1: 122, 1: 1090}

In [59]:
if __name__ == '__main__':
    LAMBDAS_LASSO = [170, 180]
    lambdas = [0.01, 0.1, 1, 10, 100, 1000]
    best_kernel = ''
    best_score = -9999999
    best_lambda = 0
    kernels = ['linear', 'polynomial', 'rbf', 'sigmoid', 'laplacian', 'cosine', ]
    for k in kernels:
        for l in lambdas:
            print("Kernel: ", k, "Lambda: ", l)
            s = train(x_imp_train, y_train_raw, l, k)
            print("Score: ", s)
            if s > best_score:
                best_score = s
                best_kernel = k
                best_lambda = l

    print("Best Kernel: ", best_kernel, "Best Lambda: ", best_lambda, "Best Score: ", best_score)

Kernel:  linear Lambda:  0.01




Score:  -20.825729255200553
Kernel:  linear Lambda:  0.1




Score:  -20.825729255200553
Kernel:  linear Lambda:  1
Score:  -20.825729255200553
Kernel:  linear Lambda:  10




Score:  -20.825729255200553
Kernel:  linear Lambda:  100




Score:  -20.825729255200553
Kernel:  linear Lambda:  1000




Score:  -20.825729255200553
Kernel:  polynomial Lambda:  0.01




Score:  -50.63757598813709
Kernel:  polynomial Lambda:  0.1




Score:  -50.63757598813709
Kernel:  polynomial Lambda:  1




Score:  -50.63757598813709
Kernel:  polynomial Lambda:  10




Score:  -50.63757598813709
Kernel:  polynomial Lambda:  100




Score:  -50.63757598813709
Kernel:  polynomial Lambda:  1000




Score:  -50.63757598813709
Kernel:  rbf Lambda:  0.01
Score:  -54.738194521181676
Kernel:  rbf Lambda:  0.1
Score:  -54.738381410167435
Kernel:  rbf Lambda:  1
Score:  -54.74953787693997
Kernel:  rbf Lambda:  10
Score:  -54.84560420358841
Kernel:  rbf Lambda:  100
Score:  -54.909297946306914
Kernel:  rbf Lambda:  1000
Score:  -54.918633994082526
Kernel:  sigmoid Lambda:  0.01
Score:  -119285.70755625289
Kernel:  sigmoid Lambda:  0.1




Score:  -1200.9690755086415
Kernel:  sigmoid Lambda:  1
Score:  -21.031512351594206
Kernel:  sigmoid Lambda:  10




Score:  -3.1907848594041823
Kernel:  sigmoid Lambda:  100
Score:  -3.7378652229976277
Kernel:  sigmoid Lambda:  1000
Score:  -16.748642812490658
Kernel:  laplacian Lambda:  0.01
Score:  -54.919718266914664
Kernel:  laplacian Lambda:  0.1
Score:  -54.919718266914664
Kernel:  laplacian Lambda:  1
Score:  -54.919718266914664
Kernel:  laplacian Lambda:  10
Score:  -54.919718266914664
Kernel:  laplacian Lambda:  100
Score:  -54.919718266914664
Kernel:  laplacian Lambda:  1000
Score:  -54.919718266914664
Kernel:  cosine Lambda:  0.01
Score:  -2.872531672774701
Kernel:  cosine Lambda:  0.1
Score:  -2.8727996575222754
Kernel:  cosine Lambda:  1
Score:  -2.8755297746306177
Kernel:  cosine Lambda:  10
Score:  -2.9077712468465178
Kernel:  cosine Lambda:  100
Score:  -3.638663938833326
Kernel:  cosine Lambda:  1000
Score:  -18.21448643307643
Best Kernel:  cosine Best Lambda:  0.01 Best Score:  -2.872531672774701
