In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, auc, f1_score, balanced_accuracy_score, roc_curve
from sklearn.model_selection import train_test_split
from math import sqrt

from sklearn.datasets import load_boston, load_breast_cancer
from sklearn.gaussian_process.kernels import *

<h4>Regression (Boston housing dataset)</h4>

In [5]:
# loading the problem
X, y = load_boston(return_X_y=True)
X = pd.DataFrame(X)

scaler = MinMaxScaler()
y = scaler.fit_transform(y.reshape(-1, 1))

In [6]:
# creating the train and validation datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
#https://scikit-learn.org/stable/modules/classes.html#module-sklearn.gaussian_process
#https://scikit-learn.org/stable/modules/gaussian_process.html#gp-kernels
kernels = {'Dot-Product and White': DotProduct() + WhiteKernel(), \
           'RBF and White': RBF() + WhiteKernel(), \
           'RBF': RBF(), \
           'Rational Quadratic': RationalQuadratic(), \
           'Rational Quadratic and White': RationalQuadratic() + WhiteKernel(), \
           'Matern': Matern(), \
           'Matern and White': Matern() + WhiteKernel(),\
           'Constant': ConstantKernel(), \
           'Constant and White': ConstantKernel() + WhiteKernel()}

In [8]:
# calculating the metrics
df_results_regression = pd.DataFrame(columns=['Kernel', 'MAE', 'RMSE'])

for kernel in kernels:
    gpr = GaussianProcessRegressor(kernel=kernels[kernel], random_state=0, normalize_y=True).fit(X_train, y_train)
    predictions = gpr.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)
    rmse = sqrt(mean_squared_error(y_test, predictions))
    df_results_regression = df_results_regression.append({'Kernel': kernel, \
                                                          'MAE': mae, 'RMSE': rmse}, ignore_index=True)

df_results_regression

Unnamed: 0,Kernel,MAE,RMSE
0,Dot-Product and White,0.084833,0.128938
1,RBF and White,0.089682,0.135016
2,RBF,0.119405,0.177056
3,Rational Quadratic,0.095602,0.150002
4,Rational Quadratic and White,0.08707,0.134522
5,Matern,0.099916,0.14863
6,Matern and White,0.08722,0.133543
7,Constant,0.138891,0.200718
8,Constant and White,0.138891,0.200718


In [9]:
df_results_regression.to_csv('results_regression.csv', encoding='utf-8', index=False)

<h4>Classification (Breast cancer dataset)</h4>

In [10]:
# loading the problem
X, y = load_breast_cancer(return_X_y=True)
X = pd.DataFrame(X)

In [11]:
# creating the train and validation datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [13]:
# calculating the metrics
df_results_classification = pd.DataFrame(columns=['Kernel', 'AUC', 'F-score', 'Balanced Accuracy'])

for kernel in kernels:
    gpr = GaussianProcessClassifier(kernel=kernels[kernel], random_state=0, n_jobs=-1, max_iter_predict=1000).fit(X_train, y_train)
    predictions = gpr.predict(X_test)
    
    fpr, tpr, thresholds = roc_curve(y_test, predictions)
    auc_pred = auc(fpr, tpr)

    f1_score_pred = f1_score(y_test, predictions)
    bal_accuracy_score = balanced_accuracy_score(y_test, predictions)
    
    df_results_classification = df_results_classification.append({'Kernel': kernel, \
                                                                  'AUC': auc_pred, \
                                                                  'F-score': f1_score_pred, \
                                                                  'Balanced Accuracy': bal_accuracy_score}, ignore_index=True)

df_results_classification

Unnamed: 0,Kernel,AUC,F-score,Balanced Accuracy
0,Dot-Product and White,0.952048,0.953846,0.952048
1,RBF and White,0.931883,0.948905,0.931883
2,RBF,0.931883,0.948905,0.931883
3,Rational Quadratic,0.92442,0.941176,0.92442
4,Rational Quadratic and White,0.92442,0.941176,0.92442
5,Matern,0.942521,0.955882,0.942521
6,Matern and White,0.942521,0.955882,0.942521
7,Constant,0.5,0.740331,0.5
8,Constant and White,0.5,0.740331,0.5


In [14]:
df_results_classification.to_csv('results_classification.csv', encoding='utf-8', index=False)