In [17]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys

from fvgp import GP
from fvgp.gp_kernels import squared_exponential_kernel, matern_kernel_diff2, exponential_kernel

from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from scipy.stats import wasserstein_distance

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report

In [12]:
# Load digits dataset
digits = load_digits()
X, y = digits.data, digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [13]:
X_train_sparse = X_train[::5]
X_test_sparse = X_test[::5]
y_train_sparse = y_train[::5]
y_test_sparse = y_test[::5]

In [14]:
len(X_train_sparse)

324

In [15]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_sparse)
X_test_scaled = scaler.transform(X_test_sparse)

In [16]:
def wasserstein_1d_slice(x1, x2):
    # Project onto 1D slice (e.g., the first dimension)
    slice_x1 = x1[:, 0]  # Use the first feature/column for slicing
    slice_x2 = x2[:, 0]
    return wasserstein_distance(slice_x1, slice_x2)

In [49]:
def wasserstein_exponential_kernel(x1, x2, length_scale):
    distance = np.array([wasserstein_1d_slice(x1[i:i+1], x2[j:j+1]) for i in range(x1.shape[0]) for j in range(x2.shape[0])])
    distance = distance.reshape(x1.shape[0], x2.shape[0])
    return np.exp(-distance / length_scale)

In [50]:
gp_model = GP(
    X_train_scaled,
    y_train_sparse,
    init_hyperparameters=np.array([1.0]),  # Initialize with a length scale of 1.0
    gp_kernel_function=wasserstein_exponential_kernel,
    noise_variances=np.ones(y_train_sparse.shape) * 0.01  # Assuming small noise variance (measurement error)
)

In [54]:
hps_bounds = np.array([[0.1, 10.0]])

# Train the GP model using MCMC with 100 iterations
gp_model.train(
    hyperparameter_bounds=hps_bounds,
    method='mcmc',  # Use MCMC for hyperparameter sampling
    max_iter=100,  # Run MCMC for 100 iterations
)

array([6.63727006])

In [56]:
posterior = gp_model.posterior_mean(X_test_scaled)  # Predict posterior mean
predicted_mean = posterior["f(x)"].flatten()  # Get the predicted mean for each test point

# For classification, use a threshold or take the sign
predicted_labels = np.where(predicted_mean > 0.5, 1, 0)  # Apply threshold of 0.5 for binary classification

# Generate classification report
print("Classification Report:")
print(classification_report(y_test_sparse, predicted_labels))

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.19      1.00      0.33         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         6
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         5
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         6
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         2

    accuracy                           0.19        36
   macro avg       0.02      0.10      0.03        36
weighted avg       0.04      0.19      0.06        36



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [27]:
def compute_kernel_matrix(X, kernel_function, length_scale, jitter=1e-6):
    n = X.shape[0]
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(i, n):
            K[i, j] = kernel_function(X[i:i+1], X[j:j+1], length_scale)
            K[j, i] = K[i, j]  # Ensure symmetry
    # Add jitter to the diagonal
    K += np.eye(n) * jitter
    return K


In [28]:
def is_psd(matrix):
    eigenvalues = np.linalg.eigvals(matrix)
    return np.all(eigenvalues >= 0), eigenvalues

In [37]:
# Compute the kernel matrix for the first 100 samples as an example
length_scale = 1.0  # Use an appropriate length scale value
K = compute_kernel_matrix(X_train_sparse[:100], wasserstein_exponential_kernel, length_scale)

# Check if the matrix is PSD
is_positive_semi_definite, eigenvalues = is_psd(K)

if is_positive_semi_definite:
    print("The kernel matrix is positive semi-definite. Median Eigenvalue:", eigenvalues.mean())
else:
    print("The kernel matrix is not positive semi-definite. Smallest eigenvalue:", eigenvalues.min())


The kernel matrix is positive semi-definite. Median Eigenvalue: 1.0000009999999993


In [57]:
def wasserstein_exponential_kernel(x1, x2, hyperparameters):
    length_scale = hyperparameters[0]  # Extract the length scale from hyperparameters
    distance = np.array([wasserstein_1d_slice(x1[i:i+1], x2[j:j+1]) for i in range(x1.shape[0]) for j in range(x2.shape[0])])
    distance = distance.reshape(x1.shape[0], x2.shape[0])
    return np.exp(-distance / length_scale)

In [58]:
num_classes = 10  # Digits 0-9
gp_models = []

In [60]:
for class_label in range(num_classes):
    print(f"Training GP model for class {class_label}")
    
    # Convert the labels to binary for the current class (1 for class_label, 0 for the rest)
    y_train_binary = (y_train_sparse == class_label).astype(int)
    
    # Initialize the GP model for each class
    gp_model = GP(
        X_train_scaled,
        y_train_binary,
        init_hyperparameters=np.array([1.0]),  # Initialize with a length scale of 1.0
        gp_kernel_function=wasserstein_exponential_kernel,
        noise_variances=np.ones(y_train_binary.shape) * 0.01  # Assuming small noise variance (measurement error)
    )

    # Train the GP model using MCMC with 100 iterations
    gp_model.train(
        hyperparameter_bounds=np.array([[0.1, 10.0]]),  # Length scale bounds
        method='mcmc',  # Use MCMC for hyperparameter sampling
        max_iter=100,  # Run MCMC for 100 iterations
    )
    
    # Store the trained GP model
    gp_models.append(gp_model)

Training GP model for class 0
Training GP model for class 1
Training GP model for class 2
Training GP model for class 3
Training GP model for class 4
Training GP model for class 5
Training GP model for class 6
Training GP model for class 7
Training GP model for class 8
Training GP model for class 9


In [61]:
def predict_probs(X_test, gp_models):
    means = np.zeros((X_test.shape[0], len(gp_models)))
    for class_label, gp_model in enumerate(gp_models):
        posterior_rbf = gp_model.posterior_mean(X_test)  # Use posterior_mean
        mean = posterior_rbf["f(x)"]  # Access the mean predictions
        means[:, class_label] = mean.flatten()
    return softmax(means.T).T

In [67]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

In [68]:
gp_probabilities = predict_probs(X_test_scaled, gp_models)
gp_predictions = np.argmax(gp_probabilities, axis=1)
gp_accuracy = accuracy_score(y_test_sparse, gp_predictions)


In [64]:
print(f'GP Classifier – Accuracy: {gp_accuracy * 100:.2f}%\n')
print(classification_report(y_test_sparse, gp_predictions))

GP Classifier – Accuracy: 19.44%

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.19      1.00      0.33         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         6
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         5
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         6
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         2

    accuracy                           0.19        36
   macro avg       0.02      0.10      0.03        36
weighted avg       0.04      0.19      0.06        36



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [69]:
def predict_class(X_test, gp_models):
    num_samples = X_test.shape[0]
    num_classes = len(gp_models)
    class_probs = np.zeros((num_samples, num_classes))  # Store probabilities for each class
    
    for class_label, gp_model in enumerate(gp_models):
        # Get the posterior mean for each test sample
        posterior = gp_model.posterior_mean(X_test)
        class_probs[:, class_label] = posterior["f(x)"].flatten()  # Get the predicted mean
    
    # For each sample, choose the class with the highest probability
    predicted_classes = np.argmax(class_probs, axis=1)
    
    return predicted_classes

In [75]:
gp_predictions = predict_class(X_test_scaled, gp_models)
gp_accuracy = accuracy_score(y_test_sparse, gp_predictions)

# Print the classification report and accuracy
print(f'GP Classifier – Accuracy: {gp_accuracy * 100:.2f}%\n')
print(classification_report(y_test_sparse, gp_predictions))

GP Classifier – Accuracy: 19.44%

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.19      1.00      0.33         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         6
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         5
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         6
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         2

    accuracy                           0.19        36
   macro avg       0.02      0.10      0.03        36
weighted avg       0.04      0.19      0.06        36



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
