# one dimension

In [None]:
import numpy as np
from function import Kernel_rbf, choose_lam_r_lr, choose_lam_lr
from function import Kernel_sobo, Kernel_laplace
import scipy.stats as stats

def f_0(x):
    """define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)"""
    # return np.exp(-1/(x**(2)))
    return np.sin(15*x)

f_true = f_0

def generate_data(n, f):
    """generate data from the mean regression function f (f_0)"""
    x_train=np.sort(np.random.rand(n))
    prob_tr=1/(1+np.exp(-f_0(x_train)))
    y_train=np.random.binomial(1,p=prob_tr)
    y_train[y_train==0]=-1
    return x_train,y_train

x_train,y_train = generate_data(200,f_true)
y_true=f_true(x_train)


#generate kernel matrix
Gaussian_kernel_matrix = Kernel_rbf(x_train, x_train, sigma=1)
Kernel_sobolev_first_order = Kernel_sobo(x_train, x_train)
Laplace = Kernel_laplace(x_train, x_train)



K = Kernel_sobolev_first_order
# K = Laplace
optimal_error_full = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type="one_zero")
optimal_error_trunc = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type="one_zero")
print("The optimal error for full kernel matrix is", optimal_error_full)
print("The optimal error for truncated kernel matrix is", optimal_error_trunc)

In [None]:
from sklearn.metrics import mean_squared_error
import tqdm
# plot image of mse for fixed r and varing sample size
import matplotlib.pyplot as plt
np.random.seed(1)


# lam_list = np.logspace(-3, 3, 10)
n_list = [200, 300]
iter_num = 5
mse_list = np.zeros([iter_num, len(n_list), 2])
mse_mean = np.zeros([len(n_list), 2])
mse_var = np.zeros([len(n_list), 2])

for i in range(len(n_list)):
    for j in tqdm.tqdm(range(iter_num)):
        x_train, y_train = generate_data(n_list[i],  f_true)
        y_true = f_true(x_train)
        K = Kernel_sobo(x_train, x_train)
        mse_list[j, i, 0] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type="one_zero")
        mse_list[j, i, 1] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type="one_zero")
    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])
    mse_var[i, 0] = np.var(mse_list[:, i, 0])
    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])
    mse_var[i, 1] = np.var(mse_list[:, i, 1])
    print("n=", n_list[i], ",truncated mean mse=", format(mse_mean[i, 0], '.3f'),  ",full mean mse=", format(mse_mean[i, 1], '.3f'))
    print("n=", n_list[i], ",truncated var mse=", format(mse_var[i, 0], '.3f'),  ",full var mse=", format(mse_var[i, 1], '.3f'))



In [None]:
from math import e
from function import choose_lam, Kernel_sobo, choose_lam_lr
import tqdm
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
from function import choose_lam_svm


def f_1(x):
    """define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)"""
    # return np.exp(-1/(x**(2)))
    return np.sin(5*x)

f_true = f_1

def generate_data(n, f):
    """generate data from the mean regression function f (f_0)"""
    x_train=np.sort(np.random.rand(n))
    prob_tr=1/(1+np.exp(-f(x_train)))
    y_train=np.random.binomial(1,p=prob_tr)
    y_train[y_train==0]=-1
    return x_train,y_train

# Fix n=100
n = 100
r_list = 10**np.linspace(-2, 0, 20)
iter_num = 2
mse_list = np.zeros([iter_num, len(r_list)])
mse_mean = np.zeros([len(r_list)])
mse_var = np.zeros([len(r_list)])


for j in tqdm.tqdm(range(iter_num)):
    x_train, y_train = generate_data(n, f_1)
    y_true = f_true(x_train)
    K = Kernel_sobo(x_train, x_train)
    U, s, V = np.linalg.svd(K)
    for i in range(len(r_list)):
        mse_list[j, i] = choose_lam_lr(K, x_train, y_train, y_true, 
                                r=int(r_list[i]*n), truncation=True, loss_type="one_zero", pre_SVD=(U, s, V))
for i in range(len(r_list)):
    print("r=", format(r_list[i], '.3f'), "mse mean=", format(np.mean(mse_list[:, i]), '.3f'), "mse var=", format(np.var(mse_list[:, i]), '.3f'))
    mse_mean[i] = np.mean(mse_list[:, i])
    mse_var[i] = np.var(mse_list[:, i])


# multidimension 

In [None]:
from function import Kernel_poly, choose_lam_r_lr, Kernel_rbf
import numpy as np


def f_m(x):
    """define the mean regression function for 3-dimensional KRR (Example S2 in supplementary material)"""
    return np.sin(3*np.sum(x, axis=1))


def Kernel_poly(x_1, x_2):
    n= x_1.shape[0]
    K = np.zeros([n,n])
    for i in range(n):
        for j in range(n):
            K[i,j] = np.exp(-np.linalg.norm(x_1[i]-x_2[j], ord=1))
    return K

f_true = f_m

def generate_data(n):
    """generate data from the mean regression function f (f_0)"""
    x_train=np.random.rand(n, 3)
    prob_tr=1/(1+np.exp(-f_true(x_train)))
    y_train=np.random.binomial(1,p=prob_tr)
    y_train[y_train==0]=-1
    return x_train,y_train

x_train,y_train=generate_data(200)
y_true=f_true(x_train)


K = Kernel_poly(x_train, x_train)
optimal_error_full = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type="one_zero")
optimal_error_trunc = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type="one_zero")
print("The optimal error for full kernel matrix is", optimal_error_full)
print("The optimal error for truncated kernel matrix is", optimal_error_trunc)

In [None]:
from sklearn.metrics import mean_squared_error
import tqdm
# plot image of mse for fixed r and varing sample size
import matplotlib.pyplot as plt
np.random.seed(1)


# lam_list = np.logspace(-3, 3, 10)
n_list = [200,300]
iter_num = 5
mse_list = np.zeros([iter_num, len(n_list), 2])
mse_mean = np.zeros([len(n_list), 2])
mse_var = np.zeros([len(n_list), 2])

for i in range(len(n_list)):
    for j in tqdm.tqdm(range(iter_num)):
        x_train, y_train = generate_data(n_list[i])
        y_true = f_true(x_train)
        K = Kernel_poly(x_train, x_train)
        mse_list[j, i, 0] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type="one_zero")
        mse_list[j, i, 1] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type="one_zero")
    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])
    mse_var[i, 0] = np.var(mse_list[:, i, 0])
    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])
    mse_var[i, 1] = np.var(mse_list[:, i, 1])
    print("n=", n_list[i], ",truncated mean mse=", format(mse_mean[i, 0], '.3f'),  ",full mean mse=", format(mse_mean[i, 1], '.3f'))
    print("n=", n_list[i], ",truncated var mse=", format(mse_var[i, 0], '.3f'),  ",full var mse=", format(mse_var[i, 1], '.3f'))



In [None]:
from function import choose_lam, Kernel_sobo, choose_lam_lr
import tqdm
import numpy as np
import matplotlib.pyplot as plt
np.random.seed()
from function import choose_lam_svm

# Fix n=100
n = 100
r_list = 10**np.linspace(-2, 0, 20)
iter_num = 2
mse_list = np.zeros([iter_num, len(r_list)])
mse_mean = np.zeros([len(r_list)])
mse_var = np.zeros([len(r_list)])


for j in tqdm.tqdm(range(iter_num)):
    x_train, y_train = generate_data(n)
    y_true = f_true(x_train)
    K = Kernel_poly(x_train, x_train)
    U, s, V = np.linalg.svd(K)
    for i in range(len(r_list)):
        mse_list[j, i] = choose_lam_lr(K, x_train, y_train, y_true, 
                                r=int(r_list[i]*n), truncation=True, loss_type="one_zero", pre_SVD=(U, s, V))
for i in range(len(r_list)):
    print("r=", format(r_list[i], '.3f'), "mse mean=", format(np.mean(mse_list[:, i]), '.3f'), "mse var=", format(np.var(mse_list[:, i]), '.3f'))
    mse_mean[i] = np.mean(mse_list[:, i])
    mse_var[i] = np.var(mse_list[:, i])

