### One dimensional data

In [None]:
import numpy as np
from function import Kernel_rbf, KRR_estimation
from function import Kernel_sobo, choose_lam_r, Kernel_laplace
np.random.seed(0)
import tqdm


# generate underlying true functions and data
def f_0(x):
    """define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)"""
    # return np.exp(-1/(x**(2)))
    return np.sin(10*x)


# generate data
f_true = f_0
def generate_data(n,d,f):
    """generate data from the mean regression function f (f_0 or f_1)"""
    x_train=np.sort(np.random.normal(0,1,size=(n)))
    y_train=f(x_train)+np.random.normal(0,1,n)
    return x_train,y_train

x_train,y_train=generate_data(100,1,f_true)
y_true=f_true(x_train)


#generate kernel matrix
Gaussian_kernel_matrix = Kernel_rbf(x_train, x_train, sigma=1)
Kernel_sobolev_first_order = Kernel_sobo(x_train, x_train)
Laplace = Kernel_laplace(x_train, x_train)



#varying lambda
K = Laplace
# K = Gaussian_kernel_matrix
lam_list = np.linspace(0.0001,10,100)
error_list = np.zeros([len(lam_list), 2])
for i in range(len(lam_list)):
    result = KRR_estimation(K, y_train, lam_list[i], truncation=False, r=None)
    error_list[i,0] = np.linalg.norm(result-y_true)
    result = KRR_estimation(K, y_train, lam_list[i], truncation=True, r=10)
    error_list[i,1] = np.linalg.norm(result-y_true)
optimal_error_full = error_list[np.argmin(error_list[:,0]), 0]
optimal_error_trunc = error_list[np.argmin(error_list[:,1]), 1]

optimal_error_full = choose_lam_r(K, y_train, y_true, truncation=False)
optimal_error_trunc = choose_lam_r(K, y_train, y_true, truncation=True)
print("The optimal error for full kernel matrix is", optimal_error_full)
print("The optimal error for truncated kernel matrix is", optimal_error_trunc)



In [None]:
from sklearn.metrics import mean_squared_error

# plot image of mse for fixed r and varing sample size
import matplotlib.pyplot as plt
np.random.seed(0)


# lam_list = np.logspace(-3, 3, 10)
n_list = [200, 300]
iter_num = 5
mse_list = np.zeros([iter_num, len(n_list), 2])
mse_mean = np.zeros([len(n_list), 2])
mse_var = np.zeros([len(n_list), 2])

for i in range(len(n_list)):
    for j in tqdm.tqdm(range(iter_num)):
        x_train, y_train = generate_data(n_list[i], 1, f_true)
        y_true = f_true(x_train)
        K = Kernel_sobo(x_train, x_train)
        mse_list[j, i, 0] = choose_lam_r(K, y_train, y_true, truncation=True)
        mse_list[j, i, 1] = choose_lam_r(K, y_train, y_true, truncation=False)
    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])
    mse_var[i, 0] = np.var(mse_list[:, i, 0])
    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])
    mse_var[i, 1] = np.var(mse_list[:, i, 1])
    print("n=", n_list[i], ",truncated mean mse=", format(mse_mean[i, 0], '.3f'),  ",full mean mse=", format(mse_mean[i, 1], '.3f'))
    print("n=", n_list[i], ",truncated var mse=", format(mse_var[i, 0], '.3f'),  ",full var mse=", format(mse_var[i, 1], '.3f'))



In [None]:
from function import choose_lam
import matplotlib.pyplot as plt
np.random.seed(0)


# Fix n=100
n = 500

r_list = 10**np.linspace(-2.4, 0, 20)
iter_num = 2
mse_list = np.zeros([iter_num, len(r_list)])
mse_mean = np.zeros([len(r_list)])
mse_var = np.zeros([len(r_list)])


for i in range(len(r_list)):
    for j in range(iter_num):
        x_train, y_train = generate_data(n, 1, f_true)
        y_true = f_true(x_train)
        K = Kernel_sobo(x_train, x_train)
        mse_list[j, i] = choose_lam(K, y_train, y_true, 
                                r=int(r_list[i]*x_train.shape[0]), truncation=True)
    print("r=", format(r_list[i], '.3f'), "mse mean=", format(np.mean(mse_list[:, i]), '.3f'), "mse var=", format(np.var(mse_list[:, i]), '.3f'))
    mse_mean[i] = np.mean(mse_list[:, i])
    mse_var[i] = np.var(mse_list[:, i])



### Muliple dimension data

In [None]:
from function import Kernel_poly, choose_lam_r, Kernel_rbf
import numpy as np


def f_m(x):
    """define the mean regression function for 3-dimensional KRR (Example S2 in supplementary material)"""
    return np.sin(2*np.sum(x, axis=1))


def Kernel_poly(x_1, x_2):
    n,d = x_1.shape
    K = np.zeros([n,n])
    for i in range(n):
        for j in range(n):
            K[i,j] = np.exp(-np.linalg.norm(x_1[i]-x_2[j], ord=1))
    return K

f_true = f_m
def generate_data(n,d,f):
    """generate data from the mean regression function f (f_0 or f_1)"""
    x_train=np.random.normal(0,1,size=(n, d))
    y_train=f(x_train)+np.random.normal(0, 0.5, size=(n))
    return x_train,y_train

x_train,y_train=generate_data(200,3,f_true)
y_true=f_true(x_train)


K = Kernel_poly(x_train, x_train)
optimal_error_full = choose_lam_r(K, y_train, y_true, truncation=False)
optimal_error_trunc = choose_lam_r(K, y_train, y_true, truncation=True)
print("The optimal error for full kernel matrix is", optimal_error_full)
print("The optimal error for truncated kernel matrix is", optimal_error_trunc)

In [None]:
from sklearn.metrics import mean_squared_error
import tqdm
# plot image of mse for fixed r and varing sample size
import matplotlib.pyplot as plt
np.random.seed(0)


# lam_list = np.logspace(-3, 3, 10)
n_list = [200, 300]
iter_num = 5
mse_list = np.zeros([iter_num, len(n_list), 2])
mse_mean = np.zeros([len(n_list), 2])
mse_var = np.zeros([len(n_list), 2])

for i in range(len(n_list)):
    for j in tqdm.tqdm(range(iter_num)):
        x_train, y_train = generate_data(n_list[i], 3, f_true)
        y_true = f_true(x_train)
        K = Kernel_poly(x_train, x_train)
        mse_list[j, i, 0] = choose_lam_r(K, y_train, y_true, truncation=True)
        mse_list[j, i, 1] = choose_lam_r(K, y_train, y_true, truncation=False)
    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])
    mse_var[i, 0] = np.var(mse_list[:, i, 0])
    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])
    mse_var[i, 1] = np.var(mse_list[:, i, 1])
    print("n=", n_list[i], ",truncated mean mse=", format(mse_mean[i, 0], '.3f'),  ",full mean mse=", format(mse_mean[i, 1], '.3f'))
    print("n=", n_list[i], ",truncated var mse=", format(mse_var[i, 0], '.3f'),  ",full var mse=", format(mse_var[i, 1], '.3f'))



In [None]:
from function import choose_lam
import matplotlib.pyplot as plt
np.random.seed(0)


# Fix n=100
n = 500
U, s, V = np.linalg.svd(K)

r_list = 10**np.linspace(-2.4, 0, 30)
iter_num = 2
mse_list = np.zeros([iter_num, len(r_list)])
mse_mean = np.zeros([len(r_list)])
mse_var = np.zeros([len(r_list)])


for i in range(len(r_list)):
    for j in range(iter_num):
        x_train, y_train = generate_data(n, 3, f_true)
        y_true = f_true(x_train)
        K = Kernel_poly(x_train, x_train)
        mse_list[j, i] = choose_lam(K, y_train, y_true, 
                                r=int(r_list[i]*x_train.shape[0]), truncation=True)
    print("r=", format(r_list[i], '.3f'), "mse mean=", format(np.mean(mse_list[:, i]), '.3f'), "mse var=", format(np.var(mse_list[:, i]), '.3f'))
    mse_mean[i] = np.mean(mse_list[:, i])
    mse_var[i] = np.var(mse_list[:, i])
