In [1]:
import numpy as np
from sklearn.svm import SVR
from utils import *

In [2]:
labeled_X = np.load('labeled_data.npy')
unlabeled_X = np.load('unlabeled_data.npy')
y = np.load('labels.npy')

labeled_X.shape, unlabeled_X.shape, y.shape

((3911, 256), (28570, 256), (3911,))

In [107]:
k_local, k_global, r, beta = 5, 10, 0.5, 1

def rbf(x1, x2, l=1):
    if ((x1-x2)**2).ndim > 1:
        return np.exp(-1 / (2 * (l**2)) * ((x1-x2)**2).sum(axis=1))
    else:
        return np.array([np.exp(-1 / (2 * (l**2)) * ((x1-x2)**2).sum())])

def find_nn(point, k):
    dist = np.linalg.norm(labeled_X - point, axis=1)
    index = np.argsort(dist)[:k]
    return labeled_X[index], index

def estimate_distribution(is_local):
    k = k_local if is_local else k_global

    ones = np.ones((k, 1))
    num = unlabeled_X.shape[0]
    y_hat = np.zeros((num,1))
    sigma_2_hat = np.zeros((num,1))

    for i in range(num):
        nn, nn_index = find_nn(unlabeled_X[i], k)
        k_star = rbf(unlabeled_X[i], nn).reshape(-1, 1)
        K_hat = rbf(nn, nn) - ones @ k_star.T - k_star @ ones.T + rbf(unlabeled_X[i], unlabeled_X[i]) * ones @ ones.T
        # Equation (15), PLR paper
        cov = (beta * K_hat + np.identity(k))
        # Equation (14), PLR paper
        mu = cov @ (ones/k)
        y_bar_nn = y[nn_index].sum(axis=0) / k
        diff = y[nn_index].reshape(-1, 1) - y_bar_nn * ones
        y_hat[i] = y_bar_nn + mu.T @ diff
        sigma_2_hat[i] = diff.T @ cov @ diff / k

    return y_hat, sigma_2_hat, num


In [109]:
# estimate distribution
y_local, sigma_2_local, n = estimate_distribution(True)
y_global, sigma_2_global, _ = estimate_distribution(False)

y_local.shape, y_global.shape

((28570, 1), (28570, 1))

In [116]:
sigma_2_local[sigma_2_local == 0] = 1e-20

In [148]:
# conjugate
# Equation (11), S3VR paper
y_bar_conjugate = (y_global/sigma_2_global + n*y_local/sigma_2_local) / (1/sigma_2_global + n/sigma_2_local)
sigma_2_conjugate = 1 / (1/sigma_2_global + n/sigma_2_local)

# generate
max_sigma_2, min_sigma_2 = sigma_2_conjugate.max(), sigma_2_conjugate.min()
# Equation (12), S3VR paper
pu = (sigma_2_conjugate - min_sigma_2) /(max_sigma_2 - min_sigma_2)
X_hat = np.vstack((labeled_X.copy(), unlabeled_X[(pu >= r).reshape(-1,)]))
y_hat = np.append(y.copy(), y_bar_conjugate[pu >= r])

X_hat.shape, y_hat.shape

((4428, 256), (4428,))

In [153]:
svr = SVR()
svr.fit(X_hat, y_hat)
print(f'The training rmse is {RMSE(y_hat, svr.predict(X_hat))}')

RMSE(y, svr.predict(labeled_X))

The training rmse is 0.49732127623220557


0.5277999698376711

In [152]:
svr = SVR()
svr.fit(labeled_X, y)
print(f'The training rmse is {RMSE(y, svr.predict(labeled_X))}')

The training rmse is 0.5265761906462435


In [1]:
from model import *
import numpy as np

labeled_X = np.load('labeled_data.npy')
unlabeled_X = np.load('unlabeled_data.npy')
y = np.load('labels.npy')

model = S3VR(5, 10, 0.5, 1)
model.fit(labeled_X, y, unlabeled_X)

The training rmse is 0.49732127623220557


In [157]:
RMSE(y, model.predict(labeled_X))

0.5277999698376711