In [1]:
import numpy as np
from sklearn.svm import SVR
from utils import *

In [2]:
labeled_X = np.load('labeled_data.npy')
unlabeled_X = np.load('unlabeled_data.npy')
y = np.load('labels.npy')

labeled_X.shape, unlabeled_X.shape, y.shape

((3911, 256), (28570, 256), (3911,))

In [107]:
k_local, k_global, r, beta = 5, 10, 0.5, 1

def rbf(x1, x2, l=1):
    if ((x1-x2)**2).ndim > 1:
        return np.exp(-1 / (2 * (l**2)) * ((x1-x2)**2).sum(axis=1))
    else:
        return np.array([np.exp(-1 / (2 * (l**2)) * ((x1-x2)**2).sum())])

def find_nn(point, k):
    dist = np.linalg.norm(labeled_X - point, axis=1)
    index = np.argsort(dist)[:k]
    return labeled_X[index], index

def estimate_distribution(is_local):
    k = k_local if is_local else k_global

    ones = np.ones((k, 1))
    num = unlabeled_X.shape[0]
    y_hat = np.zeros((num,1))
    sigma_2_hat = np.zeros((num,1))

    for i in range(num):
        nn, nn_index = find_nn(unlabeled_X[i], k)
        k_star = rbf(unlabeled_X[i], nn).reshape(-1, 1)
        K_hat = rbf(nn, nn) - ones @ k_star.T - k_star @ ones.T + rbf(unlabeled_X[i], unlabeled_X[i]) * ones @ ones.T
        # Equation (15), PLR paper
        cov = (beta * K_hat + np.identity(k))
        # Equation (14), PLR paper
        mu = cov @ (ones/k)
        y_bar_nn = y[nn_index].sum(axis=0) / k
        diff = y[nn_index].reshape(-1, 1) - y_bar_nn * ones
        y_hat[i] = y_bar_nn + mu.T @ diff
        sigma_2_hat[i] = diff.T @ cov @ diff / k

    return y_hat, sigma_2_hat, num


In [109]:
# estimate distribution
y_local, sigma_2_local, n = estimate_distribution(True)
y_global, sigma_2_global, _ = estimate_distribution(False)

y_local.shape, y_global.shape

((28570, 1), (28570, 1))

In [116]:
sigma_2_local[sigma_2_local == 0] = 1e-20

In [148]:
# conjugate
# Equation (11), S3VR paper
y_bar_conjugate = (y_global/sigma_2_global + n*y_local/sigma_2_local) / (1/sigma_2_global + n/sigma_2_local)
sigma_2_conjugate = 1 / (1/sigma_2_global + n/sigma_2_local)

# generate
max_sigma_2, min_sigma_2 = sigma_2_conjugate.max(), sigma_2_conjugate.min()
# Equation (12), S3VR paper
pu = (sigma_2_conjugate - min_sigma_2) /(max_sigma_2 - min_sigma_2)
X_hat = np.vstack((labeled_X.copy(), unlabeled_X[(pu >= r).reshape(-1,)]))
y_hat = np.append(y.copy(), y_bar_conjugate[pu >= r])

X_hat.shape, y_hat.shape

((4428, 256), (4428,))

In [153]:
svr = SVR()
svr.fit(X_hat, y_hat)
print(f'The training rmse is {RMSE(y_hat, svr.predict(X_hat))}')

RMSE(y, svr.predict(labeled_X))

The training rmse is 0.49732127623220557


0.5277999698376711

In [152]:
svr = SVR()
svr.fit(labeled_X, y)
print(f'The training rmse is {RMSE(y, svr.predict(labeled_X))}')

The training rmse is 0.5265761906462435


In [1]:
from model import *
import numpy as np
import numpy as np
from sklearn.svm import SVR
from utils import *

labeled_X = np.load('labeled_data.npy')
unlabeled_X = np.load('unlabeled_data.npy')
y = np.load('labels.npy')

In [16]:
preds = np.zeros_like(y)
for i in range(y.shape[1]):
    svr = SVR()
    svr.fit(labeled_X, y[:, i])
    preds[:, i] = svr.predict(labeled_X)
    print(f'The training rmse is {RMSE(y[:, i], preds[:, i])}')
    
print(f'\nThe training mcrmse is {MCRMSE(y, preds.T)}')

The training rmse is 0.5265761906462435
The training rmse is 0.5115732746256291
The training rmse is 0.4578897052301383
The training rmse is 0.5184815199582646
The training rmse is 0.5559571891536925
The training rmse is 0.5350020691699422

The training mcrmse is 0.445168390950493


In [2]:
import itertools
p = [[5, 10],[10, 20],[0.5, 0.8], [1, 10]]
ps = list(itertools.product(*p))

In [3]:
preds = np.zeros_like(y)
s3vr = S3VR(0,0,0,0)
res = []
for k_local, k_global, r, beta in ps:
    for i in range(y.shape[1]):
        s3vr.k_local, s3vr.k_global, s3vr.r, s3vr.beta = k_local, k_global, r, beta
        s3vr.fit(labeled_X, y[:, i], unlabeled_X)
        preds[:, i] = s3vr.predict(labeled_X)
    
    perf = MCRMSE(y, preds.T)
    print(f'The training mcrmse is {perf}')
    res.append(perf)

The training mcrmse is 0.4472663089060917
The training mcrmse is 0.4473920935608988
The training mcrmse is 0.4452582840070505
The training mcrmse is 0.445278469679296
The training mcrmse is 0.4472674067258249
The training mcrmse is 0.44739125037243127
The training mcrmse is 0.44525893068036293
The training mcrmse is 0.44527844809163936
The training mcrmse is 0.44823580098975196
The training mcrmse is 0.44880365227986546
The training mcrmse is 0.4453637683537495
The training mcrmse is 0.44541307841296895
The training mcrmse is 0.4482618765468786
The training mcrmse is 0.44881945612071
The training mcrmse is 0.44536285338337733
The training mcrmse is 0.44541288891390657


In [8]:
for i in range(len(res)):
    k_local, k_global, r, beta = ps[i]
    print(f"S3VR-{i+1} & {k_local} & {k_global} & {r} & {beta} & {res[i]:.4f} \\\\")

S3VR-1 & 5 & 10 & 0.5 & 1 & 0.4473 \\
S3VR-2 & 5 & 10 & 0.5 & 10 & 0.4474 \\
S3VR-3 & 5 & 10 & 0.8 & 1 & 0.4453 \\
S3VR-4 & 5 & 10 & 0.8 & 10 & 0.4453 \\
S3VR-5 & 5 & 20 & 0.5 & 1 & 0.4473 \\
S3VR-6 & 5 & 20 & 0.5 & 10 & 0.4474 \\
S3VR-7 & 5 & 20 & 0.8 & 1 & 0.4453 \\
S3VR-8 & 5 & 20 & 0.8 & 10 & 0.4453 \\
S3VR-9 & 10 & 10 & 0.5 & 1 & 0.4482 \\
S3VR-10 & 10 & 10 & 0.5 & 10 & 0.4488 \\
S3VR-11 & 10 & 10 & 0.8 & 1 & 0.4454 \\
S3VR-12 & 10 & 10 & 0.8 & 10 & 0.4454 \\
S3VR-13 & 10 & 20 & 0.5 & 1 & 0.4483 \\
S3VR-14 & 10 & 20 & 0.5 & 10 & 0.4488 \\
S3VR-15 & 10 & 20 & 0.8 & 1 & 0.4454 \\
S3VR-16 & 10 & 20 & 0.8 & 10 & 0.4454 \\


In [None]:
model = S3VR(5, 10, 0.5, 1)
model.fit(labeled_X, y, unlabeled_X)

In [157]:
RMSE(y, model.predict(labeled_X))

0.5277999698376711

In [4]:
import math

math.factorial(10) / (10**10)

0.00036288

In [5]:
6000 * math.factorial(10) / (math.factorial(60000)/math.factorial(59990))

3.6035248181869355e-38

In [25]:
from scipy.spatial import distance

x = np.array([[0,0], [0,1]])
y = np.array([[0,1], [0,1]])
distance.cdist(x, y, 'euclidean')

array([[1., 1.],
       [0., 0.]])