In [13]:
import numpy as np

from surprise import SVD
from surprise import Dataset
from surprise.accuracy import rmse
from surprise.model_selection import train_test_split
from surprise.similarities import cosine
from Uncertain import SVDAverageEnsemble
from Uncertain.metrics import rmse as urmse
from Uncertain.metrics import misscalibration

import matplotlib.pyplot as plt

data = Dataset.load_builtin('ml-100k')
train, test = train_test_split(data, test_size=0.01, random_state=0)
nu = train.n_users
ni = train.n_items

data_decrease = 0.2
ensemble = 20
epochs = 20
factors = 100
algo_base = SVD(n_epochs=epochs, n_factors=factors)
preds = []

In [14]:
for i in range(1):
    
    print('n_ratings =', train.n_ratings)
    model = SVDAverageEnsemble(n=ensemble).fit(train, algo_base=algo_base)
    preds.append(model.test(test))
    
    train.n_ratings = 0
    for u in train.ur:
        ur = train.ur[u]
        sample = np.random.binomial(1, 1-data_decrease, len(ur))
        del_i = [ur[j][0] for j in range(len(ur)) if sample[j] == False]
        for i in del_i:
            del train.ur[u][[a[0] for a in train.ur[u]].index(i)]
            del train.ir[i][[a[0] for a in train.ir[i]].index(u)]
        train.n_ratings += len(train.ur[u])

n_ratings = 99000
Fitting: Model 1
Fitting: Model 2
Fitting: Model 3
Fitting: Model 4
Fitting: Model 5
Fitting: Model 6
Fitting: Model 7
Fitting: Model 8
Fitting: Model 9
Fitting: Model 10
Fitting: Model 11
Fitting: Model 12
Fitting: Model 13
Fitting: Model 14
Fitting: Model 15
Fitting: Model 16
Fitting: Model 17
Fitting: Model 18
Fitting: Model 19
Fitting: Model 20


In [15]:
[urmse(preds) for preds in preds]
print(' ')

RMSE: 0.9355
 


In [16]:
[misscalibration(ensemble, preds) for preds in preds]
print(' ')

Misscalibration:  0.4659
 


In [17]:
score, hot, err, sd, p, t_p, conf = misscalibration(n=ensemble, predictions=preds[0], verbose=False)
print(score)

0.4658750000000005


In [18]:
conf

array([0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14, 0.16, 0.18, 0.2 , 0.22,
       0.24, 0.26, 0.28, 0.3 , 0.32, 0.34, 0.36, 0.38, 0.4 , 0.42, 0.44,
       0.46, 0.48, 0.5 , 0.52, 0.54, 0.56, 0.58, 0.6 , 0.62, 0.64, 0.66,
       0.68, 0.7 , 0.72, 0.74, 0.76, 0.78, 0.8 , 0.82, 0.84, 0.86, 0.88,
       0.9 , 0.92, 0.94, 0.96])

In [19]:
print(err[0], sd[0])

0.5941631567223005 0.17059555877417226


In [20]:
2*sd[0]/np.sqrt(10)

0.10789410488710133

In [21]:
print(err[19], sd[19])

0.03384932676407493 0.1278958726308427


In [22]:
hot.mean(axis=1)

array([0.002, 0.002, 0.002, 0.003, 0.004, 0.006, 0.007, 0.009, 0.009,
       0.011, 0.013, 0.013, 0.014, 0.014, 0.014, 0.014, 0.014, 0.014,
       0.015, 0.017, 0.018, 0.02 , 0.02 , 0.02 , 0.02 , 0.02 , 0.022,
       0.022, 0.024, 0.025, 0.025, 0.027, 0.027, 0.029, 0.029, 0.031,
       0.033, 0.035, 0.036, 0.039, 0.043, 0.045, 0.05 , 0.053, 0.053,
       0.058, 0.066, 0.071])

In [23]:
hot.mean(axis=0)

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.20833333, 0.        , 0.60416667,
       0.        , 0.        , 0.        , 0.        , 0.22916667,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.10416667,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.1875    , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.16666667, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.35416667, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.125     , 0.        , 0.        , 0.        , 0.     