In [1]:
from infra.amazondatasetloader import AmazonReviewsDatasetLoader
from infra.svdpredictor import SVDRatingPredictor
from core.util import task

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = AmazonReviewsDatasetLoader().load_dataset()
train, test = dataset.split([0.8, 0.2])

 ⟍  Amazon review data load
   ⟍  Raw data loading
   ⟋  Raw data loading [1m[+4911ms][0m
   ⟍  Data conversion
    |  Loaded 50000 ratings (0.10%) [1m[+8839ms][0m
    |  Loaded 100000 ratings (0.20%) [1m[+1778ms][0m
    |  Loaded 150000 ratings (0.30%) [1m[+1654ms][0m
    |  Loaded 200000 ratings (0.40%) [1m[+1502ms][0m
    |  Loaded 250000 ratings (0.50%) [1m[+1491ms][0m
    |  Loaded 300000 ratings (0.61%) [1m[+1459ms][0m
    |  Loaded 350000 ratings (0.71%) [1m[+1551ms][0m
    |  Loaded 400000 ratings (0.81%) [1m[+1484ms][0m
    |  Loaded 450000 ratings (0.91%) [1m[+1478ms][0m
    |  Loaded 500000 ratings (1.01%) [1m[+1471ms][0m
    |  Loaded 550000 ratings (1.11%) [1m[+1477ms][0m
    |  Loaded 600000 ratings (1.21%) [1m[+1699ms][0m
    |  Loaded 650000 ratings (1.31%) [1m[+1586ms][0m
    |  Loaded 700000 ratings (1.41%) [1m[+1476ms][0m
    |  Loaded 750000 ratings (1.51%) [1m[+1458ms][0m
    |  Loaded 800000 ratings (1.62%) [1m[+1472ms][0m
    |  

In [3]:
model_path = "model.npz"
predictor = SVDRatingPredictor(latent_dim=2)
try:
    predictor.load(model_path)
except:
    pass

In [6]:
history = []
with task("Training"):
    for i in range(100):
        with task("Epoch %d" % (i + 1)):
            predictor.train(train)
            train_mse = predictor.evaluate(train)
            valid_mse = predictor.evaluate(test)
            history.append((train_mse, valid_mse))
            task.log("Train MSE      : %.4f" % train_mse)
            task.log("Validation MSE : %.4f" % valid_mse)
            predictor.save(model_path)

 ⟍  Training
   ⟍  Epoch 1
    |  Train MSE      : 0.0582 [1m[+46629ms][0m
    |  Validation MSE : 22.4801 [1m[+0ms][0m
   ⟋  Epoch 1 [1m[+46638ms][0m
   ⟍  Epoch 2
    |  Train MSE      : 0.0376 [1m[+46285ms][0m
    |  Validation MSE : 22.4317 [1m[+0ms][0m
   ⟋  Epoch 2 [1m[+46295ms][0m
   ⟍  Epoch 3
    |  Train MSE      : 0.0262 [1m[+46247ms][0m
    |  Validation MSE : 22.3855 [1m[+0ms][0m
   ⟋  Epoch 3 [1m[+46257ms][0m
   ⟍  Epoch 4
    |  Train MSE      : 0.0193 [1m[+46649ms][0m
    |  Validation MSE : 22.3422 [1m[+0ms][0m
   ⟋  Epoch 4 [1m[+46658ms][0m
   ⟍  Epoch 5
    |  Train MSE      : 0.0148 [1m[+47082ms][0m
    |  Validation MSE : 22.3019 [1m[+0ms][0m
   ⟋  Epoch 5 [1m[+47090ms][0m
   ⟍  Epoch 6
    |  Train MSE      : 0.0117 [1m[+45505ms][0m
    |  Validation MSE : 22.2643 [1m[+0ms][0m
   ⟋  Epoch 6 [1m[+45513ms][0m
   ⟍  Epoch 7
    |  Train MSE      : 0.0096 [1m[+48095ms][0m
    |  Validation MSE : 22.2291 [1m[+0ms][0m
   ⟋  Epoch 

In [5]:
import matplotlib.pyplot as plt

train_mse, valid_mse = zip(*history)
plt.plot(train_mse, label="Train MSE")
plt.plot(valid_mse, label="Validation MSE")
plt.legend()
plt.show()

user 0: [-1.04652809  0.80490864]
item 0: [ 2.65346336 -0.31309566]
dot product: -3.028937345636921

user 1: [ 0.74362641 -1.063549  ]
item 1: [ 0.29045099 -2.27725463]
dot product: 2.6379589076134136

user 2: [1.0285957  0.69189767]
item 2: [-1.36962934 -1.95434423]
dot product: -2.7610010671990617

user 3: [ 0.73568616 -1.35040709]
item 3: [2.38752618 0.06994849]
dot product: 1.6620110203545209

user 4: [-0.81480515 -1.58753805]
item 4: [-1.49650018 -1.7966944 ]
dot product: 4.071676774619571

user 5: [1.17128679 1.44798309]
item 5: [ 0.52469976 -0.07207382]
dot product: 0.51021222362949

user 6: [-2.19179421 -0.66126064]
item 6: [-0.7067513   1.25494304]
dot product: 0.7192089545718356

user 7: [0.74235315 1.09083793]
item 7: [ 1.4896279  -1.74956485]
dot product: -0.8026617448850821

user 8: [-0.39464852 -1.62875273]
item 8: [0.99781221 2.15928904]
dot product: -3.9107330394412205

user 9: [ 0.83332036 -0.67391613]
item 9: [ 0.42411089 -2.3677278 ]
dot product: 1.9490701843183593

