In [2]:
import numpy as np
from surprise import SVD
from surprise import Dataset # comes with built in play data! 
from surprise.model_selection import cross_validate


In [5]:
data = Dataset.load_builtin('ml-100k')
raw_ratings = data.raw_ratings
print(raw_ratings[:5], '\n', type(raw_ratings))

[('196', '242', 3.0, '881250949'), ('186', '302', 3.0, '891717742'), ('22', '377', 1.0, '878887116'), ('244', '51', 2.0, '880606923'), ('166', '346', 1.0, '886397596')] 
 <class 'list'>


In [20]:
raw_df = pd.DataFrame(raw_ratings, columns = ['user','movie','rating','tmstmp'])
print(raw_df.head())
raw_df.set_index('tmstmp', inplace=True)
raw_df.sort_index(inplace=True)
print(raw_df.head())

  user movie  rating     tmstmp
0  196   242     3.0  881250949
1  186   302     3.0  891717742
2   22   377     1.0  878887116
3  244    51     2.0  880606923
4  166   346     1.0  886397596
          user movie  rating
tmstmp                      
874724710  259   255     4.0
874724727  259   286     4.0
874724754  259   298     4.0
874724781  259   185     4.0
874724843  259   173     4.0


Here's Surprise's version of the FunkSVD model:
https://surprise.readthedocs.io/en/stable/matrix_factorization.html?highlight=svd 

And from their getting started page:
https://surprise.readthedocs.io/en/stable/getting_started.html

In [7]:
from surprise import accuracy
from surprise.model_selection import train_test_split


# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=.25)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 0.9442


0.9442396140560069

In [19]:
import pandas as pd

from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate

# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(raw_df[['user','movie','rating']], reader)

# We can now use this dataset as we please, e.g. calling cross_validate
cross_validate(NormalPredictor(), data, cv=10)

{'fit_time': (0.1152808666229248,
  0.15630888938903809,
  0.13222026824951172,
  0.13597488403320312,
  0.15244388580322266,
  0.14197802543640137,
  0.1411740779876709,
  0.14772701263427734,
  0.13224387168884277,
  0.1460111141204834),
 'test_mae': array([1.22555997, 1.21991467, 1.21458678, 1.21797677, 1.23084651,
        1.20643889, 1.23626158, 1.22134486, 1.22889777, 1.21578468]),
 'test_rmse': array([1.52612628, 1.51704517, 1.51156922, 1.51663395, 1.53162476,
        1.5003961 , 1.53598223, 1.51638514, 1.53004993, 1.51123425]),
 'test_time': (0.05736088752746582,
  0.13531804084777832,
  0.04775595664978027,
  0.05068707466125488,
  0.05751800537109375,
  0.05187702178955078,
  0.04990887641906738,
  0.04981827735900879,
  0.12294316291809082,
  0.04957294464111328)}