In [1]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

# Load the movielens-100k dataset (download it if needed).
data = Dataset.load_builtin('ml-100k')

# Use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9329  0.9312  0.9405  0.9330  0.9435  0.9362  0.0048  
MAE (testset)     0.7345  0.7351  0.7381  0.7350  0.7454  0.7376  0.0041  
Fit time          4.17    4.25    4.19    4.08    4.12    4.16    0.06    
Test time         0.22    0.13    0.15    0.11    0.14    0.15    0.04    


{'test_rmse': array([0.93294224, 0.93122752, 0.9405023 , 0.93302933, 0.94352882]),
 'test_mae': array([0.73446076, 0.73510803, 0.7380812 , 0.73496957, 0.74541692]),
 'fit_time': (4.165370941162109,
  4.245474815368652,
  4.187935829162598,
  4.08091139793396,
  4.119876384735107),
 'test_time': (0.22140765190124512,
  0.1262648105621338,
  0.15016746520996094,
  0.10967850685119629,
  0.1406247615814209)}

In [10]:
from surprise import KNNBasic
from surprise import Dataset
data = Dataset.load_builtin('ml-100k') #加载movielens-100k数据集
trainset = data.build_full_trainset() #纠正/取出训练集
#sim_options={nam=cosine, user_based=True,min_support=10,shrinkage}
algo = KNNBasic(k=40, min_k=1,sim_options={}, verbose=True) #建立算法并训练
algo.fit(trainset)
uid = str(196)  # 原始user id (在评分文件中的). 注意，是个字符串
iid = str(302)  # 原始item id （其他同上）
pred = algo.predict(uid, iid, r_ui=4, verbose=True) #对某一个具体的user和item给出预测

Computing the msd similarity matrix...
Done computing similarity matrix.
user: 196        item: 302        r_ui = 4.00   est = 4.06   {'actual_k': 40, 'was_impossible': False}


In [12]:
from collections import defaultdict
from surprise import SVD
from surprise import Dataset
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.
    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n
# First train an SVD algorithm on the movielens dataset.
data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)
# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)
top_n = get_top_n(predictions, n=10)
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

196 ['318', '408', '513', '89', '205', '483', '64', '169', '603', '484']
186 ['64', '318', '169', '133', '498', '515', '69', '313', '272', '513']
22 ['12', '408', '234', '191', '520', '496', '150', '22', '11', '169']
244 ['127', '187', '285', '654', '12', '124', '134', '14', '529', '1142']
166 ['169', '56', '316', '12', '408', '114', '178', '187', '318', '515']
298 ['64', '313', '191', '12', '272', '661', '408', '169', '520', '480']
115 ['474', '285', '238', '175', '191', '169', '179', '1449', '134', '189']
253 ['174', '205', '169', '423', '657', '133', '199', '172', '114', '603']
305 ['513', '515', '919', '137', '657', '114', '126', '320', '489', '603']
6 ['603', '654', '179', '606', '1062', '641', '1203', '443', '60', '1194']
62 ['408', '661', '234', '169', '654', '197', '603', '187', '251', '23']
286 ['496', '493', '185', '479', '134', '958', '489', '474', '603', '478']
200 ['64', '498', '480', '519', '178', '114', '12', '83', '513', '166']
210 ['169', '408', '474', '427', '64', '19