Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml): started
  Building wheel for scikit-surprise (pyproject.toml): finished with status 'done'
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp312-cp312-win_amd64.whl size=1290969 sha256=e64bd33e3e02aeaa1100e60b4988f84c4ec7c0613f2fa50e7ed1969d22ddfea9
  Stored in directory: c:\users\admin\appdata\local\pip\cache\wheels\75\fa\bc\739bc2cb1fbaab6061854e6cfbb81a0ae52c92a502a7fa454b
Su

In [4]:
from surprise import SVD, SVDpp, NMF
from surprise import Dataset
import pandas as pd
from surprise import Reader
from surprise.model_selection import GridSearchCV, cross_validate

In [19]:
def load_data(file_path):
    ratings = pd.read_csv(file_path)
    reader = Reader(rating_scale=(0.5, 5.0))
    data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
    return data

def evaluate_model(data, model, model_name):
    print(f"Evaluating {model_name}...")
    results = cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
    print(results)

def optimize_model(data, model_class, param_grid, model_name):
    print(f"Optimizing {model_name} with parameters: {param_grid}")
    gs = GridSearchCV(model_class, param_grid, measures=['RMSE', 'MAE'], cv=5)
    try:
        gs.fit(data)
        if 'RMSE' in gs.best_score:
            print(f"Best RMSE: {gs.best_score['RMSE']}")
            print(f"Best parameters for {model_name}: {gs.best_params['RMSE']}")
        else:
            print("RMSE not available in the results.")
        if 'MAE' in gs.best_score:
            print(f"Best MAE: {gs.best_score['MAE']}")
            print(f"Best parameters for MAE: {gs.best_params['MAE']}")
    except Exception as e:
        print(f"An error occurred while optimizing {model_name}: {e}")
    if hasattr(gs, 'cv_results_'):
        print("CV Results Keys:", gs.cv_results_.keys())
        print("Full CV Results:", gs.cv_results_)
    else:
        print("No CV results available or GridSearchCV did not run successfully.")

def main(file_path):
    data = load_data(file_path)

    evaluate_model(data, SVD(), "SVD")
    param_grid_svd = {'n_epochs': [20, 30], 'lr_all': [0.005, 0.01], 'reg_all': [0.02, 0.1]}
    optimize_model(data, SVD, param_grid_svd, "SVD")

    evaluate_model(data, SVDpp(), "SVD++")
    param_grid_svdpp = {'n_epochs': [20, 30], 'lr_all': [0.005, 0.01], 'reg_all': [0.02, 0.1]}
    optimize_model(data, SVDpp, param_grid_svdpp, "SVD++")

    evaluate_model(data, NMF(), "NMF")
    param_grid_nmf = {'n_epochs': [20, 30], 'n_factors': [15, 20]}
    optimize_model(data, NMF, param_grid_nmf, "NMF")

if __name__ == "__main__":
    main('ratings.csv')

Evaluating SVD...
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8757  0.8712  0.8757  0.8760  0.8692  0.8736  0.0028  
MAE (testset)     0.6750  0.6686  0.6734  0.6725  0.6708  0.6721  0.0022  
Fit time          1.30    1.14    1.18    1.33    1.26    1.24    0.07    
Test time         0.12    0.13    0.10    0.14    0.10    0.12    0.02    
{'test_rmse': array([0.87569492, 0.87123266, 0.875693  , 0.87598338, 0.869237  ]), 'test_mae': array([0.6750147 , 0.66862857, 0.67339197, 0.6725066 , 0.67084239]), 'fit_time': (1.2956056594848633, 1.13631272315979, 1.1825745105743408, 1.3329226970672607, 1.262061595916748), 'test_time': (0.12199997901916504, 0.13399982452392578, 0.09899163246154785, 0.13979053497314453, 0.09999966621398926)}
Optimizing SVD with parameters: {'n_epochs': [20, 30], 'lr_all': [0.005, 0.01], 'reg_all': [0.02, 0.1]}
RMSE not available in the results.
No CV results avail