In [None]:
# Import libraries
import pandas as pd
import numpy as np
import math

In [None]:
# load MovieLens 1M Dataset
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Reading ratings file
ratings = pd.read_csv("/content/drive/ml-1m/ratings.dat", sep="::", engine="python", header=None,
                   names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Show the first few rows of the DataFrame
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [None]:
print(ratings.info())
ratings.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000209 entries, 0 to 1000208
Data columns (total 4 columns):
 #   Column     Non-Null Count    Dtype
---  ------     --------------    -----
 0   user_id    1000209 non-null  int64
 1   movie_id   1000209 non-null  int64
 2   rating     1000209 non-null  int64
 3   timestamp  1000209 non-null  int64
dtypes: int64(4)
memory usage: 30.5 MB
None


(1000209, 4)

In [None]:
# Reading users file
users = pd.read_csv("/content/drive/MyDrive/RS/ml-1m/users.dat", sep="::", engine="python", header=None,
                   names=['user_id', 'gender', 'zipcode', 'age_desc', 'occ_desc'])

# Show the first few rows of the DataFrame
users.head()

Unnamed: 0,user_id,gender,zipcode,age_desc,occ_desc
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [None]:
# Reading movies file
movies = pd.read_csv("/content/drive/MyDrive/RS/ml-1m/movies.dat", sep="::", engine="python", header=None,
                   names=['movie_id', 'title', 'genres'], encoding='latin-1')

# Show the first few rows of the DataFrame
movies.head()
# print(movies.movie_id.unique().shape[0])

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
# count the number of unique users and movies.
n_users = ratings.user_id.unique().shape[0]
n_movies = ratings.movie_id.unique().shape[0]
print('Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_movies))

Number of users = 6040 | Number of movies = 3706


In [None]:
# Drop users and items with less than 30 interactions from ratings

# Compute the number of interactions for each user and each movie
user_interactions = ratings.groupby('user_id')['movie_id'].count()
movie_interactions = ratings.groupby('movie_id')['user_id'].count()

# Get a list of active users and active movies
active_users = user_interactions.loc[user_interactions >= 30].index.tolist()
active_movies = movie_interactions.loc[movie_interactions >= 30].index.tolist()

# Filter the ratings dataset to include only interactions from active users and active movies
ratings_filtered = ratings[(ratings['user_id'].isin(active_users)) & (ratings['movie_id'].isin(active_movies))]
# print(ratings_filtered)

# Compute the number of dropped users and items
num_dropped_users = len(user_interactions) - len(active_users)
num_dropped_movies = len(movie_interactions) - len(active_movies)
print(f"Dropped {num_dropped_users} users and {num_dropped_movies} items.")

Dropped 751 users and 870 items.


In [None]:
# Drop items with less than 30 interactions from movies

# Perform a left join on the movies and ratings datasets
merged = pd.merge(movies, ratings_filtered, on='movie_id', how='left')
# Drop rows with null values in the rating column
merged = merged.dropna(subset=['rating'])
# Keep only the movie_id, title, and genres columns from the merged dataset
merged = merged[['movie_id', 'title', 'genres']]

# Drop duplicates from the merged dataset
merged = merged.drop_duplicates()

# Perform a left join on the merged dataset and the movies dataset
final_movies = pd.merge(merged, movies, on=['movie_id', 'title', 'genres'], how='left')
final_movies

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
2831,3948,Meet the Parents (2000),Comedy
2832,3949,Requiem for a Dream (2000),Drama
2833,3950,Tigerland (2000),Drama
2834,3951,Two Family House (2000),Drama


In [None]:
# Converting Genres from String Format to List
final_movies['genres'] = final_movies['genres'].apply(lambda x: x.split("|"))
final_movies.head()
# count the number of movie genres
from collections import Counter

genres_counts = Counter(g for genres in final_movies['genres'] for g in genres)
print(f"There are {len(genres_counts)} genre labels.")
genres_counts


There are 18 genre labels.


Counter({'Animation': 96,
         "Children's": 225,
         'Comedy': 948,
         'Adventure': 254,
         'Fantasy': 64,
         'Romance': 373,
         'Drama': 1079,
         'Action': 453,
         'Crime': 160,
         'Thriller': 418,
         'Horror': 264,
         'Sci-Fi': 246,
         'War': 125,
         'Musical': 103,
         'Documentary': 54,
         'Mystery': 92,
         'Film-Noir': 35,
         'Western': 53})

In [None]:
# we need to manipulate the genres column so that each genre is represented as a separate binary feature
# "1" indicates that the movie falls under a given genre, while "0" does not.
genres = list(genres_counts.keys())

for g in genres:
    final_movies[g] = final_movies['genres'].transform(lambda x: int(g in x))
final_movies[genres].head()

Unnamed: 0,Animation,Children's,Comedy,Adventure,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Sci-Fi,War,Musical,Documentary,Mystery,Film-Noir,Western
0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Create a user-item matrix from the ratings dataset
user_item_matrix = ratings_filtered.pivot(index = 'user_id', columns ='movie_id', values = 'rating').fillna(0)
user_item_matrix.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3937,3943,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Separate the movies data into warm and cold items
warm_items = final_movies.sample(frac=0.5, random_state=42)
cold_items = final_movies.drop(warm_items.index)
print('Number of warm items:', len(warm_items))
print('Number of cold items:', len(cold_items))

# Get the movie ids of the warm items
# warm_item_ids = warm_items['movie_id'].tolist()
# Separate the user-item matrix based on the warm items
warm_item_ids = warm_items['movie_id'].values
warm_user_item_matrix = user_item_matrix.loc[:, user_item_matrix.columns.isin(warm_item_ids)]
print('User-item matrix for warm items:')
warm_user_item_matrix.head()

Number of warm items: 1418
Number of cold items: 1418
User-item matrix for warm items:


movie_id,3,8,9,14,19,21,24,26,30,31,...,3930,3936,3943,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
from sklearn.decomposition import TruncatedSVD

# Create SVD model
svd_model = TruncatedSVD(n_components=26, random_state=42)

# Fit SVD model on warm user-item matrix
svd_model.fit(warm_user_item_matrix)


In [None]:
# Predict missing ratings
predicted_ratings = svd_model.inverse_transform(svd_model.transform(warm_user_item_matrix))

# Create new user-item matrix based on actual ratings and predicted ratings
new_user_item_matrix = pd.DataFrame(predicted_ratings, columns=warm_user_item_matrix.columns, index=warm_user_item_matrix.index)
new_user_item_matrix[warm_user_item_matrix > 0] = warm_user_item_matrix[warm_user_item_matrix > 0]
# Print SVD predicting matrix
print('SVD Predicting Matrix:')
# print(new_user_item_matrix.head())
new_warm_user_item_matrix=pd.DataFrame(predicted_ratings, columns=warm_user_item_matrix.columns, index=warm_user_item_matrix.index)
new_warm_user_item_matrix

SVD Predicting Matrix:


movie_id,3,8,9,14,19,21,24,26,30,31,...,3930,3936,3943,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.005583,0.124300,-0.012755,0.022333,0.034696,-0.620668,0.269028,0.001234,0.003677,0.051206,...,-0.099339,-0.035061,-0.001930,-0.015127,-0.073443,0.268383,-0.002034,-0.015547,0.000310,0.115749
2,0.231349,0.036482,0.093911,0.074976,0.126905,1.000000,0.166180,0.136580,0.024567,0.149811,...,0.038136,0.143469,-0.074376,0.049394,-0.006121,0.327762,-0.282332,-0.017079,-0.027057,-0.029724
3,0.126691,0.052836,-0.018930,-0.024144,0.006118,0.459064,0.184469,-0.039941,-0.038194,-0.051926,...,0.030753,0.069450,-0.040373,0.023188,-0.019860,0.077119,-0.149610,-0.047768,-0.001592,-0.018810
5,-0.281341,0.007083,-0.114459,0.312341,-0.237505,1.717995,1.000000,0.222151,0.332732,-0.050551,...,-0.056431,0.040338,0.134757,-0.034698,0.012158,0.012689,0.625022,0.046079,0.106698,0.257010
6,0.433064,0.073913,0.011860,-0.141851,-0.043891,0.011813,0.115508,-0.020307,-0.016056,0.128417,...,0.149743,0.092926,0.085940,0.096718,0.082386,1.179381,0.442498,0.116581,0.084439,0.657956
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035,1.000000,0.051635,-0.041832,0.418575,0.554191,2.000000,1.078232,2.000000,0.154516,2.000000,...,-0.057581,0.072793,-0.020094,-0.067353,-0.063654,-0.495227,-0.279563,-0.060197,-0.008103,-0.238423
6036,-0.023371,0.104167,-0.127492,1.111825,-0.182887,3.000000,2.000000,3.000000,4.000000,0.385661,...,1.122148,0.437804,0.226608,-0.031585,0.269429,-0.234249,0.822983,0.262250,0.205246,0.663062
6037,-0.284593,-0.032267,0.043971,-0.058239,-0.128803,0.362810,0.515693,-0.136651,-0.034638,-0.070725,...,0.225938,0.031248,-0.018277,0.012869,0.053432,0.143452,0.016971,0.074359,-0.047732,0.227374
6039,0.214310,0.020286,0.023785,-0.025268,0.056373,0.448227,-0.251988,0.089812,-0.075457,-0.008429,...,0.353344,0.318577,-0.025493,-0.051958,0.097896,-0.175070,-0.267649,-0.036082,-0.071235,-0.236208


In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import ndcg_score

# Create 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

ndcg_scores = []

# Loop over folds
for train_idx, test_idx in kf.split(new_user_item_matrix):
    # Split into training and testing sets
    train = new_user_item_matrix.iloc[train_idx]
    test = new_user_item_matrix.iloc[test_idx]

    # Fit SVD model on training set
    svd_model.fit(train)

    # Predict missing ratings on test set
    predicted_ratings = svd_model.inverse_transform(svd_model.transform(test))

    # Calculate NDCG score
    ndcg_scores.append(ndcg_score(test.values, predicted_ratings, k=10))

print('NDCG Score:', sum(ndcg_scores) / len(ndcg_scores))



NDCG Score: 0.827030452468067


In [None]:
# second stage

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score

In [None]:
warm_items = warm_items.set_index('movie_id').sort_index(ascending=True)
warm_movie_features = warm_items[genres]
warm_movie_features

cold_items = cold_items.set_index('movie_id').sort_index(ascending=True)
cold_movie_features = cold_items[genres]
cold_movie_features


Unnamed: 0_level_0,Animation,Children's,Comedy,Adventure,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Sci-Fi,War,Musical,Documentary,Mystery,Film-Noir,Western
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3925,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3928,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3932,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0
3937,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0


In [None]:
# Train a multi-output random forest model
rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=20, min_samples_leaf=5, random_state=42))
rf_model.fit(warm_movie_features, new_warm_user_item_matrix.T)


In [None]:
# Separate the user-item matrix based on the cold items
cold_item_ids = cold_items['movie_id'].values
cold_user_item_matrix = user_item_matrix.loc[:, user_item_matrix.columns.isin(cold_item_ids)]
cold_user_item_matrix.shape

# Use Random Forest model to predict ratings
predicted_ratings_rf = rf_model.predict(cold_movie_features)

# Reshape predictions to match matrix dimensions
predicted_cold_item_user_matrix = pd.DataFrame(predicted_ratings_rf, columns=cold_user_item_matrix.T.columns, index=cold_user_item_matrix.T.index)

print('Predicted Cold User-Item Matrix:')
predicted_cold_item_user_matrix.T

Predicted Cold User-Item Matrix:


movie_id,1,2,4,5,6,7,10,11,12,13,...,3916,3918,3920,3921,3922,3925,3928,3932,3937,3945
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.631304,0.197065,0.011844,0.026876,-0.062546,-0.010705,0.056969,0.012714,-0.046099,0.905259,...,0.196115,-0.023681,0.308376,0.026876,0.026876,0.026876,-0.046099,-0.104011,-0.070455,0.905259
2,-0.046350,-0.057376,0.252847,0.097540,1.486922,0.082489,0.731711,0.169118,0.159041,-0.153052,...,0.422711,-0.013060,0.422711,0.097540,0.097540,0.097540,0.159041,0.002010,-0.017782,-0.119823
3,0.971571,0.280931,0.077717,0.071362,0.214690,-0.039743,0.540812,-0.002867,0.066080,0.018454,...,0.021365,0.070614,0.135517,0.071362,0.071362,0.071362,0.066080,-0.028544,-0.134004,0.095990
5,0.955541,0.172702,0.735063,0.278255,1.151127,0.178122,0.190764,0.286102,0.116888,0.224021,...,0.500289,0.040096,0.500289,0.278255,0.278255,0.278255,0.116888,0.204528,0.577807,0.216696
6,0.537957,0.226741,0.065586,0.047292,0.127729,0.723495,0.221043,0.568726,0.013629,0.437742,...,0.049994,-0.005873,0.126966,0.047292,0.047292,0.047292,0.013629,0.013365,-0.029384,0.364753
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035,0.850212,0.441905,0.539793,0.332335,0.205464,0.527248,-0.017220,0.403560,0.149416,0.079911,...,0.470138,-0.039793,0.470138,0.332335,0.332335,0.332335,0.149416,0.579712,0.821360,0.081680
6036,2.569537,0.690358,1.852592,0.969320,1.401469,1.113366,0.254940,2.086015,0.933872,2.145526,...,1.832123,1.114387,1.832123,0.969320,0.969320,0.969320,0.933872,2.102720,2.018211,2.145526
6037,0.338499,-0.183334,0.588414,0.174088,0.736032,0.192378,0.326079,0.425703,0.121464,-0.029680,...,0.325854,0.264898,0.315476,0.174088,0.174088,0.174088,0.121464,0.704704,1.006470,-0.042174
6039,0.862402,0.128746,0.448945,0.304891,0.076278,0.337042,-0.013438,0.167687,0.038944,0.247912,...,0.041446,-0.023914,0.041446,0.304891,0.304891,0.304891,0.038944,0.107518,0.052484,0.185103


In [None]:
# Recommend top 10 movies for each user
top_recommendations = {}
for userId in predicted_cold_item_user_matrix.T.index:
    user_ratings = predicted_cold_item_user_matrix.T.loc[userId].sort_values(ascending=False)
    top_recommendations[userId] = list(user_ratings.head(10).index)

print('Top 10 movie recommendations for each user:')
print(top_recommendations)

Top 10 movie recommendations for each user:
{1: [2102, 588, 2080, 2081, 2096, 595, 661, 1022, 1024, 1029], 2: [1210, 2468, 1722, 1264, 3584, 3705, 969, 1374, 2275, 1371], 3: [3805, 2422, 1287, 592, 2370, 552, 688, 153, 2880, 3104], 5: [1732, 1647, 521, 1352, 2917, 164, 2952, 2605, 2561, 2391], 6: [2102, 588, 2096, 3759, 364, 2092, 2087, 595, 1489, 661], 7: [1488, 1233, 3654, 2028, 1215, 1356, 2105, 1373, 1371, 2528], 8: [1722, 2468, 1210, 3705, 2692, 1912, 1523, 2028, 1233, 3654], 9: [2700, 1, 3754, 2142, 2141, 673, 2354, 2102, 2081, 588], 10: [260, 2105, 2102, 588, 2143, 3889, 3877, 653, 2193, 2081], 11: [946, 1184, 1078, 1256, 688, 2917, 1686, 1352, 521, 3896], 13: [1210, 2642, 2641, 2275, 610, 2105, 2094, 1356, 1371, 1373], 15: [2028, 3654, 1233, 1488, 1732, 1210, 1356, 1374, 1373, 1371], 16: [1, 673, 2142, 2141, 3754, 2354, 588, 2102, 2080, 2081], 17: [32, 3700, 2117, 3701, 3503, 2594, 1253, 3780, 680, 2663], 18: [2102, 2081, 2080, 588, 3159, 2092, 1489, 1029, 1022, 1024], 19: [210

In [None]:
# Evaluate model with 5-fold cross validation
scores = cross_val_score(rf_model, new_warm_user_item_matrix.T, warm_movie_features,
                         cv=5, scoring='neg_mean_absolute_error')

print('MAE scores:', -scores)
print('Average MAE:', -np.mean(scores))

MAE scores: [0.05103558 0.06359798 0.05003746 0.05541439 0.0768615 ]
Average MAE: 0.059389383225328475
