# Evaluation Metrics for Context-Aware Movie Recommender System


In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline


In [13]:
   # Load data
ratings = pd.read_csv('../data/rating.csv')
movies = pd.read_csv('../data/movie.csv')

In [15]:
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'])


In [16]:
# Now extract the hour and day of the week
ratings['hour'] = ratings['timestamp'].dt.hour
ratings['day_of_week'] = ratings['timestamp'].dt.dayofweek

In [17]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp,hour,day_of_week
0,1,2,3.5,2005-04-02 23:53:47,23,5
1,1,29,3.5,2005-04-02 23:31:16,23,5
2,1,32,3.5,2005-04-02 23:33:39,23,5
3,1,47,3.5,2005-04-02 23:32:07,23,5
4,1,50,3.5,2005-04-02 23:29:40,23,5
...,...,...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00,15,4
20000259,138493,69526,4.5,2009-12-03 18:31:48,18,3
20000260,138493,69644,3.0,2009-12-07 18:10:57,18,0
20000261,138493,70286,5.0,2009-11-13 15:42:24,15,4


In [18]:
    # Merge ratings with movie information
data = pd.merge(ratings, movies[['movieId', 'title']], on='movieId')

In [19]:
# Prepare features and target
X = data[['userId', 'movieId', 'hour', 'day_of_week']]
y = data['rating']

In [20]:
X

Unnamed: 0,userId,movieId,hour,day_of_week
0,1,2,23,5
1,1,29,23,5
2,1,32,23,5
3,1,47,23,5
4,1,50,23,5
...,...,...,...,...
20000258,138493,68954,15,4
20000259,138493,69526,18,3
20000260,138493,69644,18,0
20000261,138493,70286,15,4


In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [22]:
print(f"Training set size: {len(X_train)}, Test set size: {len(X_test)}")


Training set size: 16000210, Test set size: 4000053


In [28]:
y

0           3.5
1           3.5
2           3.5
3           3.5
4           3.5
           ... 
20000258    4.5
20000259    4.5
20000260    3.0
20000261    5.0
20000262    2.5
Name: rating, Length: 20000263, dtype: float64