In [1]:
import joblib
import pandas as pd

from sklearn.model_selection import train_test_split
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

from pymongo import MongoClient

In [2]:
client = MongoClient("mongodb://root:root@mongo:27017/")
db = client["movie_database"]

In [3]:
df_movies = pd.DataFrame(list(db.movies.find())).drop('_id', axis=1)
df_ratings = pd.DataFrame(list(db.ratings.find())).drop('_id', axis=1)

In [4]:
df_movies.head()

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
df_ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [6]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_ratings[['user_id', 'movie_id', 'rating']], reader)

In [7]:
trainset = data.build_full_trainset()
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7a07705f4890>

In [8]:
def predict_movie(df_movies, user_id):
  recommendations = []

  for movie_id in df_movies['movie_id']:
      rating = model.predict(user_id, movie_id).est
      recommendations.append((movie_id, rating))

  recommendations.sort(key=lambda x: x[1], reverse=True)
  top_recommendations = recommendations[:10]

  for movie_id, rating in top_recommendations:
      movie = df_movies[df_movies['movie_id'] == movie_id].iloc[0]
      print(f"{movie['title']} (Rating: {rating:.2f})")

In [9]:
def add_new_data(df, user_id, movie_id, rate_movie, timestamp):
  new_row = pd.DataFrame([[user_id, movie_id, rate_movie, timestamp]], columns=df.columns)
  df = pd.concat([df, new_row], ignore_index=True)

In [11]:
add_new_data(
    df=df_ratings,
    user_id=199999999,
    movie_id=1,
    rate_movie=5,
    timestamp=978300760
)

In [12]:
predict_movie(df_movies, 199999999)

Sanjuro (1962) (Rating: 4.73)
Shawshank Redemption, The (1994) (Rating: 4.62)
Schindler's List (1993) (Rating: 4.55)
Close Shave, A (1995) (Rating: 4.52)
Seven Samurai (The Magnificent Seven) (Shichinin no samurai) (1954) (Rating: 4.51)
Wrong Trousers, The (1993) (Rating: 4.49)
Godfather, The (1972) (Rating: 4.49)
Rear Window (1954) (Rating: 4.48)
Usual Suspects, The (1995) (Rating: 4.47)
For All Mankind (1989) (Rating: 4.45)


In [14]:
import pickle
pickle.dump(model, open('movie_recommendation_model.pkl','wb'))