# MovieLens and Recommender System

### System I: Recommendation Based on Genres

In [25]:
import numpy as np
import pandas as pd
import streamlit as st


In [26]:
import warnings
warnings.filterwarnings('ignore')

#### Load the data

In [27]:
ratings = pd.read_csv('ratings.dat', sep='::', engine = 'python', header=None)
ratings.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']

movies = pd.read_csv('movies.dat', sep='::', engine = 'python',
                     encoding="ISO-8859-1", header = None)
movies.columns = ['MovieID', 'Title', 'Genres']

users = pd.read_csv(f'users.dat', sep='::', engine = 'python', header = None)
users.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zipcode']
    

In [28]:
rating_merged = ratings.merge(movies, left_on = 'MovieID', right_on = 'MovieID')
#rating_merged


### How recommendations are generated by Genre?

* We are going to recommend movies based on highly-rated by users.

* `How to handle movies with multiple genres?` We are denormalizing genre + Movie ID. By doing so, that Movie becomes a recommendation candidate / recall set of both the Genres.

* `A scenario to be addressed:` How do we come up with a score that has Rating & Number of Ratings embedded inside.

* Our idea is, to keep the Rating schema simple. Add a dimnishing factor. And this can shrink the Movies with less ratings more. And perform less shrinkage on movies with more ratings.

* Here is a scheme, we are coming with: 
    * (`avg_rating_of_the_movie * rating_count_of_the_movie` + `min_rating_of_all_movies * avg_rating_count_of_all_movies`) / (`rating_count_of_the_movie + avg_rating_count_of_all_movies`)


#### Weighted Rating computed for each Movie

In [29]:
movie_rating = rating_merged[['MovieID', 'Rating']].groupby("MovieID").agg(['mean', 'count']).droplevel(0, axis=1).reset_index()

movie_rating.rename(columns={"mean": "Rating", "count": "Rating_count"}, inplace=True)


avg_rating_count = movie_rating['Rating_count'].mean() 
#avg_rating = (movie_rating['Rating'] * movie_rating['Rating_count']).sum() / movie_rating['Rating_count'].sum()
#avg_rating = 2.5
avg_rating = movie_rating['Rating'].min()

movie_rating['Weighted_Rating'] = (movie_rating['Rating'] * movie_rating['Rating_count'] + avg_rating * avg_rating_count)  / (movie_rating['Rating_count'] + avg_rating_count)



#### Data frame is built that has Movies with Genres & Weighted Rating

In [30]:
movie_with_rating = movies.join(movie_rating.set_index('MovieID'), how='left', on="MovieID")

movie_with_rating['Weighted_Rating'].fillna(value=avg_rating, inplace=True)

#movie_with_rating.sort_values(by='Rating_count', ascending=False)[0:30]

In [31]:
genre_movie_ratings = movie_with_rating.copy()
genre_movie_ratings['Genres'] = genre_movie_ratings['Genres'].str.split('|')
genre_movie_ratings = genre_movie_ratings.explode('Genres')
#genre_movie_ratings

### Find movie by genre

In [32]:
def get_all_genre():
    genres = genre_movie_ratings['Genres'].unique()
    
    return genres

In [33]:
def find_top_movies_by_genre(genre, n=10):
    top_movies = genre_movie_ratings[genre_movie_ratings['Genres'] == genre]


    top_movies = top_movies.sort_values(by='Weighted_Rating', ascending=False)
    
    top_movies = top_movies[0:n]
    return top_movies



In [34]:
get_all_genre()

array(['Animation', "Children's", 'Comedy', 'Adventure', 'Fantasy',
       'Romance', 'Drama', 'Action', 'Crime', 'Thriller', 'Horror',
       'Sci-Fi', 'Documentary', 'War', 'Musical', 'Mystery', 'Film-Noir',
       'Western'], dtype=object)

In [35]:
find_top_movies_by_genre(genre='Drama', n=10)

Unnamed: 0,MovieID,Title,Genres,Rating,Rating_count,Weighted_Rating
315,318,"Shawshank Redemption, The (1994)",Drama,4.554558,2227.0,4.170345
847,858,"Godfather, The (1972)",Drama,4.524966,2223.0,4.143341
523,527,Schindler's List (1993),Drama,4.510417,2304.0,4.142327
2789,2858,American Beauty (1999),Drama,4.317386,3428.0,4.075268
589,593,"Silence of the Lambs, The (1991)",Drama,4.351823,2578.0,4.034177
1959,2028,Saving Private Ryan (1998),Drama,4.337354,2653.0,4.029195
1178,1196,Star Wars: Episode V - The Empire Strikes Back...,Drama,4.292977,2990.0,4.020348
604,608,Fargo (1996),Drama,4.254676,2513.0,3.939032
900,912,Casablanca (1942),Drama,4.412822,1669.0,3.937765
1176,1193,One Flew Over the Cuckoo's Nest (1975),Drama,4.390725,1725.0,3.931993


### System II: Recommendation Based on IBCF

In [36]:
def get_random_movie_set(n=10):
    movie_set = movies.sample(n)
    return movie_set

get_random_movie_set()

Unnamed: 0,MovieID,Title,Genres
1706,1759,Four Days in September (1997),Drama
2580,2649,Son of Frankenstein (1939),Horror
93,95,Broken Arrow (1996),Action|Thriller
759,769,Marlene Dietrich: Shadow and Light (1996),Documentary
3206,3275,"Boondock Saints, The (1999)",Action|Comedy
1342,1363,"Preacher's Wife, The (1996)",Drama
1630,1676,Starship Troopers (1997),Action|Adventure|Sci-Fi|War
1594,1640,How to Be a Player (1997),Comedy
1716,1772,Blues Brothers 2000 (1998),Action|Comedy|Musical
1224,1244,Manhattan (1979),Comedy|Drama|Romance


#### Building Similarity Matrix

In [97]:
def build_similarity_matrix_v1():
    #rating_matrix = ratings.pivot_table(index="UserID", columns="MovieID", values="Rating")
    rating_matrix = pd.read_csv('Rmat.csv', sep=',')

    normalized_rating_matrix = rating_matrix.subtract(rating_matrix.mean(axis=1), axis='rows')

    cardinality_df = (~normalized_rating_matrix.isna()).astype('int')
    cardinality_df = cardinality_df.T
    cardinality_matrix = cardinality_df @ cardinality_df.T
    
    normalized_rating_matrix = normalized_rating_matrix.T
    normalized_rating_matrix = normalized_rating_matrix.fillna(0)

    nr = normalized_rating_matrix @ normalized_rating_matrix.T

    squared_normalized_rating_matrix = (normalized_rating_matrix * normalized_rating_matrix).sum(axis=1)
    squared_normalized_rating_matrix = squared_normalized_rating_matrix.to_numpy()
    squared_normalized_rating_matrix = np.sqrt(squared_normalized_rating_matrix)
    dr = squared_normalized_rating_matrix[:, np.newaxis] * squared_normalized_rating_matrix[np.newaxis, :]

    cosine_distance = nr/dr
    S = (1 + cosine_distance)/2
    
    np.fill_diagonal(S.values, np.nan)
    
    S[cardinality_matrix<3] = None
    
    #S[S.rank(axis=1, ascending=False)>30] = None
    return S

In [98]:
S_v1 = build_similarity_matrix_v1()

In [99]:

S_v1

Unnamed: 0,m1,m10,m100,m1000,m1002,m1003,m1004,m1005,m1006,m1007,...,m99,m990,m991,m992,m993,m994,m996,m997,m998,m999
m1,,0.505509,0.477509,0.511099,0.497227,0.471604,0.447174,0.448390,0.462001,0.481792,...,0.502093,0.449798,0.488735,0.472768,0.502632,0.550655,0.449482,0.500965,0.483786,0.479153
m10,0.505509,,0.508178,0.499409,,0.520256,0.481772,0.491437,0.526866,0.509923,...,0.474656,0.493233,0.490592,0.487408,,0.492317,0.515498,0.506088,0.492826,0.525125
m100,0.477509,0.508178,,0.498083,,0.601094,0.519871,0.505753,0.549409,0.499405,...,0.496772,0.516877,0.498421,0.484686,0.574447,0.451436,0.519526,0.466056,0.497359,0.518213
m1000,0.511099,0.499409,0.498083,,,0.467777,,,,0.504081,...,,,0.465165,0.526493,,0.487098,0.516611,,0.536843,0.532720
m1002,0.497227,,,,,,,,,,...,,,,,,0.515304,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
m994,0.550655,0.492317,0.451436,0.487098,0.515304,0.462534,0.475162,0.471181,0.466718,0.471792,...,0.494794,0.464956,0.492825,0.479168,0.503955,,0.453246,0.490887,0.480387,0.478639
m996,0.449482,0.515498,0.519526,0.516611,,0.541592,0.564457,0.522456,0.534665,0.510577,...,0.501990,0.553482,0.495981,0.547950,,0.453246,,0.476731,0.509818,0.518060
m997,0.500965,0.506088,0.466056,,,0.490814,0.527035,0.529735,0.467030,0.469491,...,0.480131,0.514275,0.482377,,0.482183,0.490887,0.476731,,0.505965,0.495996
m998,0.483786,0.492826,0.497359,0.536843,,0.492610,0.556721,0.508418,0.495911,0.500476,...,,0.530794,0.525574,0.563471,,0.480387,0.509818,0.505965,,0.525778


In [100]:
idx = np.array(["m1", "m10", "m100", "m1510", "m260", "m3212"])
S_v1.loc[idx, idx]

Unnamed: 0,m1,m10,m100,m1510,m260,m3212
m1,,0.505509,0.477509,,0.641658,
m10,0.505509,,0.508178,,0.516948,
m100,0.477509,0.508178,,,0.471287,
m1510,,,,,,
m260,0.641658,0.516948,0.471287,,,
m3212,,,,,,


In [101]:
def build_similarity_matrix_v2():
    #rating_matrix = ratings.pivot_table(index="UserID", columns="MovieID", values="Rating")
    rating_matrix = pd.read_csv('Rmat.csv', sep=',')

    normalized_rating_matrix = rating_matrix.subtract(rating_matrix.mean(axis=1), axis='rows')

    cardinality_df = (~normalized_rating_matrix.isna()).astype('int')
    cardinality_df = cardinality_df.T
    cardinality_matrix = cardinality_df @ cardinality_df.T
    
    normalized_rating_matrix = normalized_rating_matrix.T
    normalized_rating_matrix = normalized_rating_matrix.fillna(0)

    nr = normalized_rating_matrix @ normalized_rating_matrix.T
    #print(nr)

    squared_normalized_rating_matrix = ((normalized_rating_matrix**2) @ (normalized_rating_matrix!=0).T)
    squared_normalized_rating_matrix = squared_normalized_rating_matrix.apply(np.vectorize(np.sqrt))
    dr = squared_normalized_rating_matrix * squared_normalized_rating_matrix.T
    #print(dr)
    
    cosine_distance = nr/dr
    S = (1 + cosine_distance)/2
    #print(S)
    
    np.fill_diagonal(S.values, np.nan)

    S[cardinality_matrix<3] = None
    #print(S)
    
    #S[S.rank(axis=1, ascending=False)>30] = None
    #print(S)
    return S

In [102]:
S_v2 = build_similarity_matrix_v2()

In [103]:
S_v2

Unnamed: 0,m1,m10,m100,m1000,m1002,m1003,m1004,m1005,m1006,m1007,...,m99,m990,m991,m992,m993,m994,m996,m997,m998,m999
m1,,0.512106,0.392000,0.729637,0.405249,0.344362,0.193479,0.292097,0.275762,0.434214,...,0.525635,0.167886,0.438244,0.204408,0.551756,0.683828,0.290653,0.514043,0.383772,0.414505
m10,0.512106,,0.547458,0.490472,,0.610983,0.423742,0.460659,0.657699,0.549540,...,0.261701,0.465863,0.448079,0.385735,,0.454464,0.547504,0.668733,0.448290,0.600812
m100,0.392000,0.547458,,0.482965,,0.836584,0.629538,0.568282,0.811807,0.488525,...,0.410753,0.642616,0.493640,0.193671,0.802844,0.306743,0.629374,0.269576,0.478923,0.612815
m1000,0.729637,0.490472,0.482965,,,0.180765,,,,0.705223,...,,,0.207393,0.901521,,0.226027,0.668436,,0.725336,0.680574
m1002,0.405249,,,,,,,,,,...,,,,,,0.722766,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
m994,0.683828,0.454464,0.306743,0.226027,0.722766,0.251738,0.227186,0.140286,0.249062,0.274397,...,0.401180,0.148686,0.470518,0.192858,0.539714,,0.215561,0.449014,0.307824,0.398517
m996,0.290653,0.547504,0.629374,0.668436,,0.790889,0.711965,0.691134,0.806075,0.621695,...,0.618137,0.779649,0.478071,0.797518,,0.215561,,0.077113,0.556378,0.622558
m997,0.514043,0.668733,0.269576,,,0.366023,0.932724,0.949228,0.214426,0.210009,...,0.215711,0.866121,0.416222,,0.412018,0.449014,0.077113,,0.642635,0.460646
m998,0.383772,0.448290,0.478923,0.725336,,0.445008,0.843772,0.604815,0.354571,0.504146,...,,0.698391,0.662904,0.852328,,0.307824,0.556378,0.642635,,0.642727


In [104]:
idx = np.array(["m1", "m10", "m100", "m1510", "m260", "m3212"])
S_v2.loc[idx, idx]

Unnamed: 0,m1,m10,m100,m1510,m260,m3212
m1,,0.512106,0.392,,0.741597,
m10,0.512106,,0.547458,,0.534349,
m100,0.392,0.547458,,,0.329694,
m1510,,,,,,
m260,0.741597,0.534349,0.329694,,,
m3212,,,,,,


In [120]:
def display_similarity_matrix(S, movieID):
    #S = S_copy.copy()
    movieID = "m" + str(movieID)
    movie_ratings = S.loc[movieID].dropna()
    movie_ratings = movie_ratings.sort_values(ascending=False)
    
    print(f"Similarity Matrix for Movie={movieID}")
    print("------------------------------------------------------------------")
    #print(movies[movies["MovieID"] == movieID])
    print(movies[movies["MovieID_idx"] == movieID])
    print("------------------------------------------------------------------")
    
    #print(f"Reco Count={np.len(movie_ratings)}")
    print(f"Reco Count={movie_ratings.size}")
    movie_ratings = movie_ratings[0:30]    
    print(movie_ratings)
    print("------------------------------------------------------------------")
    print("------------------------------------------------------------------")
    """
    #movie_rating_details = movies.merge(movie_ratings, left_on="MovieID", right_on="MovieID", how="right")
    movie_rating_details = movies.merge(movie_ratings, left_on="MovieID_idx", right_on="MovieID", how="right")
    movie_rating_details.rename(columns={1 : "Rating"}, inplace=True)
    #movie_rating_details.drop(columns=['Genres'], inplace=True) 
    #movie_rating_details = movie_rating_details['MovieID', 'Rating']
    print(movie_rating_details.columns)
    print(movie_rating_details)
    #print(movie_ratings)
    #return movie_ratings
    """

# movies["MovieID_idx"] = "m" + movies["MovieID"].astype(str)
# movies

# S_v1.loc["m10"].dropna().sort_values(ascending=False)

In [121]:

idx = np.array([1, 10, 100, 1510, 260, 3212])

for i in idx:
    display_similarity_matrix(S_v1, i)


Similarity Matrix for Movie=m1
------------------------------------------------------------------
   MovieID             Title                       Genres MovieID_idx
0        1  Toy Story (1995)  Animation|Children's|Comedy          m1
------------------------------------------------------------------
Reco Count=3386
m3114    0.751479
m1198    0.655395
m2355    0.650808
m318     0.643794
m1196    0.642833
m260     0.641658
m1270    0.639653
m588     0.636349
m1197    0.628957
m1148    0.628400
m364     0.627962
m2762    0.625780
m2571    0.625765
m2028    0.624565
m527     0.624002
m595     0.622841
m2804    0.621141
m1291    0.620966
m356     0.618132
m745     0.617427
m457     0.616146
m919     0.615985
m593     0.613962
m1036    0.609679
m1097    0.609141
m150     0.608120
m2918    0.607007
m3396    0.606199
m2797    0.606138
m589     0.604416
Name: m1, dtype: float64
------------------------------------------------------------------
-----------------------------------------------

In [111]:

idx = np.array([1, 10, 100, 1510, 260, 3212])

for i in idx:
    display_similarity_matrix(S_v2, i)


Similarity Matrix for Movie=m1
------------------------------------------------------------------
   MovieID             Title                       Genres MovieID_idx
0        1  Toy Story (1995)  Animation|Children's|Comedy          m1
------------------------------------------------------------------
m53      1.000000
m2487    1.000000
m2304    0.946758
m3880    0.939612
m755     0.926827
m3644    0.919297
m3293    0.901581
m2127    0.897873
m567     0.890436
m3292    0.890308
m3495    0.885116
m3114    0.877563
m2494    0.872247
m3905    0.870332
m3056    0.867704
m1901    0.857866
m669     0.856929
m1664    0.856390
m2776    0.848356
m129     0.846463
m3338    0.843543
m2773    0.841150
m525     0.833893
m1651    0.833886
m1872    0.832924
m1144    0.830793
m3853    0.827553
m831     0.825370
m59      0.820970
m844     0.820244
Name: m1, dtype: float64
------------------------------------------------------------------
---------------------------------------------------------------

In [47]:
%%time

rating_matrix = pd.read_csv('Rmat.csv', sep=',')

normalized_rating_matrix = rating_matrix.subtract(rating_matrix.mean(axis=1), axis='rows')

normalized_rating_matrix = normalized_rating_matrix.T
normalized_rating_matrix = normalized_rating_matrix.fillna(0)

nr = normalized_rating_matrix @ normalized_rating_matrix.T

squared_normalized_rating_matrix = ((normalized_rating_matrix**2) @ (normalized_rating_matrix!=0).T)
squared_normalized_rating_matrix = squared_normalized_rating_matrix.apply(np.vectorize(np.sqrt))
dr_v2 = squared_normalized_rating_matrix * squared_normalized_rating_matrix.T

squared_normalized_rating_matrix = (normalized_rating_matrix * normalized_rating_matrix).sum(axis=1)
squared_normalized_rating_matrix = squared_normalized_rating_matrix.to_numpy()
squared_normalized_rating_matrix = np.sqrt(squared_normalized_rating_matrix)
dr_v1 = squared_normalized_rating_matrix[:, np.newaxis] * squared_normalized_rating_matrix[np.newaxis, :]


#nr[cardinality_matrix<3] = None
#dr[cardinality_matrix<3] = None



CPU times: user 14.8 s, sys: 1.11 s, total: 15.9 s
Wall time: 7.25 s


In [50]:
nr.iloc[0, np.argsort(-S_v1.to_numpy())[0, 1:30]]

m1198    721.118638
m2355    470.995381
m318     655.254111
m1196    705.807882
m260     742.397490
m1270    550.471286
m588     393.224347
m1197    562.322626
m1148    355.544310
m364     348.638129
m2762    588.153717
m2571    634.346398
m2028    590.523561
m527     569.273818
m595     342.647600
m2804    403.712285
m1291    396.049503
m356     520.777771
m745     277.178418
m457     390.980927
m919     410.374178
m593     530.878497
m1036    372.846692
m1097    420.146892
m150     280.297845
m2918    338.593819
m3396    237.165611
m2797    289.658496
m589     453.232868
Name: m1, dtype: float64

In [49]:
nr.iloc[0, np.argsort(-S_v2.to_numpy())[0, 1:30]]

m2487      0.052747
m2304      0.916612
m3880      3.028440
m755       2.526754
m3644      2.700837
m3293      3.520463
m2127      1.800423
m567       4.164278
m3292      4.180320
m3495      6.029996
m3114    893.960037
m2494     14.262462
m3905      6.934317
m3056      2.341297
m1901      2.213432
m669       2.491559
m1664      0.590179
m2776      5.094869
m129       3.267992
m3338     14.315405
m2773      3.158057
m525       6.792400
m1651      5.836284
m1872      0.899733
m1144      4.345282
m3853      2.849124
m831       6.173367
m59        4.445937
m844       1.090648
Name: m1, dtype: float64

In [71]:
dr_v1[0, np.argsort(-S_v1.to_numpy())[0, 1:30]]
#type(dr_v2)

array([2320.27399238, 1561.570041  , 2278.45117489, 2470.74063604,
       2620.37776507, 1970.85390148, 1441.97663052, 2180.27332695,
       1384.52357476, 1362.27245264, 2338.0198098 , 2521.94820329,
       2370.34578108, 2295.42832577, 1394.68280091, 1666.2972258 ,
       1637.02833607, 2204.21377148, 1180.21588921, 1683.14568083,
       1769.08630997, 2329.18429957, 1699.72264951, 1924.79696617,
       1296.23238666, 1582.10508817, 1116.607023  , 1364.54339173,
       2170.32510615])

In [70]:
#dr_v2[0, np.argsort(-S_v2.to_numpy())[0, 1:30]]
print(np.argsort(-S_v2.to_numpy())[0, 1:30])
dr_v2.iloc[0, np.argsort(-S_v2.to_numpy())[0, 1:30]]
#dr_v2

[1520 1319 3012 3462 2755 2380 1134 3277 2379 2597 2193 1527 3040 2130
  884 3382  663 1829  293 2429 1826 3231  649  851  144 2983 3533 3302
 3547]


m2487       0.052747
m2304       1.025847
m3880       3.444446
m755        2.959925
m3644       3.220673
m3293       4.383253
m2127       2.262559
m567        5.332856
m3292       5.355150
m3495       7.828812
m3114    1183.856279
m2494      19.157256
m3905       9.362297
m3056       3.183674
m1901       3.092539
m669        3.490274
m1664       0.827996
m2776       7.312731
m129        4.716228
m3338      20.834949
m2773       4.628545
m525       10.171537
m1651       8.739938
m1872       1.351258
m1144       6.567988
m3853       4.349103
m831        9.486675
m59         6.925791
m844        1.702838
Name: m1, dtype: float64

#### myIBCF

In [242]:
def myIBCF(S, w, n=10):
    S = S.copy()
    S = S.fillna(0)

    w = w.copy()
    identity = (~w.isna()).astype(int)
    w = w.fillna(0)

    reco_movies = w.dot(S) / identity.dot(S)
    reco_movies = reco_movies.sort_values(ascending=False)[0:n]
    
    reco_movies = reco_movies.dropna()
    
    if reco_movies.size < n:
        print("Backfilling from Genre based recommendations")        
        backfill_count = n - reco_movies.size
        random_genre = np.random.choice(get_all_genre())
        backfill_df = find_top_movies_by_genre(genre=random_genre, n=backfill_count)
        
        #reco_movies.set_value()
        #reco_movies = backfill_df
        #x.set_value(i, i**2)
        
    
    
    return reco_movies
    

In [245]:
random_genre = np.random.choice(get_all_genre())
backfill_df = find_top_movies_by_genre(genre=random_genre, n=5)
pd_series = backfill_df["MovieID"]

Unnamed: 0,MovieID,Title,Genres,Rating,Rating_count,Weighted_Rating
907,919,"Wizard of Oz, The (1939)",Musical,4.247963,1718.0,3.806998
1268,1288,This Is Spinal Tap (1984),Musical,4.179785,1118.0,3.561444
1202,1220,"Blues Brothers, The (1980)",Musical,3.939597,1341.0,3.447096
887,899,Singin' in the Rain (1952),Musical,4.283622,751.0,3.415542
584,588,Aladdin (1992),Musical,3.788305,1351.0,3.324033


In [230]:
user_rating = rating_matrix.loc["u1181"].copy()
user_rating
print(myIBCF(S_v1, user_rating))
print(myIBCF(S_v2, user_rating))

m1039    4.000000
m3899    3.773189
m3232    3.656500
m749     3.619902
m729     3.594585
m1174    3.591295
m2129    3.549514
m853     3.542094
m3126    3.522129
m3303    3.498938
Name: u1181, dtype: float64
m749     4.526559
m3899    4.526066
m1039    4.000000
m3288    3.850019
m3232    3.726343
m853     3.682413
m729     3.671729
m2129    3.593696
m3126    3.535438
m53      3.529714
Name: u1181, dtype: float64


In [231]:
user_rating = rating_matrix.loc["u1351"].copy()
user_rating
print(myIBCF(S_v1, user_rating))
print(myIBCF(S_v2, user_rating))

m404     5.000000
m1877    5.000000
m2869    5.000000
m2623    5.000000
m744     5.000000
m3373    5.000000
m853     5.000000
m2934    4.716274
m3772    4.605164
m3575    4.599763
Name: u1351, dtype: float64
m404     5.000000
m3373    5.000000
m2869    5.000000
m2623    5.000000
m1877    5.000000
m744     5.000000
m853     5.000000
m2934    4.940172
m3166    4.835657
m1532    4.822379
Name: u1351, dtype: float64


In [232]:
user_rating = row.copy()
user_rating[:] = np.nan
user_rating["m1613"] = 5
user_rating["m1755"] = 4

print(myIBCF(S_v1, user_rating))
print(myIBCF(S_v2, user_rating))

m3030    5.0
m2852    5.0
m89      5.0
m1178    5.0
m171     5.0
m2866    5.0
m1884    5.0
m2929    5.0
m3423    5.0
m2621    5.0
Name: m1, dtype: float64
m1661    5.0
m3715    5.0
m3224    5.0
m1366    5.0
m61      5.0
m3567    5.0
m234     5.0
m2729    5.0
m3296    5.0
m947     5.0
Name: m1, dtype: float64


In [243]:
user_rating = row.copy()
user_rating[:] = np.nan
# user_rating["m1613"] = 5
# user_rating["m1755"] = 4

print(myIBCF(S_v1, user_rating))
print(myIBCF(S_v2, user_rating))

Backfilling from Genre based recommendations
Series([], Name: m1, dtype: float64)
Backfilling from Genre based recommendations
Series([], Name: m1, dtype: float64)
