In [1]:
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
import json
import pandas as pd
import numpy as np

In [2]:
show_activities = pd.read_json('show_activities.json')

## Shows

In [3]:
show_activities

Unnamed: 0,content_id,activity,user_id,datetime
0,10298,Play,4,2023-03-30 21:53:21.191623
1,10145,Play,0,2023-03-30 21:53:21.191623
2,10343,Dislike,2,2023-03-30 21:53:21.191623
3,10484,Dislike,0,2023-03-30 21:53:21.191623
4,10195,Like,0,2023-03-30 21:53:21.191623
...,...,...,...,...
4995,10007,Dislike,1,2023-03-30 21:53:21.279623
4996,10328,Dislike,3,2023-03-30 21:53:21.279623
4997,10044,Dislike,4,2023-03-30 21:53:21.279623
4998,10149,Dislike,4,2023-03-30 21:53:21.279623


In [4]:
# Variables for adjusting formula

clicks = 0.3
like = 10
dislike = -10

# current formula is clicks * 0.3 +/-10 (+10 for like and -10 for dislike)

In [6]:
# getting likes/dislikes
likes_dislikes = show_activities.loc[np.where((show_activities['activity'] == 'Like') ^ (show_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,10594,10595,10596,10597,10598,10599,10600,10601,10602,10603
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-10.0,-10,10,0.0,0.0,0.0,0.0,-10,0.0,10.0,...,0.0,10.0,10.0,-10.0,-10.0,0.0,-10.0,-10.0,10.0,-10.0
1,10.0,-10,-10,0.0,-10.0,-10.0,0.0,-10,0.0,10.0,...,0.0,-10.0,10.0,10.0,0.0,10.0,10.0,0.0,0.0,10.0
2,0.0,-10,10,10.0,-10.0,10.0,10.0,-10,-10.0,-10.0,...,10.0,0.0,10.0,-10.0,0.0,-10.0,-10.0,-10.0,-10.0,10.0
3,10.0,10,-10,10.0,10.0,0.0,10.0,10,0.0,0.0,...,-10.0,10.0,-10.0,10.0,-10.0,-10.0,0.0,0.0,-10.0,-10.0
4,-10.0,10,10,10.0,-10.0,0.0,10.0,-10,-10.0,0.0,...,10.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0


In [8]:
max_length = list(unstacked.columns)[-1] + 1

max_length

10604

In [9]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, max_length))

users = [i for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i, j]
        
likes_matrix

array([[  0.,   0.,   0., ..., -10.,  10., -10.],
       [  0.,   0.,   0., ...,   0.,   0.,  10.],
       [  0.,   0.,   0., ..., -10., -10.,  10.],
       [  0.,   0.,   0., ...,   0., -10., -10.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.]])

In [10]:
# creating sparse matrix for plays

plays = show_activities[show_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, max_length))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

array([[0. , 0. , 0. , ..., 0. , 0.3, 0.3],
       [0. , 0. , 0. , ..., 0. , 0.3, 0.3],
       [0. , 0. , 0. , ..., 0.3, 0.6, 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0.3, 0. , 0.3]])

In [11]:
scores_matrix = plays_matrix + likes_matrix

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10594,10595,10596,10597,10598,10599,10600,10601,10602,10603
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.3,10.0,10.3,-9.7,-9.7,0.6,-9.4,-10.0,10.3,-9.7
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.3,-10.0,10.0,10.3,0.0,10.0,10.0,0.0,0.3,10.3
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.3,0.3,10.0,-9.7,0.3,-10.0,-10.0,-9.7,-9.4,10.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-10.0,10.3,-10.0,10.0,-9.7,-9.7,0.3,0.0,-10.0,-10.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.3,0.3,0.3,0.6,0.0,10.3,0.3,0.3,0.0,0.3


In [12]:
scores_df_clean = scores_df.copy().loc[:, ~(scores_df == 0).all()]

scores_df_clean

Unnamed: 0,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,10594,10595,10596,10597,10598,10599,10600,10601,10602,10603
0,-9.7,-10.0,10.6,0.6,0.0,0.0,0.0,-9.1,0.6,10.0,...,0.3,10.0,10.3,-9.7,-9.7,0.6,-9.4,-10.0,10.3,-9.7
1,10.3,-10.0,-9.7,0.0,-10.0,-10.0,0.3,-10.0,0.0,10.0,...,0.3,-10.0,10.0,10.3,0.0,10.0,10.0,0.0,0.3,10.3
2,0.0,-9.7,10.0,10.6,-10.0,10.0,10.3,-9.7,-10.0,-10.0,...,10.3,0.3,10.0,-9.7,0.3,-10.0,-10.0,-9.7,-9.4,10.0
3,10.3,10.0,-9.4,10.6,10.0,0.0,10.0,10.3,0.0,0.6,...,-10.0,10.3,-10.0,10.0,-9.7,-9.7,0.3,0.0,-10.0,-10.0
4,-9.7,10.3,10.6,10.6,-9.7,0.0,10.3,-10.0,-10.0,0.3,...,10.3,0.3,0.3,0.6,0.0,10.3,0.3,0.3,0.0,0.3


In [13]:
scores_df_clean.replace(0, np.nan, inplace = True)

In [15]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)


In [16]:
# Function to predict a user's rating for an item
def predict_rating(user_item_matrix, user_id, item_id):
    # Find the most similar users to the given user
    similar_users = np.argsort(user_similarities[user_id])[::-1][1:]
    
    # Compute weighted average of their ratings for the item 
    weighted_sum = 0
    weight_sum = 0   
    for sim_user in similar_users:
        if user_item_matrix[sim_user][item_id] != 0:
            similarity = user_similarities[user_id][sim_user]
            rating = user_item_matrix[sim_user][item_id]
            weighted_sum += similarity * rating            
            weight_sum += similarity   
    if weight_sum == 0:
        return 0
    else:
        return weighted_sum / weight_sum
    # Example usage:
    # Predict user 0's rating for item 2
    #predicted_rating = predict_rating(0, 2)
    #print(predicted_rating)

In [17]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([    0,     1,     2, ..., 10585, 10598, 10602], dtype=int64)

In [18]:
z = np.zeros((5, max_length))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  6.18710812,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ..., -7.4175892 ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
        -4.47652359,  0.        ]])

In [19]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10594,10595,10596,10597,10598,10599,10600,10601,10602,10603
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-10.809314,0.0,0.0,6.187108,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.417589,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-83.089032,0.0,0.0,0.0,-4.476524,0.0


In [20]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [21]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[10004,
  10005,
  10006,
  10014,
  10033,
  10035,
  10041,
  10044,
  10046,
  10047,
  10048,
  10051,
  10052,
  10057,
  10062,
  10072,
  10078,
  10085,
  10090,
  10091,
  10092,
  10097,
  10098,
  10108,
  10115,
  10121,
  10125,
  10134,
  10140,
  10154,
  10155,
  10160,
  10164,
  10180,
  10181,
  10186,
  10193,
  10200,
  10202,
  10205,
  10208,
  10214,
  10215,
  10217,
  10228,
  10232,
  10235,
  10237,
  10260,
  10265,
  10266,
  10279,
  10281,
  10283,
  10287,
  10292,
  10293,
  10299,
  10300,
  10301,
  10306,
  10307,
  10319,
  10330,
  10347,
  10348,
  10355,
  10360,
  10362,
  10369,
  10377,
  10387,
  10388,
  10393,
  10394,
  10431,
  10434,
  10438,
  10443,
  10456,
  10457,
  10460,
  10464,
  10471,
  10474,
  10485,
  10487,
  10497,
  10499,
  10500,
  10506,
  10511,
  10519,
  10525,
  10531,
  10533,
  10537,
  10539,
  10543,
  10553,
  10554,
  10558,
  10559,
  10587,
  10589,
  10591],
 [10003,
  10008,
  10022,
  10026,
  10041,


In [22]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[-6.046277091666603,
  -5.582059271156924,
  5.863284604565687,
  -0.6105347691543802,
  2.9875723622725503,
  -3.9854682992684705,
  4.74367610315648,
  -5.185935797586402,
  -3.4666102414652697,
  -9.63604681848973,
  10.037242804907192,
  -10.0,
  -6.312441999918714,
  -1.0503054283192466,
  7.067418143649792,
  6.9719674056901315,
  -9.483621822983283,
  -1.2161931291347874,
  10.083568449735159,
  1.4768726098448264,
  6.933758002580516,
  -2.990569863401502,
  -7.428796756482487,
  3.2005229722646344,
  1.4232838000302812,
  3.4013960722314827,
  -9.850026686624059,
  10.207880939529044,
  -3.9954832866603716,
  -0.9621252850339448,
  -5.479738563152831,
  1.9697867395457016,
  0.3015114976466718,
  -5.973083053497719,
  7.514385008827937,
  10.3,
  -3.60109794761712,
  1.9827557344011764,
  10.0,
  2.879900318333373,
  1.4475343372766505,
  -3.800206805246753,
  5.193591493576566,
  -1.9089703644215372,
  -3.121925087272958,
  1.0281163408440663,
  -0.08609331589219825,
  -9.90

In [23]:
score_indices = []
for i in score_list:
    order = sorted(range(len(i)), reverse = True, key=lambda k: i[k])
    score_indices.append(order)
    
score_indices

[[77,
  85,
  35,
  88,
  90,
  27,
  18,
  10,
  58,
  38,
  74,
  81,
  98,
  99,
  65,
  34,
  14,
  15,
  20,
  50,
  73,
  86,
  2,
  63,
  42,
  83,
  6,
  96,
  89,
  72,
  52,
  61,
  25,
  69,
  51,
  23,
  64,
  84,
  4,
  39,
  37,
  31,
  62,
  66,
  19,
  40,
  24,
  105,
  45,
  76,
  101,
  67,
  32,
  103,
  68,
  46,
  80,
  3,
  29,
  13,
  82,
  49,
  71,
  17,
  94,
  79,
  97,
  43,
  57,
  87,
  93,
  100,
  48,
  21,
  44,
  91,
  8,
  36,
  75,
  41,
  95,
  70,
  60,
  55,
  5,
  28,
  53,
  54,
  56,
  104,
  7,
  30,
  1,
  33,
  0,
  12,
  92,
  78,
  22,
  102,
  16,
  9,
  59,
  26,
  47,
  11],
 [56,
  69,
  105,
  54,
  10,
  16,
  79,
  44,
  93,
  30,
  91,
  81,
  43,
  96,
  15,
  53,
  72,
  29,
  61,
  39,
  88,
  82,
  36,
  13,
  0,
  21,
  103,
  55,
  64,
  25,
  19,
  9,
  4,
  97,
  37,
  18,
  70,
  35,
  95,
  24,
  80,
  106,
  60,
  73,
  65,
  17,
  23,
  86,
  59,
  112,
  71,
  84,
  92,
  11,
  52,
  100,
  41,
  94,
  38,
  99,
  104

In [24]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[10438,
  10485,
  10186,
  10499,
  10506,
  10134,
  10090,
  10048,
  10300,
  10202,
  10394,
  10460,
  10543,
  10553,
  10348,
  10181,
  10062,
  10072,
  10092,
  10266,
  10393,
  10487,
  10006,
  10330,
  10215,
  10471,
  10041,
  10537,
  10500,
  10388,
  10281,
  10307,
  10121,
  10369,
  10279,
  10108,
  10347,
  10474,
  10033,
  10205,
  10200,
  10160,
  10319,
  10355,
  10091,
  10208,
  10115,
  10591,
  10232,
  10434,
  10558,
  10360,
  10164,
  10587,
  10362,
  10235,
  10457,
  10014,
  10154,
  10057,
  10464,
  10265,
  10387,
  10085,
  10531,
  10456,
  10539,
  10217,
  10299,
  10497,
  10525,
  10554,
  10260,
  10097,
  10228,
  10511,
  10046,
  10193,
  10431,
  10214,
  10533,
  10377,
  10306,
  10292,
  10035,
  10140,
  10283,
  10287,
  10293,
  10589,
  10044,
  10155,
  10005,
  10180,
  10004,
  10052,
  10519,
  10443,
  10098,
  10559,
  10078,
  10047,
  10301,
  10125,
  10237,
  10051],
 [10320,
  10374,
  10570,
  10317,
  10064,


In [27]:
# output dataframe with user ids a

recommendations_shows = pd.DataFrame(rec_list)

recommendations_shows.to_csv('show_recommendations.csv', index = False)
recommendations_shows

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,105,106,107,108,109,110,111,112,113,114
0,10438,10485,10186,10499,10506,10134,10090,10048,10300,10202,...,10051,,,,,,,,,
1,10320,10374,10570,10317,10064,10079,10429,10231,10504,10143,...,10424,10368.0,10094.0,10407.0,10576.0,10209.0,10593.0,10422.0,,
2,10320,10317,10467,10243,10156,10374,10391,10430,10570,10452,...,10306,10046.0,,,,,,,,
3,10438,10485,10186,10083,10572,10015,10437,10399,10202,10460,...,10304,10247.0,10117.0,10245.0,10371.0,10211.0,10502.0,10312.0,10518.0,10237.0
4,10189,10472,10466,10144,10201,10404,10299,10252,10421,10274,...,10358,10432.0,10222.0,10255.0,10527.0,10196.0,10598.0,10473.0,10480.0,10512.0


In [54]:
pd.read_csv('show_recommendations.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076
0,11459,11709,11982,12608,12931,11595,11378,11134,11743,11989,...,11920.0,12949.0,11238.0,11733.0,,,,,,
1,11388,12512,11467,11624,11961,12368,11743,11989,12826,12964,...,11324.0,12044.0,12797.0,12171.0,11278.0,12594.0,11936.0,12553.0,12709.0,12647.0
2,11164,12054,12767,11732,12560,11494,11606,12378,12091,11409,...,,,,,,,,,,
3,11301,11622,11091,11601,11427,12920,12095,12187,11115,12570,...,,,,,,,,,,
4,11285,11738,12214,12473,12481,11702,11844,12776,11911,11164,...,,,,,,,,,,


## Movies 

In [39]:
movie_activities = pd.read_json('movie_activities.json')

In [40]:
# getting likes/dislikes
likes_dislikes = movie_activities.loc[np.where((movie_activities['activity'] == 'Like') ^ (movie_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,0,1,2,3,4,5,6,7,8,9,...,312,313,314,315,316,317,318,319,320,321
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-10.0,0.0,-10.0,-10.0,-10.0,10.0,0.0,0.0,0.0,0.0,...,10.0,10.0,10.0,0.0,0.0,10.0,-10.0,10,-10.0,-10
1,0.0,-10.0,0.0,-10.0,0.0,0.0,10.0,10.0,10.0,-10.0,...,0.0,10.0,0.0,-10.0,10.0,-10.0,-10.0,-10,-10.0,10
2,0.0,0.0,-10.0,0.0,10.0,-10.0,-10.0,10.0,0.0,0.0,...,-10.0,0.0,10.0,-10.0,10.0,-10.0,0.0,10,0.0,-10
3,10.0,-10.0,-10.0,10.0,-10.0,10.0,10.0,-10.0,-10.0,0.0,...,0.0,-10.0,0.0,10.0,-10.0,0.0,-10.0,10,0.0,10
4,-10.0,10.0,10.0,0.0,-10.0,10.0,-10.0,10.0,10.0,10.0,...,-10.0,0.0,10.0,10.0,-10.0,-10.0,-10.0,10,10.0,-10


In [41]:
max_length = list(unstacked.columns)[-1] + 1

max_length

322

In [42]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, max_length))

users = [i for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i, j]
        
likes_matrix

array([[-10.,   0., -10., ...,  10., -10., -10.],
       [  0., -10.,   0., ..., -10., -10.,  10.],
       [  0.,   0., -10., ...,  10.,   0., -10.],
       [ 10., -10., -10., ...,  10.,   0.,  10.],
       [-10.,  10.,  10., ...,  10.,  10., -10.]])

In [43]:
# creating sparse matrix for plays

plays = movie_activities[show_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, max_length))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

  plays = movie_activities[show_activities['activity'] == 'Play']


array([[0.3, 0. , 0.3, ..., 0.6, 0. , 0.6],
       [0.3, 0. , 0. , ..., 0.3, 0.3, 0. ],
       [0. , 0. , 0.3, ..., 0.3, 0.3, 0. ],
       [0.6, 0. , 0. , ..., 0. , 0.6, 0.3],
       [0.3, 0.6, 0.3, ..., 0.3, 0. , 0. ]])

In [44]:
scores_matrix = plays_matrix + likes_matrix

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,312,313,314,315,316,317,318,319,320,321
0,-9.7,0.0,-9.7,-9.7,-9.7,10.3,0.0,0.0,0.3,0.3,...,10.0,10.0,10.9,0.0,0.3,10.3,-9.7,10.6,-10.0,-9.4
1,0.3,-10.0,0.0,-9.4,0.0,0.0,10.0,10.9,10.0,-10.0,...,0.0,10.3,0.0,-9.7,10.0,-9.7,-9.7,-9.7,-9.7,10.0
2,0.0,0.0,-9.7,0.0,10.0,-10.0,-9.1,10.0,0.0,0.0,...,-9.7,0.0,10.0,-9.7,10.3,-10.0,0.0,10.3,0.3,-10.0
3,10.6,-10.0,-10.0,10.0,-9.1,10.6,10.3,-9.4,-9.4,0.0,...,0.0,-9.7,0.3,10.3,-9.7,0.0,-10.0,10.0,0.6,10.3
4,-9.7,10.6,10.3,0.0,-10.0,10.3,-10.0,10.6,10.0,10.0,...,-10.0,0.0,10.3,10.3,-9.4,-10.0,-10.0,10.3,10.0,-10.0


In [45]:
scores_df_clean = scores_df.copy().loc[:, ~(scores_df == 0).all()]

scores_df_clean

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,312,313,314,315,316,317,318,319,320,321
0,-9.7,0.0,-9.7,-9.7,-9.7,10.3,0.0,0.0,0.3,0.3,...,10.0,10.0,10.9,0.0,0.3,10.3,-9.7,10.6,-10.0,-9.4
1,0.3,-10.0,0.0,-9.4,0.0,0.0,10.0,10.9,10.0,-10.0,...,0.0,10.3,0.0,-9.7,10.0,-9.7,-9.7,-9.7,-9.7,10.0
2,0.0,0.0,-9.7,0.0,10.0,-10.0,-9.1,10.0,0.0,0.0,...,-9.7,0.0,10.0,-9.7,10.3,-10.0,0.0,10.3,0.3,-10.0
3,10.6,-10.0,-10.0,10.0,-9.1,10.6,10.3,-9.4,-9.4,0.0,...,0.0,-9.7,0.3,10.3,-9.7,0.0,-10.0,10.0,0.6,10.3
4,-9.7,10.6,10.3,0.0,-10.0,10.3,-10.0,10.6,10.0,10.0,...,-10.0,0.0,10.3,10.3,-9.4,-10.0,-10.0,10.3,10.0,-10.0


In [46]:
scores_df_clean.replace(0, np.nan, inplace = True)

In [47]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)


In [48]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([  1,   6,   7,  15,  16,  20,  21,  23,  24,  30,  31,  32,  35,
        39,  41,  44,  56,  58,  61,  66,  70,  73,  79,  83,  85,  87,
        93,  94,  96, 104, 110, 116, 119, 129, 136, 138, 142, 144, 165,
       168, 169, 173, 182, 183, 184, 188, 191, 195, 203, 204, 206, 216,
       219, 220, 222, 228, 230, 235, 244, 247, 251, 252, 253, 257, 268,
       272, 280, 286, 291, 297, 298, 300, 302, 303, 307, 310, 315,   2,
         4,   5,  21,  22,  25,  26,  31,  32,  37,  46,  48,  49,  54,
        57,  66,  71,  78,  83,  87,  97, 100, 101, 103, 106, 109, 112,
       116, 121, 123, 125, 134, 139, 142, 145, 151, 160, 163, 169, 171,
       174, 175, 178, 179, 187, 188, 189, 190, 194, 208, 219, 224, 236,
       238, 243, 244, 252, 257, 258, 261, 262, 265, 266, 273, 275, 281,
       284, 285, 286, 303, 310, 311, 312, 314,   0,   1,   3,   8,   9,
        23,  25,  26,  28,  29,  41,  44,  47,  52,  54,  61,  62,  73,
        74,  75,  76,  91, 100, 103, 112, 114, 115, 119, 121, 12

In [49]:
z = np.zeros((5, max_length))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[  0.        ,  38.39509677,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        , -10.0311692 , ...,   0.        ,
          0.        ,   0.        ],
       [-15.24129687,  47.10367602,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ]])

In [50]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,312,313,314,315,316,317,318,319,320,321
0,0.0,38.395097,0.0,0.0,0.0,0.0,-31.095263,9.209556,0.0,0.0,...,0.0,0.0,0.0,28.314849,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,-10.031169,0.0,1.957359,-1.868597,0.0,0.0,0.0,0.0,...,0.333331,0.0,12.245891,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-15.241297,47.103676,0.0,-23.237203,0.0,0.0,0.0,0.0,-4.943183,90.844505,...,0.0,24.811327,0.0,0.0,0.0,0.0,-10.431182,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-16.203382,...,-6.687645,0.0,0.0,0.0,0.0,-8.37712,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,-10.874811,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,11.186806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 

In [52]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[1,
  6,
  7,
  15,
  16,
  20,
  21,
  23,
  24,
  30,
  31,
  32,
  35,
  39,
  41,
  44,
  56,
  58,
  61,
  66,
  70,
  73,
  79,
  83,
  85,
  87,
  93,
  94,
  96,
  104,
  110,
  116,
  119,
  129,
  136,
  138,
  142,
  144,
  165,
  168,
  169,
  173,
  182,
  183,
  184,
  188,
  191,
  195,
  203,
  204,
  206,
  216,
  219,
  220,
  222,
  228,
  230,
  235,
  244,
  247,
  251,
  252,
  253,
  257,
  268,
  272,
  280,
  286,
  291,
  297,
  298,
  300,
  302,
  303,
  307,
  310,
  315],
 [2,
  4,
  5,
  21,
  22,
  25,
  26,
  31,
  32,
  37,
  46,
  48,
  49,
  54,
  57,
  66,
  71,
  78,
  83,
  87,
  97,
  100,
  101,
  103,
  106,
  109,
  112,
  116,
  121,
  123,
  125,
  134,
  139,
  142,
  145,
  151,
  160,
  163,
  169,
  171,
  174,
  175,
  178,
  179,
  187,
  188,
  189,
  190,
  194,
  208,
  219,
  224,
  236,
  238,
  243,
  244,
  252,
  257,
  258,
  261,
  262,
  265,
  266,
  273,
  275,
  281,
  284,
  285,
  286,
  303,
  310,
  311,
  312,
  314

In [53]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[38.395096767956026,
  -31.095262771841387,
  9.209555738641889,
  11.08938109664133,
  33.897621290440945,
  -27.381407096607592,
  -6.999787674609742,
  -9.713107236912803,
  -33.597621290440934,
  -8.524252571275214,
  10.0,
  -9.259281516619726,
  -14.379022396614022,
  -8.609831494386755,
  10.873815794187008,
  -14.38946986187905,
  -4.996329734351496,
  -5.929452180099424,
  -37.682086624604246,
  10.6,
  8.512186151688008,
  11.786923031099814,
  -8.95962932810158,
  -9.453270581711552,
  -14.153186361473551,
  10.116119282637193,
  20.40697920586487,
  -9.908864145501596,
  -13.646672240000218,
  26.57175542136281,
  -20.89189876957148,
  9.859281516619728,
  -36.24537730543833,
  -32.340061798236206,
  33.0857070331995,
  23.65980674802037,
  -9.522730059887897,
  8.905360880312946,
  10.748933297768653,
  -14.664443258955103,
  -8.952204339395852,
  -9.777177382518351,
  40.228886517910205,
  30.965436333824766,
  9.909651018688946,
  -9.70734369460532,
  9.925699460423333,

In [54]:
score_indices = []
for i in score_list:
    order = sorted(range(len(i)), reverse = True, key=lambda k: i[k])
    score_indices.append(order)
    
score_indices

[[42,
  0,
  48,
  4,
  34,
  51,
  43,
  76,
  62,
  29,
  35,
  26,
  47,
  21,
  3,
  14,
  38,
  19,
  54,
  72,
  25,
  10,
  46,
  44,
  31,
  74,
  2,
  37,
  73,
  20,
  66,
  70,
  58,
  64,
  16,
  17,
  60,
  6,
  52,
  63,
  9,
  13,
  40,
  22,
  11,
  23,
  36,
  67,
  75,
  45,
  7,
  41,
  27,
  53,
  61,
  57,
  69,
  71,
  49,
  28,
  24,
  12,
  15,
  56,
  39,
  68,
  30,
  5,
  59,
  55,
  1,
  50,
  65,
  33,
  8,
  32,
  18],
 [19,
  27,
  54,
  24,
  49,
  59,
  58,
  9,
  73,
  57,
  21,
  26,
  3,
  50,
  55,
  43,
  15,
  16,
  65,
  35,
  51,
  20,
  67,
  31,
  46,
  23,
  7,
  38,
  69,
  11,
  63,
  1,
  4,
  39,
  62,
  29,
  72,
  34,
  37,
  30,
  22,
  25,
  42,
  2,
  14,
  47,
  32,
  52,
  10,
  70,
  41,
  36,
  66,
  48,
  45,
  56,
  40,
  0,
  12,
  53,
  64,
  18,
  61,
  60,
  17,
  44,
  13,
  6,
  71,
  33,
  8,
  68,
  5,
  28],
 [42,
  62,
  4,
  50,
  40,
  47,
  1,
  58,
  55,
  61,
  34,
  31,
  41,
  69,
  17,
  10,
  37,
  57,
  33,


In [55]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[182,
  1,
  203,
  16,
  136,
  216,
  183,
  315,
  253,
  104,
  138,
  93,
  195,
  73,
  15,
  41,
  165,
  66,
  222,
  302,
  87,
  31,
  191,
  184,
  116,
  307,
  7,
  144,
  303,
  70,
  280,
  298,
  244,
  268,
  56,
  58,
  251,
  21,
  219,
  257,
  30,
  39,
  169,
  79,
  32,
  83,
  142,
  286,
  310,
  188,
  23,
  173,
  94,
  220,
  252,
  235,
  297,
  300,
  204,
  96,
  85,
  35,
  44,
  230,
  168,
  291,
  110,
  20,
  247,
  228,
  6,
  206,
  272,
  129,
  24,
  119,
  61],
 [87,
  116,
  243,
  106,
  208,
  261,
  258,
  37,
  314,
  257,
  100,
  112,
  21,
  219,
  244,
  179,
  66,
  71,
  281,
  151,
  224,
  97,
  285,
  134,
  189,
  103,
  31,
  169,
  303,
  48,
  273,
  4,
  22,
  171,
  266,
  123,
  312,
  145,
  163,
  125,
  101,
  109,
  178,
  5,
  57,
  190,
  139,
  236,
  46,
  310,
  175,
  160,
  284,
  194,
  188,
  252,
  174,
  2,
  49,
  238,
  275,
  83,
  265,
  262,
  78,
  187,
  54,
  26,
  311,
  142,
  32,
  286,
  25,
  121

In [56]:
# output dataframe with user ids a

recommendations_movies = pd.DataFrame(rec_list)

recommendations_movies.to_csv('movie_recommendations.csv', index = False)
recommendations_movies

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,69,70,71,72,73,74,75,76,77,78
0,182,1,203,16,136,216,183,315,253,104,...,228.0,6.0,206.0,272.0,129.0,24.0,119.0,61.0,,
1,87,116,243,106,208,261,258,37,314,257,...,142.0,32.0,286.0,25.0,121.0,,,,,
2,182,283,9,218,176,205,1,267,255,282,...,168.0,61.0,,,,,,,,
3,275,61,76,75,179,168,123,299,210,153,...,65.0,226.0,145.0,283.0,9.0,205.0,176.0,218.0,182.0,112.0
4,15,230,85,106,313,91,288,95,66,71,...,,,,,,,,,,


In [57]:
pd.read_csv('movie_recommendations.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,69,70,71,72,73,74,75,76,77,78
0,182,1,203,16,136,216,183,315,253,104,...,228.0,6.0,206.0,272.0,129.0,24.0,119.0,61.0,,
1,87,116,243,106,208,261,258,37,314,257,...,142.0,32.0,286.0,25.0,121.0,,,,,
2,182,283,9,218,176,205,1,267,255,282,...,168.0,61.0,,,,,,,,
3,275,61,76,75,179,168,123,299,210,153,...,65.0,226.0,145.0,283.0,9.0,205.0,176.0,218.0,182.0,112.0
4,15,230,85,106,313,91,288,95,66,71,...,,,,,,,,,,
