In [111]:
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
import json
import pandas as pd
import numpy as np

In [112]:
current_dir = os.getcwd()  # gets current working directory
project_dir = os.path.dirname(current_dir)  
directory_path = os.path.dirname(current_dir) + os.sep + 'data' + os.sep

show_activities = pd.read_json(directory_path + 'show_activities.json')

## Shows

In [113]:
show_activities

Unnamed: 0,content_id,activity,user_id,datetime
0,10524,Play,3,2023-04-01 23:54:04.372501
1,10299,Dislike,3,2023-04-01 23:54:04.372543
2,10158,Play,4,2023-04-01 23:54:04.372567
3,10344,Like,2,2023-04-01 23:54:04.372589
4,10238,Like,4,2023-04-01 23:54:04.372611
...,...,...,...,...
4995,10059,Play,4,2023-04-01 23:54:04.447640
4996,10208,Dislike,1,2023-04-01 23:54:04.447653
4997,10309,Like,3,2023-04-01 23:54:04.447666
4998,10560,Dislike,1,2023-04-01 23:54:04.447679


In [114]:
# Variables for adjusting formula

clicks = 0.3
like = 10
dislike = -10

# current formula is clicks * 0.3 +/-10 (+10 for like and -10 for dislike)

In [115]:
# getting likes/dislikes
likes_dislikes = show_activities.loc[np.where((show_activities['activity'] == 'Like') ^ (show_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-10.0,0.0,10,0.0,10.0,0.0,10.0,0.0,0.0,0.0,...,10.0,0.0,10.0,0.0,10,0.0,-10.0,0.0,-10,-10.0
1,0.0,0.0,10,0.0,0.0,0.0,-10.0,0.0,10.0,-10.0,...,10.0,0.0,10.0,10.0,10,-10.0,10.0,0.0,-10,10.0
2,-10.0,10.0,10,-10.0,10.0,10.0,0.0,10.0,10.0,-10.0,...,10.0,0.0,-10.0,0.0,-10,0.0,0.0,0.0,10,0.0
3,-10.0,10.0,10,10.0,0.0,0.0,0.0,-10.0,-10.0,10.0,...,0.0,10.0,10.0,-10.0,-10,-10.0,10.0,10.0,-10,0.0
4,0.0,0.0,-10,10.0,0.0,0.0,-10.0,0.0,0.0,-10.0,...,0.0,10.0,0.0,10.0,10,-10.0,0.0,10.0,-10,-10.0


In [116]:
max_length = list(unstacked.columns)[-1] + 1

max_length

10624

In [117]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, max_length))

users = [i for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i, j]
        
likes_matrix

array([[  0.,   0.,   0., ...,   0., -10., -10.],
       [  0.,   0.,   0., ...,   0., -10.,  10.],
       [  0.,   0.,   0., ...,   0.,  10.,   0.],
       [  0.,   0.,   0., ...,  10., -10.,   0.],
       [  0.,   0.,   0., ...,  10., -10., -10.]])

In [118]:
# creating sparse matrix for plays

plays = show_activities[show_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, max_length))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

array([[0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0.3, 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0.3, 0. ],
       [0. , 0. , 0. , ..., 0.3, 0. , 0.6],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [119]:
scores_matrix = plays_matrix + likes_matrix

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.3,0.3,10.0,0.0,10.0,0.0,-10.0,0.0,-10.0,-10.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,10.0,10.0,10.0,-9.7,10.3,0.3,-10.0,10.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.9,0.0,-9.4,0.0,-10.0,0.0,0.0,0.0,10.3,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.3,10.0,10.3,-10.0,-10.0,-10.0,10.0,10.3,-10.0,0.6
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.3,10.0,0.3,10.0,10.3,-10.0,0.0,10.0,-10.0,-10.0


In [120]:
scores_df_clean = scores_df.copy().loc[:, ~(scores_df == 0).all()]

scores_df_clean

Unnamed: 0,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
0,-9.7,0.3,10.6,0.0,10.0,0.0,10.0,0.3,0.0,0.3,...,10.3,0.3,10.0,0.0,10.0,0.0,-10.0,0.0,-10.0,-10.0
1,0.3,0.0,10.0,0.0,0.6,0.3,-10.0,0.0,10.3,-9.7,...,10.0,0.0,10.0,10.0,10.0,-9.7,10.3,0.3,-10.0,10.0
2,-10.0,10.6,10.3,-10.0,10.3,10.6,0.0,10.3,10.0,-9.7,...,10.9,0.0,-9.4,0.0,-10.0,0.0,0.0,0.0,10.3,0.0
3,-10.0,10.0,10.0,10.3,0.3,0.0,0.0,-10.0,-9.7,10.3,...,0.3,10.0,10.3,-10.0,-10.0,-10.0,10.0,10.3,-10.0,0.6
4,0.0,0.6,-10.0,10.3,0.0,0.0,-9.4,0.3,0.3,-9.4,...,0.3,10.0,0.3,10.0,10.3,-10.0,0.0,10.0,-10.0,-10.0


In [121]:
scores_df_clean.replace(0, np.nan, inplace = True)

In [122]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)


In [123]:
# Function to predict a user's rating for an item
def predict_rating(user_item_matrix, user_id, item_id):
    # Find the most similar users to the given user
    similar_users = np.argsort(user_similarities[user_id])[::-1][1:]
    
    # Compute weighted average of their ratings for the item 
    weighted_sum = 0
    weight_sum = 0   
    for sim_user in similar_users:
        if user_item_matrix[sim_user][item_id] != 0:
            similarity = user_similarities[user_id][sim_user]
            rating = user_item_matrix[sim_user][item_id]
            weighted_sum += similarity * rating            
            weight_sum += similarity   
    if weight_sum == 0:
        return 0
    else:
        return weighted_sum / weight_sum
    # Example usage:
    # Predict user 0's rating for item 2
    #predicted_rating = predict_rating(0, 2)
    #print(predicted_rating)

In [124]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([    0,     1,     2, ..., 10612, 10613, 10620])

In [125]:
z = np.zeros((5, max_length))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[ 0.        ,  0.        ,  0.        , ...,  8.59562066,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  5.52497285,
         0.        , -2.35845758],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [126]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,14.78825,0.0,-9.958787,0.0,8.595621,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,15.210278,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,7.20935,0.0,9.863564,0.0,-9.861534,3.860121,5.524973,0.0,-2.358458
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-5.11002,0.0,0.0,0.0


In [127]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [128]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[10003,
  10005,
  10008,
  10016,
  10018,
  10034,
  10043,
  10051,
  10058,
  10060,
  10064,
  10072,
  10073,
  10074,
  10081,
  10088,
  10090,
  10095,
  10099,
  10100,
  10103,
  10106,
  10108,
  10113,
  10121,
  10125,
  10130,
  10137,
  10138,
  10149,
  10153,
  10159,
  10177,
  10191,
  10196,
  10197,
  10199,
  10205,
  10210,
  10217,
  10223,
  10230,
  10233,
  10234,
  10235,
  10238,
  10245,
  10250,
  10251,
  10259,
  10260,
  10262,
  10270,
  10291,
  10293,
  10295,
  10313,
  10319,
  10324,
  10325,
  10328,
  10347,
  10350,
  10352,
  10357,
  10363,
  10366,
  10380,
  10387,
  10388,
  10394,
  10400,
  10404,
  10412,
  10417,
  10420,
  10428,
  10433,
  10435,
  10439,
  10441,
  10447,
  10460,
  10465,
  10471,
  10472,
  10487,
  10492,
  10493,
  10502,
  10507,
  10511,
  10513,
  10518,
  10520,
  10522,
  10524,
  10537,
  10540,
  10544,
  10550,
  10552,
  10558,
  10567,
  10575,
  10586,
  10587,
  10592,
  10593,
  10594,
  10599,
 

In [129]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[6.377720035622377,
  6.4862674945019005,
  5.083571491209366,
  9.97294123831196,
  2.6073735161975575,
  -13.69441539450395,
  9.313039238882325,
  9.999999999999998,
  10.1159294570752,
  -13.900106532138771,
  2.7659487528758087,
  -8.656842040373018,
  29.51846779838918,
  6.575658699288856,
  -7.92379690913401,
  -12.112536455258647,
  -139.3530166746464,
  10.31329389527486,
  -2.2838200844950634,
  -6.585496302712658,
  -4.358072969926002,
  -0.6897456620098202,
  10.042860663940328,
  56.097384074511666,
  -13.780167854023132,
  10.0,
  -10.543452708706106,
  -137.76486594637487,
  -135.8767152181033,
  10.537882615713816,
  4.139717403080413,
  12.412536455258648,
  10.274004498467544,
  10.383262026221296,
  1.926029910989726,
  4.174097602265199,
  -9.097361399497732,
  -28.73010031010551,
  8.142767092544606,
  -5.959352202376335,
  -11.521096865923443,
  -11.417303366746525,
  12.19868814655872,
  -12.121002522919204,
  6.56749752040495,
  10.042860663940328,
  -1.629939

In [130]:
score_indices = []
for i in score_list:
    order = sorted(range(len(i)), reverse = True, key=lambda k: i[k])
    score_indices.append(order)
    
score_indices

[[71,
  23,
  12,
  101,
  105,
  114,
  78,
  48,
  31,
  100,
  42,
  104,
  49,
  50,
  81,
  29,
  33,
  17,
  32,
  8,
  22,
  45,
  25,
  67,
  97,
  7,
  3,
  111,
  58,
  6,
  112,
  109,
  116,
  38,
  87,
  57,
  70,
  59,
  96,
  66,
  88,
  54,
  13,
  44,
  1,
  92,
  0,
  83,
  102,
  2,
  93,
  35,
  30,
  72,
  10,
  4,
  34,
  91,
  62,
  63,
  21,
  47,
  46,
  84,
  82,
  110,
  18,
  76,
  55,
  60,
  20,
  56,
  39,
  107,
  19,
  95,
  98,
  14,
  94,
  86,
  89,
  108,
  74,
  65,
  11,
  36,
  90,
  64,
  53,
  52,
  115,
  103,
  26,
  69,
  41,
  40,
  79,
  73,
  106,
  15,
  43,
  99,
  68,
  5,
  24,
  9,
  85,
  75,
  37,
  61,
  113,
  51,
  80,
  77,
  28,
  27,
  16],
 [84,
  11,
  98,
  73,
  87,
  33,
  95,
  100,
  28,
  53,
  2,
  51,
  91,
  50,
  105,
  106,
  34,
  16,
  52,
  25,
  26,
  44,
  23,
  54,
  40,
  47,
  79,
  64,
  15,
  20,
  83,
  49,
  102,
  55,
  66,
  3,
  39,
  97,
  81,
  65,
  7,
  10,
  115,
  86,
  0,
  63,
  58,
  60,
 

In [131]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[10400,
  10113,
  10073,
  10552,
  10586,
  10617,
  10435,
  10251,
  10159,
  10550,
  10233,
  10575,
  10259,
  10260,
  10447,
  10149,
  10191,
  10095,
  10177,
  10058,
  10108,
  10238,
  10125,
  10380,
  10537,
  10051,
  10016,
  10605,
  10324,
  10043,
  10609,
  10594,
  10621,
  10210,
  10492,
  10319,
  10394,
  10325,
  10524,
  10366,
  10493,
  10293,
  10074,
  10235,
  10005,
  10513,
  10003,
  10465,
  10558,
  10008,
  10518,
  10197,
  10153,
  10404,
  10064,
  10018,
  10196,
  10511,
  10350,
  10352,
  10106,
  10250,
  10245,
  10471,
  10460,
  10599,
  10099,
  10428,
  10295,
  10328,
  10103,
  10313,
  10217,
  10592,
  10100,
  10522,
  10540,
  10081,
  10520,
  10487,
  10502,
  10593,
  10417,
  10363,
  10072,
  10199,
  10507,
  10357,
  10291,
  10270,
  10619,
  10567,
  10130,
  10388,
  10230,
  10223,
  10439,
  10412,
  10587,
  10088,
  10234,
  10544,
  10387,
  10034,
  10121,
  10060,
  10472,
  10420,
  10205,
  10347,
  10610,
 

In [132]:
# output dataframe with user ids a

recommendations_shows = pd.DataFrame(rec_list)

recommendations_shows.to_csv(directory_path + 'show_recommendations.csv', index = False)
recommendations_shows

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,123,124,125,126,127,128,129,130,131,132
0,10400,10113,10073,10552,10586,10617,10435,10251,10159,10550,...,,,,,,,,,,
1,10435,10062,10516,10386,10469,10173,10495,10521,10144,10273,...,,,,,,,,,,
2,10213,10191,10551,10135,10288,10211,10491,10111,10246,10586,...,10069.0,10544.0,10309.0,10604.0,10181.0,10436.0,10619.0,10406.0,10461.0,10254.0
3,10126,10200,10102,10110,10358,10584,10167,10556,10184,10271,...,10403.0,10109.0,10152.0,10393.0,10242.0,10506.0,10564.0,10011.0,,
4,10005,10515,10243,10220,10360,10344,10213,10043,10316,10125,...,,,,,,,,,,


In [133]:
pd.read_csv(directory_path + 'show_recommendations.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,123,124,125,126,127,128,129,130,131,132
0,10400,10113,10073,10552,10586,10617,10435,10251,10159,10550,...,,,,,,,,,,
1,10435,10062,10516,10386,10469,10173,10495,10521,10144,10273,...,,,,,,,,,,
2,10213,10191,10551,10135,10288,10211,10491,10111,10246,10586,...,10069.0,10544.0,10309.0,10604.0,10181.0,10436.0,10619.0,10406.0,10461.0,10254.0
3,10126,10200,10102,10110,10358,10584,10167,10556,10184,10271,...,10403.0,10109.0,10152.0,10393.0,10242.0,10506.0,10564.0,10011.0,,
4,10005,10515,10243,10220,10360,10344,10213,10043,10316,10125,...,,,,,,,,,,


## Movies 

In [134]:
movie_activities = pd.read_json(directory_path + 'movie_activities.json')

In [135]:
# getting likes/dislikes
likes_dislikes = movie_activities.loc[np.where((movie_activities['activity'] == 'Like') ^ (movie_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,0,1,2,3,4,5,6,7,8,9,...,343,344,345,346,347,348,349,350,351,352
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,10.0,10.0,10.0,-10.0,-10.0,-10.0,-10.0,10,-10.0,...,10.0,0.0,0.0,10.0,0.0,-10.0,-10.0,10.0,0.0,-10
1,-10.0,0.0,-10.0,10.0,0.0,0.0,10.0,10.0,10,-10.0,...,-10.0,-10.0,0.0,10.0,-10.0,0.0,-10.0,10.0,-10.0,-10
2,0.0,0.0,0.0,0.0,0.0,-10.0,0.0,0.0,10,10.0,...,-10.0,10.0,10.0,-10.0,10.0,10.0,10.0,-10.0,10.0,-10
3,-10.0,-10.0,0.0,0.0,0.0,0.0,-10.0,-10.0,10,0.0,...,10.0,-10.0,10.0,0.0,10.0,10.0,-10.0,0.0,0.0,-10
4,-10.0,0.0,-10.0,-10.0,-10.0,-10.0,0.0,-10.0,-10,10.0,...,0.0,-10.0,0.0,-10.0,-10.0,0.0,0.0,0.0,-10.0,-10


In [136]:
max_length = list(unstacked.columns)[-1] + 1

max_length

353

In [137]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, max_length))

users = [i for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i, j]
        
likes_matrix

array([[  0.,  10.,  10., ...,  10.,   0., -10.],
       [-10.,   0., -10., ...,  10., -10., -10.],
       [  0.,   0.,   0., ..., -10.,  10., -10.],
       [-10., -10.,   0., ...,   0.,   0., -10.],
       [-10.,   0., -10., ...,   0., -10., -10.]])

In [138]:
# creating sparse matrix for plays

plays = movie_activities[show_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, max_length))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

  plays = movie_activities[show_activities['activity'] == 'Play']


array([[0. , 0.3, 0. , ..., 0. , 0. , 0.6],
       [0. , 0. , 0.3, ..., 0. , 0. , 0. ],
       [0. , 0. , 0.3, ..., 0.6, 0.3, 0.3],
       [0. , 0.3, 0. , ..., 0. , 0.6, 0.3],
       [0.6, 0. , 0.3, ..., 0. , 0. , 0. ]])

In [139]:
scores_matrix = plays_matrix + likes_matrix

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,343,344,345,346,347,348,349,350,351,352
0,0.0,10.3,10.0,10.0,-10.0,-10.0,-9.7,-9.4,10.0,-9.1,...,10.0,0.0,0.0,10.3,0.0,-10.0,-9.7,10.0,0.0,-9.4
1,-10.0,0.0,-9.7,10.3,0.0,0.0,10.0,10.0,10.3,-10.0,...,-9.7,-9.7,0.0,10.3,-9.4,0.0,-10.0,10.0,-10.0,-10.0
2,0.0,0.0,0.3,0.0,0.0,-9.7,0.0,0.3,10.9,10.3,...,-9.7,10.3,10.3,-9.7,10.6,10.6,10.0,-9.4,10.3,-9.7
3,-10.0,-9.7,0.0,0.0,0.0,0.0,-9.7,-9.7,10.3,0.0,...,10.3,-9.7,10.6,0.0,10.6,10.0,-10.0,0.0,0.6,-9.7
4,-9.4,0.0,-9.7,-10.0,-10.0,-9.4,0.0,-9.7,-10.0,10.0,...,0.0,-10.0,0.0,-9.7,-9.7,0.0,0.0,0.0,-10.0,-10.0


In [140]:
scores_df_clean = scores_df.copy().loc[:, ~(scores_df == 0).all()]

scores_df_clean

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,343,344,345,346,347,348,349,350,351,352
0,0.0,10.3,10.0,10.0,-10.0,-10.0,-9.7,-9.4,10.0,-9.1,...,10.0,0.0,0.0,10.3,0.0,-10.0,-9.7,10.0,0.0,-9.4
1,-10.0,0.0,-9.7,10.3,0.0,0.0,10.0,10.0,10.3,-10.0,...,-9.7,-9.7,0.0,10.3,-9.4,0.0,-10.0,10.0,-10.0,-10.0
2,0.0,0.0,0.3,0.0,0.0,-9.7,0.0,0.3,10.9,10.3,...,-9.7,10.3,10.3,-9.7,10.6,10.6,10.0,-9.4,10.3,-9.7
3,-10.0,-9.7,0.0,0.0,0.0,0.0,-9.7,-9.7,10.3,0.0,...,10.3,-9.7,10.6,0.0,10.6,10.0,-10.0,0.0,0.6,-9.7
4,-9.4,0.0,-9.7,-10.0,-10.0,-9.4,0.0,-9.7,-10.0,10.0,...,0.0,-10.0,0.0,-9.7,-9.7,0.0,0.0,0.0,-10.0,-10.0


In [141]:
scores_df_clean.replace(0, np.nan, inplace = True)

In [142]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)


In [143]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([  0,  15,  23,  24,  26,  30,  31,  38,  43,  44,  48,  59,  61,
        67,  70,  73,  74,  77,  78,  82,  84,  85,  87,  89,  94,  95,
       104, 107, 108, 114, 124, 127, 131, 133, 141, 146, 154, 157, 158,
       159, 164, 170, 172, 175, 181, 183, 184, 187, 190, 196, 197, 199,
       213, 223, 226, 232, 237, 239, 243, 245, 246, 247, 248, 252, 260,
       264, 266, 267, 290, 293, 298, 302, 303, 308, 309, 312, 315, 319,
       322, 329, 333, 335, 342, 344, 345, 347, 351,   1,   4,   5,  10,
        13,  15,  18,  21,  30,  40,  41,  45,  50,  51,  56,  58,  62,
        71,  73,  77,  85,  88,  89,  94,  98, 103, 104, 105, 107, 108,
       109, 110, 113, 114, 120, 121, 131, 133, 138, 148, 152, 157, 161,
       164, 165, 168, 169, 170, 171, 173, 176, 177, 178, 180, 181, 186,
       188, 189, 196, 206, 209, 212, 213, 214, 222, 224, 225, 229, 230,
       236, 239, 245, 248, 249, 250, 251, 252, 255, 260, 261, 264, 268,
       269, 270, 273, 285, 289, 290, 292, 293, 294, 295, 299, 30

In [144]:
z = np.zeros((5, max_length))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[  -9.66666524,    0.        ,    0.        , ...,    0.        ,
          34.1115001 ,    0.        ],
       [   0.        , -255.01424725,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [ -10.03677589,  100.74201808,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,   -1.79054406, ...,   -7.36519536,
           0.        ,    0.        ],
       [   0.        ,   -2.45752333,    0.        , ...,   10.95481929,
           0.        ,    0.        ]])

In [145]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,343,344,345,346,347,348,349,350,351,352
0,-9.666665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,38.523111,10.233796,0.0,30.147245,0.0,0.0,0.0,34.1115,0.0
1,0.0,-255.014247,0.0,0.0,-10.0,-9.128005,0.0,0.0,0.0,0.0,...,0.0,0.0,10.369635,0.0,0.0,16.051955,0.0,0.0,0.0,0.0
2,-10.036776,100.742018,0.0,9.590183,-10.0,0.0,2.273458,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,-1.790544,-7.159699,-10.0,-9.625503,0.0,0.0,0.0,8.983081,...,0.0,0.0,0.0,-8.408196,0.0,0.0,0.0,-7.365195,0.0,0.0
4,0.0,-2.457523,0.0,0.0,0.0,0.0,-4.290695,0.0,0.0,0.0,...,5.10258,0.0,10.617281,0.0,0.0,2.475248,-10.436245,10.954819,0.0,0.0


In [146]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 

In [147]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[0,
  15,
  23,
  24,
  26,
  30,
  31,
  38,
  43,
  44,
  48,
  59,
  61,
  67,
  70,
  73,
  74,
  77,
  78,
  82,
  84,
  85,
  87,
  89,
  94,
  95,
  104,
  107,
  108,
  114,
  124,
  127,
  131,
  133,
  141,
  146,
  154,
  157,
  158,
  159,
  164,
  170,
  172,
  175,
  181,
  183,
  184,
  187,
  190,
  196,
  197,
  199,
  213,
  223,
  226,
  232,
  237,
  239,
  243,
  245,
  246,
  247,
  248,
  252,
  260,
  264,
  266,
  267,
  290,
  293,
  298,
  302,
  303,
  308,
  309,
  312,
  315,
  319,
  322,
  329,
  333,
  335,
  342,
  344,
  345,
  347,
  351],
 [1,
  4,
  5,
  10,
  13,
  15,
  18,
  21,
  30,
  40,
  41,
  45,
  50,
  51,
  56,
  58,
  62,
  71,
  73,
  77,
  85,
  88,
  89,
  94,
  98,
  103,
  104,
  105,
  107,
  108,
  109,
  110,
  113,
  114,
  120,
  121,
  131,
  133,
  138,
  148,
  152,
  157,
  161,
  164,
  165,
  168,
  169,
  170,
  171,
  173,
  176,
  177,
  178,
  180,
  181,
  186,
  188,
  189,
  196,
  206,
  209,
  212,
  213,
  21

In [148]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[-9.666665244931815,
  16.953030218787973,
  4.612128860455085,
  -11.712607121465455,
  -17.956017859428965,
  -30.498221335933394,
  -14.880938637951138,
  -10.0,
  -13.461680684868682,
  13.127266736213494,
  23.431179604009305,
  -27.987219621403398,
  12.964586495824094,
  -22.496332010098847,
  -14.240314150669885,
  -14.54603592808164,
  36.051397879923584,
  -9.7,
  23.972616095506808,
  13.466644861767545,
  3.9340408420623683,
  10.0,
  -3.7706923689575844,
  -10.0,
  -2.8375152821904766,
  10.092989376706052,
  17.51490633631663,
  14.347422707922734,
  10.233795593280366,
  -13.447422707922733,
  -34.282314992772704,
  18.308504924362648,
  23.087009851839174,
  20.03076527697086,
  -9.497941634056549,
  37.756611206620576,
  -9.595022145795687,
  10.0,
  -9.909227213084696,
  18.66893804371519,
  -9.399999999999999,
  -9.55619558703984,
  -3.4480620886502606,
  -18.67586577508152,
  -9.892562729232857,
  -8.314881895578312,
  3.9340408420623683,
  -9.72553261763734,
  10.

In [149]:
score_indices = []
for i in score_list:
    order = sorted(range(len(i)), reverse = True, key=lambda k: i[k])
    score_indices.append(order)
    
score_indices

[[83,
  35,
  16,
  86,
  85,
  74,
  53,
  60,
  18,
  10,
  32,
  81,
  33,
  72,
  39,
  31,
  26,
  63,
  1,
  50,
  27,
  71,
  19,
  9,
  12,
  48,
  28,
  84,
  25,
  21,
  37,
  51,
  59,
  78,
  56,
  70,
  79,
  2,
  55,
  20,
  46,
  69,
  62,
  24,
  76,
  42,
  58,
  22,
  45,
  82,
  40,
  34,
  41,
  36,
  0,
  17,
  68,
  77,
  80,
  47,
  44,
  38,
  7,
  23,
  49,
  57,
  65,
  3,
  29,
  8,
  54,
  14,
  15,
  6,
  4,
  43,
  64,
  13,
  75,
  11,
  67,
  5,
  52,
  30,
  61,
  73,
  66],
 [36,
  31,
  95,
  77,
  5,
  25,
  81,
  9,
  78,
  94,
  53,
  3,
  17,
  91,
  4,
  35,
  34,
  66,
  100,
  48,
  59,
  7,
  89,
  103,
  96,
  86,
  97,
  29,
  102,
  21,
  71,
  20,
  41,
  46,
  88,
  38,
  92,
  75,
  60,
  40,
  28,
  93,
  82,
  44,
  63,
  24,
  84,
  49,
  6,
  39,
  42,
  83,
  61,
  33,
  18,
  13,
  85,
  101,
  65,
  79,
  98,
  45,
  2,
  69,
  43,
  68,
  15,
  19,
  87,
  80,
  90,
  12,
  22,
  1,
  58,
  70,
  54,
  52,
  27,
  47,
  64,
  74,

In [150]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[344,
  146,
  74,
  351,
  347,
  309,
  223,
  246,
  78,
  48,
  131,
  335,
  133,
  303,
  159,
  127,
  104,
  252,
  15,
  197,
  107,
  302,
  82,
  44,
  61,
  190,
  108,
  345,
  95,
  85,
  157,
  199,
  245,
  322,
  237,
  298,
  329,
  23,
  232,
  84,
  184,
  293,
  248,
  94,
  315,
  172,
  243,
  87,
  183,
  342,
  164,
  141,
  170,
  154,
  0,
  77,
  290,
  319,
  333,
  187,
  181,
  158,
  38,
  89,
  196,
  239,
  264,
  24,
  114,
  43,
  226,
  70,
  73,
  31,
  26,
  175,
  260,
  67,
  312,
  59,
  267,
  30,
  213,
  124,
  247,
  308,
  266],
 [131,
  110,
  309,
  255,
  15,
  103,
  268,
  40,
  260,
  307,
  180,
  10,
  71,
  295,
  13,
  121,
  120,
  225,
  336,
  171,
  206,
  21,
  293,
  348,
  320,
  289,
  323,
  108,
  345,
  88,
  245,
  85,
  157,
  169,
  292,
  138,
  299,
  251,
  209,
  152,
  107,
  300,
  269,
  165,
  214,
  98,
  273,
  173,
  18,
  148,
  161,
  270,
  212,
  114,
  73,
  51,
  285,
  339,
  224,
  261,
  325,
  

In [151]:
# output dataframe with user ids a

recommendations_movies = pd.DataFrame(rec_list)

recommendations_movies.to_csv(directory_path + 'movie_recommendations.csv', index = False)
recommendations_movies

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,94,95,96,97,98,99,100,101,102,103
0,344,146,74,351,347,309,223,246,78,48,...,,,,,,,,,,
1,131,110,309,255,15,103,268,40,260,307,...,133.0,229.0,177.0,330.0,248.0,213.0,30.0,1.0,62.0,113.0
2,1,62,113,54,194,289,168,240,97,36,...,,,,,,,,,,
3,304,237,82,61,116,44,296,63,217,323,...,,,,,,,,,,
4,45,73,114,284,350,315,169,98,273,108,...,239.0,154.0,22.0,235.0,349.0,107.0,21.0,,,


In [57]:
pd.read_csv(directory_path + 'movie_recommendations.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,69,70,71,72,73,74,75,76,77,78
0,182,1,203,16,136,216,183,315,253,104,...,228.0,6.0,206.0,272.0,129.0,24.0,119.0,61.0,,
1,87,116,243,106,208,261,258,37,314,257,...,142.0,32.0,286.0,25.0,121.0,,,,,
2,182,283,9,218,176,205,1,267,255,282,...,168.0,61.0,,,,,,,,
3,275,61,76,75,179,168,123,299,210,153,...,65.0,226.0,145.0,283.0,9.0,205.0,176.0,218.0,182.0,112.0
4,15,230,85,106,313,91,288,95,66,71,...,,,,,,,,,,
