In [87]:
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
import json
import pandas as pd
import numpy as np
import os

In [88]:
current_dir = os.getcwd()  # gets current working directory
project_dir = os.path.dirname(current_dir)  
directory_path = os.path.dirname(current_dir) + os.sep + 'data' + os.sep

show_activities = pd.read_json(directory_path + 'show_activities.json')

## Shows

In [89]:
show_activities

Unnamed: 0,content_id,activity,user_id,datetime
0,10076,Like,3,2023-04-02 13:03:59.911611
1,10607,Like,3,2023-04-02 13:03:59.911646
2,10357,Dislike,3,2023-04-02 13:03:59.911665
3,10346,Play,0,2023-04-02 13:03:59.911681
4,10159,Like,2,2023-04-02 13:03:59.911698
...,...,...,...,...
4995,10157,Play,1,2023-04-02 13:03:59.985805
4996,10471,Like,2,2023-04-02 13:03:59.985819
4997,10248,Play,0,2023-04-02 13:03:59.985832
4998,10524,Dislike,4,2023-04-02 13:03:59.985846


In [90]:
# Variables for adjusting formula

clicks = 0.3
like = 10
dislike = -10

# current formula is clicks * 0.3 +/-10 (+10 for like and -10 for dislike)

In [91]:
# getting likes/dislikes
likes_dislikes = show_activities.loc[np.where((show_activities['activity'] == 'Like') ^ (show_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,10.0,0.0,0.0,10.0,0.0,-10.0,0.0,-10.0,0.0,-10.0,...,10.0,10.0,10.0,0.0,-10.0,10,10.0,10.0,10.0,10.0
1,0.0,10.0,10.0,0.0,10.0,10.0,10.0,0.0,0.0,0.0,...,10.0,-10.0,-10.0,10.0,-10.0,-10,10.0,-10.0,10.0,0.0
2,-10.0,-10.0,-10.0,-10.0,-10.0,10.0,0.0,0.0,10.0,0.0,...,0.0,-10.0,10.0,0.0,0.0,10,10.0,0.0,0.0,0.0
3,0.0,10.0,0.0,10.0,10.0,-10.0,-10.0,10.0,10.0,0.0,...,0.0,0.0,10.0,10.0,10.0,10,0.0,10.0,-10.0,10.0
4,10.0,0.0,0.0,-10.0,0.0,0.0,0.0,0.0,0.0,10.0,...,0.0,10.0,0.0,0.0,0.0,-10,10.0,0.0,10.0,10.0


In [92]:
max_length = list(unstacked.columns)[-1] + 1

max_length

10624

In [93]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, max_length))

users = [i for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i, j]
        
likes_matrix

array([[  0.,   0.,   0., ...,  10.,  10.,  10.],
       [  0.,   0.,   0., ..., -10.,  10.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,  10., -10.,  10.],
       [  0.,   0.,   0., ...,   0.,  10.,  10.]])

In [94]:
# creating sparse matrix for plays

plays = show_activities[show_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, max_length))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

array([[0. , 0. , 0. , ..., 0. , 0.3, 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0.3],
       [0. , 0. , 0. , ..., 0.9, 0. , 0. ],
       [0. , 0. , 0. , ..., 0.3, 0. , 0. ],
       [0. , 0. , 0. , ..., 0.6, 0.3, 0. ]])

In [95]:
scores_matrix = plays_matrix + likes_matrix

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,10.0,10.3,0.0,-10.0,10.6,10.0,10.0,10.3,10.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,-10.0,-9.7,10.3,-10.0,-9.7,10.0,-10.0,10.0,0.3
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.3,-9.7,10.6,0.0,0.3,10.6,10.3,0.9,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.6,10.6,10.3,10.0,10.0,0.3,10.3,-10.0,10.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.6,0.0,0.0,0.3,-10.0,10.0,0.6,10.3,10.0


In [96]:
scores_df_clean = scores_df.copy().loc[:, ~(scores_df == 0).all()]

scores_df_clean

Unnamed: 0,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
0,10.3,0.3,0.0,10.0,0.6,-10.0,0.9,-10.0,0.0,-10.0,...,10.0,10.0,10.3,0.0,-10.0,10.6,10.0,10.0,10.3,10.0
1,0.0,10.3,10.3,0.3,10.0,10.3,10.0,0.3,0.0,0.3,...,10.0,-10.0,-9.7,10.3,-10.0,-9.7,10.0,-10.0,10.0,0.3
2,-10.0,-10.0,-10.0,-9.7,-10.0,10.6,0.0,0.3,10.0,0.3,...,0.3,-9.7,10.6,0.0,0.3,10.6,10.3,0.9,0.0,0.0
3,0.3,10.3,0.0,10.3,10.3,-10.0,-9.4,10.0,10.3,1.2,...,0.0,0.6,10.6,10.3,10.0,10.0,0.3,10.3,-10.0,10.0
4,10.0,0.3,0.6,-9.4,0.0,0.6,0.0,0.3,0.0,10.3,...,0.0,10.6,0.0,0.0,0.3,-10.0,10.0,0.6,10.3,10.0


In [97]:
scores_df_clean.replace(0, np.nan, inplace = True)

In [98]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)


In [99]:
# Function to predict a user's rating for an item
def predict_rating(user_item_matrix, user_id, item_id):
    # Find the most similar users to the given user
    similar_users = np.argsort(user_similarities[user_id])[::-1][1:]
    
    # Compute weighted average of their ratings for the item 
    weighted_sum = 0
    weight_sum = 0   
    for sim_user in similar_users:
        if user_item_matrix[sim_user][item_id] != 0:
            similarity = user_similarities[user_id][sim_user]
            rating = user_item_matrix[sim_user][item_id]
            weighted_sum += similarity * rating            
            weight_sum += similarity   
    if weight_sum == 0:
        return 0
    else:
        return weighted_sum / weight_sum
    # Example usage:
    # Predict user 0's rating for item 2
    #predicted_rating = predict_rating(0, 2)
    #print(predicted_rating)

In [100]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([    0,     1,     2, ..., 10614, 10616, 10617])

In [101]:
z = np.zeros((5, max_length))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 7.06435775,
        7.67759827],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [102]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10614,10615,10616,10617,10618,10619,10620,10621,10622,10623
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,10.3,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,10.3,0.0,0.0,0.0,0.0,7.064358,7.677598
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.213758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.689658,0.0,8.014214,10.3,0.0,0.0,0.0,0.0,0.0,0.0


In [103]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [104]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[10002,
  10008,
  10015,
  10020,
  10021,
  10024,
  10028,
  10031,
  10036,
  10038,
  10066,
  10067,
  10081,
  10083,
  10086,
  10094,
  10100,
  10102,
  10106,
  10114,
  10115,
  10116,
  10122,
  10123,
  10124,
  10128,
  10147,
  10149,
  10157,
  10159,
  10166,
  10169,
  10176,
  10178,
  10187,
  10189,
  10191,
  10193,
  10195,
  10197,
  10203,
  10207,
  10217,
  10225,
  10232,
  10233,
  10242,
  10245,
  10261,
  10268,
  10271,
  10292,
  10305,
  10308,
  10309,
  10316,
  10318,
  10323,
  10328,
  10344,
  10355,
  10356,
  10357,
  10358,
  10359,
  10378,
  10382,
  10399,
  10412,
  10417,
  10424,
  10425,
  10427,
  10447,
  10448,
  10450,
  10453,
  10464,
  10466,
  10489,
  10491,
  10498,
  10502,
  10504,
  10505,
  10509,
  10513,
  10517,
  10521,
  10529,
  10531,
  10542,
  10547,
  10548,
  10551,
  10552,
  10555,
  10557,
  10558,
  10559,
  10561,
  10563,
  10568,
  10571,
  10581,
  10585,
  10594,
  10602,
  10603,
  10604,
  10605,
 

In [105]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[2.138840923211854,
  10.14016803871673,
  9.022396260711268,
  -3.7031052122735955,
  7.4177730221289915,
  3.908228865948771,
  10.36444301963603,
  0.6,
  -1.8723103197419102,
  7.238911370362672,
  10.15983196128327,
  4.5518241041821605,
  -2.182598809143477,
  7.978771102091977,
  2.843831590738331,
  5.497261039089218,
  -0.6342789314205021,
  -1.770743602182073,
  -6.967981450948107,
  4.972267957224348,
  -2.2746894226677825,
  2.4957867365462016,
  -2.2811999745891574,
  1.4282241425875302,
  0.3,
  10.299999999999999,
  10.0,
  -3.3194782550827764,
  9.215680468417178,
  -4.975076757589671,
  4.0620122319496215,
  6.836833368371932,
  6.135499447782186,
  7.302335333915153,
  10.3,
  3.2310332180811185,
  3.514241218794102,
  -4.638917659488339,
  -5.850108947697461,
  2.9370707847588564,
  3.85668507616521,
  7.312446550018126,
  4.1819480443682435,
  10.084494822075627,
  10.294820330515243,
  4.789807341442843,
  8.116767116143643,
  7.749338978976323,
  4.50595006173387

In [106]:
score_indices = []
for i in score_list:
    order = sorted(range(len(i)), reverse = True, key=lambda k: i[k])
    score_indices.append(order)
    
score_indices

[[6,
  92,
  34,
  112,
  25,
  68,
  44,
  50,
  10,
  1,
  73,
  43,
  62,
  65,
  26,
  102,
  104,
  28,
  84,
  57,
  110,
  2,
  49,
  87,
  91,
  54,
  46,
  13,
  67,
  47,
  74,
  106,
  4,
  41,
  33,
  89,
  9,
  95,
  31,
  105,
  90,
  63,
  53,
  93,
  32,
  15,
  99,
  19,
  69,
  51,
  45,
  11,
  48,
  58,
  60,
  42,
  30,
  5,
  40,
  86,
  36,
  59,
  71,
  96,
  100,
  35,
  77,
  39,
  14,
  80,
  21,
  109,
  81,
  79,
  64,
  101,
  0,
  55,
  23,
  76,
  98,
  7,
  70,
  24,
  97,
  107,
  61,
  16,
  94,
  111,
  103,
  75,
  17,
  8,
  78,
  12,
  72,
  20,
  22,
  66,
  56,
  27,
  3,
  88,
  83,
  37,
  29,
  108,
  82,
  38,
  18,
  52,
  85],
 [113,
  110,
  87,
  28,
  46,
  84,
  115,
  9,
  11,
  32,
  69,
  5,
  75,
  1,
  92,
  63,
  83,
  105,
  59,
  106,
  4,
  109,
  117,
  99,
  94,
  54,
  101,
  22,
  77,
  91,
  35,
  120,
  89,
  78,
  41,
  72,
  47,
  45,
  48,
  65,
  7,
  76,
  79,
  62,
  52,
  27,
  18,
  31,
  67,
  51,
  114,
  3,
  

In [107]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[10028,
  10547,
  10187,
  10617,
  10128,
  10412,
  10232,
  10271,
  10066,
  10008,
  10447,
  10225,
  10357,
  10378,
  10147,
  10568,
  10581,
  10157,
  10505,
  10323,
  10605,
  10015,
  10268,
  10517,
  10542,
  10309,
  10242,
  10083,
  10399,
  10245,
  10448,
  10594,
  10021,
  10207,
  10178,
  10529,
  10038,
  10552,
  10169,
  10585,
  10531,
  10358,
  10308,
  10548,
  10176,
  10094,
  10559,
  10114,
  10417,
  10292,
  10233,
  10067,
  10261,
  10328,
  10355,
  10217,
  10166,
  10024,
  10203,
  10513,
  10191,
  10344,
  10425,
  10555,
  10561,
  10189,
  10464,
  10197,
  10086,
  10491,
  10116,
  10604,
  10498,
  10489,
  10359,
  10563,
  10002,
  10316,
  10123,
  10453,
  10558,
  10031,
  10424,
  10124,
  10557,
  10602,
  10356,
  10100,
  10551,
  10609,
  10571,
  10450,
  10102,
  10036,
  10466,
  10081,
  10427,
  10115,
  10122,
  10382,
  10318,
  10149,
  10020,
  10521,
  10504,
  10193,
  10159,
  10603,
  10502,
  10195,
  10106,
 

In [108]:
# output dataframe with user ids a

recommendations_shows = pd.DataFrame(rec_list)

recommendations_shows.to_csv(directory_path + 'show_recommendations.csv', index = False)
recommendations_shows

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,127,128,129,130,131,132,133,134,135,136
0,10028,10547,10187,10617,10128,10412,10232,10271,10066,10008,...,,,,,,,,,,
1,10575,10547,10419,10128,10187,10412,10582,10048,10066,10140,...,,,,,,,,,,
2,10088,10419,10547,10128,10412,10582,10617,10271,10447,10550,...,10526.0,10403.0,10093.0,10069.0,10148.0,10524.0,10177.0,10564.0,10330.0,10401.0
3,10088,10187,10582,10271,10550,10533,10243,10256,10059,10431,...,,,,,,,,,,
4,10128,10412,10617,10364,10550,10225,10048,10430,10174,10140,...,10132.0,10401.0,10528.0,,,,,,,


In [109]:
pd.read_csv(directory_path + 'show_recommendations.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,127,128,129,130,131,132,133,134,135,136
0,10028,10547,10187,10617,10128,10412,10232,10271,10066,10008,...,,,,,,,,,,
1,10575,10547,10419,10128,10187,10412,10582,10048,10066,10140,...,,,,,,,,,,
2,10088,10419,10547,10128,10412,10582,10617,10271,10447,10550,...,10526.0,10403.0,10093.0,10069.0,10148.0,10524.0,10177.0,10564.0,10330.0,10401.0
3,10088,10187,10582,10271,10550,10533,10243,10256,10059,10431,...,,,,,,,,,,
4,10128,10412,10617,10364,10550,10225,10048,10430,10174,10140,...,10132.0,10401.0,10528.0,,,,,,,


## Movies 

In [110]:
movie_activities = pd.read_json(directory_path + 'movie_activities.json')

In [111]:
# getting likes/dislikes
likes_dislikes = movie_activities.loc[np.where((movie_activities['activity'] == 'Like') ^ (movie_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,0,1,2,3,4,5,6,7,8,9,...,342,343,344,346,347,348,349,350,351,352
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-10.0,0.0,-10.0,0.0,-10.0,10,10.0,0.0,0.0,0.0,...,-10.0,0.0,10.0,10.0,10,10.0,10.0,-10.0,10.0,0.0
1,-10.0,0.0,-10.0,10.0,10.0,10,0.0,0.0,10.0,10.0,...,-10.0,10.0,10.0,0.0,10,0.0,0.0,0.0,-10.0,10.0
2,0.0,0.0,10.0,0.0,0.0,-10,10.0,0.0,10.0,-10.0,...,0.0,0.0,0.0,0.0,10,-10.0,0.0,-10.0,0.0,10.0
3,-10.0,-10.0,0.0,0.0,10.0,10,-10.0,10.0,10.0,-10.0,...,-10.0,0.0,10.0,10.0,-10,0.0,10.0,-10.0,10.0,10.0
4,0.0,0.0,10.0,10.0,0.0,-10,10.0,10.0,10.0,0.0,...,-10.0,0.0,-10.0,0.0,10,-10.0,-10.0,10.0,-10.0,0.0


In [112]:
max_length = list(unstacked.columns)[-1] + 1

max_length

353

In [113]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, max_length))

users = [i for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i, j]
        
likes_matrix

array([[-10.,   0., -10., ..., -10.,  10.,   0.],
       [-10.,   0., -10., ...,   0., -10.,  10.],
       [  0.,   0.,  10., ..., -10.,   0.,  10.],
       [-10., -10.,   0., ..., -10.,  10.,  10.],
       [  0.,   0.,  10., ...,  10., -10.,   0.]])

In [114]:
# creating sparse matrix for plays

plays = movie_activities[show_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, max_length))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

  plays = movie_activities[show_activities['activity'] == 'Play']


array([[0.9, 0. , 0. , ..., 0.3, 0.3, 0. ],
       [0.3, 0. , 0.3, ..., 0. , 0. , 0. ],
       [0. , 0. , 0.3, ..., 0.6, 0. , 0. ],
       [0.9, 0.3, 0. , ..., 0.3, 0. , 0. ],
       [0.3, 0. , 0.3, ..., 0. , 0.6, 0.3]])

In [115]:
scores_matrix = plays_matrix + likes_matrix

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,343,344,345,346,347,348,349,350,351,352
0,-9.1,0.0,-10.0,0.0,-9.7,10.0,10.9,0.0,0.0,0.0,...,0.0,10.0,0.0,10.6,10.6,10.0,10.3,-9.7,10.3,0.0
1,-9.7,0.0,-9.7,10.3,10.3,10.9,0.0,0.0,10.6,10.0,...,10.0,10.6,0.0,0.0,10.0,0.3,0.0,0.0,-10.0,10.0
2,0.0,0.0,10.3,0.0,0.3,-10.0,10.3,0.0,10.3,-9.7,...,0.0,0.0,0.0,0.3,10.9,-9.7,0.3,-9.4,0.0,10.0
3,-9.1,-9.7,0.0,0.3,10.3,10.0,-10.0,10.3,10.3,-9.7,...,0.0,10.6,0.0,10.6,-10.0,0.0,10.0,-9.7,10.0,10.0
4,0.3,0.0,10.3,10.3,0.3,-9.7,10.0,10.0,10.6,0.3,...,0.0,-10.0,0.0,0.0,10.6,-9.1,-9.7,10.0,-9.4,0.3


In [116]:
scores_df_clean = scores_df.copy().loc[:, ~(scores_df == 0).all()]

scores_df_clean

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,342,343,344,346,347,348,349,350,351,352
0,-9.1,0.0,-10.0,0.0,-9.7,10.0,10.9,0.0,0.0,0.0,...,-9.4,0.0,10.0,10.6,10.6,10.0,10.3,-9.7,10.3,0.0
1,-9.7,0.0,-9.7,10.3,10.3,10.9,0.0,0.0,10.6,10.0,...,-10.0,10.0,10.6,0.0,10.0,0.3,0.0,0.0,-10.0,10.0
2,0.0,0.0,10.3,0.0,0.3,-10.0,10.3,0.0,10.3,-9.7,...,0.3,0.0,0.0,0.3,10.9,-9.7,0.3,-9.4,0.0,10.0
3,-9.1,-9.7,0.0,0.3,10.3,10.0,-10.0,10.3,10.3,-9.7,...,-10.0,0.0,10.6,10.6,-10.0,0.0,10.0,-9.7,10.0,10.0
4,0.3,0.0,10.3,10.3,0.3,-9.7,10.0,10.0,10.6,0.3,...,-9.7,0.0,-10.0,0.0,10.6,-9.1,-9.7,10.0,-9.4,0.3


In [117]:
scores_df_clean.replace(0, np.nan, inplace = True)

In [118]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)


In [119]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([  1,   3,   7,   8,   9,  11,  14,  25,  28,  37,  46,  54,  58,
        60,  62,  71,  73,  74,  77,  91,  94,  95,  96,  98,  99, 102,
       104, 105, 106, 110, 114, 117, 119, 126, 128, 144, 146, 148, 149,
       150, 159, 160, 164, 165, 167, 169, 177, 179, 183, 186, 197, 202,
       207, 211, 212, 216, 224, 229, 233, 236, 242, 246, 250, 255, 263,
       265, 267, 268, 278, 279, 283, 284, 285, 286, 293, 294, 297, 309,
       319, 326, 328, 333, 335, 343, 345, 352,   1,   6,   7,  10,  13,
        14,  19,  21,  26,  27,  30,  32,  38,  42,  50,  52,  56,  62,
        72,  73,  78,  80,  91,  95,  99, 101, 108, 112, 118, 129, 132,
       133, 139, 140, 153, 154, 158, 168, 172, 177, 178, 179, 180, 182,
       184, 190, 197, 199, 202, 203, 208, 209, 210, 215, 218, 229, 233,
       238, 241, 243, 244, 249, 250, 252, 255, 257, 261, 263, 264, 267,
       268, 272, 276, 277, 284, 287, 299, 302, 303, 305, 307, 308, 312,
       322, 330, 331, 337, 339, 345, 346, 349, 350,   0,   1,   

In [120]:
z = np.zeros((5, max_length))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[ 0.        , -9.7       ,  0.        , ...,  0.        ,
         0.        ,  9.2109142 ],
       [ 0.        , -9.7       ,  0.        , ..., -8.53355282,
         0.        ,  0.        ],
       [-2.91526415, -9.7       ,  0.        , ...,  0.        ,
        -2.07846681,  0.        ],
       [ 0.        ,  0.        , -5.2081067 , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        , -9.7       ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [121]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,343,344,345,346,347,348,349,350,351,352
0,0.0,-9.7,0.0,6.620612,0.0,0.0,0.0,10.232382,10.444077,-1.02805,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.210914
1,0.0,-9.7,0.0,0.0,0.0,0.0,0.889559,10.266783,0.0,0.0,...,0.0,0.0,0.0,10.70031,0.0,0.0,9.061605,-8.533553,0.0,0.0
2,-2.915264,-9.7,0.0,21.271485,0.0,0.0,0.0,9.693591,0.0,0.0,...,10.0,-3.34287,0.0,0.0,0.0,0.0,0.0,0.0,-2.078467,0.0
3,0.0,0.0,-5.208107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,1.288514,0.0,0.0,0.0,0.0
4,0.0,-9.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,6.871427,0.0,0.0,0.0,0.0,0.0,0.0


In [122]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 

In [123]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[1,
  3,
  7,
  8,
  9,
  11,
  14,
  25,
  28,
  37,
  46,
  54,
  58,
  60,
  62,
  71,
  73,
  74,
  77,
  91,
  94,
  95,
  96,
  98,
  99,
  102,
  104,
  105,
  106,
  110,
  114,
  117,
  119,
  126,
  128,
  144,
  146,
  148,
  149,
  150,
  159,
  160,
  164,
  165,
  167,
  169,
  177,
  179,
  183,
  186,
  197,
  202,
  207,
  211,
  212,
  216,
  224,
  229,
  233,
  236,
  242,
  246,
  250,
  255,
  263,
  265,
  267,
  268,
  278,
  279,
  283,
  284,
  285,
  286,
  293,
  294,
  297,
  309,
  319,
  326,
  328,
  333,
  335,
  343,
  352],
 [1,
  6,
  7,
  10,
  13,
  14,
  19,
  21,
  26,
  27,
  30,
  32,
  38,
  42,
  50,
  52,
  56,
  62,
  72,
  73,
  78,
  80,
  91,
  95,
  99,
  101,
  108,
  112,
  118,
  129,
  132,
  133,
  139,
  140,
  153,
  154,
  158,
  168,
  172,
  177,
  178,
  179,
  180,
  182,
  184,
  190,
  197,
  199,
  202,
  203,
  208,
  209,
  210,
  215,
  218,
  229,
  233,
  238,
  241,
  243,
  244,
  249,
  250,
  252,
  255,
  257,


In [124]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[-9.7,
  6.620611967912324,
  10.23238155167985,
  10.444076694081268,
  -1.0280495584984366,
  -7.475531285174243,
  10.3,
  8.606130237264997,
  7.909642591006291,
  -0.38958359597648134,
  4.174517417426324,
  1.4687838882561082,
  1.35150277223506,
  7.969122942077098,
  10.3,
  2.3332646753160806,
  2.169186024297999,
  5.707482097023643,
  3.869510991338581,
  -4.715693768420807,
  -2.070993644935956,
  10.697144655039551,
  -7.54166933447665,
  -0.1416842186180883,
  -5.19210344532334,
  -1.9820820070810807,
  -7.741656127080449,
  -1.1001021780664502,
  -7.433360703043663,
  -6.561444381647943,
  3.176986174858079,
  0.4621515577605182,
  4.730682625124504,
  9.893144775779156,
  1.4687838882561082,
  -9.817404670355543,
  -8.084837029322166,
  -2.070993644935956,
  5.329172146273653,
  -0.8933764056213629,
  -8.838890122318384,
  -1.563530267212103,
  4.756268813958179,
  4.47935326902679,
  2.5384768015073607,
  1.4353734585857791,
  3.130104686080808,
  2.26811569196786,
  

In [125]:
score_indices = []
for i in score_list:
    order = sorted(range(len(i)), reverse = True, key=lambda k: i[k])
    score_indices.append(order)
    
score_indices

[[21,
  3,
  6,
  14,
  65,
  2,
  55,
  77,
  83,
  33,
  84,
  54,
  7,
  69,
  13,
  8,
  76,
  58,
  1,
  17,
  64,
  51,
  61,
  38,
  72,
  42,
  56,
  32,
  73,
  43,
  68,
  10,
  70,
  74,
  79,
  18,
  80,
  52,
  30,
  46,
  75,
  44,
  78,
  48,
  15,
  47,
  16,
  57,
  82,
  11,
  34,
  45,
  12,
  49,
  31,
  67,
  59,
  23,
  53,
  9,
  66,
  50,
  39,
  4,
  27,
  41,
  25,
  20,
  37,
  60,
  19,
  24,
  81,
  29,
  71,
  28,
  5,
  22,
  26,
  36,
  40,
  62,
  0,
  63,
  35],
 [22,
  39,
  23,
  88,
  26,
  41,
  37,
  51,
  72,
  67,
  10,
  53,
  19,
  54,
  55,
  66,
  5,
  17,
  83,
  2,
  86,
  50,
  52,
  36,
  49,
  80,
  12,
  33,
  87,
  21,
  57,
  60,
  89,
  73,
  28,
  45,
  7,
  42,
  48,
  9,
  27,
  84,
  76,
  20,
  71,
  14,
  29,
  1,
  16,
  35,
  34,
  11,
  13,
  63,
  8,
  30,
  77,
  65,
  38,
  70,
  78,
  75,
  18,
  32,
  6,
  85,
  68,
  25,
  47,
  44,
  40,
  59,
  56,
  3,
  82,
  43,
  74,
  24,
  58,
  4,
  90,
  31,
  79,
  81,
  61

In [126]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[95,
  8,
  14,
  62,
  265,
  7,
  216,
  309,
  343,
  126,
  352,
  212,
  25,
  279,
  60,
  28,
  297,
  233,
  3,
  74,
  263,
  202,
  246,
  149,
  285,
  164,
  224,
  119,
  286,
  165,
  278,
  46,
  283,
  293,
  326,
  77,
  328,
  207,
  114,
  177,
  294,
  167,
  319,
  183,
  71,
  179,
  73,
  229,
  335,
  54,
  128,
  169,
  58,
  186,
  117,
  268,
  236,
  98,
  211,
  37,
  267,
  197,
  150,
  9,
  105,
  160,
  102,
  94,
  148,
  242,
  91,
  99,
  333,
  110,
  284,
  106,
  11,
  96,
  104,
  146,
  159,
  250,
  1,
  255,
  144],
 [91,
  177,
  95,
  346,
  108,
  179,
  168,
  209,
  276,
  263,
  30,
  215,
  73,
  218,
  229,
  261,
  14,
  62,
  322,
  7,
  337,
  208,
  210,
  158,
  203,
  307,
  38,
  140,
  339,
  80,
  238,
  244,
  349,
  277,
  118,
  190,
  21,
  180,
  202,
  27,
  112,
  330,
  299,
  78,
  272,
  50,
  129,
  6,
  56,
  154,
  153,
  32,
  42,
  252,
  26,
  132,
  302,
  257,
  172,
  268,
  303,
  287,
  72,
  139,
  19,
 

In [127]:
# output dataframe with user ids a

recommendations_movies = pd.DataFrame(rec_list)

recommendations_movies.to_csv(directory_path + 'movie_recommendations.csv', index = False)
recommendations_movies

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,95,8,14,62,265,7,216,309,343,126,...,,,,,,,,,,
1,91,177,95,346,108,179,168,209,276,263,...,,,,,,,,,,
2,106,114,99,296,87,323,327,19,287,3,...,187.0,165.0,24.0,202.0,225.0,259.0,60.0,,,
3,91,294,183,276,177,207,258,41,168,209,...,,,,,,,,,,
4,168,209,265,261,31,219,324,304,322,14,...,341.0,253.0,308.0,333.0,144.0,1.0,255.0,250.0,315.0,249.0


In [128]:
pd.read_csv(directory_path + 'movie_recommendations.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,95,8,14,62,265,7,216,309,343,126,...,,,,,,,,,,
1,91,177,95,346,108,179,168,209,276,263,...,,,,,,,,,,
2,106,114,99,296,87,323,327,19,287,3,...,187.0,165.0,24.0,202.0,225.0,259.0,60.0,,,
3,91,294,183,276,177,207,258,41,168,209,...,,,,,,,,,,
4,168,209,265,261,31,219,324,304,322,14,...,341.0,253.0,308.0,333.0,144.0,1.0,255.0,250.0,315.0,249.0
