In [22]:
## Imports
# Data processing
import pandas as pd
import numpy as np
import scipy.stats
from sklearn.metrics.pairwise import cosine_similarity

In [23]:
# Read datasets

users_colnames=['userId', 'gender', 'age', 'profession']
movies_colnames=['movieId', 'year', 'title']
ratings_colnames=['userId', 'movieId', 'rating']

users = pd.read_csv('cse2525-reccommender-systems-challenge/users.csv', sep=';', names=users_colnames)
movies = pd.read_csv('cse2525-reccommender-systems-challenge/movies_v2.csv', sep=';', names=movies_colnames) # changed line 3601 from dataset : ';' -> '_'
ratings = pd.read_csv('cse2525-reccommender-systems-challenge/ratings.csv', sep=';', names=ratings_colnames)

# Check
users.head()
movies.head()
ratings.head()


Unnamed: 0,userId,movieId,rating
0,5494,2006,4
1,3579,1143,3
2,1251,2567,2
3,5687,652,4
4,3018,3440,4


In [24]:
# Count the # of ratings per movie
ratings_by_movie_id = ratings.groupby(['movieId'])['movieId']\
    .count()\
    .reset_index(name='counts')

ratings_by_movie_id.head()


Unnamed: 0,movieId,counts
0,1,1896
1,2,635
2,3,443
3,4,155
4,5,270


In [25]:
# Number of rows remaining after saving only the ones that have `>= threshold` ratings
movies_threshold = 100
ratings_by_movie_id_processed = ratings_by_movie_id[ratings_by_movie_id['counts'] >= movies_threshold] # drop in another variable
len(ratings_by_movie_id_processed)


1940

In [26]:
# Titles of the most popular (in # of ratings) movies
relevant_movie_ids = ratings_by_movie_id.sort_values(by='counts', ascending=False)['movieId'].array
# relevant_movie_ids.array
# type(relevant_movie_ids.array)
joined_ids = list(set(movies['movieId']) & set(relevant_movie_ids))
joined_ids = [x - 1 for x in joined_ids] # 0-indexed
movies.loc[joined_ids]

Unnamed: 0,movieId,year,title
0,1,1995,Toy_Story_(1995)
1,2,1995,Jumanji_(1995)
2,3,1995,Grumpier_Old_Men_(1995)
3,4,1995,Waiting_to_Exhale_(1995)
4,5,1995,Father_of_the_Bride_Part_II_(1995)
...,...,...,...
3701,3702,2000,Meet_the_Parents_(2000)
3702,3703,2000,Requiem_for_a_Dream_(2000)
3703,3704,2000,Tigerland_(2000)
3704,3705,2000,Two_Family_House_(2000)


In [27]:
# Create utility (user-movie) matrix
# rows - users, cols - movies
matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating')
print(type(matrix))
matrix.head()

<class 'pandas.core.frame.DataFrame'>


movieId,1,2,3,4,5,6,7,8,9,10,...,3697,3698,3699,3700,3701,3702,3703,3704,3705,3706
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [94]:
# Statistics on ratings values

print('The ratings dataset has', ratings['userId'].nunique(), 'unique users')
print('The ratings dataset has', ratings['movieId'].nunique(), 'unique movies')
print('The ratings dataset has', ratings['rating'].nunique(), 'unique ratings')
print('The unique ratings are', sorted(ratings['rating'].unique()))


The ratings dataset has 6040 unique users
The ratings dataset has 3695 unique movies
The ratings dataset has 5 unique ratings
The unique ratings are [1, 2, 3, 4, 5]
6040 3695


In [96]:
# Statistics on users values
unique_users = users['userId'].nunique()
print('The users dataset has', unique_users, 'unique users')
print('The users dataset has',users['gender'].nunique(), 'unique genders')
print('The users dataset has', users['age'].nunique(), 'unique age values')
print('The users dataset has', users['profession'].nunique(), 'unique professions values')

The users dataset has 6040 unique users
The users dataset has 2 unique genders
The users dataset has 7 unique age values
The users dataset has 21 unique professions values


In [97]:
# Statistics on movies values
unique_movies = movies['movieId'].nunique()
print('The movies dataset has', unique_movies, 'unique ids')
print('The movies dataset has',movies['year'].nunique(), 'unique year values')
print('The movies dataset has', movies['title'].nunique(), 'unique titles')
print('The movies dataset has year values in range', movies['year'].min(), ' - ', movies['year'].max())

The movies dataset has 3706 unique ids
The movies dataset has 82 unique year values
The movies dataset has 3688 unique titles
The movies dataset has year values in range 0  -  2000


In [31]:
# Data normalization step
# Rating > user_avg => positive value; otherwise negative value
matrix_norm = matrix.subtract(matrix.mean(axis=1), axis = 'rows')
matrix_norm.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,3697,3698,3699,3700,3701,3702,3703,3704,3705,3706
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.808511,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [48]:
# User similarity matrix using alternative #1: Pearson correlation
user_similarity = matrix_norm.T.corr()
user_similarity.head()

userId,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.642857,-0.5625,0.333333,-0.172516,0.1139606,,-0.583333,0.644094,0.163446,...,-0.19803,0.852803,1.0,,0.381246,-0.150021,-0.394771,,0.061199,-5.0620030000000004e-17
2,0.642857,1.0,-0.208013,0.220863,-0.102728,2.7626380000000003e-17,0.2626,-0.015445,0.13525,-0.105465,...,-0.75,0.338062,0.326183,,0.338558,0.381157,0.183019,-0.5,0.483046,0.03193962
3,-0.5625,-0.208013,1.0,0.774597,-0.437621,-0.3492151,0.790569,-0.528594,0.108465,0.016264,...,-0.5,-0.342997,-0.711556,,0.559017,0.158237,-0.041345,1.0,0.315063,-0.4678087
4,0.333333,0.220863,0.774597,1.0,0.0,,-0.207514,0.534522,0.866025,-0.371479,...,-0.645497,,-0.038576,,-0.013558,0.489046,0.301511,-1.0,0.0,0.5358259
5,-0.172516,-0.102728,-0.437621,0.0,1.0,-0.7211103,-0.216966,0.168732,-0.033128,0.021635,...,0.073324,1.0,0.116775,,0.514356,0.142739,-0.074062,-1.0,1.0,0.325407


In [33]:
# User similarity matrix using alternative #2: cosine_similarity
# values in [-1, 1]; -1 : opposite, 1: really close
user_similarity_cosine = cosine_similarity(matrix_norm.fillna(0))
print(type(user_similarity_cosine)) # ndarray
user_similarity_cosine


<class 'numpy.ndarray'>


array([[ 1.        ,  0.03565432, -0.04347203, ...,  0.        ,
         0.0138397 ,  0.00903407],
       [ 0.03565432,  1.        , -0.01496278, ..., -0.01598902,
         0.02835177, -0.00176974],
       [-0.04347203, -0.01496278,  1.        , ...,  0.0414533 ,
         0.05292534, -0.04647062],
       ...,
       [ 0.        , -0.01598902,  0.0414533 , ...,  1.        ,
         0.04679397, -0.03514554],
       [ 0.0138397 ,  0.02835177,  0.05292534, ...,  0.04679397,
         1.        ,  0.05144363],
       [ 0.00903407, -0.00176974, -0.04647062, ..., -0.03514554,
         0.05144363,  1.        ]])

In [34]:
# Try finding similar users to user w/ `userID`
# Let's use `cosine_similarity` for this

userId = 1
print(type(user_similarity_cosine))
user_similarity_cosine = user_similarity_cosine[:, userId]
user_similarity_cosine = np.delete(user_similarity_cosine, userId, axis=0)
print(len(user_similarity_cosine))
user_similarity_cosine

<class 'numpy.ndarray'>
6039


array([ 0.03565432, -0.01496278, -0.02398623, ..., -0.01598902,
        0.02835177, -0.00176974])

In [35]:
# Try finding similar users to user w/ `userID`
# Let's use `Pearson correlation` for this

# Pick a user ID
# picked_userid = 1
# Remove picked user ID from the candidate list
#user_similarity.drop(index=picked_userid, inplace=True)
# Take a look at the data
# user_similarity.head()

userId,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.642857,1.0,-0.208013,0.220863,-0.102728,2.7626380000000003e-17,0.2626,-0.015445,0.13525,-0.105465,...,-0.75,0.338062,0.326183,,0.338558,0.381157,0.183019,-0.5,0.483046,0.03194
3,-0.5625,-0.2080126,1.0,0.774597,-0.437621,-0.3492151,0.790569,-0.528594,0.108465,0.016264,...,-0.5,-0.342997,-0.711556,,0.559017,0.158237,-0.041345,1.0,0.315063,-0.467809
4,0.333333,0.2208631,0.774597,1.0,0.0,,-0.207514,0.534522,0.866025,-0.371479,...,-0.645497,,-0.038576,,-0.013558,0.489046,0.301511,-1.0,0.0,0.535826
5,-0.172516,-0.1027277,-0.437621,0.0,1.0,-0.7211103,-0.216966,0.168732,-0.033128,0.021635,...,0.073324,1.0,0.116775,,0.514356,0.142739,-0.074062,-1.0,1.0,0.325407
6,0.113961,2.7626380000000003e-17,-0.349215,,-0.72111,1.0,1.0,-0.590932,-0.216007,0.333863,...,-1.0,0.92582,,,-0.419573,-0.16525,-0.272166,-0.866025,-0.693375,0.197545


In [47]:
# Tests to decide on tuning the parameters in next code cell `user_similarity_threshold` and `k`

# Number of similar users - first 150 are 1.0, first 160 -> 0.94
k = 200
# User similarity threshold
user_similarity_threshold = 0.3

for_userId = user_similarity[user_similarity[userId] > user_similarity_threshold][userId]
print(type(for_userId))
print(len(for_userId))
print(len(for_userId[for_userId >= .7]))


<class 'pandas.core.series.Series'>
1197
337


In [38]:
# Choose `k` most similar users based on the cosine_similarity distance
# Parameters to be tuned:
# k - # of similar users
# user_similarity_threshold

# Get top `k` similar users
print(user_similarity.shape)
similar_users = user_similarity[user_similarity[userId] > user_similarity_threshold][userId].sort_values(ascending=False)[:k] # Print out top `k` similar users
print(f'The similar users for user {userId} are', similar_users)

(6039, 6040)
The similar users for user 1 are userId
4910    1.000000
3222    1.000000
3247    1.000000
900     1.000000
545     1.000000
          ...   
361     0.866025
1760    0.866025
2431    0.866025
2509    0.866025
3368    0.866025
Name: 1, Length: 200, dtype: float64


  similar_users = user_similarity[user_similarity[userId] > user_similarity_threshold][userId].sort_values(ascending=False)[:k] # Print out top `k` similar users


In [39]:
# Narrow item pool in 2 steps
# Step 1. Remove movies already watched by targeted `userId`

# Movies that the target user has watched
picked_userid_watched = matrix_norm[matrix_norm.index == picked_userid].dropna(axis=1, how='all')
picked_userid_watched

movieId,1,48,145,254,514,518,575,581,582,594,...,2489,2558,2587,2593,2600,2711,2890,2899,2970,3178
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.808511,0.808511,0.808511,-0.191489,0.808511,-0.191489,-0.191489,-0.191489,0.808511,-0.191489,...,-0.191489,-0.191489,-0.191489,-0.191489,0.808511,-0.191489,0.808511,-0.191489,-0.191489,-0.191489


In [40]:
# Step 2. Keep only the movies that similar users have watched

# Remove movies that none of the similar users have watched
similar_user_movies = matrix_norm[matrix_norm.index.isin(similar_users.index)].dropna(axis=1, how='all')
similar_user_movies

movieId,1,2,3,4,5,6,7,9,10,11,...,3690,3691,3694,3697,3700,3702,3703,3704,3705,3706
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
40,,1.595506,,,,,,,,,...,,,,,,,,,,
66,,,,,,,,,,,...,,,,,,,,,,
72,,,,,,,,,,1.333333,...,,,,,,,,,,1.333333
113,,,,,,,,,,,...,,,,,,,,,,
179,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5882,,,,,,,,,,,...,,,,,,,,,,
5910,,,,,,,,,,,...,,,,,,,,,,
5919,,,,,,,,,,,...,,,,,,,,,,
5940,,,,,,,,,,,...,,,,,,,,,,


In [41]:
# Remove already watched movies
similar_user_movies.drop(picked_userid_watched.columns,axis=1, inplace=True, errors='ignore') # drop cols if exits without error msgs.
similar_user_movies

movieId,2,3,4,5,6,7,9,10,11,12,...,3690,3691,3694,3697,3700,3702,3703,3704,3705,3706
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
40,1.595506,,,,,,,,,,...,,,,,,,,,,
66,,,,,,,,,,,...,,,,,,,,,,
72,,,,,,,,,1.333333,,...,,,,,,,,,,1.333333
113,,,,,,,,,,,...,,,,,,,,,,
179,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5882,,,,,,,,,,,...,,,,,,,,,,
5910,,,,,,,,,,,...,,,,,,,,,,
5919,,,,,,,,,,,...,,,,,,,,,,
5940,,,,,,,,,,,...,,,,,,,,,,


In [52]:
# Read predictions.csv

predictions_colnames=['userId', 'movieId']
predictions = pd.read_csv('cse2525-reccommender-systems-challenge/predictions.csv', sep=';', names=predictions_colnames)
predictions

Unnamed: 0,userId,movieId,predicts
0,1635,1914,0
1,2077,2124,0
2,3507,1582,0
3,1117,2857,0
4,5298,3281,0
...,...,...,...
90014,1396,1159,0
90015,2022,1272,0
90016,1109,2319,0
90017,2941,1273,0


In [99]:
# Create `ans` matrix
#ans = ratings.pivot_table(index='userId', columns='movieId', fill_value=0)
#ans
ans =  np.zeros((unique_users + 1, unique_movies + 1), dtype=np.int32 )
print(ans.shape)

(6041, 3707)


In [None]:
# Recommend Items to targetUser
# weighted avg of user similarity score & movie rating
# User w/ high similarity -> higher weights; in other words, similarity score weights the resuts

for idd in range(1, unique_users + 1): # 1-indexes
  item_score = {}
  user_similarity_userId = user_similarity.drop(index=idd, inplace=False)
  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users
  picked_userid_watched = matrix_norm[matrix_norm.index == idd].dropna(axis=1, how='all')
  similar_user_movies = matrix_norm[matrix_norm.index.isin(similar_users.index)].dropna(axis=1, how='all')
  similar_user_movies.drop(picked_userid_watched.columns,axis=1, inplace=True, errors='ignore') # drop cols if exits without error msgs.

  for movieId in similar_user_movies.columns:
    movie_rating = similar_user_movies[movieId]
    # Stores the score
    total = 0
    # Stores the number of scores
    count = 0

    for userId in similar_users.index:
      if not pd.isna(movie_rating[userId]): #     # If the movie has rating
        score = similar_users[userId] * movie_rating[userId]
        total += score
        count += 1
    item_score[movieId] = total / count# Convert dictionary to pandas dataframe

  item_score = pd.DataFrame(item_score.items(), columns=['movie', 'movie_score'])
  item_score

  # Sort the movies by score
  # ranked_item_score = item_score.sort_values(by='movie_score', ascending=False)

  # Select top `m` movies
  m = unique_movies
  avg_rating = matrix[matrix.index == idd].T.mean()[idd]
  item_score['predicted_rating'] = item_score['movie_score'] + avg_rating # Take a look at the data
  for (index, data_row) in item_score.iterrows():
      movieId = (int) (data_row['movie'])
      prediction = data_row['predicted_rating']
      if prediction >= 5:
        prediction = 5
      elif prediction <= 1:
        prediction = 1

      if prediction - (int) (prediction) <= 0.5:
          prediction = (int) (prediction)
      else:
          prediction = (int) (prediction) + 1

      # print(prediction)
      ans[idd][movieId] = prediction
  print(idd)
  # print(ranked_item_score.head(m))


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


1


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


2


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


3


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


4


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


5


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


6


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


7


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


8


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


9


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


10


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


11


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


12


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


13


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


14


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


15


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


16


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


17


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


18


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


19


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


20


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


21


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


22


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


23


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


24


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


25


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


26


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


27


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


28


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


29


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


30


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


31


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


32


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


33


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


34


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


35


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


36


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


37


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


38


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


39


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


40


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


41


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


42


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


43


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


44


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


45


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


46


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


47


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


48


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


49


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


50


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


51


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


52


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


53


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


54


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


55


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


56


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


57


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


58


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


59


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


60


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


61


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


62


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


63


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


64


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


65


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


66


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


67


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


68


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


69


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


70


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


71


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


72


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


73


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


74


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


75


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


76


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


77


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


78


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


79


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


80


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


81


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


82


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


83


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


84


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


85


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


86


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


87


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


88


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


89


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


90


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


91


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


92


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


93


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


94


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


95


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


96


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


97


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


98


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


99


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


100


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


101


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


102


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


103


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


104


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


105


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


106


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


107


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


108


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


109


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


110


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


111


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


112


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


113


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


114


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


115


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


116


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


117


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


118


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


119


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


120


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


121


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


122


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


123


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


124


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


125


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


126


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


127


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


128


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


129


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


130


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


131


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


132


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


133


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


134


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


135


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


136


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


137


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


138


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


139


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


140


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


141


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


142


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


143


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


144


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


145


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


146


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


147


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


148


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


149


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


150


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


151


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


152


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


153


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


154


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


155


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


156


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


157


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


158


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


159


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


160


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


161


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


162


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


163


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


164


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


165


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


166


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


167


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


168


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


169


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


170


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


171


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


172


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


173


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


174


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


175


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


176


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


177


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


178


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


179


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


180


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


181


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


182


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


183


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


184


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


185


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


186


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


187


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


188


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


189


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


190


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


191


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


192


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


193


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


194


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


195


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


196


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


197


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


198


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


199


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


200


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


201


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


202


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


203


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


204


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


205


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


206


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


207


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


208


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


209


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


210


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


211


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


212


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


213


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


214


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


215


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


216


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


217


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


218


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


219


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


220


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


221


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


222


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


223


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


224


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


225


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


226


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


227


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


228


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


229


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


230


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


231


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


232


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


233


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


234


  similar_users = user_similarity[user_similarity[idd] > user_similarity_threshold][idd].sort_values(ascending=False)[:k] # Print out top `k` similar users


In [101]:
# Test print
print(ans)

[[0 0 0 ... 0 0 0]
 [0 0 4 ... 6 6 4]
 [0 4 3 ... 2 3 3]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [None]:
# Create submissions.csv
submissions_colnames=['Id', 'Rating']
submissions = pd.read_csv('cse2525-reccommender-systems-challenge/submission.csv', sep=',', names=submissions_colnames)
ids = submissions['Id']

for idx in ids:
    idd = predictions.iloc[ids[idx - 1], 0]
    movieId = predictions.iloc[ids[idx - 1], 1]
    submissions.iloc[ids[idx - 1], ['Rating']] = ans[idd][movieId]

submissions

In [43]:
# Test - Print titles of recommended movies\
print(type(ranked_item_score))
print(movies['movieId'])
print(type(ranked_item_score['movie']))
movieIds_indices = ranked_item_score['movie'][:m].array
print(movieIds_indices)
movies.iloc[movieIds_indices]['title']
# joined_indices = list(set(movies.movieId) & set(movieIds_indices))
# print(len(joined_indices))
# print(movies.iloc[joined_indices]['title'])

<class 'pandas.core.frame.DataFrame'>
0          1
1          2
2          3
3          4
4          5
        ... 
3701    3702
3702    3703
3703    3704
3704    3705
3705    3706
Name: movieId, Length: 3706, dtype: int64
<class 'pandas.core.series.Series'>
<PandasArray>
[ 499, 3374, 3259, 1881, 2072, 1616,  394, 1690, 1382, 3208,
 ...
 3230, 3597, 1487, 2816, 1791, 2433, 2278, 2203,  117, 1110]
Length: 1000, dtype: int64


  movieIds_indices = ranked_item_score['movie'][:m].array


499         Radioland_Murders_(1994)
3374       Honeymoon_in_Vegas_(1992)
3259    Son_of_the_Sheik,_The_(1926)
1881           Governess,_The_(1998)
2072      About_Last_Night..._(1986)
                    ...             
2433       Mummy's_Curse,_The_(1944)
2278      Crocodile_Dundee_II_(1988)
2203      Shakespeare_in_Love_(1998)
117                 Boomerang_(1992)
1110                   Aliens_(1986)
Name: title, Length: 1000, dtype: object

In [44]:
# Predicts scores - user's ratings
# get average user's w/ `userId` movie rating score

avg_rating = matrix[matrix.index == picked_userid].T.mean()[picked_userid]
print(f'The average movie rating for user {picked_userid} is {avg_rating:.2f}')

# Calcuate the predicted rating
ranked_item_score['predicted_rating'] = ranked_item_score['movie_score'] + avg_rating# Take a look at the data
print(m)
ranked_item_score.head(m)

The average movie rating for user 1 is 4.19
1000


Unnamed: 0,movie,movie_score,predicted_rating
291,499,2.554455,6.745945
1989,3374,2.144737,6.336226
1915,3259,2.144737,6.336226
1086,1881,2.144737,6.336226
1199,2072,2.144737,6.336226
...,...,...,...
1425,2433,0.002537,4.194027
1333,2278,0.001970,4.193460
1277,2203,0.000140,4.191630
71,117,0.000000,4.191489


In [51]:
# Iterate through userIds

for userId in range(1, unique_users + 1): # 1-indexes
  user_similarity_userId = user_similarity.drop(index=userId, inplace=False)
  print(user_similarity_userId.shape)


(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)
(6039, 6040)

KeyboardInterrupt: 