In [1]:
from datetime import datetime
import json
import pandas as pd
import numpy as np

In [2]:
df_activities = pd.read_json('activities_large.json')
movies = pd.read_pickle('full_movies.pkl')

## User/Content Matrices

In [3]:
# Variables for adjusting formula

clicks = 0.3
like = 10
dislike = -10

# current formula is clicks * 0.3 +/-10 (+10 for like and -10 for dislike)

In [4]:
# getting likes/dislikes
likes_dislikes = df_activities.loc[np.where((df_activities['activity'] == 'Like') ^ (df_activities['activity'] == 'Dislike'))]

# latest activity at the bottom
likes_dislikes.sort_values('datetime', inplace = True)

# filtering out all like/dislike that is not at the bottom
latest = likes_dislikes.groupby(['user_id', 'content_id']).last()


# getting the right shape
latest.drop(columns = 'datetime', inplace = True)

unstacked = latest.unstack()

unstacked.columns = unstacked.columns.droplevel()

unstacked.replace({'Dislike':dislike, 'Like':like}, inplace = True)

unstacked.fillna(0, inplace = True)

unstacked

content_id,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,10,10,10,0.0,0.0,-10,-10,-10,10,10,...,10,10,-10.0,10.0,10,10.0,10,10,-10,-10
1,10,10,-10,-10.0,-10.0,-10,-10,10,-10,10,...,-10,10,0.0,10.0,-10,0.0,-10,10,-10,-10
2,10,-10,-10,10.0,0.0,10,10,10,-10,10,...,10,-10,10.0,-10.0,-10,10.0,-10,10,-10,-10
3,-10,-10,-10,-10.0,10.0,10,-10,10,10,10,...,10,10,-10.0,-10.0,-10,-10.0,10,-10,10,-10
4,-10,10,-10,-10.0,-10.0,10,-10,10,10,10,...,10,10,10.0,0.0,-10,-10.0,-10,-10,-10,-10


In [5]:
# turning it into sparse matrix

likes_matrix = np.zeros((5, 501))

users = [i -1 for i in list(unstacked.index)]
cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i +1, j]
        
likes_matrix

array([[ 10.,  10., -10., ...,  10., -10., -10.],
       [ 10., -10., -10., ...,  10., -10., -10.],
       [-10., -10., -10., ..., -10.,  10., -10.],
       [-10.,  10., -10., ..., -10., -10., -10.],
       [ 10.,  10.,  10., ...,  10., -10., -10.]])

In [17]:
# creating sparse matrix for plays

plays = df_activities[df_activities['activity'] == 'Play']

grouped_plays = plays.groupby(['user_id', 'content_id']).count()

pivot_plays = grouped_plays.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_plays.fillna(0, inplace = True)

plays_matrix = np.zeros((5, 501))

users = [i for i in range(5)]
cols = list(pivot_plays.columns)

for i in users:
    for j in cols:
        plays_matrix[i, j] = pivot_plays.loc[i, j] * clicks
        
plays_matrix

array([[0.3, 0.6, 0.3, ..., 0.3, 0.6, 0. ],
       [0. , 0. , 0.3, ..., 0.6, 0.9, 0.3],
       [0.3, 0.6, 0.6, ..., 0.3, 0. , 0.3],
       [0.9, 0. , 0.3, ..., 0. , 0. , 0.6],
       [0. , 0.3, 0. , ..., 0. , 0.6, 0.3]])

In [18]:
scores_matrix = plays_matrix + likes_matrix

#scores_df = pd.DataFrame(scores_matrix.copy())

scores_df = pd.DataFrame(scores_matrix)

scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
0,10.3,10.6,-9.7,-10.0,-10.0,-10.0,-9.4,10.3,-9.1,10.0,...,-9.1,10.9,0.6,10.3,-9.4,0.9,-10.0,10.3,-9.4,-10.0
1,10.0,-10.0,-9.7,10.9,0.0,10.3,10.6,10.3,-9.1,10.3,...,10.3,-9.1,10.9,-10.0,-9.4,10.0,-10.0,10.6,-9.1,-9.7
2,-9.7,-9.4,-9.4,-10.0,10.3,10.3,-10.0,10.0,10.3,10.3,...,10.0,10.0,-9.7,-10.0,-9.7,-9.7,10.6,-9.7,10.0,-9.7
3,-9.1,10.0,-9.7,-9.7,-10.0,10.3,-9.7,10.6,10.6,10.0,...,10.3,10.9,10.3,0.0,-10.0,-9.7,-10.0,-10.0,-10.0,-9.4
4,10.0,10.3,10.0,0.0,0.6,-9.7,-9.4,-9.7,10.0,10.3,...,10.0,10.6,-9.4,10.3,10.9,10.6,10.0,10.0,-9.4,-9.7


In [8]:
scores_df.replace(0, np.nan, inplace = True)

In [9]:
centralized = scores_df.sub(scores_df.mean(axis = 1), axis = 0)

centralized

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
0,10.110931,10.410931,-9.889069,-10.189069,-10.189069,-10.189069,-9.589069,10.110931,-9.289069,9.810931,...,-9.289069,10.710931,0.410931,10.110931,-9.589069,0.710931,-10.189069,10.110931,-9.589069,-10.189069
1,9.550816,-10.449184,-10.149184,10.450816,,9.850816,10.150816,9.850816,-9.549184,9.850816,...,9.850816,-9.549184,10.450816,-10.449184,-9.849184,9.550816,-10.449184,10.150816,-9.549184,-10.149184
2,-9.551534,-9.251534,-9.251534,-9.851534,10.448466,10.448466,-9.851534,10.148466,10.448466,10.448466,...,10.148466,10.148466,-9.551534,-9.851534,-9.551534,-9.551534,10.748466,-9.551534,10.148466,-9.551534
3,-10.002434,9.097566,-10.602434,-10.602434,-10.902434,9.397566,-10.602434,9.697566,9.697566,9.097566,...,9.397566,9.997566,9.397566,,-10.902434,-10.602434,-10.902434,-10.902434,-10.902434,-10.302434
4,9.756707,10.056707,9.756707,,0.356707,-9.943293,-9.643293,-9.943293,9.756707,10.056707,...,9.756707,10.356707,-9.643293,10.056707,10.656707,10.356707,9.756707,9.756707,-9.643293,-9.943293


In [10]:
#centralized_imputed = centralized.fillna(scores_df.mean(axis = 1))

#centralized_imputed

In [11]:
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
scores_matrix

array([[ 10.3,  10.6,  -9.7, ...,  10.3,  -9.4, -10. ],
       [ 10. , -10. ,  -9.7, ...,  10.6,  -9.1,  -9.7],
       [ -9.7,  -9.4,  -9.4, ...,  -9.7,  10. ,  -9.7],
       [ -9.1,  10. ,  -9.7, ..., -10. , -10. ,  -9.4],
       [ 10. ,  10.3,  10. , ...,  10. ,  -9.4,  -9.7]])

In [13]:
similarities = cosine_similarity(scores_matrix)

similarities

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
similarities[0]

In [None]:
pd.DataFrame(similarities)

In [14]:
users = [i for i in range(5)]

users_dict = {}

for i in range(5):
    users_dict[f'user{i}'] = similarities[i]
    

NameError: name 'similarities' is not defined

In [19]:
# Compute cosine similarity between all pairs of users
user_similarities = cosine_similarity(scores_matrix)
# Function to predict a user's rating for an item
def predict_rating(user_item_matrix, user_id, item_id):
    # Find the most similar users to the given user
    similar_users = np.argsort(user_similarities[user_id])[::-1][1:]
    
    # Compute weighted average of their ratings for the item 
    weighted_sum = 0
    weight_sum = 0   
    for sim_user in similar_users:
        if user_item_matrix[sim_user][item_id] != 0:
            similarity = user_similarities[user_id][sim_user]
            rating = user_item_matrix[sim_user][item_id]
            weighted_sum += similarity * rating            
            weight_sum += similarity   
    if weight_sum == 0:
        return 0
    else:
        return weighted_sum / weight_sum
    # Example usage:
    # Predict user 0's rating for item 2
    #predicted_rating = predict_rating(0, 2)
    #print(predicted_rating)

In [21]:
user_similarities

array([[ 1.        ,  0.07731235, -0.01099794,  0.02742089,  0.10407872],
       [ 0.07731235,  1.        ,  0.03562576,  0.06936093,  0.06943124],
       [-0.01099794,  0.03562576,  1.        , -0.013615  ,  0.09525441],
       [ 0.02742089,  0.06936093, -0.013615  ,  1.        ,  0.02672267],
       [ 0.10407872,  0.06943124,  0.09525441,  0.02672267,  1.        ]])

In [120]:
scores_matrix

array([[ 10.3,  10.6,  -9.7, ...,  10.3,  -9.4, -10. ],
       [ 10. , -10. ,  -9.7, ...,  10.6,  -9.1,  -9.7],
       [ -9.7,  -9.4,  -9.4, ...,  -9.7,  10. ,  -9.7],
       [ -9.1,  10. ,  -9.7, ..., -10. , -10. ,  -9.4],
       [ 10. ,  10.3,  10. , ...,  10. ,  -9.4,  -9.7]])

In [24]:
missings = np.where(scores_matrix == 0)

u_ids = missings[0]
c_ids = missings[1]

c_ids

array([ 32,  79, 106, 286, 289, 340, 378,   4, 105, 175, 224, 228, 231,
       250, 388, 391, 427, 429,  31,  32,  35,  95, 110, 135, 217, 225,
       228, 257, 285, 291,  48,  49,  55, 215, 295, 334, 482, 494,   3,
        37,  80, 160, 285, 302, 321, 400, 461], dtype=int64)

In [25]:
z = np.zeros((5, 501))

for i in range(len(u_ids)):
    z[u_ids[i], c_ids[i]] = predict_rating(scores_matrix, u_ids[i], c_ids[i])
        
z

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [141]:
scores_matrix

array([[ 10.3,  10.6,  -9.7, ...,  10.3,  -9.4, -10. ],
       [ 10. , -10. ,  -9.7, ...,  10.6,  -9.1,  -9.7],
       [ -9.7,  -9.4,  -9.4, ...,  -9.7,  10. ,  -9.7],
       [ -9.1,  10. ,  -9.7, ..., -10. , -10. ,  -9.4],
       [ 10. ,  10.3,  10. , ...,  10. ,  -9.4,  -9.7]])

In [145]:
only_predictions = 

only_predictions

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [26]:
only_predictions = pd.DataFrame(z)

only_predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,-4.203423,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.001996,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,-5.06195,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
np.where(only_predictions != 0)

(array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
        4, 4, 4], dtype=int64),
 array([ 32,  79, 106, 286, 289, 340, 378,   4, 105, 175, 224, 228, 231,
        250, 388, 391, 427, 429,  31,  32,  35,  95, 110, 135, 217, 225,
        228, 257, 285, 291,  48,  49,  55, 215, 295, 334, 482, 494,   3,
         37,  80, 160, 285, 302, 321, 400, 461], dtype=int64))

In [196]:
unwatched_list = []

for i in range(len(only_predictions)):
    unwatched_list.append(list(np.where(z[i] != 0)[0]))
                     
unwatched_list

[[32, 79, 106, 286, 289, 340, 378],
 [4, 105, 175, 224, 228, 231, 250, 388, 391, 427, 429],
 [31, 32, 35, 95, 110, 135, 217, 225, 228, 257, 285, 291],
 [48, 49, 55, 215, 295, 334, 482, 494],
 [3, 37, 80, 160, 285, 302, 321, 400, 461]]

In [197]:
score_list = []
c = 0
for i in unwatched_list:
    s = []
    for j in i:
        s.append(z[c, j])
    score_list.append(s)
    c += 1
    
score_list

[[0.3313449354095434,
  3.9536247512987273,
  -1.7096526165723696,
  1.735742089755546,
  -2.7719341682011134,
  5.1733780667697475,
  -0.3630131338572109],
 [-4.203422532651175,
  5.00652129759249,
  -3.9812356418916894,
  1.7839134139259065,
  3.3705046386348907,
  1.6587953440872143,
  1.9934968888047977,
  -6.489205143599954,
  4.200065606533768,
  -4.030999502214306,
  -1.251595899766357],
 [-6.154093259307808,
  -5.945984926577072,
  13.032507732232702,
  -10.062095954987923,
  6.940142914943385,
  10.938394113702836,
  5.857511674760655,
  -4.92763929169229,
  12.869090987393971,
  -12.399695549443448,
  29.67338699211129,
  5.675850682057405],
 [7.82885669998902,
  7.762203621908158,
  -9.739075321672797,
  13.046533981524732,
  5.763462844900077,
  1.3109713319159404,
  10.63522197128149,
  0.0019960683741303453],
 [-5.061950258050593,
  -9.673782431147917,
  -4.771047696259773,
  8.385611660305822,
  -0.23984377514316763,
  -5.05390440608193,
  -2.5153757903781027,
  3.299719

In [198]:
rec_list = []
for i in range(len(score_indices)):
    l = unwatched_list[i]
    order = score_indices[i]
    rec_list.append([l[j] for j in order])
    
rec_list

[[289, 106, 378, 32, 286, 79, 340],
 [388, 4, 427, 175, 429, 231, 224, 250, 228, 391, 105],
 [257, 95, 31, 32, 225, 291, 217, 110, 135, 228, 35, 285],
 [55, 494, 334, 295, 49, 48, 482, 215],
 [37, 3, 302, 80, 321, 285, 461, 400, 160]]

In [209]:
# output dataframe with user ids a

recommendations = pd.DataFrame(rec_list)

recommendations.to_csv('jonas_sofo_data.csv', index = False)
recommendations

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,289,106,378,32,286,79,340,,,,,
1,388,4,427,175,429,231,224,250.0,228.0,391.0,105.0,
2,257,95,31,32,225,291,217,110.0,135.0,228.0,35.0,285.0
3,55,494,334,295,49,48,482,215.0,,,,
4,37,3,302,80,321,285,461,400.0,160.0,,,


In [210]:
pd.read_csv('jonas_sofo_data.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,289,106,378,32,286,79,340,,,,,
1,388,4,427,175,429,231,224,250.0,228.0,391.0,105.0,
2,257,95,31,32,225,291,217,110.0,135.0,228.0,35.0,285.0
3,55,494,334,295,49,48,482,215.0,,,,
4,37,3,302,80,321,285,461,400.0,160.0,,,


In [206]:
recommendations.iloc[0]

0     289.0
1     106.0
2     378.0
3      32.0
4     286.0
5      79.0
6     340.0
7       NaN
8       NaN
9       NaN
10      NaN
11      NaN
Name: 0, dtype: float64

In [None]:
to

In [203]:
recommendations.dtypes

0       int64
1       int64
2       int64
3       int64
4       int64
5       int64
6       int64
7     float64
8     float64
9     float64
10    float64
11    float64
dtype: object

## Test Zone 

In [30]:
scores = [z[0,i] for i in user_0_preds]

scores

[0.3313449354095434,
 3.9536247512987273,
 -1.7096526165723696,
 1.735742089755546,
 -2.7719341682011134,
 5.1733780667697475,
 -0.3630131338572109]

In [56]:
u_id_list = [u_id for i in user_0_preds]

In [57]:
u_id_list

[0, 0, 0, 0, 0, 0, 0]

In [35]:
pd.Series(user_0_preds)

0     32
1     79
2    106
3    286
4    289
5    340
6    378
dtype: int64

In [46]:
d = 

SyntaxError: invalid syntax (<ipython-input-46-d9df4ac9da3a>, line 1)

In [169]:
recommendations = pd.DataFrame({'content_id' : user_0_preds, 'rating' : scores}).sort_values('rating', ascending = False, ignore_index = True).drop(columns = 'rating')

recommendations

Unnamed: 0,content_id
0,340
1,79
2,286
3,32
4,378
5,106
6,289


In [168]:
recommendations.transpose()

Unnamed: 0,0,1,2,3,4,5,6
content_id,340,79,286,32,378,106,289


In [161]:
np.array(recommendations).reshape(1,len(recommendations))

array([[340,  79, 286,  32, 378, 106, 289]], dtype=int64)

In [None]:
recommendations

In [53]:
recommendations.to_csv('recommendations.csv')

In [65]:
survey = pd.read_csv('personalisation_survey.csv', encoding = 'latin1', delimiter = ',')

In [107]:
survey_cols = list(survey.columns)

In [108]:
survey_cols = [i for i in survey_cols if 'Points' not in i and 'Feedback' not in i]

survey_cols.remove('ID')
survey_cols.remove('Start time')
survey_cols.remove('Completion time')
survey_cols.remove('Email')
survey_cols.remove('Total points')
survey_cols.remove('Quiz feedback')
survey_cols.remove('Name')

In [109]:
survey_cols_renamed = ['expat', 'live_nl', 'age', 'gender', 'occupation_status', 'occupation', 'relationship_status', 'hobbies', 'watchtime', 'diversity_preferences', 'integration_genres', 'user_behaviour', 'genre_ranks', 'personalisation_priority', 'diversity_open', 'recommender_pains']

In [110]:
survey_clean = survey[survey_cols]
survey_clean.columns = survey_cols_renamed

survey_clean

Unnamed: 0,expat,live_nl,age,gender,occupation_status,occupation,relationship_status,hobbies,watchtime,diversity_preferences,integration_genres,user_behaviour,genre_ranks,personalisation_priority,diversity_open,recommender_pains
0,Yes,0-2 years,18-24,Female,I am a student,Science,In a relationship,"going to festivals, watching series, walking m...",5-10 hours,Diverse genres;A diverse cast and characters (...,Documentaries;Popular native shows;,What you click or hover with your mouse;,Comedy series;Documentaries;Drama series;News ...,50/50,Watch different type of shows.,The recommendations are too similar to each ot...
1,Yes,0-2 years,25-34,Female,I am working,IT,In a relationship,"Yoga, reading, walking",5-10 hours,"Diverse formats (movies, ongoing series, non-c...",Popular native shows;,What you click or hover with your mouse;Watch ...,Comedy series;Family-friendly and children's s...,More personal,,
2,Yes,0-2 years,25-34,Female,I am working,Management,In a relationship,"music, pottery, books",5-10 hours,"Diverse formats (movies, ongoing series, non-c...",Popular native shows;Panel discussions;,Watch time;,Comedy series;Drama series;Documentaries;Famil...,50/50,same genre but not similar plots,The recommendations do not match my taste;
3,Yes,0-2 years,35-44,Male,I am unemployed,Marketing,In a relationship,"Traveling, Listening to musics, ceramics",5-10 hours,A diverse cast and characters (in terms of rac...,Other;,Watch time;Location information;,Music and Arts programmes;Comedy series;Drama ...,50/50,"Variety represented in gender, race, sexuality...",The recommendations are too similar to each ot...
4,Yes,0-2 years,25-34,Female,I am a student,IT,In a relationship,"Skiing ,Horse riding , hiking",0-5 hours,"Diverse formats (movies, ongoing series, non-c...",Documentaries;Other;,Location information;Watch time;,Drama series;Family-friendly and children's se...,More personal,To Attach more interest about my personality,I didn't understand how recommendations are de...
5,Yes,0-2 years,18-24,Female,I am a student,IT,In a relationship,"Dance, Languages, Art",0-5 hours,"Diverse formats (movies, ongoing series, non-c...",Popular native shows;,"Personal information (e.g. age, gender);",Music and Arts programmes;Comedy series;Docume...,More personal,nothing,The recommendations do not match my taste;I di...
6,Yes,0-2 years,25-34,Male,I am working,IT,In a relationship,"Cooking, Growing plants, music production",5-10 hours,"Diverse formats (movies, ongoing series, non-c...",Documentaries;,Watch time;,Documentaries;Comedy series;News programmes;Mu...,More personal,A lot of interesting sh*t to watch,The recommendations do not match my taste;
7,Yes,0-2 years,25-34,Male,I am working,Marketing,Married,"SEO, Marketing, Games",0-5 hours,"Diverse formats (movies, ongoing series, non-c...",Popular native shows;,"Watch time;Personal information (e.g. age, ge...",Drama series;Comedy series;Documentaries;Music...,More diverse,Big content catalogue,The recommendations do not match my taste;
8,Yes,0-2 years,25-34,Male,I am unemployed,Sales,Married,"Gym, cooking, films",10-15 hours,"Diverse formats (movies, ongoing series, non-c...",Other;,Watch time;What you click or hover with your m...,Comedy series;Family-friendly and children's s...,More personal,--,The recommendations do not match my taste;
9,Yes,0-2 years,25-34,Female,I am working,Business or Finance,Married,"Movies and TV shows, reading books, walkings",10-15 hours,"Diverse formats (movies, ongoing series, non-c...",Documentaries;,"Watch time;Personal information (e.g. age, ge...",Drama series;Documentaries;Family-friendly and...,50/50,Wide range of genres and formats that match my...,The recommendations do not match my taste;


In [112]:
sur

KeyError: "['expat', 'live_nl', 'age', 'gender', 'occupation_status', 'occupation', 'relationship_status', 'hobbies', 'watchtime', 'diversity_preferences', 'integration_genres', 'user_behaviour', 'genre_ranks', 'personalisation_priority', 'diversity_open', 'recommender_pains'] not found in axis"

In [68]:
survey.drop(columns = ['ID', 'Start time', 'Completion time', 'Email', 'Name', 'Total points', 'Quiz feedback'])

Unnamed: 0,Email,Name,Total points,Quiz feedback,Are you an expat (foreign national) living in the Netherlands?,Points - Are you an expat (foreign national) living in the Netherlands?,Feedback - Are you an expat (foreign national) living in the Netherlands?,How long have you lived in the Netherlands?,Points - How long have you lived in the Netherlands?,Feedback - How long have you lived in the Netherlands?,...,Points - Do you prefer to be more personalized (optimized for you) or more diverse (somewhat outside of your interests) content?,Choose your priority,Feedback - Choose your priority,Points - Choose your priority,What does diversity of media mean to you?,Feedback - What does diversity of media mean to you?,Points - What does diversity of media mean to you?,What pain points have you found in other recommender systems?,Feedback - What pain points have you found in other recommender systems?,Points - What pain points have you found in other recommender systems?
0,anonymous,,,,Yes,,,0-2 years,,,...,,50/50,,,Watch different type of shows.,,,The recommendations are too similar to each ot...,,
1,anonymous,,,,Yes,,,0-2 years,,,...,,More personal,,,,,,,,
2,anonymous,,,,Yes,,,0-2 years,,,...,,50/50,,,same genre but not similar plots,,,The recommendations do not match my taste;,,
3,anonymous,,,,Yes,,,0-2 years,,,...,,50/50,,,"Variety represented in gender, race, sexuality...",,,The recommendations are too similar to each ot...,,
4,anonymous,,,,Yes,,,0-2 years,,,...,,More personal,,,To Attach more interest about my personality,,,I didn't understand how recommendations are de...,,
5,anonymous,,,,Yes,,,0-2 years,,,...,,More personal,,,nothing,,,The recommendations do not match my taste;I di...,,
6,anonymous,,,,Yes,,,0-2 years,,,...,,More personal,,,A lot of interesting sh*t to watch,,,The recommendations do not match my taste;,,
7,anonymous,,,,Yes,,,0-2 years,,,...,,More diverse,,,Big content catalogue,,,The recommendations do not match my taste;,,
8,anonymous,,,,Yes,,,0-2 years,,,...,,More personal,,,--,,,The recommendations do not match my taste;,,
9,anonymous,,,,Yes,,,0-2 years,,,...,,50/50,,,Wide range of genres and formats that match my...,,,The recommendations do not match my taste;,,


In [54]:
pd.read_csv('recommendations.csv')

Unnamed: 0.1,Unnamed: 0,content_id
0,0,340
1,1,79
2,2,286
3,3,32
4,4,378
5,5,106
6,6,289


In [32]:
pd.DataFrame(data = (user_0_preds, scores), columns = ['scores', 'c_id'])

ValueError: 2 columns passed, passed data had 7 columns

In [135]:
pd.DataFrame(z)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
0,0.0,0.0,0.0,7.325963,-2.697394,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.39955,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,-9.885856,-4.203423,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.334405,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,57.239264,4.293258,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.267389,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,7.430549,-9.830347,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.001996,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,-5.06195,-1.446074,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-2.138846,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
u_id = 0

not_seen = list(np.where(scores_matrix[u_id] == 0)[0])

not_seen

[0,
 2,
 3,
 4,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 14,
 15,
 16,
 17,
 18,
 20,
 21,
 22,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 34,
 35,
 36,
 40,
 41,
 43,
 44,
 45,
 46,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 58,
 60,
 61,
 62,
 63,
 64,
 65,
 67,
 68,
 69,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 82,
 83,
 84,
 85,
 87,
 88,
 89,
 90,
 91,
 92,
 94,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 144,
 145,
 146,
 147,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 179,
 180,
 181,
 183,
 185,
 186,
 188,
 189,
 190,
 191,
 192,
 193,
 195,
 197,
 198,
 199,
 200,
 201,
 204,
 205,
 206,
 207,
 208,
 209,
 210,
 211,
 212,
 21

[0,
 2,
 3,
 4,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 14,
 15,
 16,
 17,
 18,
 20,
 21,
 22,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 34,
 35,
 36,
 40,
 41,
 43,
 44,
 45,
 46,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 58,
 60,
 61,
 62,
 63,
 64,
 65,
 67,
 68,
 69,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 82,
 83,
 84,
 85,
 87,
 88,
 89,
 90,
 91,
 92,
 94,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 144,
 145,
 146,
 147,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 179,
 180,
 181,
 183,
 185,
 186,
 188,
 189,
 190,
 191,
 192,
 193,
 195,
 197,
 198,
 199,
 200,
 201,
 204,
 205,
 206,
 207,
 208,
 209,
 210,
 211,
 212,
 21

In [42]:
not_seen

[array([  0,   2,   3,   4,   6,   7,   8,   9,  10,  11,  12,  14,  15,
         16,  17,  18,  20,  21,  22,  24,  25,  26,  27,  28,  29,  30,
         31,  32,  34,  35,  36,  40,  41,  43,  44,  45,  46,  48,  49,
         50,  51,  52,  53,  54,  55,  56,  58,  60,  61,  62,  63,  64,
         65,  67,  68,  69,  71,  72,  73,  74,  75,  76,  77,  78,  79,
         80,  82,  83,  84,  85,  87,  88,  89,  90,  91,  92,  94,  96,
         97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
        110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
        123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
        136, 137, 138, 139, 140, 144, 145, 146, 147, 149, 150, 151, 152,
        153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166,
        167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 179, 180,
        181, 183, 185, 186, 188, 189, 190, 191, 192, 193, 195, 197, 198,
        199, 200, 201, 204, 205, 206, 207, 208, 209

In [34]:
np.where((scores_matrix == 0.0).all(axis = 1))

(array([], dtype=int64),)

##

### Stuff below is probably garbage but not scrapping it yet to be safe

In [285]:
unstacked.columns = unstacked.columns.droplevel()

unstacked

ValueError: Cannot remove 1 levels from an index with 1 levels: at least one level must be left.

In [223]:
unstacked.replace({'Dislike':-10, 'Like':10}, inplace = True)

In [224]:
pivot_plays

content_id,0,3,7,8,19,21,34,37,39,43,...,460,465,466,478,480,483,486,487,491,496
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
2,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [225]:
pivot_plays.fillna(0, inplace = True)
unstacked.fillna(0, inplace = True)

In [226]:
likes = unstacked

likes

content_id,0,1,3,4,5,7,8,9,13,16,...,480,483,486,487,491,492,493,494,496,498
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,-10.0,0.0,0.0,-10.0,0.0,0.0,0.0,-10.0,0.0,...,-10.0,0.0,0.0,0.0,10.0,0.0,-10.0,-10.0,10.0,-10.0
2,0.0,0.0,0.0,0.0,0.0,10.0,10.0,-10.0,0.0,-10.0,...,10.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,-10.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,0.0,-10.0,0.0,0.0,0.0,0.0
5,10.0,0.0,10.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0


In [227]:
pivot_plays

content_id,0,3,7,8,19,21,34,37,39,43,...,460,465,466,478,480,483,486,487,491,496
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
2,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [228]:
plays_matrix = np.zeros((5, 500))

cols = list(pivot_plays.columns)

list(pivot_plays.index)

users = [i -1 for i in list(pivot_plays.index)]

users

[0, 1, 2, 3, 4]

In [230]:
plays_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [231]:
unstacked

content_id,0,1,3,4,5,7,8,9,13,16,...,480,483,486,487,491,492,493,494,496,498
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,-10.0,0.0,0.0,-10.0,0.0,0.0,0.0,-10.0,0.0,...,-10.0,0.0,0.0,0.0,10.0,0.0,-10.0,-10.0,10.0,-10.0
2,0.0,0.0,0.0,0.0,0.0,10.0,10.0,-10.0,0.0,-10.0,...,10.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,-10.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,0.0,-10.0,0.0,0.0,0.0,0.0
5,10.0,0.0,10.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0


In [232]:
likes_matrix = np.zeros((5, 500))

cols = list(unstacked.columns)

for i in users:
    for j in cols:
        likes_matrix[i, j] = unstacked.loc[i +1, j]

In [234]:
plays_matrix = plays_matrix * 0.3

plays_matrix

array([[0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0.3, 0. , 0. , ..., 0. , 0. , 0. ]])

In [235]:
plays = np.array(pivot_plays)

likes = np.array(unstacked)

In [236]:
likes_matrix

array([[  0., -10.,   0., ...,   0., -10.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0., -10.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [ 10.,   0.,   0., ...,   0.,   0.,   0.]])

In [237]:
scores = plays_matrix + likes_matrix

scores

array([[  0. , -10. ,   0. , ...,   0. , -10. ,   0. ],
       [  0. ,   0. ,   0. , ...,   0. ,   0. ,   0. ],
       [  0. , -10. ,   0. , ...,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. , ...,   0. ,   0. ,   0. ],
       [ 10.3,   0. ,   0. , ...,   0. ,   0. ,   0. ]])

In [189]:
np.unique(scores)

array([-2.,  0.,  2.,  4.])

In [190]:
scores_df = pd.DataFrame(scores, columns = [i for i in range(500)])

In [191]:
scores_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,-0.0,-0.0,0.0,2.0,0.0,-0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,-0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2.0,0.0,0.0,2.0,-0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [198]:
scores_v2 = np.where(scores < 0, -5, scores)

np.unique(scores_v2)

array([-5.,  0.,  2.,  4.])

In [197]:
temp = pd.DataFrame(np.where(scores < 0, -5, scores))

temp

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,-0.0,-0.0,0.0,2.0,0.0,-0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,-0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2.0,0.0,0.0,2.0,-0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
np.unique()

In [77]:
pivot_rating = latest.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

pivot_rating

  pivot_rating = latest.pivot_table(index='user_id', columns = 'content_id', values = 'activity')


content_id
user_id
1
2
3
4
5


In [None]:
df_activities['content_id']

In [42]:
df_activities

Unnamed: 0,content_id,activity,user_id,datetime
0,374,Dislike,2,2023-03-19 13:39:37.899777
1,366,Like,3,2023-03-19 13:39:37.900176
2,316,Like,4,2023-03-19 13:39:37.900207
3,167,Dislike,2,2023-03-19 13:39:37.900227
4,22,Play,3,2023-03-19 13:39:37.900244
...,...,...,...,...
495,424,Play,2,2023-03-19 13:39:37.912798
496,355,Play,5,2023-03-19 13:39:37.912815
497,83,Dislike,4,2023-03-19 13:39:37.912844
498,247,Play,2,2023-03-19 13:39:37.912863


In [43]:
df_activities.dtypes

content_id             int64
activity              object
user_id                int64
datetime      datetime64[ns]
dtype: object

In [38]:
df_activities.sort_values(by = 'datetime')


TypeError: '<' not supported between instances of 'str' and 'Timestamp'

In [35]:
df_act

Unnamed: 0,content_id,activity,user_id,datetime
0,374,Dislike,2,2023-03-19 13:39:37.899777
1,366,Like,3,2023-03-19 13:39:37.900176
2,316,Like,4,2023-03-19 13:39:37.900207
3,167,Dislike,2,2023-03-19 13:39:37.900227
4,22,Play,3,2023-03-19 13:39:37.900244
...,...,...,...,...
496,355,Play,5,2023-03-19 13:39:37.912815
497,83,Dislike,4,2023-03-19 13:39:37.912844
498,247,Play,2,2023-03-19 13:39:37.912863
499,488,Play,3,2023-03-19 13:39:37.912892


In [66]:
df_activities.loc[len(df_activities)] = [374, "Like", 2, "2023-03-19 13:39:37.899778"]

TypeError: unhashable type: 'list'

In [8]:
Likes = df_activities[df_activities['activity'] == 'Like']

In [12]:
grouped_likes = Likes.groupby(['user_id', 'content_id']).count()

In [28]:
pivot_likes = grouped_likes.pivot_table(index='user_id', columns = 'content_id', values = 'activity')

In [25]:
pivot_plays

content_id,0,3,7,8,19,21,34,37,39,43,...,460,465,466,478,480,483,486,487,491,496
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,1.0,,,,1.0,,...,1.0,1.0,,1.0,,,,,1.0,1.0
2,,,1.0,1.0,,,,,,1.0,...,,,1.0,,1.0,,,1.0,,
3,,,,,,1.0,1.0,,,,...,,,,1.0,,,,,,
4,,,,,,,,1.0,,,...,,1.0,,,,,1.0,,,
5,1.0,1.0,,,,,,,,,...,,,,,,1.0,,1.0,,


In [29]:
Dislikes = df_activities[df_activities['activity'] == 'Dislike']