# K-Nearest Neighbors approach

In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import sklearn.preprocessing as preprocessing
from sklearn.model_selection import train_test_split

### Load Data

In [45]:
spotify_data = pd.read_csv('data\output_usersong_features.csv', header=0)

#debugging output, uncomment to view data
display(spotify_data.iloc[0])

artist_name                      Baka Beyond
title                  Journey Album Version
artist_location                     Cameroon
release                     The Meeting Pool
hotttness                           0.391484
familiarity                         0.559823
danceability                               0
duration                             368.143
energy                                     0
loudness                             -11.482
year                                    1995
tempo                                139.092
analysis_rate                          22050
end_of_fade_in                             0
key                                        9
key_confidence                         0.588
mode                                       0
mode_confidence                        0.367
start_of_fade_out                    358.905
time_signature                             4
time_signature_conf                        0
song_id                   SOXFJZQ12A6D4F81FB
Name: 0, d

### Removing Duplicates

In [3]:
#default duplicate removal
spotify_data_df = spotify_data.drop_duplicates()
print(len(spotify_data_df.index))

#clear songs with same IDs
spotify_data_df = spotify_data.drop_duplicates(subset=['song_id'])
print(len(spotify_data_df.index))

#reset the index
spotify_data_df = spotify_data_df.reset_index(drop=True)

#debugging output, uncomment to view
#spotify_data_df.head

385251
384546


### Remove NaNs

In [4]:
#specified factors that we might care about
spotify_data_df = spotify_data_df.dropna(axis=0,subset=['hotttness','familiarity','loudness','tempo','key','key_confidence','mode','mode_confidence'])

#reset the index
spotify_data_df = spotify_data_df.reset_index(drop=True)

### Extract Relevant Features

In [5]:
#messing around with this to try and give better results
#relevant_features = ['hotttness','familiarity','loudness','tempo','key','key_confidence','mode','mode_confidence']
relevant_features = ['hotttness','familiarity','loudness','tempo','key','mode']
#relevant_features = ['hotttness','familiarity','key','mode']
#relevant_features = ['hotttness','familiarity']

train_data = spotify_data_df[relevant_features]

#debugging output, uncomment to view
#train_data.head

### Normalize Data

Picked normalization between 0 and 1 to start to keep from suffering from one feature being uncharacteristically dominant in distance calculation (min max scaler)

In [6]:
scaler = preprocessing.MinMaxScaler()
train_data_norm = scaler.fit_transform(train_data)

## Create Model
### Nearest Neighbors

In [7]:
#create Nearest Neighbors model and fit data
neigh = NearestNeighbors(10)
neigh.fit(train_data_norm)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                 radius=1.0)

### Pick a Song

In [8]:
#pick a random song
rand_song = spotify_data_df.sample(n=1)
rand_song_feat = rand_song[relevant_features]
rand_song_feat = scaler.transform(rand_song_feat)

In [9]:
#Run KNN on it
neigh_ind = neigh.kneighbors(rand_song_feat, return_distance=False)

### Input Song

In [10]:
rand_song[['artist_name','title','hotttness','familiarity','loudness','tempo','key','mode']]

Unnamed: 0,artist_name,title,hotttness,familiarity,loudness,tempo,key,mode
115800,Etran Finatawa,Tekana,0.329729,0.487797,-7.322,172.76,2,1


### Closest Songs

In [11]:
closest_songs = spotify_data_df.iloc[ neigh_ind[0] , : ]

closest_songs[['artist_name','title','hotttness','familiarity','loudness','tempo','key','mode']]

Unnamed: 0,artist_name,title,hotttness,familiarity,loudness,tempo,key,mode
115800,Etran Finatawa,Tekana,0.329729,0.487797,-7.322,172.76,2,1
293323,Eleventh Dream Day,Orange Moon,0.33208,0.478679,-7.995,171.234,2,1
219007,Scott Wesley Brown,I Stand Here Forgiven,0.342406,0.497308,-7.695,170.064,2,1
215937,Pointed Sticks,Destitute Bonus Track,0.341491,0.496769,-6.708,175.31,2,1
101196,Christine Anu,Island Home,0.342908,0.497964,-8.017,169.72,2,1
52820,Sarah Siskind,Falling Stars,0.34663,0.497603,-7.396,168.868,2,1
46326,Pete Escovedo Ray Obiedo,Feliz Navidad,0.322221,0.471375,-6.601,175.07,2,1
257329,The Four Of Us,Free Spirit,0.338404,0.47387,-7.942,176.553,2,1
146805,Welton Irie,Army Life,0.333831,0.468,-7.227,168.951,2,1
135705,Doug Supernaw,I Dont Call Him Daddy,0.324403,0.496014,-8.335,177.206,2,1



# =========================================================

# Finding best feature combination

Testing different feature combinations.

In [12]:
#helper functions
def create_song_feature_array(user_songs, features, scaler):
    ''' Builds the song feature array for a specific user based on their songs and features chosen
    
    Args:
        user_songs(list): list of songs that a particular user has listened to before
        
        features(list): list of features that should be used in the array
        
        scaler(Scaler obj): which scaler to use to transform
        
    Returns:
        s_feat_arr(np.array): array of the songs with the respective features filling out the columns. 
        This has been normalized according to training data normalization.
    '''
    user_songs_data = spotify_data_df.loc[spotify_data_df['song_id'].isin(user_songs)]
    rel_data = user_songs_data[features]
    s_feat_arr = scaler.transform(rel_data)
    return s_feat_arr

def content_evaluate(expected, actual):
    ''' PRECISION: True Positives / (True Positives + False Positives)
        RECALL: True Positives / (True Positives + False Negatives)
    Evaluates a list of songs by the expected output vs the actual output. 
    Right now just an intersection, could possibly work in artist/genre score increase or increase score based on when the song is returned
    
    Args:
        expected(list): list of song_ids (strings) that are expected to be returned from a perfect prediction algorithm
        actual(list): list of song_ids (strings) that are actually returned from prediction algorithm
        
    returns:
        score(int): number of songs that were in both the actual and expected list
    
    '''
    
    temp = set(actual) 
    lst3 = [value for value in expected if value in temp] 
    score = len(lst3)
    
    precision = score/len(actual)
    recall = score/len(expected)
    
    return precision, recall

def run_model(model,feature_array,test_set,n):
    #find the songs returned
    neigh_ind = model.kneighbors(feature_array, return_distance=False)
    
    start_ind = len(feature_array)
    end_ind = int(start_ind+n)
    
    topNsongs = neigh_ind[0][start_ind:end_ind]
    nearest_n = spotify_data_df.iloc[topNsongs,:][['song_id']]
    nearest_n_list = nearest_n['song_id'].tolist()
    print(nearest_n_list)
    
    #evaluate this KNN 
    precision, recall = content_evaluate(test_set, nearest_n_list)
    return precision, recall

### Creating Different KNN Models with different features

In [36]:
relevant_features1 = ['hotttness']
relevant_features2 = ['hotttness','familiarity','loudness']
relevant_features3 = ['hotttness','familiarity','duration','loudness','tempo','key','mode','time_signature']
relevant_features4 = ['hotttness','familiarity','key','mode']
relevant_features5 = ['hotttness','familiarity']
relevant_features6 = ['familiarity']


train_data1 = spotify_data_df[relevant_features1]
train_data2 = spotify_data_df[relevant_features2]
train_data3 = spotify_data_df[relevant_features3]
train_data4 = spotify_data_df[relevant_features4]
train_data5 = spotify_data_df[relevant_features5]
train_data6 = spotify_data_df[relevant_features6]


scaler1 = preprocessing.RobustScaler()
train_data_norm1 = scaler1.fit_transform(train_data1)
scaler2 = preprocessing.RobustScaler()
train_data_norm2 = scaler2.fit_transform(train_data2)
scaler3 = preprocessing.RobustScaler()
train_data_norm3 = scaler3.fit_transform(train_data3)
scaler4 = preprocessing.RobustScaler()
train_data_norm4 = scaler4.fit_transform(train_data4)
scaler5 = preprocessing.RobustScaler()
train_data_norm5 = scaler5.fit_transform(train_data5)
scaler6 = preprocessing.RobustScaler()
train_data_norm6 = scaler6.fit_transform(train_data6)

In [37]:
#Building our different KNNs off the different features
KNN1 = NearestNeighbors(1000)
KNN1.fit(train_data_norm1)

KNN2 = NearestNeighbors(1000)
KNN2.fit(train_data_norm2)

KNN3 = NearestNeighbors(1000)
KNN3.fit(train_data_norm3)

KNN4 = NearestNeighbors(1000)
KNN4.fit(train_data_norm4)

KNN5 = NearestNeighbors(1000)
KNN5.fit(train_data_norm5)

KNN6 = NearestNeighbors(1000)
KNN6.fit(train_data_norm6)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=1000, p=2,
                 radius=1.0)

### Build Train Test Dataset from User/ Song plays dataset

In [15]:
#Loading in the Data Set 
userdata = pd.read_csv('data//train_triplets.txt', sep="\t", header=None)
userdata.columns = ['user', 'song', 'plays']

In [16]:
#userdata.head()

### 1. take random sample of 1000 users

In [17]:
#get all user ids
user_id_list = userdata['user'].unique().tolist()

#print(len(user_id_list))

In [18]:
#randomly select 1000
rand_seed = 42
np.random.seed(seed=rand_seed)
sample = np.random.choice(user_id_list, 1000)

#print(sample[0], len(sample))

### 2. Evaluate how each set of features performs on average
uses comparison of prediction to actual songs returned

In [21]:
p1 = p2 = p3 = p4 = p5 = p6 = 0
r1 = r2 = r3 = r4 = r5 = r6 = 0

counter = 1

for user_i in sample:
    #get all songs for this specific user
    user_i_data = userdata.loc[userdata['user'] == user_i]
    #print(user_i_data)
    
    #create a list of all the songs that this user listens to
    user_i_songs = user_i_data['song'].unique().tolist()
    #print(user_i_songs)
    #print(len(user_i_songs))
    
    #split data into train and test
    songs_i_train, songs_i_test = train_test_split(user_i_songs,test_size=0.25,random_state=rand_seed)
    
    #build the features input for one KNN algorithm
    u_arr = create_song_feature_array(songs_i_train,relevant_features1, scaler1)
    u_arr2 = create_song_feature_array(songs_i_train,relevant_features2, scaler2)
    u_arr3 = create_song_feature_array(songs_i_train,relevant_features3, scaler3)
    u_arr4 = create_song_feature_array(songs_i_train,relevant_features4, scaler4)
    u_arr5 = create_song_feature_array(songs_i_train,relevant_features5, scaler5)
    u_arr6 = create_song_feature_array(songs_i_train,relevant_features6, scaler6)
    
    prec, rec = run_model(KNN1,u_arr,songs_i_test,30)
    p1 += prec
    r1 += rec
    
    prec, rec = run_model(KNN2,u_arr2,songs_i_test,30)
    p2 += prec
    r2 += rec
    
    prec, rec = run_model(KNN3,u_arr3,songs_i_test,30)
    p3 += prec
    r3 += rec

    prec, rec = run_model(KNN4,u_arr4,songs_i_test,30)
    p4 += prec
    r4 += rec
    
    prec, rec = run_model(KNN5,u_arr5,songs_i_test,30)
    p5 += prec
    r5 += rec
    
    prec, rec = run_model(KNN6,u_arr6,songs_i_test,30)
    p6 += prec
    r6 += rec
    
    print("For user:", counter, "id:", user_i, "p6 and r6", prec, rec)
    counter += 1
    

p1 /= len(sample)
p2 /= len(sample)
p3 /= len(sample)
p4 /= len(sample)
p5 /= len(sample)
p6 /= len(sample)

r1 /= len(sample)
r2 /= len(sample)
r3 /= len(sample)
r4 /= len(sample)
r5 /= len(sample)
r6 /= len(sample)

print("for feature set: ", relevant_features1, "avg precision:", p1, "avg recall:", r1)
print("for feature set: ", relevant_features2, "avg precision:", p2, "avg recall:", r2)
print("for feature set: ", relevant_features3, "avg precision:", p3, "avg recall:", r3)
print("for feature set: ", relevant_features4, "avg precision:", p4, "avg recall:", r4)
print("for feature set: ", relevant_features5, "avg precision:", p5, "avg recall:", r5)
print("for feature set: ", relevant_features6, "avg precision:", p6, "avg recall:", r6)

For user: 1 id: f0079fb5619dc0f2cd1fd55e13207effc2cc2ddf p6 and r6 0.0 0.0
For user: 2 id: cf28cd441dabbb8091e0065c583d214fec545b56 p6 and r6 0.0 0.0
For user: 3 id: c51be6ee821e9c674c7198964404d1c053fff8b0 p6 and r6 0.0 0.0
For user: 4 id: 09225307a641f9a43b1b4ad1b8d8e1f2f93532f2 p6 and r6 0.0 0.0
For user: 5 id: 9b02de3fbc1b0e9088a8b17002f60f7b0c33ec54 p6 and r6 0.0 0.0
For user: 6 id: 211bb42a26b0da0d190ded2072d12d0946d5f67f p6 and r6 0.0 0.0
For user: 7 id: 5b3389d4a92d7256392dc60465dbe903af0c3475 p6 and r6 0.0 0.0
For user: 8 id: 559ff7e2ee976599f0cb62bea63971ec347d4551 p6 and r6 0.0 0.0
For user: 9 id: 916138a4a7bec1a93de83724371b01b9de8041df p6 and r6 0.0 0.0
For user: 10 id: 58a649d25f775aba1829ac8a282bcc45c3dfde21 p6 and r6 0.0 0.0
For user: 11 id: 9afcb2a2c50dd6388f4a3a80285682ffdfe303d4 p6 and r6 0.0 0.0
For user: 12 id: 36bfa01ccb000d117aeadbffeae74a314c0aad5b p6 and r6 0.0 0.0
For user: 13 id: 7950b48dc6f3e612ff05a26f6a7662f2a8e6a79f p6 and r6 0.0 0.0
For user: 14 id: 1042

For user: 107 id: 3e891693a0b0c20a68832f3fdfc854ce2ab705d2 p6 and r6 0.0 0.0
For user: 108 id: 2269a7aeb590dc35ed4759e1dbfe70a23bc16a9c p6 and r6 0.0 0.0
For user: 109 id: 7fecdafd3a6934ca5f27807c7712a0387ceab59e p6 and r6 0.0 0.0
For user: 110 id: 0625bcba6863889d76ba9b5cfc6c67eb39757049 p6 and r6 0.0 0.0
For user: 111 id: 114e87f0aabdeaef949245356758204df4062acc p6 and r6 0.0 0.0
For user: 112 id: e5366c901c49968aeaab714f9172fbcfacc23a6e p6 and r6 0.0 0.0
For user: 113 id: 8442dcff8890b2870ce0cc4c495d4b010fcef46e p6 and r6 0.0 0.0
For user: 114 id: 562bbaf5de24a3ba0de8ff0452e58c8354ed1c8b p6 and r6 0.0 0.0
For user: 115 id: bb9f772af7f331bc5e07e1e67a1879ae750aac8d p6 and r6 0.0 0.0
For user: 116 id: c802c72a22587ed2c9fcc0d32de68d926469003d p6 and r6 0.0 0.0
For user: 117 id: 9f4d24bb6e52e359e3f959e4d0434affaa0a0e65 p6 and r6 0.0 0.0
For user: 118 id: 2bfe55e746e10d57526b76dbf532010e7d957122 p6 and r6 0.0 0.0
For user: 119 id: 554746367e28728413042c22a7b5534e660bb358 p6 and r6 0.0 0.0

For user: 211 id: 7a655c62332a8207ea0dc2e7a3e7d1a6383ea2ff p6 and r6 0.0 0.0
For user: 212 id: b93e6a30eda95f48a768aaf71b15caa4ebf9f67c p6 and r6 0.0 0.0
For user: 213 id: e20f6728ebd0d3ba660613535d260cb130ad45e7 p6 and r6 0.0 0.0
For user: 214 id: 12cdcd2e2ebafe58ebbc1110ed70d4ed626e8c69 p6 and r6 0.0 0.0
For user: 215 id: f9d648e3a4cf0fa477e18f2df4c6eae694d8bc27 p6 and r6 0.0 0.0
For user: 216 id: 38092292c6c245740c01a15039b71f38125ea536 p6 and r6 0.0 0.0
For user: 217 id: 880aa577e593a7d60a87f7bd328bc35ee7f3f028 p6 and r6 0.03333333333333333 0.16666666666666666
For user: 218 id: 225abbd9598dea0bf10d719b368d59a4d8dbf665 p6 and r6 0.03333333333333333 0.05263157894736842
For user: 219 id: a5b7b8b52be93f8a292bb18d7d4f41858d676eeb p6 and r6 0.0 0.0
For user: 220 id: bd3a6607bc4278a06bd3c4d38a3dbb61b4d45057 p6 and r6 0.0 0.0
For user: 221 id: 9a17148881086425c3e86bf2bc4b8a781185771a p6 and r6 0.0 0.0
For user: 222 id: a4752432671dbe2ebacf4eebaf23502ea06bc99d p6 and r6 0.0 0.0
For user: 22

For user: 315 id: 4857155182f9febd06310b66d3a49566dc78e6da p6 and r6 0.0 0.0
For user: 316 id: 0cf3c5b2d69a50e075e848d2b7d6cff8f545d4f3 p6 and r6 0.0 0.0
For user: 317 id: 339fbf843dcf3f7ec3a7f43c37b6a1ff37f5b817 p6 and r6 0.0 0.0
For user: 318 id: ca0db1ba8740f9ec24f8aa5ea09843a5b34b386b p6 and r6 0.0 0.0
For user: 319 id: e2a64039138ffb0ae4653cc8cb1dd0125c378c39 p6 and r6 0.0 0.0
For user: 320 id: 3f61ead20ef5d0c5d31256ed703228e6f7e1c540 p6 and r6 0.0 0.0
For user: 321 id: da061a3f45aa6cc1d2e963d57f060eea09e7471a p6 and r6 0.0 0.0
For user: 322 id: b588fac10c9aafdc7ca41ca27575bd18d6655e03 p6 and r6 0.0 0.0
For user: 323 id: 6d8b520019a1c7882a70dc012e1110caaeda3497 p6 and r6 0.0 0.0
For user: 324 id: 429daebea676753688e96ca0cd73d0a0108d4f60 p6 and r6 0.0 0.0
For user: 325 id: b120028367d5e26e31af7e15dcd2dabb0f7b5d39 p6 and r6 0.0 0.0
For user: 326 id: f0f59e3d697c98e7f75bb6fa29bd1182bd15a3f6 p6 and r6 0.0 0.0
For user: 327 id: 4897b05d09c104e7fe099ed2857de9ad53b251ee p6 and r6 0.0 0.0

For user: 420 id: 3e1a5acde38aef26ad80423e3a43be08ea6ee12a p6 and r6 0.0 0.0
For user: 421 id: fe42f5a5abffa75fff834235df27560bbcc15199 p6 and r6 0.0 0.0
For user: 422 id: 44014fde641b339496d9042ef08b6b9cd4140080 p6 and r6 0.0 0.0
For user: 423 id: 52f917f55f0376d82a403492e327e4a72b53c472 p6 and r6 0.0 0.0
For user: 424 id: 8a5841023fb7f345032ee76b377a39015acb053b p6 and r6 0.0 0.0
For user: 425 id: 746225b9bbe9afcc2d95a9c34437a3be1c26b7ce p6 and r6 0.0 0.0
For user: 426 id: f4d96f5faabfd446aa92954850bea4bfd132dafb p6 and r6 0.0 0.0
For user: 427 id: a32008aad282eb17caba1ce8543ea3216373a988 p6 and r6 0.0 0.0
For user: 428 id: 65525da44fa19155a4e70602cb54e8de56f90acf p6 and r6 0.0 0.0
For user: 429 id: 3ab034ca5228c0c4ba72e1e0f53c6466716e58a1 p6 and r6 0.0 0.0
For user: 430 id: 18c13bd0cc24e05857bd10d520be9362ef65fee6 p6 and r6 0.0 0.0
For user: 431 id: 6f45f991198c40dcbcdf4bac177c8714aa4b286a p6 and r6 0.0 0.0
For user: 432 id: ea58e48d2e59c65f166e1acc9e981766dbf0d4c8 p6 and r6 0.0 0.0

For user: 525 id: 67e9184ec2a2201183aae01f71191e96196b5ec3 p6 and r6 0.0 0.0
For user: 526 id: 2dcb17e35cb354c59e83e30304952212d86c2868 p6 and r6 0.0 0.0
For user: 527 id: e80b93c45d50f60054c1b3fe8184c83287d26a01 p6 and r6 0.0 0.0
For user: 528 id: b21ab3e0b3a7fa42c47ea763e0a7e614dd9a3ab3 p6 and r6 0.0 0.0
For user: 529 id: e33a071ca29067e5fbd6c2c23f4a4e91cb2380b9 p6 and r6 0.0 0.0
For user: 530 id: 5b71462f7e393f6b1762819c62abd5b6983e159c p6 and r6 0.0 0.0
For user: 531 id: 1f9590d001c633e43efb27ac901a3e56b43c9dcf p6 and r6 0.0 0.0
For user: 532 id: a50cbcf61f556edce5451ae50fcca00e159ade91 p6 and r6 0.0 0.0
For user: 533 id: b666bd7fb801e7f2fa08cc9565e4795067e180ac p6 and r6 0.0 0.0
For user: 534 id: b9450dc7742ec184832a489e85be5d57ba59b945 p6 and r6 0.0 0.0
For user: 535 id: ff58a1fcf10d901c8cbbfc700eeef8429936685a p6 and r6 0.0 0.0
For user: 536 id: 346f139c321a03826caff9cc6d18a254f812c718 p6 and r6 0.03333333333333333 0.07142857142857142
For user: 537 id: 5ffac53eee4c3c0b126544945b

For user: 628 id: ed97453e9c908b2a9edd7236802e59f3e2539290 p6 and r6 0.0 0.0
For user: 629 id: 344c7c9addba370093ebec558344396015dc684d p6 and r6 0.0 0.0
For user: 630 id: 24cf01853dcbf3b954ecd87612ad56da3fcbf0fd p6 and r6 0.0 0.0
For user: 631 id: e371a58d4e8a942ce457c5b7c1095b2442f9ebf1 p6 and r6 0.0 0.0
For user: 632 id: b588d23136bf67e059b411aab23a289db4b3ec99 p6 and r6 0.0 0.0
For user: 633 id: 682ab0dddcb4e2fa309d48c4a2491ee8e13a6edf p6 and r6 0.0 0.0
For user: 634 id: 602f4b14d3d6a2bd960149637aa079e571468eb3 p6 and r6 0.0 0.0
For user: 635 id: 253ae0b1ba4db0ed787a73adc2ea7e9966b836c1 p6 and r6 0.0 0.0
For user: 636 id: d2032d92be104eb524ad0e3c2699e803936ddaf1 p6 and r6 0.0 0.0
For user: 637 id: 5cd1e285ceb9654f1201e0d48555186222f13836 p6 and r6 0.03333333333333333 0.25
For user: 638 id: e07a5faf3ebd848cb9116782234a767ce988c803 p6 and r6 0.0 0.0
For user: 639 id: f87e4f615ecfd0aeca1d6e6bab88d368865e4002 p6 and r6 0.0 0.0
For user: 640 id: 38f3edbb215cb397dd32358f62ca3e9b7edc9671 

For user: 732 id: c6d5d9b788fb70c182467cab3ee54f76f3acfe0b p6 and r6 0.0 0.0
For user: 733 id: 90121a337c7b1a9d80a59f969865b75745cdf3af p6 and r6 0.0 0.0
For user: 734 id: 9d0633f2091b278147d7796913e8fc7491122a50 p6 and r6 0.0 0.0
For user: 735 id: ba1f835a61d5bd1cab7f6b21b5c8fc30401658ae p6 and r6 0.0 0.0
For user: 736 id: 841627a50a6fed3d1fea4faf07dcad85866595e8 p6 and r6 0.0 0.0
For user: 737 id: 6fe927afb0e4fa11a71660f25ce8d150d2552513 p6 and r6 0.0 0.0
For user: 738 id: 771c61fa4804e5222cebc44e1d85c15892d3f0f4 p6 and r6 0.0 0.0
For user: 739 id: 51562edf812021ac0d2e19babc6bfbc0856d6b2e p6 and r6 0.0 0.0
For user: 740 id: 5858d606b3f1d6224d08b33243d256cca7bd0511 p6 and r6 0.0 0.0
For user: 741 id: 055eabe551e2a730025da383df23d87d2845c281 p6 and r6 0.13333333333333333 0.5714285714285714
For user: 742 id: 04aee86b83a249a84d267474ca47a3be98c50016 p6 and r6 0.0 0.0
For user: 743 id: 5ed82ac9fa1a019c80bcad339084dcd9a28cca05 p6 and r6 0.03333333333333333 0.14285714285714285
For user: 744

For user: 837 id: eaf94163727136adba69a2e768ac5f8f45ab4b2c p6 and r6 0.0 0.0
For user: 838 id: 99a0ad25863df0553e2245b6e84de03a3424d0ad p6 and r6 0.0 0.0
For user: 839 id: e06b64dd910d1da68fdd748e7effbfbd871f84da p6 and r6 0.0 0.0
For user: 840 id: 338c44d88fa1a751466bc1b0e2535a9a2871592d p6 and r6 0.0 0.0
For user: 841 id: b6ce08b93ed972288af9487613943f01eab38e84 p6 and r6 0.0 0.0
For user: 842 id: 5420aa3c0bb70ad7d8b134201179b78e42c5ac94 p6 and r6 0.0 0.0
For user: 843 id: 813e8a427c105b00bead125d8a65110d3cea1e09 p6 and r6 0.0 0.0
For user: 844 id: d9c52e72c6df35993b848378224fb7d5664635eb p6 and r6 0.0 0.0
For user: 845 id: ec932f8ee90abdfa003fb9f425c0d0b6fcf1313b p6 and r6 0.0 0.0
For user: 846 id: 1dc74d2ff2f03ac66461819eee4bba406757b987 p6 and r6 0.0 0.0
For user: 847 id: ec4e4da8b8758dfa5ab5dcc384edb72ac8f865eb p6 and r6 0.0 0.0
For user: 848 id: 81f0492705b2306ec85fb10e8ae2f422e09ba30d p6 and r6 0.0 0.0
For user: 849 id: db73e89e996dbc6a554e872449b556a371d31e3c p6 and r6 0.0 0.0

For user: 940 id: 023e27ed4f1e3ec0858804c3e0c0afa5d0addce1 p6 and r6 0.0 0.0
For user: 941 id: 1c54bdca7730f14744475aa38dfd12022f5b17a3 p6 and r6 0.0 0.0
For user: 942 id: 0c84a76bb3edf7052eee01939bf3fea1e876deaa p6 and r6 0.0 0.0
For user: 943 id: 80f1eb865d05ee289d9e1d662ad927ee213eee61 p6 and r6 0.0 0.0
For user: 944 id: 034b47082e69082767b9ecce4ca4bbd4715e9e08 p6 and r6 0.0 0.0
For user: 945 id: 3e727495f67ad9bf9ef39361e470b3fc19d2835e p6 and r6 0.03333333333333333 0.3333333333333333
For user: 946 id: b1a9d910f4856298760b5ddd3e6eaf6d881259f1 p6 and r6 0.0 0.0
For user: 947 id: f264c0efcdf1e167af9d2786049831aa0212f451 p6 and r6 0.0 0.0
For user: 948 id: d56a7a6acbf9ead364c8d1b29911b0cbd8469e8e p6 and r6 0.0 0.0
For user: 949 id: 55fd35457d3669612430984c0df26739989f5aa7 p6 and r6 0.0 0.0
For user: 950 id: f15ccdca69ba39d620308f798ccc99179077f0dc p6 and r6 0.0 0.0
For user: 951 id: 36aeb5e346c15d727929e80bef4b915ca9e75130 p6 and r6 0.0 0.0
For user: 952 id: f692d010b31dca7e6b938b65355

From the scores, we actually predict some songs that the user has listened to before. 

The best feature space in terms of predicting songs the user actually had listened to: ['familiarity']

# =============================================================

# Number of Neighbors evaluation

In [22]:
p1 = p2 = p3 = p4 = p5 = p6 = 0
r1 = r2 = r3 = r4 = r5 = r6 = 0

counter = 1

for user_i in sample:
    #get all songs for this specific user
    user_i_data = userdata.loc[userdata['user'] == user_i]
    #print(user_i_data)
    
    #create a list of all the songs that this user listens to
    user_i_songs = user_i_data['song'].unique().tolist()
    #print(user_i_songs)
    #print(len(user_i_songs))
    
    #split data into train and test
    songs_i_train, songs_i_test = train_test_split(user_i_songs,test_size=0.25,random_state=rand_seed)
    
    #build the features input for one KNN algorithm
    u_arr5 = create_song_feature_array(songs_i_train,relevant_features5, scaler5)
    
    prec, rec = run_model(KNN5,u_arr5,songs_i_test,10)
    p1 += prec
    r1 += rec
    
    prec, rec = run_model(KNN5,u_arr5,songs_i_test,25)
    p2 += prec
    r2 += rec
    
    prec, rec = run_model(KNN5,u_arr5,songs_i_test,50)
    p3 += prec
    r3 += rec

    prec, rec = run_model(KNN5,u_arr5,songs_i_test,100)
    p4 += prec
    r4 += rec
    
    prec, rec = run_model(KNN5,u_arr5,songs_i_test,200)
    p5 += prec
    r5 += rec
    
    prec, rec = run_model(KNN5,u_arr5,songs_i_test,400)
    p6 += prec
    r6 += rec
    
    print("For user:", counter, "id:", user_i, "p6 and r6", prec, rec)
    counter += 1
    

p1 /= len(sample)
p2 /= len(sample)
p3 /= len(sample)
p4 /= len(sample)
p5 /= len(sample)
p6 /= len(sample)

r1 /= len(sample)
r2 /= len(sample)
r3 /= len(sample)
r4 /= len(sample)
r5 /= len(sample)
r6 /= len(sample)

print("for k = 10, avg precision:", p1, "avg recall:", r1)
print("for k = 25, avg precision:", p2, "avg recall:", r2)
print("for k = 50, avg precision:", p3, "avg recall:", r3)
print("for k = 100, avg precision:", p4, "avg recall:", r4)
print("for k = 200, avg precision:", p5, "avg recall:", r5)
print("for k = 400, avg precision:", p6, "avg recall:", r6)

For user: 1 id: f0079fb5619dc0f2cd1fd55e13207effc2cc2ddf p6 and r6 0.0 0.0
For user: 2 id: cf28cd441dabbb8091e0065c583d214fec545b56 p6 and r6 0.0025 0.2
For user: 3 id: c51be6ee821e9c674c7198964404d1c053fff8b0 p6 and r6 0.0 0.0
For user: 4 id: 09225307a641f9a43b1b4ad1b8d8e1f2f93532f2 p6 and r6 0.0 0.0
For user: 5 id: 9b02de3fbc1b0e9088a8b17002f60f7b0c33ec54 p6 and r6 0.0 0.0
For user: 6 id: 211bb42a26b0da0d190ded2072d12d0946d5f67f p6 and r6 0.0 0.0
For user: 7 id: 5b3389d4a92d7256392dc60465dbe903af0c3475 p6 and r6 0.0 0.0
For user: 8 id: 559ff7e2ee976599f0cb62bea63971ec347d4551 p6 and r6 0.0 0.0
For user: 9 id: 916138a4a7bec1a93de83724371b01b9de8041df p6 and r6 0.0 0.0
For user: 10 id: 58a649d25f775aba1829ac8a282bcc45c3dfde21 p6 and r6 0.0 0.0
For user: 11 id: 9afcb2a2c50dd6388f4a3a80285682ffdfe303d4 p6 and r6 0.0 0.0
For user: 12 id: 36bfa01ccb000d117aeadbffeae74a314c0aad5b p6 and r6 0.0 0.0
For user: 13 id: 7950b48dc6f3e612ff05a26f6a7662f2a8e6a79f p6 and r6 0.0 0.0
For user: 14 id: 1

For user: 107 id: 3e891693a0b0c20a68832f3fdfc854ce2ab705d2 p6 and r6 0.0 0.0
For user: 108 id: 2269a7aeb590dc35ed4759e1dbfe70a23bc16a9c p6 and r6 0.0 0.0
For user: 109 id: 7fecdafd3a6934ca5f27807c7712a0387ceab59e p6 and r6 0.0 0.0
For user: 110 id: 0625bcba6863889d76ba9b5cfc6c67eb39757049 p6 and r6 0.0 0.0
For user: 111 id: 114e87f0aabdeaef949245356758204df4062acc p6 and r6 0.0 0.0
For user: 112 id: e5366c901c49968aeaab714f9172fbcfacc23a6e p6 and r6 0.0 0.0
For user: 113 id: 8442dcff8890b2870ce0cc4c495d4b010fcef46e p6 and r6 0.0 0.0
For user: 114 id: 562bbaf5de24a3ba0de8ff0452e58c8354ed1c8b p6 and r6 0.0 0.0
For user: 115 id: bb9f772af7f331bc5e07e1e67a1879ae750aac8d p6 and r6 0.0 0.0
For user: 116 id: c802c72a22587ed2c9fcc0d32de68d926469003d p6 and r6 0.005 0.125
For user: 117 id: 9f4d24bb6e52e359e3f959e4d0434affaa0a0e65 p6 and r6 0.0 0.0
For user: 118 id: 2bfe55e746e10d57526b76dbf532010e7d957122 p6 and r6 0.0025 0.2
For user: 119 id: 554746367e28728413042c22a7b5534e660bb358 p6 and r6 

For user: 212 id: b93e6a30eda95f48a768aaf71b15caa4ebf9f67c p6 and r6 0.0 0.0
For user: 213 id: e20f6728ebd0d3ba660613535d260cb130ad45e7 p6 and r6 0.0 0.0
For user: 214 id: 12cdcd2e2ebafe58ebbc1110ed70d4ed626e8c69 p6 and r6 0.0 0.0
For user: 215 id: f9d648e3a4cf0fa477e18f2df4c6eae694d8bc27 p6 and r6 0.0 0.0
For user: 216 id: 38092292c6c245740c01a15039b71f38125ea536 p6 and r6 0.0 0.0
For user: 217 id: 880aa577e593a7d60a87f7bd328bc35ee7f3f028 p6 and r6 0.005 0.3333333333333333
For user: 218 id: 225abbd9598dea0bf10d719b368d59a4d8dbf665 p6 and r6 0.005 0.10526315789473684
For user: 219 id: a5b7b8b52be93f8a292bb18d7d4f41858d676eeb p6 and r6 0.0 0.0
For user: 220 id: bd3a6607bc4278a06bd3c4d38a3dbb61b4d45057 p6 and r6 0.0 0.0
For user: 221 id: 9a17148881086425c3e86bf2bc4b8a781185771a p6 and r6 0.0 0.0
For user: 222 id: a4752432671dbe2ebacf4eebaf23502ea06bc99d p6 and r6 0.0025 0.1111111111111111
For user: 223 id: 57f6525879f1ee8858f5f78b5664d53698e0816f p6 and r6 0.0025 0.2
For user: 224 id: d8

For user: 318 id: ca0db1ba8740f9ec24f8aa5ea09843a5b34b386b p6 and r6 0.0 0.0
For user: 319 id: e2a64039138ffb0ae4653cc8cb1dd0125c378c39 p6 and r6 0.0 0.0
For user: 320 id: 3f61ead20ef5d0c5d31256ed703228e6f7e1c540 p6 and r6 0.0 0.0
For user: 321 id: da061a3f45aa6cc1d2e963d57f060eea09e7471a p6 and r6 0.0 0.0
For user: 322 id: b588fac10c9aafdc7ca41ca27575bd18d6655e03 p6 and r6 0.0 0.0
For user: 323 id: 6d8b520019a1c7882a70dc012e1110caaeda3497 p6 and r6 0.0 0.0
For user: 324 id: 429daebea676753688e96ca0cd73d0a0108d4f60 p6 and r6 0.0 0.0
For user: 325 id: b120028367d5e26e31af7e15dcd2dabb0f7b5d39 p6 and r6 0.0 0.0
For user: 326 id: f0f59e3d697c98e7f75bb6fa29bd1182bd15a3f6 p6 and r6 0.0 0.0
For user: 327 id: 4897b05d09c104e7fe099ed2857de9ad53b251ee p6 and r6 0.0 0.0
For user: 328 id: c8884c67816f834e41f6ba14611d486d0d002ac6 p6 and r6 0.0 0.0
For user: 329 id: 84c62046b4fb2326e590c3f92904b04f9476225c p6 and r6 0.0025 0.3333333333333333
For user: 330 id: 59e88e874411f4ebcbe0e74a8180c9e4b7ec3052

For user: 424 id: 8a5841023fb7f345032ee76b377a39015acb053b p6 and r6 0.0 0.0
For user: 425 id: 746225b9bbe9afcc2d95a9c34437a3be1c26b7ce p6 and r6 0.0 0.0
For user: 426 id: f4d96f5faabfd446aa92954850bea4bfd132dafb p6 and r6 0.0 0.0
For user: 427 id: a32008aad282eb17caba1ce8543ea3216373a988 p6 and r6 0.0 0.0
For user: 428 id: 65525da44fa19155a4e70602cb54e8de56f90acf p6 and r6 0.0 0.0
For user: 429 id: 3ab034ca5228c0c4ba72e1e0f53c6466716e58a1 p6 and r6 0.0 0.0
For user: 430 id: 18c13bd0cc24e05857bd10d520be9362ef65fee6 p6 and r6 0.0 0.0
For user: 431 id: 6f45f991198c40dcbcdf4bac177c8714aa4b286a p6 and r6 0.0 0.0
For user: 432 id: ea58e48d2e59c65f166e1acc9e981766dbf0d4c8 p6 and r6 0.0 0.0
For user: 433 id: 0bc24f84e966f22e8cbe7998d7e328b68b7315f9 p6 and r6 0.0 0.0
For user: 434 id: 287437bca6ea01e16529ff51cbff28bfcc865430 p6 and r6 0.0 0.0
For user: 435 id: b22850e0bf856e52678d578e664e4585a79c48a2 p6 and r6 0.0 0.0
For user: 436 id: 407d8d8b92b38c67600a233711154d3f72547f1f p6 and r6 0.0 0.0

For user: 529 id: e33a071ca29067e5fbd6c2c23f4a4e91cb2380b9 p6 and r6 0.0 0.0
For user: 530 id: 5b71462f7e393f6b1762819c62abd5b6983e159c p6 and r6 0.0 0.0
For user: 531 id: 1f9590d001c633e43efb27ac901a3e56b43c9dcf p6 and r6 0.0 0.0
For user: 532 id: a50cbcf61f556edce5451ae50fcca00e159ade91 p6 and r6 0.0 0.0
For user: 533 id: b666bd7fb801e7f2fa08cc9565e4795067e180ac p6 and r6 0.0 0.0
For user: 534 id: b9450dc7742ec184832a489e85be5d57ba59b945 p6 and r6 0.0 0.0
For user: 535 id: ff58a1fcf10d901c8cbbfc700eeef8429936685a p6 and r6 0.0 0.0
For user: 536 id: 346f139c321a03826caff9cc6d18a254f812c718 p6 and r6 0.0025 0.07142857142857142
For user: 537 id: 5ffac53eee4c3c0b126544945bb3c2cfcdfef78c p6 and r6 0.0 0.0
For user: 538 id: 1cad874dda372a9b0682f6e502935dbf0260bb83 p6 and r6 0.0 0.0
For user: 539 id: 651ee835b3783c7ae7d1026bf88c03eb21f5856f p6 and r6 0.0 0.0
For user: 540 id: 5420f9e79ac722998b20704a53558bc6bb778f6e p6 and r6 0.0 0.0
For user: 541 id: 5e0fea5cfc423fb22acd1b6dd319f1b1a583c82

For user: 632 id: b588d23136bf67e059b411aab23a289db4b3ec99 p6 and r6 0.0 0.0
For user: 633 id: 682ab0dddcb4e2fa309d48c4a2491ee8e13a6edf p6 and r6 0.0 0.0
For user: 634 id: 602f4b14d3d6a2bd960149637aa079e571468eb3 p6 and r6 0.0 0.0
For user: 635 id: 253ae0b1ba4db0ed787a73adc2ea7e9966b836c1 p6 and r6 0.0 0.0
For user: 636 id: d2032d92be104eb524ad0e3c2699e803936ddaf1 p6 and r6 0.0 0.0
For user: 637 id: 5cd1e285ceb9654f1201e0d48555186222f13836 p6 and r6 0.0 0.0
For user: 638 id: e07a5faf3ebd848cb9116782234a767ce988c803 p6 and r6 0.0 0.0
For user: 639 id: f87e4f615ecfd0aeca1d6e6bab88d368865e4002 p6 and r6 0.0 0.0
For user: 640 id: 38f3edbb215cb397dd32358f62ca3e9b7edc9671 p6 and r6 0.0 0.0
For user: 641 id: 052e643dbb068d2b39ed91e6887762a23023f2a0 p6 and r6 0.0 0.0
For user: 642 id: fb77b637ef45f395741e39d3747a36d8d133b6ea p6 and r6 0.0 0.0
For user: 643 id: ca66c1bcafe2d677190b4cf78d9107cffce1ffbc p6 and r6 0.0025 0.2
For user: 644 id: 0e51987b4d7e06d427a088c4397729e01ae1d2b1 p6 and r6 0.0 

For user: 737 id: 6fe927afb0e4fa11a71660f25ce8d150d2552513 p6 and r6 0.0 0.0
For user: 738 id: 771c61fa4804e5222cebc44e1d85c15892d3f0f4 p6 and r6 0.0 0.0
For user: 739 id: 51562edf812021ac0d2e19babc6bfbc0856d6b2e p6 and r6 0.0 0.0
For user: 740 id: 5858d606b3f1d6224d08b33243d256cca7bd0511 p6 and r6 0.0 0.0
For user: 741 id: 055eabe551e2a730025da383df23d87d2845c281 p6 and r6 0.005 0.2857142857142857
For user: 742 id: 04aee86b83a249a84d267474ca47a3be98c50016 p6 and r6 0.0 0.0
For user: 743 id: 5ed82ac9fa1a019c80bcad339084dcd9a28cca05 p6 and r6 0.0075 0.42857142857142855
For user: 744 id: 801e96da82a8de7454a227aa276da438797c65c5 p6 and r6 0.0 0.0
For user: 745 id: 86047da3c884af4dbd864a24213351b54dada0e5 p6 and r6 0.005 0.2857142857142857
For user: 746 id: 19d50c99f4ca736a034d9fc1b586c94f464bb187 p6 and r6 0.0 0.0
For user: 747 id: 543086e1f32bc193c3e3bb3311d7fe8ef0814051 p6 and r6 0.0 0.0
For user: 748 id: 8e43c20b33c4a9a9c1c27641389ea650b586891c p6 and r6 0.0 0.0
For user: 749 id: 061ca

For user: 840 id: 338c44d88fa1a751466bc1b0e2535a9a2871592d p6 and r6 0.0 0.0
For user: 841 id: b6ce08b93ed972288af9487613943f01eab38e84 p6 and r6 0.0 0.0
For user: 842 id: 5420aa3c0bb70ad7d8b134201179b78e42c5ac94 p6 and r6 0.0 0.0
For user: 843 id: 813e8a427c105b00bead125d8a65110d3cea1e09 p6 and r6 0.0 0.0
For user: 844 id: d9c52e72c6df35993b848378224fb7d5664635eb p6 and r6 0.0 0.0
For user: 845 id: ec932f8ee90abdfa003fb9f425c0d0b6fcf1313b p6 and r6 0.0 0.0
For user: 846 id: 1dc74d2ff2f03ac66461819eee4bba406757b987 p6 and r6 0.0 0.0
For user: 847 id: ec4e4da8b8758dfa5ab5dcc384edb72ac8f865eb p6 and r6 0.0 0.0
For user: 848 id: 81f0492705b2306ec85fb10e8ae2f422e09ba30d p6 and r6 0.0 0.0
For user: 849 id: db73e89e996dbc6a554e872449b556a371d31e3c p6 and r6 0.0 0.0
For user: 850 id: 91ba189a68c01d4b4c480afcdc217c2e54588be6 p6 and r6 0.0 0.0
For user: 851 id: 53e24e80221a799c2657c28f4558a2385f05fd7d p6 and r6 0.0 0.0
For user: 852 id: 472dd4a168722574ad132e70faa235631cd05693 p6 and r6 0.0 0.0

For user: 946 id: b1a9d910f4856298760b5ddd3e6eaf6d881259f1 p6 and r6 0.0 0.0
For user: 947 id: f264c0efcdf1e167af9d2786049831aa0212f451 p6 and r6 0.0 0.0
For user: 948 id: d56a7a6acbf9ead364c8d1b29911b0cbd8469e8e p6 and r6 0.0 0.0
For user: 949 id: 55fd35457d3669612430984c0df26739989f5aa7 p6 and r6 0.0 0.0
For user: 950 id: f15ccdca69ba39d620308f798ccc99179077f0dc p6 and r6 0.0 0.0
For user: 951 id: 36aeb5e346c15d727929e80bef4b915ca9e75130 p6 and r6 0.0 0.0
For user: 952 id: f692d010b31dca7e6b938b6535594686d7e64546 p6 and r6 0.0 0.0
For user: 953 id: dae5f59457bdc3979023eb7cb21ceb573d0cf037 p6 and r6 0.0 0.0
For user: 954 id: 7ba1b204073ab96b2ba1e7a57657ef134a83a934 p6 and r6 0.0 0.0
For user: 955 id: 0d1b2d76c18090890c21f115fd71e92bf2ac28d1 p6 and r6 0.0 0.0
For user: 956 id: 4537960513540aa6af84a2ea7126f7c3386b3149 p6 and r6 0.0 0.0
For user: 957 id: d7d44da54abf4a19338cd410f1843f249c62cba1 p6 and r6 0.0 0.0
For user: 958 id: 6cb84fd4e32c7432f1f3d5c00ebdd8c3c2b81ae5 p6 and r6 0.0 0.0

In [42]:
average_num_songs = 0

counter = 1

for user_i in sample:
    #get all songs for this specific user
    user_i_data = userdata.loc[userdata['user'] == user_i]
    #print(user_i_data)
    
    #create a list of all the songs that this user listens to
    user_i_songs = user_i_data['song'].unique().tolist()
    average_num_songs += len(user_i_songs)
    print(counter)
    counter+=1
    
average_num_songs /= len(sample)
print(average_num_songs)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


# ============================================================
# Distance Metrics
finding the best distance metrics using the same evaluation technique

In [38]:
relevant_features3 = ['hotttness','familiarity']
#adding features based on Pranshu's Data Analysis
#relevant_features3 = ['hotttness','familiarity','duration','loudness','tempo','key','mode','time_signature']

train_data3 = spotify_data_df[relevant_features3]


#scaler3 = preprocessing.MinMaxScaler()
scaler3 = preprocessing.RobustScaler()
train_data_norm3 = scaler3.fit_transform(train_data3)

In [39]:
#Building our different KNNs off the different features
KNN1 = NearestNeighbors(1000,metric='chebyshev')
KNN1.fit(train_data_norm3)

d = {}
d['p'] = 5
KNN2 = NearestNeighbors(1000, metric='minkowski', metric_params=d)
KNN2.fit(train_data_norm3)

KNN3 = NearestNeighbors(1000, metric='manhattan')
KNN3.fit(train_data_norm3)


KNN4 = NearestNeighbors(1000)
KNN4.fit(train_data_norm3)

  metric_params=metric_params, n_jobs=n_jobs, **kwargs)
  return self._fit(X)


NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=1000, p=2,
                 radius=1.0)

In [40]:
p1 = p2 = p3 = p4 = 0
r1 = r2 = r3 = r4 = 0

counter = 1

for user_i in sample:
    #get all songs for this specific user
    user_i_data = userdata.loc[userdata['user'] == user_i]
    #print(user_i_data)
    
    #create a list of all the songs that this user listens to
    user_i_songs = user_i_data['song'].unique().tolist()
    #print(user_i_songs)
    #print(len(user_i_songs))
    
    #split data into train and test
    songs_i_train, songs_i_test = train_test_split(user_i_songs,test_size=0.25,random_state=rand_seed)
    
    #build the features input for one KNN algorithm
    u_arr = create_song_feature_array(songs_i_train,relevant_features3, scaler3)
    
    prec, rec = run_model(KNN1,u_arr,songs_i_test,100)
    p1 += prec
    r1 += rec
    
    prec, rec = run_model(KNN2,u_arr,songs_i_test,100)
    p2 += prec
    r2 += rec
    
    prec, rec = run_model(KNN3,u_arr,songs_i_test,100)
    p3 += prec
    r3 += rec
    
    prec, rec = run_model(KNN4,u_arr,songs_i_test,100)
    p4 += prec
    r4 += rec
    
    print("For user:", counter, "id:", user_i, "p4 and r4", prec, rec)
    counter += 1

p1 /= len(sample)
p2 /= len(sample)
p3 /= len(sample)
p4 /= len(sample)

r1 /= len(sample)
r2 /= len(sample)
r3 /= len(sample)
r4 /= len(sample)

print("for Chebyshev, avg precision:", p1, "avg recall:", r1)
print("for Minkowski (p=5), avg precision:", p2, "avg recall:", r2)
print("for Manhattan, avg precision:", p3, "avg recall:", r3)
print("for Euclidean, avg precision:", p4, "avg recall:", r4)

For user: 1 id: f0079fb5619dc0f2cd1fd55e13207effc2cc2ddf p4 and r4 0.0 0.0
For user: 2 id: cf28cd441dabbb8091e0065c583d214fec545b56 p4 and r4 0.0 0.0
For user: 3 id: c51be6ee821e9c674c7198964404d1c053fff8b0 p4 and r4 0.0 0.0
For user: 4 id: 09225307a641f9a43b1b4ad1b8d8e1f2f93532f2 p4 and r4 0.0 0.0
For user: 5 id: 9b02de3fbc1b0e9088a8b17002f60f7b0c33ec54 p4 and r4 0.0 0.0
For user: 6 id: 211bb42a26b0da0d190ded2072d12d0946d5f67f p4 and r4 0.0 0.0
For user: 7 id: 5b3389d4a92d7256392dc60465dbe903af0c3475 p4 and r4 0.0 0.0
For user: 8 id: 559ff7e2ee976599f0cb62bea63971ec347d4551 p4 and r4 0.0 0.0
For user: 9 id: 916138a4a7bec1a93de83724371b01b9de8041df p4 and r4 0.0 0.0
For user: 10 id: 58a649d25f775aba1829ac8a282bcc45c3dfde21 p4 and r4 0.0 0.0
For user: 11 id: 9afcb2a2c50dd6388f4a3a80285682ffdfe303d4 p4 and r4 0.0 0.0
For user: 12 id: 36bfa01ccb000d117aeadbffeae74a314c0aad5b p4 and r4 0.0 0.0
For user: 13 id: 7950b48dc6f3e612ff05a26f6a7662f2a8e6a79f p4 and r4 0.0 0.0
For user: 14 id: 1042

For user: 108 id: 2269a7aeb590dc35ed4759e1dbfe70a23bc16a9c p4 and r4 0.0 0.0
For user: 109 id: 7fecdafd3a6934ca5f27807c7712a0387ceab59e p4 and r4 0.0 0.0
For user: 110 id: 0625bcba6863889d76ba9b5cfc6c67eb39757049 p4 and r4 0.0 0.0
For user: 111 id: 114e87f0aabdeaef949245356758204df4062acc p4 and r4 0.0 0.0
For user: 112 id: e5366c901c49968aeaab714f9172fbcfacc23a6e p4 and r4 0.0 0.0
For user: 113 id: 8442dcff8890b2870ce0cc4c495d4b010fcef46e p4 and r4 0.0 0.0
For user: 114 id: 562bbaf5de24a3ba0de8ff0452e58c8354ed1c8b p4 and r4 0.0 0.0
For user: 115 id: bb9f772af7f331bc5e07e1e67a1879ae750aac8d p4 and r4 0.0 0.0
For user: 116 id: c802c72a22587ed2c9fcc0d32de68d926469003d p4 and r4 0.0 0.0
For user: 117 id: 9f4d24bb6e52e359e3f959e4d0434affaa0a0e65 p4 and r4 0.0 0.0
For user: 118 id: 2bfe55e746e10d57526b76dbf532010e7d957122 p4 and r4 0.0 0.0
For user: 119 id: 554746367e28728413042c22a7b5534e660bb358 p4 and r4 0.0 0.0
For user: 120 id: e2c4118c1e01d74a3ac17f73613a38d3ad91c72a p4 and r4 0.0 0.0

For user: 214 id: 12cdcd2e2ebafe58ebbc1110ed70d4ed626e8c69 p4 and r4 0.0 0.0
For user: 215 id: f9d648e3a4cf0fa477e18f2df4c6eae694d8bc27 p4 and r4 0.0 0.0
For user: 216 id: 38092292c6c245740c01a15039b71f38125ea536 p4 and r4 0.0 0.0
For user: 217 id: 880aa577e593a7d60a87f7bd328bc35ee7f3f028 p4 and r4 0.0 0.0
For user: 218 id: 225abbd9598dea0bf10d719b368d59a4d8dbf665 p4 and r4 0.02 0.10526315789473684
For user: 219 id: a5b7b8b52be93f8a292bb18d7d4f41858d676eeb p4 and r4 0.0 0.0
For user: 220 id: bd3a6607bc4278a06bd3c4d38a3dbb61b4d45057 p4 and r4 0.0 0.0
For user: 221 id: 9a17148881086425c3e86bf2bc4b8a781185771a p4 and r4 0.0 0.0
For user: 222 id: a4752432671dbe2ebacf4eebaf23502ea06bc99d p4 and r4 0.0 0.0
For user: 223 id: 57f6525879f1ee8858f5f78b5664d53698e0816f p4 and r4 0.01 0.2
For user: 224 id: d831044849c478a291b75004452d289c2acddc95 p4 and r4 0.0 0.0
For user: 225 id: 1de525dfe6cdbbae8d624fe12e57ad8b8d0a5ad2 p4 and r4 0.0 0.0
For user: 226 id: 875e2e2145e7bf2e915cf7e81621ab4e28487c56

For user: 320 id: 3f61ead20ef5d0c5d31256ed703228e6f7e1c540 p4 and r4 0.0 0.0
For user: 321 id: da061a3f45aa6cc1d2e963d57f060eea09e7471a p4 and r4 0.0 0.0
For user: 322 id: b588fac10c9aafdc7ca41ca27575bd18d6655e03 p4 and r4 0.0 0.0
For user: 323 id: 6d8b520019a1c7882a70dc012e1110caaeda3497 p4 and r4 0.0 0.0
For user: 324 id: 429daebea676753688e96ca0cd73d0a0108d4f60 p4 and r4 0.0 0.0
For user: 325 id: b120028367d5e26e31af7e15dcd2dabb0f7b5d39 p4 and r4 0.0 0.0
For user: 326 id: f0f59e3d697c98e7f75bb6fa29bd1182bd15a3f6 p4 and r4 0.0 0.0
For user: 327 id: 4897b05d09c104e7fe099ed2857de9ad53b251ee p4 and r4 0.0 0.0
For user: 328 id: c8884c67816f834e41f6ba14611d486d0d002ac6 p4 and r4 0.0 0.0
For user: 329 id: 84c62046b4fb2326e590c3f92904b04f9476225c p4 and r4 0.01 0.3333333333333333
For user: 330 id: 59e88e874411f4ebcbe0e74a8180c9e4b7ec3052 p4 and r4 0.0 0.0
For user: 331 id: 362e7bc451922f0f1a982b5b259cb470ec65ab4b p4 and r4 0.0 0.0
For user: 332 id: 8547f99a63d33ad46a784b8daf9ddd75fa765a21 p

For user: 426 id: f4d96f5faabfd446aa92954850bea4bfd132dafb p4 and r4 0.0 0.0
For user: 427 id: a32008aad282eb17caba1ce8543ea3216373a988 p4 and r4 0.0 0.0
For user: 428 id: 65525da44fa19155a4e70602cb54e8de56f90acf p4 and r4 0.0 0.0
For user: 429 id: 3ab034ca5228c0c4ba72e1e0f53c6466716e58a1 p4 and r4 0.0 0.0
For user: 430 id: 18c13bd0cc24e05857bd10d520be9362ef65fee6 p4 and r4 0.0 0.0
For user: 431 id: 6f45f991198c40dcbcdf4bac177c8714aa4b286a p4 and r4 0.0 0.0
For user: 432 id: ea58e48d2e59c65f166e1acc9e981766dbf0d4c8 p4 and r4 0.0 0.0
For user: 433 id: 0bc24f84e966f22e8cbe7998d7e328b68b7315f9 p4 and r4 0.0 0.0
For user: 434 id: 287437bca6ea01e16529ff51cbff28bfcc865430 p4 and r4 0.0 0.0
For user: 435 id: b22850e0bf856e52678d578e664e4585a79c48a2 p4 and r4 0.0 0.0
For user: 436 id: 407d8d8b92b38c67600a233711154d3f72547f1f p4 and r4 0.0 0.0
For user: 437 id: f8cbcd45c0f2462e53dcc00b03e79b11bdbde456 p4 and r4 0.0 0.0
For user: 438 id: 7f0a2e1b94e977ac07063e94e0f09bd736e1e352 p4 and r4 0.0 0.0

For user: 532 id: a50cbcf61f556edce5451ae50fcca00e159ade91 p4 and r4 0.0 0.0
For user: 533 id: b666bd7fb801e7f2fa08cc9565e4795067e180ac p4 and r4 0.0 0.0
For user: 534 id: b9450dc7742ec184832a489e85be5d57ba59b945 p4 and r4 0.0 0.0
For user: 535 id: ff58a1fcf10d901c8cbbfc700eeef8429936685a p4 and r4 0.0 0.0
For user: 536 id: 346f139c321a03826caff9cc6d18a254f812c718 p4 and r4 0.01 0.07142857142857142
For user: 537 id: 5ffac53eee4c3c0b126544945bb3c2cfcdfef78c p4 and r4 0.0 0.0
For user: 538 id: 1cad874dda372a9b0682f6e502935dbf0260bb83 p4 and r4 0.0 0.0
For user: 539 id: 651ee835b3783c7ae7d1026bf88c03eb21f5856f p4 and r4 0.0 0.0
For user: 540 id: 5420f9e79ac722998b20704a53558bc6bb778f6e p4 and r4 0.0 0.0
For user: 541 id: 5e0fea5cfc423fb22acd1b6dd319f1b1a583c828 p4 and r4 0.0 0.0
For user: 542 id: 1e9782d1775514320e11abbf7940bf5ef1da2eef p4 and r4 0.0 0.0
For user: 543 id: 3af0d84d645ca36348c163ea080d6c4deaf15585 p4 and r4 0.0 0.0
For user: 544 id: eb3099d55fde4781dbddd962a206bf19a237ee27 

For user: 636 id: d2032d92be104eb524ad0e3c2699e803936ddaf1 p4 and r4 0.0 0.0
For user: 637 id: 5cd1e285ceb9654f1201e0d48555186222f13836 p4 and r4 0.0 0.0
For user: 638 id: e07a5faf3ebd848cb9116782234a767ce988c803 p4 and r4 0.0 0.0
For user: 639 id: f87e4f615ecfd0aeca1d6e6bab88d368865e4002 p4 and r4 0.0 0.0
For user: 640 id: 38f3edbb215cb397dd32358f62ca3e9b7edc9671 p4 and r4 0.0 0.0
For user: 641 id: 052e643dbb068d2b39ed91e6887762a23023f2a0 p4 and r4 0.0 0.0
For user: 642 id: fb77b637ef45f395741e39d3747a36d8d133b6ea p4 and r4 0.0 0.0
For user: 643 id: ca66c1bcafe2d677190b4cf78d9107cffce1ffbc p4 and r4 0.0 0.0
For user: 644 id: 0e51987b4d7e06d427a088c4397729e01ae1d2b1 p4 and r4 0.0 0.0
For user: 645 id: 4dbe118f1da22ecba8ef41e8b560380b82672501 p4 and r4 0.02 0.25
For user: 646 id: ec2a818d5ddfa28c3d2214655b76c3d13e58aa23 p4 and r4 0.0 0.0
For user: 647 id: 9b2638a397ccad63e52bba70401cc7ec70d5cb5a p4 and r4 0.02 0.2857142857142857
For user: 648 id: 0988ee5837b787e4978773435cd2b976221e41c8

For user: 741 id: 055eabe551e2a730025da383df23d87d2845c281 p4 and r4 0.02 0.2857142857142857
For user: 742 id: 04aee86b83a249a84d267474ca47a3be98c50016 p4 and r4 0.0 0.0
For user: 743 id: 5ed82ac9fa1a019c80bcad339084dcd9a28cca05 p4 and r4 0.03 0.42857142857142855
For user: 744 id: 801e96da82a8de7454a227aa276da438797c65c5 p4 and r4 0.0 0.0
For user: 745 id: 86047da3c884af4dbd864a24213351b54dada0e5 p4 and r4 0.02 0.2857142857142857
For user: 746 id: 19d50c99f4ca736a034d9fc1b586c94f464bb187 p4 and r4 0.0 0.0
For user: 747 id: 543086e1f32bc193c3e3bb3311d7fe8ef0814051 p4 and r4 0.0 0.0
For user: 748 id: 8e43c20b33c4a9a9c1c27641389ea650b586891c p4 and r4 0.0 0.0
For user: 749 id: 061ca4da13c09861f1fbb177aa2f99a2e9e7b15f p4 and r4 0.0 0.0
For user: 750 id: 1341ab6e1c5f5be34a72d7b2661d2c7ef75e6333 p4 and r4 0.0 0.0
For user: 751 id: a54fecc03a22bff16ca6b2416f368d204d8731d0 p4 and r4 0.0 0.0
For user: 752 id: 3bd7ba1ace813f2d7abd29d960bad26f80c4e451 p4 and r4 0.0 0.0
For user: 753 id: a8a703f30

For user: 847 id: ec4e4da8b8758dfa5ab5dcc384edb72ac8f865eb p4 and r4 0.0 0.0
For user: 848 id: 81f0492705b2306ec85fb10e8ae2f422e09ba30d p4 and r4 0.0 0.0
For user: 849 id: db73e89e996dbc6a554e872449b556a371d31e3c p4 and r4 0.0 0.0
For user: 850 id: 91ba189a68c01d4b4c480afcdc217c2e54588be6 p4 and r4 0.0 0.0
For user: 851 id: 53e24e80221a799c2657c28f4558a2385f05fd7d p4 and r4 0.0 0.0
For user: 852 id: 472dd4a168722574ad132e70faa235631cd05693 p4 and r4 0.0 0.0
For user: 853 id: 61815a85348e71734078c0365e01099434dd695a p4 and r4 0.0 0.0
For user: 854 id: 61ee48c41a16c7e3836f8e0af15eef10d8714849 p4 and r4 0.0 0.0
For user: 855 id: a4915b0e039452ef36290ff0e11a4d9edb4a0a98 p4 and r4 0.0 0.0
For user: 856 id: 7ebfd1c25a30ff5369e74ed944aedf1b5a7112b8 p4 and r4 0.0 0.0
For user: 857 id: 37190979bd16c5e54e383e70e8a5ae509e91a79b p4 and r4 0.0 0.0
For user: 858 id: 01adc1ebecfbb2c035a171f75ffc5fb99e05a7eb p4 and r4 0.0 0.0
For user: 859 id: 3483475e4bf4c3880a729288c4aa7503575fe516 p4 and r4 0.0 0.0

For user: 952 id: f692d010b31dca7e6b938b6535594686d7e64546 p4 and r4 0.0 0.0
For user: 953 id: dae5f59457bdc3979023eb7cb21ceb573d0cf037 p4 and r4 0.0 0.0
For user: 954 id: 7ba1b204073ab96b2ba1e7a57657ef134a83a934 p4 and r4 0.0 0.0
For user: 955 id: 0d1b2d76c18090890c21f115fd71e92bf2ac28d1 p4 and r4 0.0 0.0
For user: 956 id: 4537960513540aa6af84a2ea7126f7c3386b3149 p4 and r4 0.0 0.0
For user: 957 id: d7d44da54abf4a19338cd410f1843f249c62cba1 p4 and r4 0.0 0.0
For user: 958 id: 6cb84fd4e32c7432f1f3d5c00ebdd8c3c2b81ae5 p4 and r4 0.0 0.0
For user: 959 id: 7c6a5eba755bed9e75eacf5adea50ab9f09a208c p4 and r4 0.0 0.0
For user: 960 id: 0d793db29fdb8ffa90b1095488396ea8df464cd7 p4 and r4 0.0 0.0
For user: 961 id: dd87e825abd7e4ff8082ed0763fea7b964f7451d p4 and r4 0.0 0.0
For user: 962 id: 643271f54e6506c0acbe427158152fe94191d3b0 p4 and r4 0.0 0.0
For user: 963 id: 08cc0fe2b48e0af27a3e6fda82f849cbd209932c p4 and r4 0.0 0.0
For user: 964 id: 076f00e265efdf61e4443ef65d612a7a4a4c5006 p4 and r4 0.01 0.

Looks like Manhattan distance performs best in our evaluation of returning songs the user has actually listened to before. 

# ==============================================================

With these results in mind,

Here's a model that uses the [hottness, familiarity] combo and 'manhattan' distance metric to predict the top 30 songs given a user

# Content Model

In [26]:
#relevant_features_best = ['hotttness','familiarity']

relevant_features3 = ['hotttness','familiarity','duration','loudness','tempo','key','mode','time_signature']

train_data_best = spotify_data_df[relevant_features3]

scaler_best = preprocessing.RobustScaler()
train_data_norm_best = scaler_best.fit_transform(train_data_best)

K = 1000
KNN_best = NearestNeighbors(K, metric='manhattan')
KNN_best.fit(train_data_norm_best)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='manhattan',
                 metric_params=None, n_jobs=None, n_neighbors=1000, p=2,
                 radius=1.0)

In [27]:
def top_n(n=30, user_id=None):
    #check if specific user given
    if user_id is None:
        sample_u = np.random.choice(user_id_list, 1)
        user_id = sample_u[0]
        user_id = str(user_id)
        
    #get all songs for this specific user
    user_i_data = userdata.loc[userdata['user'] == user_i]
    
    
    total_playcount = user_i_data.sum()
    print("PLAYCOUNT",total_playcount)
    
    
    #check if user in dataset
    if len(user_i_data) == 0:
        raise Exception('User: {} has no songs'.format(user_id))
    
    #create a list of all the songs that this user listens to
    user_i_songs = user_i_data['song'].unique().tolist()
    start_ind = len(user_i_songs)
    if start_ind >= K - n:
        raise Exception('Need larger KNN, user has too many songs')
    end_ind = min(int(n+start_ind),999)
    
    #build the features input for best KNN algorithm
    user_songs_data = spotify_data_df.loc[spotify_data_df['song_id'].isin(user_i_songs)]
    rel_data = user_songs_data[relevant_features_best]
    s_feat_arr = scaler_best.transform(rel_data)
    
    
    neigh_ind = KNN_best.kneighbors(s_feat_arr, return_distance=False)
    topNsongs = neigh_ind[0][start_ind:end_ind]
    nearest_n = spotify_data_df.iloc[topNsongs,:][['song_id']]
    nearest_n_list = nearest_n['song_id'].tolist()
    
    return user_i_songs, nearest_n_list

### Run the following cell to generate recommended songs for Random User

In [28]:
#np.random.seed() #uncomment to clear random seed
rand_user_songs, rand_user_rec = top_n()

PLAYCOUNT user     dfd913fc5332b19797ffd5edb8cde96e1778dfdadfd913...
song     SOAFHIF12AB0184446SOAMZEO12AB0183D2CSOAQZVE12A...
plays                                                  315
dtype: object


ValueError: operands could not be broadcast together with shapes (95,2) (8,) (95,2) 

### View User's Listened Songs

In [103]:
listened_songs = spotify_data_df.loc[spotify_data_df['song_id'].isin(rand_user_songs)]
display(listened_songs[['artist_name','title']])

Unnamed: 0,artist_name,title
17850,SONATA ARCTICA,Dont say a word
21164,The Gathering,Saturnine
31353,Rammstein,BENZIN
38499,Rammstein,OHNE DICH
48504,SONATA ARCTICA,Blinded No More
63957,The Gathering,Even The Spirits Are Afraid
72972,The Gathering,A Life All Mine
82829,The Cranberries,Animal Instinct
132938,Rammstein,Klavier
149798,Rammstein,STEIN UM STEIN


### View Recommended Songs

In [104]:
rec_songs = spotify_data_df.loc[spotify_data_df['song_id'].isin(rand_user_rec)]
display(rec_songs[['artist_name','title']])

Unnamed: 0,artist_name,title
4156,Callenish Circle,Passionate Dance
12768,Squash Bowels,Grindvirus
31860,orkidea,Beautiful AltF4 Instrumental
53270,Squash Bowels,Nose Lunger
54946,Lemon D,This Is LA
68191,Squash Bowels,Sheep Dag
88556,orkidea,Beautiful
92779,Squash Bowels,Oust Odor Eliminator
108810,Callenish Circle,Suffer My Disbelief
119306,Squash Bowels,Hamsters In Your Head


Songs seems to be at least in the right genre