In [12]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

import sys

from skimage import io
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime



from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

## Model Refinement 

### [New Data Source: ]('https://www.kaggle.com/datasets/zaheenhamidani/ultimate-spotify-tracks-db')

This data source was chosen as opposed to the previous data set, one because it has the key signature which has alot to do with music trends and the general sound, as well as the Genre data already being cleaned, allowing for less lossy reccommendations. 

In [2]:
spotify_data= pd.read_csv("/Users/victoriapuck-karam/Downloads/SpotifyFeatures.csv")

In [3]:
spotify_data.head()

Unnamed: 0,genre,artist_name,track_name,track_id,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Movie,Henri Salvador,C'est beau de faire un Show,0BRjO6ga9RKCKjfDqeFgWV,0,0.611,0.389,99373,0.91,0.0,C#,0.346,-1.828,Major,0.0525,166.969,4/4,0.814
1,Movie,Martin & les fées,Perdu d'avance (par Gad Elmaleh),0BjC1NfoEOOusryehmNudP,1,0.246,0.59,137373,0.737,0.0,F#,0.151,-5.559,Minor,0.0868,174.003,4/4,0.816
2,Movie,Joseph Williams,Don't Let Me Be Lonely Tonight,0CoSDzoNIKCRs124s9uTVy,3,0.952,0.663,170267,0.131,0.0,C,0.103,-13.879,Minor,0.0362,99.488,5/4,0.368
3,Movie,Henri Salvador,Dis-moi Monsieur Gordon Cooper,0Gc6TVm52BwZD07Ki6tIvf,0,0.703,0.24,152427,0.326,0.0,C#,0.0985,-12.178,Major,0.0395,171.758,4/4,0.227
4,Movie,Fabien Nataf,Ouverture,0IuslXpMROHdEPvSl1fTQK,4,0.95,0.331,82625,0.225,0.123,F,0.202,-21.15,Major,0.0456,140.576,4/4,0.39


In [4]:
spotify_features_df = spotify_data
genre_OHE = pd.get_dummies(spotify_features_df.genre)
key_OHE = pd.get_dummies(spotify_features_df.key)

### Converting Catagorical Variables (Normalization)

In [35]:
genre_OHE # turning the key signature data from cateogorical to numerical for modeling
key_OHE

Unnamed: 0,A,A#,B,C,C#,D,D#,E,F,F#,G,G#
0,0,0,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,0,0
2,0,0,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
232720,0,0,0,0,0,1,0,0,0,0,0,0
232721,0,0,0,0,0,0,0,1,0,0,0,0
232722,0,0,0,0,0,1,0,0,0,0,0,0
232723,1,0,0,0,0,0,0,0,0,0,0,0


### vectorization of feature data

In [5]:
scaled_features = MinMaxScaler().fit_transform([
  spotify_features_df['acousticness'].values,
  spotify_features_df['danceability'].values,
  spotify_features_df['duration_ms'].values,
  spotify_features_df['energy'].values,
  spotify_features_df['instrumentalness'].values,
  spotify_features_df['liveness'].values,
  spotify_features_df['loudness'].values,
  spotify_features_df['speechiness'].values,
  spotify_features_df['tempo'].values,
  spotify_features_df['valence'].values,
  ])

In [6]:
spotify_features_df[['acousticness','danceability','duration_ms','energy','instrumentalness','liveness','loudness','speechiness','tempo','valence']] = scaled_features.T

## dropping information not to be used in cosine similarity functionality

In [7]:
spotify_features_df = spotify_features_df.drop('genre',axis = 1)
spotify_features_df = spotify_features_df.drop('artist_name', axis = 1)
spotify_features_df = spotify_features_df.drop('track_name', axis = 1)
spotify_features_df = spotify_features_df.drop('popularity',axis = 1)
spotify_features_df = spotify_features_df.drop('key', axis = 1)
spotify_features_df = spotify_features_df.drop('mode', axis = 1)
spotify_features_df = spotify_features_df.drop('time_signature', axis = 1)


In [8]:
spotify_features_df = spotify_features_df.join(genre_OHE)
spotify_features_df = spotify_features_df.join(key_OHE)

In [9]:
spotify_features_df.head() #updated dataframe with fitted numerical data

Unnamed: 0,track_id,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,...,B,C,C#,D,D#,E,F,F#,G,G#
0,0BRjO6ga9RKCKjfDqeFgWV,2.5e-05,2.2e-05,1.0,2.8e-05,1.8e-05,2.2e-05,0.0,1.9e-05,0.001699,...,0,0,1,0,0,0,0,0,0,0
1,0BjC1NfoEOOusryehmNudP,4.2e-05,4.5e-05,1.0,4.6e-05,4e-05,4.2e-05,0.0,4.1e-05,0.001307,...,0,0,0,0,0,0,0,1,0,0
2,0CoSDzoNIKCRs124s9uTVy,8.7e-05,8.5e-05,1.0,8.2e-05,8.2e-05,8.2e-05,0.0,8.2e-05,0.000666,...,0,1,0,0,0,0,0,0,0,0
3,0Gc6TVm52BwZD07Ki6tIvf,8.4e-05,8.1e-05,1.0,8.2e-05,8e-05,8.1e-05,0.0,8e-05,0.001207,...,0,0,1,0,0,0,0,0,0,0
4,0IuslXpMROHdEPvSl1fTQK,0.000267,0.00026,1.0,0.000259,0.000257,0.000258,0.0,0.000256,0.001957,...,0,0,0,0,0,0,1,0,0,0


## connection to spotify api

In [10]:
client_id = '364e9ddb857b4cd59e9370d1f3f115fe'
client_secret= '48f8efbbd9dc49fca0b49aebd759ebfc'

In [11]:
scope = 'user-library-read'

if len(sys.argv) > 1:
    username = sys.argv[1]
else:
    print("Usage: %s username" % (sys.argv[0],))
    sys.exit()


In [12]:
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [13]:
token = util.prompt_for_user_token(scope, client_id= client_id, client_secret=client_secret, redirect_uri='http://localhost:8834/')


In [14]:
sp = spotipy.Spotify(auth=token)

### data retrieval from spotify api

In [15]:
playlist_dic = {}
playlist_cover_art = {}

for i in sp.current_user_playlists()['items']:
    playlist_dic[i['name']] = i['uri'].split(':')[2]
    playlist_cover_art[i['uri'].split(':')[2]] = i['images'][0]['url']

print(playlist_dic)

{'testing 1 -2 -3 ': '0SVTYWfVMhlROdFpZGWT7T', 'yeah': '2dUTHFq3ffChxcIYEDZSRg', 'dom': '2sXDUEJIy7ADKstOlEZb1N', 'stop being mentally ill 😃': '1Ax0ss6gBDTy0EEX3XwuHz', 'white woman victim mentalility': '0CqBqQDeemcY1uPno55gw3', 'it’s giving pregame ': '06uH5504qeZ9S1t8Yi98Re', "god's favorites": '34JjmLf1swnMZQdbGi1YqY', 'r u from new york even ': '3NHuIVmGx1rprbzvAcxFZf', 'tummy ache ': '5PTiaF2o9zptowGbU6SqhV', 'march-ish': '7cEfzMNky6frWSZYPlMjII', 'yer ': '7v9rp09s1WOdni2lWetiRv', 'sleep': '1WED6iXU4UHvKKm2l2CnRI', 'die for my bitch!!!!!!!!': '5S9fYTKASPBddrjIBi7XS2', 'chilxz': '3vQlFH3Tw5RPsxvIVjNoR2', 'coding music ': '6irm6oF0AMHMNUXDnoZZCL', 'spring ': '2Q5l8i8D6qoT2ID2L1uNIV', 'study new ': '7rkwo8iWk631naUIVe94fy', 'jeannie <3': '04KEaRrMoDMi6iNDt6HN5b', 'hmwrk': '5DXemOwm6LmS6xFWCVOA3R', 'different state of mind ': '4K46i5JvI82UFLSaAs3gRH', 'real?': '0OY59cJNNCCFaLVobRaCas', '888': '2PTUB2vwKNf4X6MfoVZ6oq', 'too good to gatekeep ': '5VlWJBuXkhkI3TddxpDFqa', 'viv ❤️\u200d🔥':

In [17]:
def generate_playlist_df(playlist_name, playlist_dic, spotify_data):
    
    playlist = pd.DataFrame()

    for i, j in enumerate(sp.playlist(playlist_dic[playlist_name])['tracks']['items']):
        playlist.loc[i, 'artist'] = j['track']['artists'][0]['name']
        playlist.loc[i, 'track_name'] = j['track']['name']
        playlist.loc[i, 'track_id'] = j['track']['id']
        playlist.loc[i, 'url'] = j['track']['album']['images'][1]['url']
        playlist.loc[i, 'date_added'] = j['added_at']

    playlist['date_added'] = pd.to_datetime(playlist['date_added'])  
    
    playlist = playlist[playlist['track_id'].isin(spotify_data['track_id'].values)].sort_values('date_added',ascending = False)

    return playlist


In [28]:
playlist_df = generate_playlist_df('yeah', playlist_dic, spotify_data) 

In [29]:
playlist_df

Unnamed: 0,artist,track_name,track_id,url,date_added
53,Choker,Daisy,3UxDyQ1pIVaeFgPcn3vmwT,https://i.scdn.co/image/ab67616d00001e0273ce49...,2022-05-02 02:57:42+00:00
52,Choker,Suzuki Peaches,3C52kv4XHZ5EwgjuLyxxGe,https://i.scdn.co/image/ab67616d00001e0273ce49...,2022-05-02 02:57:24+00:00
51,Choker,Windbreaker,1WKr4Sw2yGWUmQkxdvCivr,https://i.scdn.co/image/ab67616d00001e0273ce49...,2022-05-02 02:57:03+00:00
46,Vince Staples,Summertime,7GKiBHjFfvPRNqqOuRYqLB,https://i.scdn.co/image/ab67616d00001e0286f51d...,2022-05-02 02:55:22+00:00
45,A$AP Rocky,Kids Turned Out Fine,0qp8MuZNKelOGTK2HPlXBA,https://i.scdn.co/image/ab67616d00001e029feadc...,2022-05-02 02:55:14+00:00
44,Saba,Photosynthesis,3DlgDXIYtnWtJKiB8bZQMv,https://i.scdn.co/image/ab67616d00001e02eec9bf...,2022-05-02 02:55:06+00:00
43,Frank Ocean,Provider,6R6ihJhRbgu7JxJKIbW57w,https://i.scdn.co/image/ab67616d00001e02ea6b1f...,2022-05-02 02:54:49+00:00
41,Frank Ocean,Lens,371H6HjS4SXGbQ9IVfFUIL,https://i.scdn.co/image/ab67616d00001e02d26658...,2022-05-02 02:53:01+00:00
40,Blood Orange,Chewing Gum,3jxExv27cf3OSt0BXtAR6Z,https://i.scdn.co/image/ab67616d00001e0247dd2c...,2022-05-02 02:52:14+00:00
39,Travis Scott,COFFEE BEAN,6vnfObZ4Ys70SBAtti1xZ9,https://i.scdn.co/image/ab67616d00001e02072e9f...,2022-05-02 02:52:08+00:00


In [30]:
def generate_playlist_vector(spotify_features, playlist_df, weight_factor):
    
    spotify_features_playlist = spotify_features[spotify_features['track_id'].isin(playlist_df['track_id'].values)]
    spotify_features_playlist = spotify_features_playlist.merge(playlist_df[['track_id','date_added']], on = 'track_id', how = 'inner')
    
    spotify_features_nonplaylist = spotify_features[~spotify_features['track_id'].isin(playlist_df['track_id'].values)]
    
    playlist_feature_set = spotify_features_playlist.sort_values('date_added',ascending=False)
    
    
    most_recent_date = playlist_feature_set.iloc[0,-1]
    
    for ix, row in playlist_feature_set.iterrows():
        playlist_feature_set.loc[ix,'days_from_recent'] = int((most_recent_date.to_pydatetime() - row.iloc[-1].to_pydatetime()).days)
        
    
    playlist_feature_set['weight'] = playlist_feature_set['days_from_recent'].apply(lambda x: weight_factor ** (-x))
    
    playlist_feature_set_weighted = playlist_feature_set.copy()
    
    playlist_feature_set_weighted.update(playlist_feature_set_weighted.iloc[:,:-3].mul(playlist_feature_set_weighted.weight.astype(int),0))   
    
    playlist_feature_set_weighted_final = playlist_feature_set_weighted.iloc[:, :-3]
    

    
    return playlist_feature_set_weighted_final.sum(axis = 0), spotify_features_nonplaylist


In [31]:
nonplaylist_df,playlist_vector = generate_playlist_vector(spotify_features_df, playlist_df, 1.2)
playlist_vector,nonplaylist_df, = generate_playlist_vector(spotify_features_df, playlist_df, 1.2)
# = generate_playlist_vector(spotify_features_df, playlist_df, 1.2)
print(playlist_vector.shape)
print(nonplaylist_df.shape)


(50,)
(232646, 50)


In [32]:
def generate_recommendation(spotify_data, playlist_vector, nonplaylist_df):

    non_playlist = spotify_data[spotify_data['track_id'].isin(nonplaylist_df['track_id'].values)]
    non_playlist['sim'] = cosine_similarity(nonplaylist_df.drop(['track_id'], axis = 1).values, playlist_vector.drop(labels = 'track_id').values.reshape(1, -1))[:,0]
    non_playlist_top15 = non_playlist.sort_values('sim',ascending = False).head(15)
    non_playlist_top15['url'] = non_playlist_top15['track_id'].apply(lambda x: sp.track(x)['album']['images'][1]['url'])
    
    return  non_playlist_top15

### returned recommendations 

In [34]:
top15 = generate_recommendation(spotify_data, playlist_vector, nonplaylist_df)  
top15

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_playlist['sim'] = cosine_similarity(nonplaylist_df.drop(['track_id'], axis = 1).values, playlist_vector.drop(labels = 'track_id').values.reshape(1, -1))[:,0]


Unnamed: 0,genre,artist_name,track_name,track_id,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,sim,url
152788,R&B,Joji,worldstar money (interlude),435yU2MvEGfDdmbH0noWZ0,65,7.6e-05,7.3e-05,1.0,7.1e-05,7.4e-05,G,7e-05,0.0,Major,7e-05,0.001228,1/4,7.2e-05,0.70525,https://i.scdn.co/image/ab67616d00001e023a927e...
85310,R&B,Ilham,i don't wanna be with you no more,4hSle8C11FkvOCBMbtmjxV,43,8.4e-05,8.1e-05,1.0,8e-05,7.7e-05,G,7.8e-05,0.0,Major,7.7e-05,0.001152,4/4,7.9e-05,0.70525,https://i.scdn.co/image/ab67616d00001e029f1a94...
55038,R&B,Frank Ocean,Solo (Reprise),2qtoRFCOEL1gRn5q9DJC7F,63,9.2e-05,9.3e-05,1.0,9.6e-05,8.6e-05,G,8.8e-05,0.0,Minor,8.9e-05,0.001144,4/4,9.4e-05,0.70525,https://i.scdn.co/image/ab67616d00001e02c5649a...
60153,R&B,UMI,Happy Again,3UyV8MvjQkeh6DqLmuaUAo,50,7.9e-05,7.8e-05,1.0,7.9e-05,7.4e-05,G,7.5e-05,0.0,Major,7.6e-05,0.00127,4/4,7.7e-05,0.70525,https://i.scdn.co/image/ab67616d00001e02af5851...
85341,R&B,Always Never,Dangerous,1RjIQxQONCtfxhWjLRCZLg,44,5.7e-05,6.1e-05,1.0,6.2e-05,5.6e-05,G,5.7e-05,0.0,Minor,5.7e-05,0.00119,4/4,5.7e-05,0.70525,https://i.scdn.co/image/ab67616d00001e0217783c...
56999,R&B,McClenney,Us,7td13K8PfRT9j2MI6WGwMA,55,0.000109,0.00011,1.0,0.000106,0.000111,G,0.000106,0.0,Major,0.000106,0.001137,4/4,0.00011,0.70525,https://i.scdn.co/image/ab67616d00001e02ea1c93...
53990,R&B,Tora,Too Little,2kaZuAnq8jwPzVxGiqTdus,65,8.4e-05,8.5e-05,1.0,8.3e-05,8e-05,G,8.1e-05,0.0,Minor,8e-05,0.001085,4/4,8.1e-05,0.70525,https://i.scdn.co/image/ab67616d00001e02271ea6...
85618,R&B,Star Cast,Perfecta,2nIgnZOZWulAPVD5I5bnbo,41,3.8e-05,4.2e-05,1.0,4.2e-05,3.8e-05,G,3.8e-05,0.0,Major,3.9e-05,0.001215,4/4,4.1e-05,0.70525,https://i.scdn.co/image/ab67616d00001e0297a678...
83406,R&B,Yeek,Slept on,61FHzmovoYcohP3bQKUofl,45,8.9e-05,8.9e-05,1.0,8.7e-05,8.6e-05,G,8.6e-05,0.0,Major,8.6e-05,0.001078,4/4,8.8e-05,0.70525,https://i.scdn.co/image/ab67616d00001e021e7b38...
54787,R&B,khai dreams,Fantasy,53usuYfeK66GLBhm6k7x5e,60,8.1e-05,7.9e-05,1.0,8e-05,7.7e-05,G,7.9e-05,0.0,Major,7.8e-05,0.001072,4/4,7.8e-05,0.70525,https://i.scdn.co/image/ab67616d00001e026b4035...


In [36]:
avgSim= top15["sim"].mean()

In [37]:
avgSim

0.705250050151097