In [2]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format ='retina'
import random
from functools import reduce    


## Spotify API Call

In [3]:
client_id = '83c1f666855f45f89b2216e55c7bf535'
client_secret = '9bfd86b3fd3c43ada84e1f282fb1df35'
redirect_uri='http://localhost:8910/callback'
username = '61d4pmnwnwjt7xz9tj6v3txho'

scope = 'user-top-read'

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret)
                                                      
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

token = util.prompt_for_user_token(username,scope, client_id, client_secret, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)
    

## Top Tracks Extraction

In [4]:
results = sp.current_user_top_tracks(limit=50, offset=0,time_range='short_term')

#Convert the results into a dataframe
track_name = []
track_id = []
artist = []
album = []
duration = []
popularity = []
for i, items in enumerate(results['items']):
        track_name.append(items['name'])
        track_id.append(items['id'])
        artist.append(items["artists"][0]["name"])
        duration.append(items["duration_ms"])
        album.append(items["album"]["name"])
        popularity.append(items["popularity"])

# Create the final df   
df_top_tracks = pd.DataFrame({ "track_name": track_name, 
                             "album": album, 
                             "track_id": track_id,
                             "artist": artist, 
                             "duration": duration, 
                             "popularity": popularity})

df_top_tracks

Unnamed: 0,track_name,album,track_id,artist,duration,popularity
0,Vaathi Coming,Master (Original Motion Picture Soundtrack),2BcPFQ7nrtUObgAs72xaac,Anirudh Ravichander,228257,63
1,"Ey Inge Paaru (From ""Velaiyilla Pattathari"")",Voice of Ani,34hssXUwRCru1RbfxCWLth,Anirudh Ravichander,117788,24
2,Feels Like Love,Noah Schnacky EP,04hHPq6kXTbcSSDrdaP3s4,Noah Schnacky,174760,53
3,Andha Kanna Paathaakaa,Master (Original Motion Picture Soundtrack),0qvoxfYodIfe14gaidhnsV,Anirudh Ravichander,194500,65
4,All the Cowboys,All the Cowboys,13TOvHAfdO8wryqxBL7fnI,Alexandra Kay,228231,55
5,My Person,Wilderness,1MOOJuxUu9QiQE9GgkYYPb,Spencer Crandall,177000,59
6,Comeback,Noah Schnacky EP,1wr0HUe5tFDlN32jfwt9IS,Noah Schnacky,168240,49
7,Maybe We Will - 2020 Version,Noah Schnacky EP,26LKaGfw6ZolgGArPDEN2R,Noah Schnacky,190440,43
8,Polakattum Para Para,Master (Original Motion Picture Soundtrack),2K058s9yrpoUfANaQt7Zu4,Anirudh Ravichander,214124,55
9,"Neeyum Naanum (From ""Naanum Rowdy Dhaan"")",Voice of Ani,2XMJ3A06ex0UI9JRPeOYrQ,Anirudh Ravichander,301951,21


## Features Extraction

In [5]:
def get_features(sp,df):
    playlist = df[['track_id','track_name']]
    features = []
    
    features += sp.audio_features(playlist.iloc[0:50, 0])

    feature_list = []
    for feature in features:
        feature_list.append([feature['danceability'],
                        feature['energy'],
                        feature['key'],
                        feature['loudness'],
                        feature['mode'],
                        feature['speechiness'],
                        feature['acousticness'],
                        feature['instrumentalness'],
                        feature['liveness'],
                        feature['valence'],
                        feature['tempo'],
                        feature['duration_ms']]
                       )

    df_audio_feature = pd.DataFrame(feature_list, columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 
                                                          'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                                          'duration_ms'])
    df_playlist_audio_features = pd.concat([playlist, df_audio_feature], axis=1)
    return df_playlist_audio_features
    

In [6]:
df_final_features = get_features(sp,df_top_tracks)
#Drop the mode feature as it is binary
del df_final_features['mode']
df_final_features

Unnamed: 0,track_id,track_name,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,2BcPFQ7nrtUObgAs72xaac,Vaathi Coming,0.643,0.952,7,-4.15,0.246,0.276,0.49,0.0588,0.585,164.784,228258
1,34hssXUwRCru1RbfxCWLth,"Ey Inge Paaru (From ""Velaiyilla Pattathari"")",0.643,0.834,7,-4.104,0.182,0.0652,4.2e-05,0.612,0.872,189.998,117788
2,04hHPq6kXTbcSSDrdaP3s4,Feels Like Love,0.639,0.845,2,-5.139,0.0414,0.00117,0.0,0.126,0.703,117.047,174760
3,0qvoxfYodIfe14gaidhnsV,Andha Kanna Paathaakaa,0.747,0.86,6,-4.071,0.279,0.301,0.0383,0.0743,0.625,90.445,194500
4,13TOvHAfdO8wryqxBL7fnI,All the Cowboys,0.501,0.554,9,-6.292,0.0265,0.321,0.0,0.178,0.47,144.022,228232
5,1MOOJuxUu9QiQE9GgkYYPb,My Person,0.567,0.753,7,-5.291,0.0487,0.0894,0.0,0.13,0.756,165.966,177000
6,1wr0HUe5tFDlN32jfwt9IS,Comeback,0.542,0.817,11,-5.192,0.0324,0.0383,0.0,0.218,0.683,95.978,168240
7,26LKaGfw6ZolgGArPDEN2R,Maybe We Will - 2020 Version,0.685,0.485,4,-7.755,0.0272,0.151,0.0,0.11,0.526,85.045,190440
8,2K058s9yrpoUfANaQt7Zu4,Polakattum Para Para,0.758,0.796,11,-6.483,0.17,0.165,4.1e-05,0.29,0.919,145.475,214125
9,2XMJ3A06ex0UI9JRPeOYrQ,"Neeyum Naanum (From ""Naanum Rowdy Dhaan"")",0.572,0.62,9,-9.949,0.0436,0.101,0.00488,0.113,0.345,157.968,301951


## EDA Performed

## Featured Playlists Extraction

In [7]:
def featured_playlists(sp):
    id = []
    name = []
    num_tracks = []
    
    featured = sp.featured_playlists()
    playlists = featured['playlists']
    for i, items in enumerate(playlists['items']):
        id.append(items['id'])
        name.append(items['name'])
        num_tracks.append(items['tracks']['total'])
    
    df_playlists = pd.DataFrame({'playlist_id':id, 'playlist_name':name, '#tracks': num_tracks})
    return df_playlists

In [8]:
df_featured_playlists = featured_playlists(sp)
df_featured_playlists

Unnamed: 0,playlist_id,playlist_name,#tracks
0,37i9dQZF1DXcBWIGoYBM5M,Today's Top Hits,50
1,37i9dQZF1DX4bSrsRWE9cd,Bliss,74
2,37i9dQZF1DWYmVQ81PeQpL,Dance The Night Away,128
3,37i9dQZF1DX1gRalH1mWrP,Summer Hits,99
4,37i9dQZF1DWVinJBuv0P4z,Feel Good Classics,80
5,37i9dQZF1DXcWBRiUaG3o5,Evening Acoustic,115
6,37i9dQZF1DXdPec7aLTmlC,Happy Hits!,100
7,37i9dQZF1DWWvhKV4FBciw,Funk & Soul Classics,80
8,37i9dQZF1DWTJ7xPn4vNaz,All Out 70s,150
9,37i9dQZF1DX36Xw4IJIVKA,Lofi Hip-Hop,100


## Fetch Tracks for each playlists

In [9]:
def get_playlist_tracks(sp, playlist_id):
    tracks = []
    offset = 0
    while True:
        track_list = sp.playlist_tracks(playlist_id, fields = None, limit = 100, offset = offset, market = None)
        tracks += track_list['items']
        
        if track_list['next'] is not None:
            offset +=100
        else:
            break
            
    track_id = []
    track_name = []
    
    for track in tracks:
        track_id.append(track['track']['id'])
        track_name.append(track['track']['name'])
    
    df_playlist_tracks = pd.DataFrame({'track_id':track_id, 'track_name': track_name})
    return df_playlist_tracks
    

## Get Audio Features for each track within a playlist

In [10]:
def get_audio_features(sp, playlist_id):
    playlist = get_playlist_tracks(sp, playlist_id)
    audio_features = []
    index = 0
    while index < playlist.shape[0]:
        audio_features += sp.audio_features(playlist.iloc[index:index + 50, 0])
        index += 50
    
    feature_list = []
    for feature in audio_features:
        feature_list.append([feature['danceability'],
                        feature['energy'],
                        feature['key'],
                        feature['loudness'],
#                         feature['mode'],
                        feature['speechiness'],
                        feature['acousticness'],
                        feature['instrumentalness'],
                        feature['liveness'],
                        feature['valence'],
                        feature['tempo'],
                        feature['duration_ms']]
                       )

    df_audio_feature = pd.DataFrame(feature_list, columns = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 
                                                          'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                                          'duration_ms'])
    df_playlist_audio_features = pd.concat([playlist, df_audio_feature], axis=1)
    return df_playlist_audio_features
    

## Get the mean & merge each playlist

In [11]:
def get_audio_features_mean (sp, playlist_id):
    playlist = get_audio_features(sp, playlist_id)
    df_mean_playlist = pd.DataFrame(playlist.mean(),columns = [playlist_id])
    return df_mean_playlist

In [12]:
dataframes = []
for i in df_featured_playlists['playlist_id']:
    dataframes.append(get_audio_features_mean(sp, i))
    
dataframes
    

[                  37i9dQZF1DXcBWIGoYBM5M
 danceability                    0.705500
 energy                          0.608620
 key                             4.860000
 loudness                       -6.552220
 speechiness                     0.098234
 acousticness                    0.281415
 instrumentalness                0.003869
 liveness                        0.152266
 valence                         0.512672
 tempo                         119.009120
 duration_ms                183441.380000,
                   37i9dQZF1DX4bSrsRWE9cd
 danceability                    0.495473
 energy                          0.235396
 key                             3.770270
 loudness                      -14.778986
 speechiness                     0.038682
 acousticness                    0.816699
 instrumentalness                0.164258
 liveness                        0.133188
 valence                         0.248828
 tempo                         117.758108
 duration_ms                23124

In [41]:
#Combines the 
X = reduce(lambda left,right: pd.merge(left,right, left_index=True, right_index=True), dataframes)
X

Unnamed: 0,37i9dQZF1DXcBWIGoYBM5M,37i9dQZF1DX4bSrsRWE9cd,37i9dQZF1DWYmVQ81PeQpL,37i9dQZF1DX1gRalH1mWrP,37i9dQZF1DWVinJBuv0P4z,37i9dQZF1DXcWBRiUaG3o5,37i9dQZF1DXdPec7aLTmlC,37i9dQZF1DWWvhKV4FBciw,37i9dQZF1DWTJ7xPn4vNaz,37i9dQZF1DX36Xw4IJIVKA,37i9dQZF1DXdpVGstUksUC,37i9dQZF1DX9XIFQuFvzM4
danceability,0.7055,0.495473,0.718773,0.705374,0.6159,0.489661,0.67462,0.621862,0.58296,0.67934,0.327607,0.63413
energy,0.60862,0.235396,0.777055,0.653465,0.64015,0.227158,0.70773,0.659925,0.568939,0.3863,0.73894,0.6182
key,4.86,3.77027,5.734375,5.121212,5.4,4.53913,5.21,4.9375,5.446667,5.57,5.48,4.72
loudness,-6.55222,-14.778986,-6.727828,-6.102586,-8.5314,-14.056017,-5.5282,-8.775563,-10.151387,-11.38789,-7.476233,-8.2497
speechiness,0.098234,0.038682,0.072946,0.111174,0.052376,0.039552,0.077673,0.061507,0.049112,0.128608,0.050639,0.048955
acousticness,0.281415,0.816699,0.118999,0.199946,0.415748,0.852304,0.150887,0.325953,0.356825,0.521623,0.040539,0.370497
instrumentalness,0.003869,0.164258,0.038717,0.009961,0.011213,0.107887,0.001847,0.095204,0.025934,0.657354,0.405376,0.022004
liveness,0.152266,0.133188,0.192106,0.185335,0.17516,0.146652,0.184725,0.208363,0.171027,0.182511,0.182473,0.193628
valence,0.512672,0.248828,0.707461,0.523758,0.795175,0.263305,0.56151,0.76745,0.642489,0.428818,0.324705,0.75767
tempo,119.00912,117.758108,118.780039,118.262667,118.272075,115.723235,117.75843,122.904837,119.5283,104.77351,123.83836,119.53994


In [14]:
y = pd.DataFrame(df_final_features.mean(), columns = ['top_playlist'])
y

Unnamed: 0,top_playlist
danceability,0.67106
energy,0.75408
key,5.2
loudness,-5.81768
speechiness,0.106166
acousticness,0.235979
instrumentalness,0.033735
liveness,0.207758
valence,0.6611
tempo,120.51986


## Cosine Similarity

In [18]:
def cos_sim(a,b):
    dot_product = np.dot(a,b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product/(norm_a * norm_b)

In [20]:
#Cosine similarity does not look at the magnititudes of individual elements within the vector, just the angle
def similarities(user_playlist,featured_playlist):
    x = cos_sim(user_playlist,featured_playlist)
    return x

## Euclidean Distance

In [33]:
#Euclidian distance takes the magnitude of each element within the vector into consideration 
def euclidean_distance(user_playlist,featured_playlist):    
    x = np.sqrt(np.sum((user_playlist - featured_playlist) ** 2))
    return x

In [38]:
def euc_dataframe(user_playlist):
    featured = []
    distance_score = []
    for i in df_featured_playlists['playlist_id']:
        featured.append(i)
        distance_score.append(euclidean_distance(user_playlist,X[i]))
    df_euc_dist = pd.DataFrame({'featured_playlist':featured, 'distance_score':distance_score})
    sort_values = df_euc_dist.sort_values('distance_score')
    return sort_values

In [39]:
euc_dataframe(y['top_playlist'])

Unnamed: 0,featured_playlist,distance_score
2,37i9dQZF1DWYmVQ81PeQpL,3587.848392
8,37i9dQZF1DWTJ7xPn4vNaz,4424.582247
7,37i9dQZF1DWWvhKV4FBciw,6004.111211
1,37i9dQZF1DX4bSrsRWE9cd,8869.737011
5,37i9dQZF1DXcWBRiUaG3o5,10788.339924
6,37i9dQZF1DXdPec7aLTmlC,34663.650112
3,37i9dQZF1DX1gRalH1mWrP,36179.4429
11,37i9dQZF1DX9XIFQuFvzM4,36527.950098
0,37i9dQZF1DXcBWIGoYBM5M,38935.780038
4,37i9dQZF1DWVinJBuv0P4z,45993.147636
