In [1]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format ='retina'
import random
from functools import reduce    


## Spotify API Call

In [38]:
client_id = '83c1f666855f45f89b2216e55c7bf535'
client_secret = '9bfd86b3fd3c43ada84e1f282fb1df35'
redirect_uri='http://localhost:8910/callback'
username = '61d4pmnwnwjt7xz9tj6v3txho'

scope = 'user-top-read'

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret)
                                                      
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

token = util.prompt_for_user_token(username,scope, client_id, client_secret, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)
    

## Top Tracks Extraction

In [3]:
results = sp.current_user_top_tracks(limit=50, offset=0,time_range='short_term')

#Convert the results into a dataframe
track_name = []
track_id = []
artist = []
album = []
duration = []
popularity = []
for i, items in enumerate(results['items']):
        track_name.append(items['name'])
        track_id.append(items['id'])
        artist.append(items["artists"][0]["name"])
        duration.append(items["duration_ms"])
        album.append(items["album"]["name"])
        popularity.append(items["popularity"])

# Create the final df   
df_top_tracks = pd.DataFrame({ "track_name": track_name, 
                             "album": album, 
                             "track_id": track_id,
                             "artist": artist, 
                             "duration": duration, 
                             "popularity": popularity})

df_top_tracks

Unnamed: 0,track_name,album,track_id,artist,duration,popularity
0,Vaathi Coming,Master (Original Motion Picture Soundtrack),2BcPFQ7nrtUObgAs72xaac,Anirudh Ravichander,228257,63
1,"Ey Inge Paaru (From ""Velaiyilla Pattathari"")",Voice of Ani,34hssXUwRCru1RbfxCWLth,Anirudh Ravichander,117788,24
2,Feels Like Love,Noah Schnacky EP,04hHPq6kXTbcSSDrdaP3s4,Noah Schnacky,174760,53
3,Andha Kanna Paathaakaa,Master (Original Motion Picture Soundtrack),0qvoxfYodIfe14gaidhnsV,Anirudh Ravichander,194500,64
4,All the Cowboys,All the Cowboys,13TOvHAfdO8wryqxBL7fnI,Alexandra Kay,228231,55
5,My Person,Wilderness,1MOOJuxUu9QiQE9GgkYYPb,Spencer Crandall,177000,59
6,Thinking out Loud,x (Wembley Edition),1Slwb6dOYkBlWal1PGtnNg,Ed Sheeran,281560,70
7,Comeback,Noah Schnacky EP,1wr0HUe5tFDlN32jfwt9IS,Noah Schnacky,168240,49
8,Maybe We Will - 2020 Version,Noah Schnacky EP,26LKaGfw6ZolgGArPDEN2R,Noah Schnacky,190440,43
9,Polakattum Para Para,Master (Original Motion Picture Soundtrack),2K058s9yrpoUfANaQt7Zu4,Anirudh Ravichander,214124,55


## Features Extraction

In [4]:
def get_features(sp,df):
    playlist = df[['track_id','track_name']]
    features = []
    
    features += sp.audio_features(playlist.iloc[0:50, 0])

    feature_list = []
    for feature in features:
        feature_list.append([feature['danceability'],
                        feature['energy'],
                        feature['key'],
                        feature['loudness'],
                        feature['mode'],
                        feature['speechiness'],
                        feature['acousticness'],
                        feature['instrumentalness'],
                        feature['liveness'],
                        feature['valence'],
                        feature['tempo'],
                        feature['duration_ms']]
                       )

    df_audio_feature = pd.DataFrame(feature_list, columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 
                                                          'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                                          'duration_ms'])
    df_playlist_audio_features = pd.concat([playlist, df_audio_feature], axis=1)
    return df_playlist_audio_features
    

In [41]:
df_final_features = get_features(sp,df_top_tracks)
#Drop the mode feature as it is binary
del df_final_features['mode']
df_final_features

Unnamed: 0,track_id,track_name,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,2BcPFQ7nrtUObgAs72xaac,Vaathi Coming,0.643,0.952,7,-4.15,0.246,0.276,0.49,0.0588,0.585,164.784,228258
1,34hssXUwRCru1RbfxCWLth,"Ey Inge Paaru (From ""Velaiyilla Pattathari"")",0.643,0.834,7,-4.104,0.182,0.0652,4.2e-05,0.612,0.872,189.998,117788
2,04hHPq6kXTbcSSDrdaP3s4,Feels Like Love,0.639,0.845,2,-5.139,0.0414,0.00117,0.0,0.126,0.703,117.047,174760
3,0qvoxfYodIfe14gaidhnsV,Andha Kanna Paathaakaa,0.747,0.86,6,-4.071,0.279,0.301,0.0383,0.0743,0.625,90.445,194500
4,13TOvHAfdO8wryqxBL7fnI,All the Cowboys,0.501,0.554,9,-6.292,0.0265,0.321,0.0,0.178,0.47,144.022,228232
5,1MOOJuxUu9QiQE9GgkYYPb,My Person,0.567,0.753,7,-5.291,0.0487,0.0894,0.0,0.13,0.756,165.966,177000
6,1Slwb6dOYkBlWal1PGtnNg,Thinking out Loud,0.781,0.445,2,-6.061,0.0295,0.474,0.0,0.184,0.591,78.998,281560
7,1wr0HUe5tFDlN32jfwt9IS,Comeback,0.542,0.817,11,-5.192,0.0324,0.0383,0.0,0.218,0.683,95.978,168240
8,26LKaGfw6ZolgGArPDEN2R,Maybe We Will - 2020 Version,0.685,0.485,4,-7.755,0.0272,0.151,0.0,0.11,0.526,85.045,190440
9,2K058s9yrpoUfANaQt7Zu4,Polakattum Para Para,0.758,0.796,11,-6.483,0.17,0.165,4.1e-05,0.29,0.919,145.475,214125


## EDA Performed

## Featured Playlists Extraction

In [10]:
def featured_playlists(sp):
    id = []
    name = []
    num_tracks = []
    
    featured = sp.featured_playlists()
    playlists = featured['playlists']
    for i, items in enumerate(playlists['items']):
        id.append(items['id'])
        name.append(items['name'])
        num_tracks.append(items['tracks']['total'])
    
    df_playlists = pd.DataFrame({'playlist_id':id, 'playlist_name':name, '#tracks': num_tracks})
    return df_playlists

In [11]:
df_featured_playlists = featured_playlists(sp)
df_featured_playlists

Unnamed: 0,playlist_id,playlist_name,#tracks
0,37i9dQZF1DWU0ScTcjJBdj,Relax & Unwind,100
1,37i9dQZF1DWTwbZHrJRIgD,Happy Weekend,54
2,37i9dQZF1DX4pUKG1kS0Ac,Guilty Pleasures,151
3,37i9dQZF1DWYAcBZSAVhlf,Walking On Sunshine,80
4,37i9dQZF1DWWBHeXOYZf74,POLLEN,183
5,37i9dQZF1DX5hkGHr0ICaZ,The Metal Festival,90
6,37i9dQZF1DX4o1oenSJRJd,All Out 00s,100
7,37i9dQZF1DX4bSrsRWE9cd,Bliss,74
8,37i9dQZF1DWZKuerrwoAGz,Happy Favorites,100
9,37i9dQZF1DX4VvfRBFClxm,Acoustic Hits,100


## Fetch Tracks for each playlists

In [17]:
def get_playlist_tracks(sp, playlist_id):
    tracks = []
    offset = 0
    while True:
        track_list = sp.playlist_tracks(playlist_id, fields = None, limit = 100, offset = offset, market = None)
        tracks += track_list['items']
        
        if track_list['next'] is not None:
            offset +=100
        else:
            break
            
    track_id = []
    track_name = []
    
    for track in tracks:
        track_id.append(track['track']['id'])
        track_name.append(track['track']['name'])
    
    df_playlist_tracks = pd.DataFrame({'track_id':track_id, 'track_name': track_name})
    return df_playlist_tracks
    

## Get Audio Features for each track within a playlist

In [35]:
def get_audio_features(sp, playlist_id):
    playlist = get_playlist_tracks(sp, playlist_id)
    audio_features = []
    index = 0
    while index < playlist.shape[0]:
        audio_features += sp.audio_features(playlist.iloc[index:index + 50, 0])
        index += 50
    
    feature_list = []
    for feature in audio_features:
        feature_list.append([feature['danceability'],
                        feature['energy'],
                        feature['key'],
                        feature['loudness'],
#                         feature['mode'],
                        feature['speechiness'],
                        feature['acousticness'],
                        feature['instrumentalness'],
                        feature['liveness'],
                        feature['valence'],
                        feature['tempo'],
                        feature['duration_ms']]
                       )

    df_audio_feature = pd.DataFrame(feature_list, columns = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 
                                                          'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                                          'duration_ms'])
    df_playlist_audio_features = pd.concat([playlist, df_audio_feature], axis=1)
    return df_playlist_audio_features
    

## Get the mean & merge each playlist

In [36]:
def get_audio_features_mean (sp, playlist_id):
    playlist = get_audio_features(sp, playlist_id)
    df_mean_playlist = pd.DataFrame(playlist.mean(),columns = [playlist_id])
    return df_mean_playlist

In [39]:
dataframes = []
for i in df_featured_playlists['playlist_id']:
    dataframes.append(get_audio_features_mean(sp, i))
    
dataframes
    

[                  37i9dQZF1DWU0ScTcjJBdj
 danceability                    0.521310
 energy                          0.363995
 key                             5.810000
 loudness                      -11.082280
 speechiness                     0.038126
 acousticness                    0.695853
 instrumentalness                0.074661
 liveness                        0.155497
 valence                         0.297808
 tempo                         117.174500
 duration_ms                241161.370000,
                   37i9dQZF1DWTwbZHrJRIgD
 danceability                    0.679463
 energy                          0.687407
 key                             5.351852
 loudness                       -7.141241
 speechiness                     0.062146
 acousticness                    0.282147
 instrumentalness                0.028680
 liveness                        0.161083
 valence                         0.573424
 tempo                         118.554074
 duration_ms                24278

In [40]:
#Combines the 
X = reduce(lambda left,right: pd.merge(left,right, left_index=True, right_index=True), dataframes)
X

Unnamed: 0,37i9dQZF1DWU0ScTcjJBdj,37i9dQZF1DWTwbZHrJRIgD,37i9dQZF1DX4pUKG1kS0Ac,37i9dQZF1DWYAcBZSAVhlf,37i9dQZF1DWWBHeXOYZf74,37i9dQZF1DX5hkGHr0ICaZ,37i9dQZF1DX4o1oenSJRJd,37i9dQZF1DX4bSrsRWE9cd,37i9dQZF1DWZKuerrwoAGz,37i9dQZF1DX4VvfRBFClxm,37i9dQZF1DX4MTfCb9IRyO,37i9dQZF1DX4dyzvuaRJ0n
danceability,0.52131,0.679463,0.662735,0.716238,0.676158,0.329411,0.67233,0.495473,0.69328,0.57359,0.59344,0.694823
energy,0.363995,0.687407,0.732854,0.760475,0.57535,0.926578,0.6849,0.235396,0.78406,0.351716,0.49494,0.770468
key,5.81,5.351852,5.231788,4.925,5.076503,4.944444,5.61,3.77027,5.03,5.53,5.4,5.78481
loudness,-11.08228,-7.141241,-6.02057,-9.2604,-8.252749,-6.302944,-5.91323,-14.778986,-4.70756,-9.57559,-10.99914,-5.858076
speechiness,0.038126,0.062146,0.078672,0.053849,0.11782,0.11194,0.076795,0.038682,0.069165,0.046322,0.049852,0.066967
acousticness,0.695853,0.282147,0.147867,0.173648,0.306755,0.008336,0.169791,0.816699,0.097232,0.67512,0.506314,0.103999
instrumentalness,0.074661,0.02868,0.019931,0.037458,0.148863,0.184432,0.008871,0.164258,0.001667,0.017179,0.086893,0.121008
liveness,0.155497,0.161083,0.167897,0.176471,0.184768,0.778233,0.160323,0.133188,0.178706,0.125548,0.189042,0.177778
valence,0.297808,0.573424,0.647991,0.787525,0.499392,0.231883,0.579362,0.248828,0.63432,0.362309,0.74196,0.442868
tempo,117.1745,118.554074,120.882874,119.811275,116.484989,116.386889,118.32189,117.758108,116.59216,116.23931,114.26652,126.806924


In [46]:
y = pd.DataFrame(df_final_features.mean(), columns = ['top_playlist'])
y

Unnamed: 0,top_playlist
danceability,0.67126
energy,0.75088
key,5.3
loudness,-5.96662
speechiness,0.100022
acousticness,0.240452
instrumentalness,0.034367
liveness,0.198582
valence,0.6674
tempo,121.36196
