In [16]:
import pandas as pd
import numpy as np

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import time

from config import *

### First, we are concatenating hot and nothot songs to a single dataframe.
The resulting dataframe has a new column **hot** which tells us whether the songs comes from the hot or nothot set of songs. Than we store the new dataframe to a csv file named **songs.csv**.

In [17]:
#hot_df = pd.read_csv('hot_with_features.csv')
#nothot_df = pd.read_csv('nothot_with_features.csv')

#hot_df['hot']    = 1
#nothot_df['hot'] = 0

#songs_df = pd.concat([hot_df, nothot_df], axis=0)
#songs_df.to_csv('songs.csv', index=False)

In [18]:
def search_song(artist, title):
    '''
    Query spotify API for a given song to obtain its URI.
    
    Args:
        artist: Artist of the song
        title: Song title
    Return:
        On success the URI is returned, on error "invalid"
    '''
    try:
        print(f'Query for "{title}" by "{artist}": ', end='')
        result = sp.search(q=f'track:{title} artist:{artist}', limit=1)
        uri = result['tracks']['items'][0]['uri']
        print(uri)
        return uri
    except:
        print('failed')
        return "invalid"

In [19]:
def get_audio_features(chunk):
    '''
    Get audiofeatures for given dataframe chunk and return them
    as a dictionary.
    
    Args:
        chunk: Pandas dataframe
    Return:
        Dictionary with audiofeatures
    '''

    d = {'danceability': [],
         'energy': [],
         'key': [],
         'loudness': [],
         'mode': [],
         'speechiness': [],
         'acousticness': [],
         'instrumentalness': [],
         'liveness': [],
         'valence': [],
         'tempo': [],
         'type': [],
         'id': [],
         'uri': [],
         'track_href': [],
         'analysis_url': [],
         'duration_ms': [],
         'time_signature': []
        }
    
    for index, row in chunk.iterrows():

        uri = search_song(row['artist'], row['title'])       
        audio_feat = sp.audio_features(uri)[0]        

        if audio_feat == None:
            # If we couldn't receive the audio features from spotify,
            # just store a 'x' for each feature and drop the song later...
            for key in d:
                d[key].append('x')
        else:        
            d['danceability'].append(audio_feat['danceability'])
            d['energy'].append(audio_feat['energy'])
            d['key'].append(audio_feat['key'])
            d['loudness'].append(audio_feat['loudness'])
            d['mode'].append(audio_feat['mode'])
            d['speechiness'].append(audio_feat['speechiness'])
            d['acousticness'].append(audio_feat['acousticness'])
            d['instrumentalness'].append(audio_feat['instrumentalness'])
            d['liveness'].append(audio_feat['liveness'])
            d['valence'].append(audio_feat['valence'])
            d['tempo'].append(audio_feat['tempo'])
            d['type'].append(audio_feat['type'])
            d['id'].append(audio_feat['id'])
            d['uri'].append(audio_feat['uri'])
            d['track_href'].append(audio_feat['track_href'])
            d['analysis_url'].append(audio_feat['analysis_url'])
            d['duration_ms'].append(audio_feat['duration_ms'])
            d['time_signature'].append(audio_feat['time_signature'])

        time.sleep(0.5)
    return d

### Load our song dataset

In [20]:
songs_df = pd.read_csv('songs.csv')
songs_df

Unnamed: 0,artist,title,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,hot
0,Taylor Swift,Anti-Hero,0.637,0.643,4,-6.571,1,0.0519,0.1300,0.000002,...,0.5330,97.008,audio_features,0V3wPSX9ygBnCm8psDIegu,spotify:track:0V3wPSX9ygBnCm8psDIegu,https://api.spotify.com/v1/tracks/0V3wPSX9ygBn...,https://api.spotify.com/v1/audio-analysis/0V3w...,200690,4,1
1,Taylor Swift,Lavender Haze,0.733,0.436,10,-10.489,1,0.0800,0.2580,0.000573,...,0.0976,96.985,audio_features,5jQI2r1RdgtuT8S3iG8zFC,spotify:track:5jQI2r1RdgtuT8S3iG8zFC,https://api.spotify.com/v1/tracks/5jQI2r1Rdgtu...,https://api.spotify.com/v1/audio-analysis/5jQI...,202396,4,1
2,Taylor Swift,Maroon,0.637,0.398,7,-8.294,1,0.0564,0.0573,0.000001,...,0.0374,108.075,audio_features,3eX0NZfLtGzoLUxPNvRfqm,spotify:track:3eX0NZfLtGzoLUxPNvRfqm,https://api.spotify.com/v1/tracks/3eX0NZfLtGzo...,https://api.spotify.com/v1/audio-analysis/3eX0...,218271,4,1
3,Taylor Swift,Midnight Rain,0.643,0.363,0,-11.738,1,0.0767,0.6900,0.000052,...,0.2300,139.865,audio_features,3rWDp9tBPQR9z6U5YyRSK4,spotify:track:3rWDp9tBPQR9z6U5YyRSK4,https://api.spotify.com/v1/tracks/3rWDp9tBPQR9...,https://api.spotify.com/v1/audio-analysis/3rWD...,174783,4,1
4,Taylor Swift,Bejeweled,0.700,0.550,7,-9.130,1,0.0653,0.0661,0.000102,...,0.4120,164.003,audio_features,0jvo9CjnbR0lYUDTSNTMiu,spotify:track:0jvo9CjnbR0lYUDTSNTMiu,https://api.spotify.com/v1/tracks/0jvo9CjnbR0l...,https://api.spotify.com/v1/audio-analysis/0jvo...,194166,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2294,Daniel Norgren,Moonshine Got Me,0.458,0.497,8,-9.660,1,0.0479,0.7160,0.155000,...,0.1440,107.748,audio_features,6LrxNNCaqssGmyCGi1EPJA,spotify:track:6LrxNNCaqssGmyCGi1EPJA,https://api.spotify.com/v1/tracks/6LrxNNCaqssG...,https://api.spotify.com/v1/audio-analysis/6Lrx...,531333,4,0
2295,Dover,Serenade me,0.524,0.776,5,-6.193,0,0.0404,0.0330,0.000022,...,0.3010,123.622,audio_features,4tzi4VV1vCTZsT4SEKQIKJ,spotify:track:4tzi4VV1vCTZsT4SEKQIKJ,https://api.spotify.com/v1/tracks/4tzi4VV1vCTZ...,https://api.spotify.com/v1/audio-analysis/4tzi...,234280,4,0
2296,Queen,I want it all,0.402,0.835,11,-5.734,0,0.0500,0.0287,0.000003,...,0.4300,92.029,audio_features,5AaGovcNgVcreY5GHmv3u3,spotify:track:5AaGovcNgVcreY5GHmv3u3,https://api.spotify.com/v1/tracks/5AaGovcNgVcr...,https://api.spotify.com/v1/audio-analysis/5AaG...,241253,4,0
2297,Sultans of Ping FC,Where's me jumper?,0.395,0.914,11,-6.706,0,0.1590,0.0728,0.000000,...,0.4350,174.284,audio_features,1PWkvrHtJI1TLfnZDWDf2u,spotify:track:1PWkvrHtJI1TLfnZDWDf2u,https://api.spotify.com/v1/tracks/1PWkvrHtJI1T...,https://api.spotify.com/v1/audio-analysis/1PWk...,189213,4,0
