In [1]:
import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import time
from sklearn.preprocessing import StandardScaler
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from config import *

In [2]:

#Initialize SpotiPy with user credentias
def search_song(artist, title):
    '''
    Query spotify API for a given song to obtain its URI.
    
    Args:
        artist: Artist of the song
        title: Song title
    Return:
        On success the URI is returned, on error "invalid"
    '''
    try:
        print(f'Query for "{title}" by "{artist}": ', end='')
        result = sp.search(q=f'track:{title} artist:{artist}', limit=1)
        uri = result['tracks']['items'][0]['uri']
        print(uri)
        return uri
    except:
        print('failed')
        return "invalid"

In [3]:
def get_audio_features(chunk):
    '''
    Get audiofeatures for given dataframe chunk and return them
    as a dictionary.
    
    Args:
        chunk: Pandas dataframe
    Return:
        Dictionary with audiofeatures
    '''

    d = {'danceability': [],
         'energy': [],
         'key': [],
         'loudness': [],
         'mode': [],
         'speechiness': [],
         'acousticness': [],
         'instrumentalness': [],
         'liveness': [],
         'valence': [],
         'tempo': [],
         'type': [],
         'id': [],
         'uri': [],
         'track_href': [],
         'analysis_url': [],
         'duration_ms': [],
         'time_signature': []
        }
    
    for index, row in chunk.iterrows():

        uri = search_song(row['artist'], row['title'])       
        audio_feat = sp.audio_features(uri)[0]        

        if audio_feat == None:
            # If we couldn't receive the audio features from spotify,
            # just store a 'x' for each feature and drop the song later...
            for key in d:
                d[key].append('x')
        else:        
            d['danceability'].append(audio_feat['danceability'])
            d['energy'].append(audio_feat['energy'])
            d['key'].append(audio_feat['key'])
            d['loudness'].append(audio_feat['loudness'])
            d['mode'].append(audio_feat['mode'])
            d['speechiness'].append(audio_feat['speechiness'])
            d['acousticness'].append(audio_feat['acousticness'])
            d['instrumentalness'].append(audio_feat['instrumentalness'])
            d['liveness'].append(audio_feat['liveness'])
            d['valence'].append(audio_feat['valence'])
            d['tempo'].append(audio_feat['tempo'])
            d['type'].append(audio_feat['type'])
            d['id'].append(audio_feat['id'])
            d['uri'].append(audio_feat['uri'])
            d['track_href'].append(audio_feat['track_href'])
            d['analysis_url'].append(audio_feat['analysis_url'])
            d['duration_ms'].append(audio_feat['duration_ms'])
            d['time_signature'].append(audio_feat['time_signature'])

        time.sleep(0.5)
    return d

#### Create a function get_audio_features(list_of_songs) to obtain the audio features of a given list of songs (the content of list_of_songs can be the href/id/uri).

In [4]:
def chunk_dataframe(df, chunk_size=50):
    '''
    Slices a pandas dataframe into chunks of given size
    and return a list containing all these chunks.
    
    Args:
        df: Pandas dataframe to slice
        chunk_size: Size of single chunk (Default=50)
    Return:
        List with dataframe chunks
    '''
    start  = 0
    end    = chunk_size
    chunks = []

    while start < len(df):
        chunks.append(df.iloc[start:end, :])
        start = end
        end   = end+chunk_size
        
    return chunks

In [5]:
def get_dataframe_audio_features(df):
    '''
    Download audio features for all songs within the given pandas dataframe.
    Note: Given df MUST have a title and artist column!!!
    
    Args:
        df: Pandas dataframe with songs (artist and title)
    Return:
        Pandas dataframe with songs and its related audio features
    '''

    df_total = pd.DataFrame()
    chunks   = chunk_dataframe(df)
    n_chunk  = 1
    
    for chunk in chunks:

        chunk = chunk.reset_index(drop=True)
        
        # 
        print(f'-> Downloading audio-features for chunk {n_chunk}')
        
        # Get audio features for current chunk
        audio_feat = get_audio_features(chunk)
        
        # Create df = chunk + audio_features
        chunk_with_feat = pd.concat([chunk, pd.DataFrame(audio_feat)], axis=1)

        # Add to total df
        if len(df_total) == 0:
            df_total = chunk_with_feat
        else:
            df_total = pd.concat([df_total, chunk_with_feat], axis=0)

        # Wait for 20 secs, for don't get banned
        time.sleep(20)
        n_chunk += 1

    return df_total

In [None]:
# codes that run Gaussian clustering
#with the time elapsed to cluster ...





In [None]:
#functions calls since the one below is erraneous


In [6]:
# Establish connection to spotify API.
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))
# Get audio features for songs provided in 'hot.csv'
hot_df = pd.read_csv('hot.csv')
hot_with_features_df = get_dataframe_audio_features(hot_df)
hot_with_features_df = hot_with_features_df[hot_with_features_df['key'] != 'x']
hot_with_features_df.to_csv('hot_with_features.csv', index=False)
hot_with_features_df
# Get audio features for songs provided in 'nothot.csv'
nothot_df = pd.read_csv('nothot.csv')
nothot_with_features_df = get_dataframe_audio_features(nothot_df)
nothot_with_features_df = nothot_with_features_df[nothot_with_features_df['key'] != 'x']
nothot_with_features_df.to_csv('nothot_with_features.csv', index=False)

-> Downloading audio-features for chunk 1


KeyError: 'artist'