In [1]:
import argparse
import pprint
import sys
import os
import subprocess
import json
import spotipy
import spotipy.util as util
import pandas as pd
import numpy as np
from spotipy.oauth2 import SpotifyClientCredentials
import configparser
from pprint import pprint

In [2]:
def connect_user(config_fn='', uname="", redirect_uri="http://localhost"):
    """Reads client id and secret from config file and connects to the users spotify account"""
    config = configparser.ConfigParser()
    config.read(config_fn)
    cid = config.get('SPOTIFY', 'CLIENT_ID')
    secret = config.get('SPOTIFY', 'CLIENT_SECRET')
    
    token = util.prompt_for_user_token(
        username=uname,
        scope='user-library-read user-top-read',
        client_id=cid,
        client_secret=secret,
        redirect_uri=redirect_uri)

    sp = spotipy.Spotify(auth=token)
    
    return sp

In [16]:
def get_song_data(sp):
    """Retrieves songs data from spotify api and creates dataframe"""
    album_id = []
    album_name = []
    artist_id = []
    artist_name = []
    track_id = []
    track_name = []
    popularity = []
    uri = []
    duration = []
    for i in range(0,4000,50):
        tracks = sp.current_user_saved_tracks(limit=50,offset=i)

        for i,t in enumerate(tracks['items']):
            album_id.append(t['track']['album']['id'])
            album_name.append(t['track']['album']['name'])
            artist_id.append(t['track']['artists'][0]['id'])
            artist_name.append(t['track']['artists'][0]['name'])
            track_id.append(t['track']['id'])
            track_name.append(t['track']['name'])
            popularity.append(t['track']['popularity'])
            uri.append(t['track']['uri'])
            duration.append(t['track']['duration_ms'])
            
    dic= {'album_id': album_id, 'album_name': album_name, 'artist_id': artist_id, 'artist_name':artist_name,
      'track_id':track_id, 'track_name':track_name, 'popularity':popularity, 'uri':uri, 'duration':duration}
    
    df = pd.DataFrame(dic)
    
    return df

In [6]:
def get_song_features(sp, df):
    """retrieves song features from spotify api and creates dataframe"""
    feat = pd.DataFrame(columns=['track_id', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'key',
                            'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence'])
    for i in range(0,len(df['track_id']),100):
        row = []
        batch = df['track_id'][i:i+100]
        feature_results = sp.audio_features(batch)
        for j, t in enumerate(feature_results):
            if t == None:
                None_counter = None_counter + 1
            else:
                feat.loc[i+j,'track_id'] = t['id']
                feat.loc[i+j, 'acousticness'] = t['acousticness']
                feat.loc[i+j, 'danceability'] = t['danceability']
                feat.loc[i+j,'energy'] = t['energy']
                feat.loc[i+j, 'instrumentalness'] = t['instrumentalness']
                feat.loc[i+j, 'key'] = t['key']
                feat.loc[i+j, 'liveness'] = t['liveness']
                feat.loc[i+j,'loudness'] = t['loudness']
                feat.loc[i+j, 'mode'] = t['mode']
                feat.loc[i+j, 'speechiness'] = t['speechiness']
                feat.loc[i+j, 'tempo'] = t['tempo']
                feat.loc[i+j, 'time_signature'] = t['time_signature']
                feat.loc[i+j, 'valence'] = t['valence']
            
    return feat

In [7]:
def conv_feat_to_float(feat):
    """convert features dataframe to float"""
    feat.set_index("track_id", inplace=True)
    feat = feat.astype(np.float64)
    feat.reset_index(inplace=True)
    return feat

In [17]:
def create_df():
    """creates entire database and merges metadata and audio features dfs"""
    sp = connect_user('/Users/zachapell/Desktop/config.cfg', "zachapell5")
    df = get_song_data(sp)
    feat = get_song_features(sp, df)
    feat = conv_feat_to_float(feat)
    df = feat.merge(df, on='track_id')
    return df

In [18]:
df = create_df()
df.to_excel('./spotify_data.xlsx')