## Spotify API Exploratory Analysis.
Credit to [Spotipy Package](https://spotipy.readthedocs.io/en/latest/) for making this Analysis a breeze.

Aim of this notebook is to:
1. Get a feel for the Spotify API and it's structure. 
2. Do some exploratory analysis of my Top Tracks.
3. Visualize the data. [Spotify API Analysis on Tableau](https://public.tableau.com/profile/william8331#!/vizhome/SpotifyMyTracks/TopTracks?publish=yes) 
4. Get some ideas for a potential App that could be built.

In [21]:
#Dependencies
import spotipy
import spotipy.util as util
from IPython.display import JSON
import pandas as pd
from pandas.io.json import json_normalize
import os
import json
import ast

In [5]:
#If you Want to use pre-cached results
df_tracks=pd.read_csv('SpotifyMyTracks.csv')
df_analysis=pd.read_csv('SpotifyMyTracksAnalysis.csv')

In [19]:
#Client ID/Secret stored in the environment variables. 
#Don't want some random to start fiddling with your Spotify account!
CLIENT_ID=os.getenv('sp_client_id')
CLIENT_SECRET=os.getenv('sp_client_secret')

In [17]:
#Authorize the user via getting a token and returning a spotify object to be used for querying the API. 
#Token Lasts for about an hour or so.
def sp_authorize():
    scope = 'user-library-read user-top-read user-read-playback-state user-read-recently-played'
    username='wjia26'
    token = util.prompt_for_user_token(username,scope,
                               client_id=CLIENT_ID,
                               client_secret=CLIENT_SECRET,
                               redirect_uri='https://google.com')
    spotify = spotipy.Spotify(auth=token)
    
    return spotify

In [90]:
def int_to_key(key_int):
    key_list = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
    return key_list[int(key_int)]

In [9]:
def get_unprefixed_keys(tracks_dict,prefix=''):
    keys=[key.replace(prefix,'') for key, value in tracks_dict.items() if prefix in key.lower()]
    return keys

In [10]:
def top_tracks_to_df(time_range='short_term'):
    spotify=sp_authorize()
    # list of track fields
    tracks_dict={
            'rank': [],
            "time_range":[],
            'track_id':[],    
            'track_name':[],
             'track_popularity':[],
             'track_release_date':[],
             'artist_genre':[],
             'artist_name':[],
            "ft_danceability": [],
            "ft_energy": [],
            "ft_key": [],
            "ft_loudness": [],
            "ft_mode": [],
            "ft_speechiness": [],
            "ft_acousticness": [],
            "ft_instrumentalness": [],
            "ft_liveness": [],
            "ft_valence": [],
            "ft_tempo": []
            }
    #Print Top Tracks with the individual track data for the user.
    track_ids=[]
    top_tracks_data = spotify.current_user_top_tracks(limit=50, offset=0,
                                              time_range=time_range)
    # print(top_tracks_data['total'])
    for item in top_tracks_data['items']:
        track_ids.append(item['id'])

    while top_tracks_data['next']:
        top_tracks_data = spotify.next(top_tracks_data)
        for item in top_tracks_data['items']:
            track_ids.append(item['id'])

    #tracks data payload json
    album_ids=[]
    artist_ids=[]

    track_data=spotify.tracks(track_ids)
    rank=0
    
    for track in track_data['tracks']:
        rank=rank+1
        album_ids.append(track['album']['id'])  
        artist_ids.append(track['artists'][0]['id']) 
        #Just grab the first artist to get genre
        tracks_dict['rank'].append(rank)
        tracks_dict['track_id'].append(track['id'])    
        tracks_dict['track_name'].append(track['name'])
        tracks_dict['track_popularity'].append(track['popularity'])
        tracks_dict['track_release_date'].append(track['album']['release_date'])
        tracks_dict['time_range'].append(time_range)

    #get genres for each track through the artists
    artists_data=spotify.artists(artist_ids)
    for artist in artists_data['artists']:
        tracks_dict['artist_genre'].append(artist['genres'])
        tracks_dict['artist_name'].append(artist['name'])

    #get Audio features for each track
    features_data=spotify.audio_features(track_ids)
    for features in features_data:
        keys=get_unprefixed_keys(tracks_dict,prefix='ft_')
        for key in keys:
            tracks_dict['ft_'+key].append(features[key])  

    df = pd.DataFrame(tracks_dict)   
    
    return df

In [11]:
df1=top_tracks_to_df('short_term')
df2=top_tracks_to_df('medium_term')
df3=top_tracks_to_df('long_term')
df_tracks=pd.concat([df1,df2,df3])
df_tracks['key_note']=df_tracks['ft_key'].apply(int_to_key)
#Output to .csv
df_tracks.to_csv('SpotifyMyTracks.csv')

In [32]:
genre_dict={
            "time_range":[],
            'track_id':[],    
            'track_name':[],
             'artist_genre':[],
             'artist_name':[]
            }
for index,track in df_tracks.iterrows():
    genre_list=ast.literal_eval(track['artist_genre'])
    for genre in genre_list:
        genre_dict['track_id'].append(track['track_id'])    
        genre_dict['track_name'].append(track['track_name'])
        genre_dict['time_range'].append(track['time_range'])
        genre_dict['artist_name'].append(track['artist_name'])
        genre_dict['artist_genre'].append(genre)
    
df = pd.DataFrame(genre_dict)   


In [34]:
df.to_csv('GenreList.csv')

### Audio Analysis Component:
Let's look at the nitty-gritty musical analysis for each track.
Uses the [Audio Analysis API](https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-analysis/)

In [72]:
def section_output_df(track_id,analysis_dict):
    spotify=sp_authorize()
    #Audio Analysis

    analysis_data=spotify.audio_analysis(track_id)
    
    for section in analysis_data['sections']:
        keys=get_unprefixed_keys(analysis_dict,prefix='')
        for key in keys:
            analysis_dict[key].append(section[key]) 
   
    df1 = pd.DataFrame(analysis_dict)
    
    df1['track_duration']=analysis_data['track']['duration']
    
    return df1    

In [88]:
analysis_dict={
                'confidence': [],
      'duration': [],
      'key': [],
      'key_confidence': [],
      'loudness': [],
      'mode': [],
      'mode_confidence': [],
      'start': [],
      'tempo': [],
      'tempo_confidence': [],
      'time_signature': [],
      'time_signature_confidence': []
                }

df_analysis = pd.DataFrame(analysis_dict)    

#Get all unique tracks. Some might be included in both short term and long term.
unique_track_ids=list(df_tracks.track_id.unique())
df_unique_tracks=df_tracks.loc[df_tracks['track_id'].isin(unique_track_ids)]

for index,track in df_unique_tracks.iterrows():
    df2=section_output_df(track['track_id'],analysis_dict)
    df2['track_name']=track['track_name']
    df2['track_id']=track['track_id']
    df2['time_range']=track['time_range']
    df_analysis=pd.concat([df_analysis,df2])
    print(str(index) + ' DONE!!!!! ' + track['track_name'])

#Convert to human-readable key
df_analysis['key_note']=df_analysis['key'].apply(int_to_key)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




0DONE!!!!!Itan na star
1DONE!!!!!Please
2DONE!!!!!Ballin
retrying ...1secs
retrying ...2secs
retrying ...3secs
retrying ...4secs
retrying ...5secs
3DONE!!!!!Zigeunerweisen, Op. 20: Lento
retrying ...1secs
4DONE!!!!!Blackbird
retrying ...1secs
5DONE!!!!!Easily
6DONE!!!!!Easy
7DONE!!!!!Concerto pour Piano et Orchestre No. 1 en Fa Dièse Mineur, Op. 1: I. Vivace
retrying ...1secs
retrying ...2secs
8DONE!!!!!Koi
9DONE!!!!!Let's Get High
10DONE!!!!!Sunsets For Somebody Else
11DONE!!!!!Klavierkonzert Nr. 1 g-Moll, Op. 25: I. Molto allegro con fuoco
12DONE!!!!!Shibuya Stroll
13DONE!!!!!Concerto pour Piano et Orchestre No. 1 en Fa Dièse Mineur, Op. 1: II. Andante
14DONE!!!!!Beyond
retrying ...1secs
retrying ...2secs
15DONE!!!!!Forgot About Dre
16DONE!!!!!New Game
retrying ...1secs
17DONE!!!!!Should We Take The Van?
retrying ...1secs
18DONE!!!!!HUMBLE.
19DONE!!!!!Better Watch Yo Self
20DONE!!!!!Capriccio brillant in B Minor, Op. 22
retrying ...1secs
retrying ...2secs
21DONE!!!!!Piano Concerto No

TypeError: list indices must be integers or slices, not float

In [93]:
df_analysis.to_csv('SpotifyMyTracksAnalysis.csv')

In [37]:
#Exploratory stats about the sections of the music.
print('Loudness: ')
print(df1['loudness'].describe())
print('Tempo: ')
print(df1['tempo'].describe())
print('Key: ')
print(df1['key'].unique())

Loudness: 
count    10.000000
mean    -12.183900
std       7.547728
min     -31.263000
25%     -12.605250
50%      -8.263000
75%      -7.902000
max      -7.670000
Name: loudness, dtype: float64
Tempo: 
count     10.000000
mean     150.102700
std        0.395035
min      149.351000
25%      150.037000
50%      150.094500
75%      150.128250
max      150.950000
Name: tempo, dtype: float64
Key: 
[2 1]


In [136]:
segments_dict={
      "start": [],
      "duration": [],
      "confidence": [],
      "loudness_start": [],
      "loudness_max_time": [],
      "loudness_max": [],
      "pitches": [],
      "timbre": []
            }
analysis_data=spotify.audio_analysis(track_1)
for segments in analysis_data['segments']:
    keys=get_unprefixed_keys(segments_dict,prefix='')
    for key in keys:
        segments_dict[key].append(segments[key])

df2 = pd.DataFrame(segments_dict)
    
df2   

Unnamed: 0,confidence,duration,loudness_max,loudness_max_time,loudness_start,pitches,start,timbre
0,0.000,0.88249,-57.946,0.85914,-60.000,"[0.893, 0.966, 0.799, 1.0, 0.858, 0.7, 0.712, ...",0.00000,"[0.037, 170.29, 9.625, -29.493, 57.866, -50.34..."
1,0.543,0.09837,-53.157,0.06952,-57.586,"[1.0, 0.678, 0.631, 0.691, 0.634, 0.75, 0.67, ...",0.88249,"[5.651, 93.79, 38.091, -48.47, 74.787, -29.401..."
2,0.489,0.15075,-48.084,0.09866,-52.829,"[1.0, 0.766, 0.3, 0.618, 0.703, 0.338, 0.426, ...",0.98086,"[10.448, 59.395, 37.278, -46.264, 43.547, -26...."
3,0.526,0.15152,-41.920,0.11191,-47.666,"[0.505, 0.858, 0.557, 0.406, 0.339, 0.386, 0.4...",1.13161,"[16.435, 25.38, 51.749, -38.483, 18.534, -24.3..."
4,0.529,0.15052,-37.161,0.08160,-42.680,"[0.937, 1.0, 0.548, 0.396, 0.235, 0.518, 0.348...",1.28313,"[20.1, 35.996, 79.03, -34.854, 13.78, -10.545,..."
5,0.354,0.10980,-35.378,0.03828,-39.573,"[1.0, 0.791, 0.41, 0.386, 0.554, 0.799, 0.833,...",1.43365,"[22.476, 55.0, 44.269, 18.362, 36.492, -25.992..."
6,0.063,0.10549,-37.189,0.02642,-40.125,"[0.741, 0.606, 0.425, 0.348, 0.416, 0.927, 0.9...",1.54345,"[21.853, 17.079, 49.106, -17.937, 28.685, -36...."
7,1.000,0.67306,-9.358,0.06311,-38.292,"[0.092, 0.063, 0.092, 0.751, 0.106, 0.098, 0.1...",1.64893,"[35.447, 52.435, 145.783, -84.316, 76.112, 136..."
8,1.000,0.62676,-11.702,0.04246,-29.190,"[0.21, 0.103, 0.095, 0.195, 0.131, 0.171, 0.14...",2.32200,"[40.245, 114.941, 144.673, 126.143, 11.891, 94..."
9,1.000,0.69755,-9.786,0.07250,-32.327,"[0.072, 0.057, 0.101, 0.271, 0.091, 0.103, 0.1...",2.94875,"[38.016, 46.631, 88.088, -32.865, 41.336, 76.7..."


medium_term (approximately last 6 months), short_term (approximately last 4 weeks). Default: medium_term.

In [24]:
for time_range in ('short_term','medium_term','long_term'):
    results = spotify.current_user_top_artists(limit=20, offset=0,
                                              time_range=time_range)
    print('\n' + time_range + '\n')
    for item in results['items']:
        print(item['name'], item['popularity'], item['genres'])
    
    print(results['total'])


short_term

Leslie Cheung 57 ['c-pop', 'cantopop', 'classic cantopop']
Itzhak Perlman 63 ['classical', 'classical performance', 'violin']
Dr. Dre 82 ['g funk', 'gangster rap', 'hip hop', 'rap', 'west coast rap']
Mac Ayres 70 ['alternative r&b', 'indie r&b', 'indie soul']
Cafe Music BGM channel 70 []
Kendrick Lamar 91 ['conscious hip hop', 'hip hop', 'rap', 'west coast rap']
Michael Bublé 96 ['adult standards', 'canadian pop', 'lounge']
Benjamin Frith 41 ['classical piano']
Lang Lang 66 ['chinese classical performance', 'classical', 'classical performance', 'classical piano']
Boris Berezovsky 33 []
Nitro Fun 53 ['catstep', 'complextro', 'deep dubstep', 'edm', 'electro house']
Jenő Jandó 60 ['classical', 'classical piano']
Maxim Vengerov 42 ['classical performance', 'violin']
Mariah Carey 93 ['dance pop', 'pop', 'r&b', 'urban contemporary']
DPR LIVE 62 ['k-hop', 'k-pop', 'korean r&b']
ONE OK ROCK 75 ['j-pop', 'j-rock', 'visual kei']
Charli XCX 81 ['candy pop', 'dance pop', 'electropop',

In [59]:
results = spotify.artist_related_artists(artist_id='0fTav4sBLmYOAzKuJw0grL')
for item in results['artists']:
    print(item['name'], item['popularity'], item['genres'])

Berhana 64 ['alternative r&b', 'indie r&b', 'indie soul']
Jeff Bernat 64 ['indie r&b', 'popping']
Leven Kali 64 ['alternative r&b', 'indie r&b', 'indie soul']
Bruno Major 71 ['indie r&b', 'pop']
Dijon 63 ['alternative r&b', 'bedroom soul', 'indie r&b']
Kyle Dion 54 ['alternative r&b', 'deep pop r&b', 'indie r&b', 'indie soul', 'trap soul']
Cautious Clay 66 ['alternative r&b', 'electropop', 'indie r&b', 'nyc pop']
Alextbh 54 ['indie r&b', 'malaysian indie']
ODIE 64 ['alternative r&b', 'indie r&b', 'trap soul']
Omar Apollo 70 ['alternative r&b', 'bedroom pop', 'bedroom soul', 'indie pop', 'indie r&b', 'pop']
Snoh Aalegra 71 ['alternative r&b', 'indie r&b', 'indie soul', 'scandinavian r&b', 'swedish soul']
RINI 63 ['alternative r&b', 'australian r&b', 'bedroom soul', 'indie r&b']
NIKI 78 ['indie r&b', 'indonesian r&b', 'pop']
bLAck pARty 56 ['alternative r&b', 'indie r&b', 'indie soul']
Raveena 62 ['alternative r&b', 'indie r&b', 'indie soul']
PREP 59 ['alternative r&b', 'indie r&b']
UMI 