In [None]:
# Importing all libraries necessary to run this notebook

import pandas as pd
import numpy as np
import datetime
from math import pi, ceil
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
#pp = pprint.PrettyPrinter()

# Obtained cid, secret_id and username from Spotify's Developer account
cid = '5fd46161069742bda7137da316ab55bb'
secret = '92176a20d05b48709224c0bbfa889805'
username = "utsav507"
uri = 'http://localhost:8888/notebooks/music-viz.ipynb'

# SpotiPy is Spotify's library
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Get read access to your library
scope = 'user-library-read'
token = util.prompt_for_user_token(username, scope, cid, secret, uri)
if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for ", username)

In [None]:
print(token)

In [None]:
# Declaring empty dataframe, empty string, and empty lists
df_saved_tracks = pd.DataFrame()
track_list = ''
added_ts_list = []
artist_list = []
title_list = []

# Since SpotiPy only returns 100 records at a time, more_songs variable enables a while loop to fetch all tracks 
# user's current saved tracks. The offset_index stores the index of the track/record.
more_songs = True
offset_index = 0

# Data wrangling from different API calls and compiling into one dataframe
while more_songs:
    songs = sp.current_user_saved_tracks(offset=offset_index)
    #print(songs)
    for song in songs['items']:
        #print(song)
        #join track ids to a string for audio_features function
        track_list += song['track']['id'] +','
        #get the time when the song was added
        added_ts_list.append(song['added_at'])
        #get the title of the song
        title_list.append(song['track']['name'])
        #get all the artists in the song
        artists = song['track']['artists']
        artists_name = ''
        for artist in artists:
            artists_name += artist['name']  + ','
        artist_list.append(artists_name[:-1])
    #get the track features and append into a dataframe
    track_features = sp.audio_features(track_list[:-1])
    #print(track_features)
    df_temp = pd.DataFrame(track_features)
    #print(df_temp.head())
    df_saved_tracks = df_saved_tracks.append(df_temp)
    track_list = ''
    if songs['next'] == None:
        # no more songs in playlist
        more_songs = False
    else:
        # get the next n songs
        offset_index += songs['limit']

In [None]:
#include timestamp added, title and artists of a song
df_saved_tracks['added_at'] = added_ts_list
df_saved_tracks['song_title'] = title_list
df_saved_tracks['artists'] = artist_list

In [None]:
#df_saved_tracks.head()
#df_saved_tracks.describe()
#list(df_saved_tracks)
#df_saved_tracks.shape
df_saved_tracks.head()

In [None]:
# Define path to save dataframe
datapath = '/Users/utsav/Google Drive/Projects/music-rec/'

In [None]:
# Write dataframe to .h5 file format
df_saved_tracks.to_hdf(datapath + 'all_tracks.h5', key='df', mode='w')

In [None]:
# Write dataframe to a .csv file format
df_saved_tracks.to_csv(datapath + 'all_tracks.csv', sep='\t', encoding='utf-8')

In [None]:
# Importing dataframe to perform visualizaions

# Define a function to load dataframe, convert added_at to datatime object, and normalize tempo
def load_playlist(playlist='all_tracks.h5'):
    '''
    Takes in the path of the data.
    Reads the data, convertes added_at to a datetime object and extract year and month to new column.
    Also standardise the tempo to between 0 and 1.
    Returns a dataframe.
    '''

    # Read dataframe
    df_playlist = pd.read_hdf(datapath + playlist, key='df')
    df_playlist.drop(['analysis_url', 'track_href', 'uri', 'type'], axis=1, inplace=True)
    
    # Convert to datetime, and extract year and month from added_at
    df_playlist['added_at'] = pd.to_datetime(df_playlist['added_at'])
    df_playlist['added_year'] = df_playlist['added_at'].apply(lambda x : x.year)
    df_playlist['added_month'] = df_playlist['added_at'].apply(lambda x : x.month)
    df_playlist['added_week'] = df_playlist['added_at'].apply(lambda x : x.week)
    
    df_playlist['tempo_01'] = df_playlist['tempo'] / df_playlist['tempo'].max()
    
    return df_playlist

In [None]:
df_saved = load_playlist()
print(df_saved.shape)

In [None]:
df_saved.sample(5)