In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import json
import time
import matplotlib.pyplot as plt

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

plt.style.use('ggplot')

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

In [3]:
CLIENT_USERNAME = "willfurtado"
SPOTIPY_CLIENT_ID = "0c58b8f377294e1393b6ff20d1db34fc"
SPOTIPY_CLIENT_SECRET = "12fb3865a39343aba75ec4b118f6adf9"
SPOTIPY_REDIRECT_URI = "https://localhost:8888"

In [4]:
scope = "user-library-read"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope, 
    client_id=SPOTIPY_CLIENT_ID, 
    client_secret=SPOTIPY_CLIENT_SECRET,
    username='willfurtado',
    redirect_uri=SPOTIPY_REDIRECT_URI))

In [6]:
def load_streaming_data(src):
    """
    Returns a Pandas DataFrame object with the current listening history 
    from Spotify data pull. The src argument is the folder in which the data is stored.
    """
    with open('personal_data/' + src + '/StreamingHistory0.json') as file:
        data0 = json.load(file)
    with open('personal_data/' + src + '/StreamingHistory1.json') as file:
        data1 = json.load(file)
    with open('personal_data/' + src + '/StreamingHistory2.json') as file:
        data2 = json.load(file)
        
    df0 = pd.DataFrame(data0)
    df1 = pd.DataFrame(data1)
    df2 = pd.DataFrame(data2)
    
    df = df0.append(df1, ignore_index=True).append(df2, ignore_index=True)
    df['secPlayed'] = round(df['msPlayed'] / 1000, 1)
    df = df.drop(columns=['msPlayed'])

    STRTIME_FORMAT = '%Y-%m-%d %H:%M'
    df['endTime'] = pd.to_datetime(df['endTime'], format=STRTIME_FORMAT)
    
    return df

In [6]:
df = load_streaming_data("winter20")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20367 entries, 0 to 20366
Data columns (total 4 columns):
endTime       20367 non-null datetime64[ns]
artistName    20367 non-null object
trackName     20367 non-null object
secPlayed     20367 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 636.6+ KB


In [13]:
top_five_artists = df.groupby("artistName").count().sort_values(by="endTime", ascending=False).head(5)
print("* TOP FIVE ARTISTS: * \n\n", top_five_artists.index.values)

* TOP FIVE ARTISTS: * 

 ['Frank Ocean' 'Drake' 'Kanye West' 'Bon Iver' 'J. Cole']


In [18]:
df.groupby("trackName").sum().sort_values(by='secPlayed', ascending=False)

Unnamed: 0_level_0,secPlayed
trackName,Unnamed: 1_level_1
The Lark Ascending,31430.3
René,16611.8
TOO DEEP TO TURN BACK,16585.5
Self Control,15778.2
All I've Ever Known,15419.6
...,...
Breathe,0.0
The Klein Bottle Guy - with Cliff Stoll,0.0
Delicious Problems - with Hannah Fry,0.0
Keep It Simple (feat. Wilder Woods),0.0


In [72]:
top_100_tracks = df.groupby("trackName").sum().sort_values("secPlayed", ascending=False).head(100)

In [73]:
top_100_tracks

Unnamed: 0_level_0,secPlayed
trackName,Unnamed: 1_level_1
The Lark Ascending,31430.3
René,16611.8
TOO DEEP TO TURN BACK,16585.5
Self Control,15778.2
All I've Ever Known,15419.6
...,...
Pork Belly,5657.6
Ghost Town,5655.8
Sex in Quarantine,5647.9
Where Is Love?,5606.1


In [44]:
def get_spotify_uri(song):
    """Returns the corresponding spotify URI from a given song title"""       
    try:
        search_results = sp.search(q=song, type='track', limit=1)
        return search_results['tracks']['items'][0]['id']
    except (AttributeError, IndexError) as err:
        print('No results for {}'.format(song))
        pass

In [50]:
def get_track_length(uri):
    """
    Returns the track length (in seconds) of the given Spotify URI
    """
    try: 
        return sp.track(uri)['duration_ms'] / 1000
    except:
        print("Cannot find track length for URI: {}".format(uri))

In [46]:
indices = []
for track in top_100_tracks.index:
    indices.append(get_spotify_uri(track))

In [74]:
top_100_tracks['uri'] = indices

In [75]:
top_100_tracks

Unnamed: 0_level_0,secPlayed,uri
trackName,Unnamed: 1_level_1,Unnamed: 2_level_1
The Lark Ascending,31430.3,3ATo60fpicfxwZ0daRatlU
René,16611.8,6Rt6KwuF7I8ZkdZG2G0bYr
TOO DEEP TO TURN BACK,16585.5,5GUYJTQap5F3RDQiCOJhrS
Self Control,15778.2,2rk0lCpO1vkKAQ6BC8bjUX
All I've Ever Known,15419.6,63UNqAjFomMcZXc2InOxTC
...,...,...
Pork Belly,5657.6,1e3GEwsaXtBNtPfooQimDn
Ghost Town,5655.8,4k7x3QKrc3h3U0Viqk0uop
Sex in Quarantine,5647.9,4sNbuMzrMv05y0g3jnkFjP
Where Is Love?,5606.1,0FlfN5cbUUpIHCRH8X1M44


In [55]:
lengths = []
for uri in top_100_tracks['uri']:
    lengths.append(get_track_length(uri))

Cannot find track length for URI: 6GQ82BpN60dzztUWzYojc7


In [76]:
top_100_tracks['duration'] = lengths

In [77]:
top_100_tracks.loc["Perfect Ten (feat. Nipsey Hussle)", "duration"] = 252.021

In [78]:
top_100_tracks

Unnamed: 0_level_0,secPlayed,uri,duration
trackName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
The Lark Ascending,31430.3,3ATo60fpicfxwZ0daRatlU,264.080
René,16611.8,6Rt6KwuF7I8ZkdZG2G0bYr,284.130
TOO DEEP TO TURN BACK,16585.5,5GUYJTQap5F3RDQiCOJhrS,249.667
Self Control,15778.2,2rk0lCpO1vkKAQ6BC8bjUX,260.160
All I've Ever Known,15419.6,63UNqAjFomMcZXc2InOxTC,318.533
...,...,...,...
Pork Belly,5657.6,1e3GEwsaXtBNtPfooQimDn,211.800
Ghost Town,5655.8,4k7x3QKrc3h3U0Viqk0uop,289.560
Sex in Quarantine,5647.9,4sNbuMzrMv05y0g3jnkFjP,203.186
Where Is Love?,5606.1,0FlfN5cbUUpIHCRH8X1M44,304.640


In [82]:
TOP_SONGS = list(top_100_tracks.index.values)

In [90]:
full_top_100 = df[df['trackName'].isin(TOP_SONGS)]

In [110]:
full_top_100.set_index("trackName", inplace=True)

In [111]:
top_100_tracks

Unnamed: 0_level_0,secPlayed,uri,duration
trackName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
The Lark Ascending,31430.3,3ATo60fpicfxwZ0daRatlU,264.080
René,16611.8,6Rt6KwuF7I8ZkdZG2G0bYr,284.130
TOO DEEP TO TURN BACK,16585.5,5GUYJTQap5F3RDQiCOJhrS,249.667
Self Control,15778.2,2rk0lCpO1vkKAQ6BC8bjUX,260.160
All I've Ever Known,15419.6,63UNqAjFomMcZXc2InOxTC,318.533
...,...,...,...
Pork Belly,5657.6,1e3GEwsaXtBNtPfooQimDn,211.800
Ghost Town,5655.8,4k7x3QKrc3h3U0Viqk0uop,289.560
Sex in Quarantine,5647.9,4sNbuMzrMv05y0g3jnkFjP,203.186
Where Is Love?,5606.1,0FlfN5cbUUpIHCRH8X1M44,304.640


In [141]:
complete_df = pd.merge(full_top_100, top_100_tracks, left_index=True, right_index=True, how='outer')

In [142]:
complete_df = complete_df.drop(labels="secPlayed_y", axis=1)
complete_df

Unnamed: 0_level_0,endTime,artistName,secPlayed_x,uri,duration
trackName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"#118 – Grant Sanderson: Math, Manim, Neural Networks & Teaching with 3Blue1Brown",2020-08-25 19:03:00,Lex Fridman Podcast,1853.1,6iq7Jv43ULC1zRqtdpYDnI,201.272
"#118 – Grant Sanderson: Math, Manim, Neural Networks & Teaching with 3Blue1Brown",2020-08-25 20:26:00,Lex Fridman Podcast,274.8,6iq7Jv43ULC1zRqtdpYDnI,201.272
"#118 – Grant Sanderson: Math, Manim, Neural Networks & Teaching with 3Blue1Brown",2020-08-26 00:28:00,Lex Fridman Podcast,3045.6,6iq7Jv43ULC1zRqtdpYDnI,201.272
"#118 – Grant Sanderson: Math, Manim, Neural Networks & Teaching with 3Blue1Brown",2020-08-26 03:21:00,Lex Fridman Podcast,957.5,6iq7Jv43ULC1zRqtdpYDnI,201.272
"#137 – Alex Filippenko: Supernovae, Dark Energy, Aliens & the Expanding Universe",2020-11-08 20:51:00,Lex Fridman Podcast,2714.1,3Zau3JcrZcBfw8aVjKGkSX,469.830
...,...,...,...,...,...
when the party's over,2020-11-18 19:11:00,Billie Eilish,2.1,4S4Mfvv03M1cHgIOJcbUCL,133.680
when the party's over,2020-11-26 08:47:00,Billie Eilish,138.0,4S4Mfvv03M1cHgIOJcbUCL,133.680
when the party's over,2020-11-26 20:37:00,Billie Eilish,28.6,4S4Mfvv03M1cHgIOJcbUCL,133.680
when the party's over,2020-11-30 13:25:00,Billie Eilish,196.1,4S4Mfvv03M1cHgIOJcbUCL,133.680


In [157]:
complete_df['plays'] = complete_df['secPlayed_x'] / complete_df['duration']

In [164]:
top_100_plays = complete_df.groupby(complete_df.index).sum().sort_values(by="plays", ascending=False)

In [165]:
top_100_tracks

Unnamed: 0_level_0,secPlayed,uri,duration
trackName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
The Lark Ascending,31430.3,3ATo60fpicfxwZ0daRatlU,264.080
René,16611.8,6Rt6KwuF7I8ZkdZG2G0bYr,284.130
TOO DEEP TO TURN BACK,16585.5,5GUYJTQap5F3RDQiCOJhrS,249.667
Self Control,15778.2,2rk0lCpO1vkKAQ6BC8bjUX,260.160
All I've Ever Known,15419.6,63UNqAjFomMcZXc2InOxTC,318.533
...,...,...,...
Pork Belly,5657.6,1e3GEwsaXtBNtPfooQimDn,211.800
Ghost Town,5655.8,4k7x3QKrc3h3U0Viqk0uop,289.560
Sex in Quarantine,5647.9,4sNbuMzrMv05y0g3jnkFjP,203.186
Where Is Love?,5606.1,0FlfN5cbUUpIHCRH8X1M44,304.640
