In [1]:
import os
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import spotipy.util as util
import pandas as pd
from datetime import datetime
import psycopg2

In [2]:
scopes = ['user-top-read', 'user-read-recently-played']

In [3]:
def refresh():
    '''
    Refresh the access token - required every hour
    '''
    global token_info, sp

    if sp_oauth.is_token_expired(token_info):
        token_info = sp_oauth.refresh_access_token(token_info['refresh_token'])
        token = token_info['access_token']
        sp = spotipy.Spotify(auth=token)

In [43]:
def get_top_tracks(sp, n=50, offset=0, time_range='long_term'):
    '''
    Pull the user top N most played tracks for the user
    '''
    
    results = sp.current_user_top_tracks(limit=n, offset=offset, time_range=time_range)
    
    return results
  

def parse_top_tracks(results):
    '''
    parse the track object from api response and return a dataframe of the results
    '''
  
    df = pd.DataFrame(columns=['track_id', 'artist_id', 'artist_ids', 'name', 'duration_ms',
                               'explicity', 'popularity', 'album_id'])
    
    for result in results['items']:
        track_id = result['id']
        artist_ids = [artist['id'] for artist in result['artists']]
        artist_id = result['artists'][0]['id']
        name = result['name']
        duration_ms = result['duration_ms']
        explicit = result['explicit']
        popularity = result['popularity']
        album_id = result['album']['id']
    
        df.loc[len(df)] = [track_id, artist_id, artist_ids, name, duration_ms, explicit, popularity, album_id]
    
    return df

In [37]:
def get_recently_played(sp, n=50):
    '''
    get the recently played tracks
    '''

    return sp.current_user_recently_played(limit=n)

def parse_recently_played(results):
    '''
    parse the recently played tracks to get info and play time
    '''

    df = pd.DataFrame(columns=['track_id', 'artist_id', 'artist_ids', 'name', 'duration_ms',
                               'explicity', 'popularity', 'album_id', 'played_at'])
    
    for result in results['items']:
        # print(result['track'].keys(), result['played_at'])
        track_id = result['track']['id']
        artist_ids = [artist['id'] for artist in result['track']['artists']]
        artist_id = result['track']['artists'][0]['id']
        name = result['track']['name']
        duration_ms = result['track']['duration_ms']
        explicit = result['track']['explicit']
        popularity = result['track']['popularity']
        album_id = result['track']['album']['id']
        played_at = result['played_at']

        df.loc[len(df)] = [track_id, artist_id, artist_ids, name, duration_ms, explicit, popularity, album_id, played_at]

    return df

In [40]:
recently_played = get_recently_played(sp)
recently_played_df = parse_recently_played(recently_played)

In [42]:
recently_played_df

Unnamed: 0,track_id,artist_id,artist_ids,name,duration_ms,explicity,popularity,album_id,played_at
0,5yQ9iMZXGcr5rlO4hoLsP4,1eYhYunlNJlDoQhtYBvPsi,[1eYhYunlNJlDoQhtYBvPsi],"What'd I Say, Pt. 1 & 2",307053,False,61,4tfWxufmH725XmTcQFAuXn,2023-04-30T20:11:00.258Z
1,4ZVZBc5xvMyV3WzWktn8i7,5m8H6zSadhu1j9Yi04VLqD,[5m8H6zSadhu1j9Yi04VLqD],Everyday People,141506,False,68,7iwS1r6JHYJe9xpPjzmWqD,2023-04-30T20:05:52.188Z
2,3Vby4nGmtbDo7HDJamOWkT,7bPU7cvfoD20ixGD9Qnqki,[7bPU7cvfoD20ixGD9Qnqki],Stuck In The Middle With You,208946,False,78,5ApN9lqru1t3Xh1IaEGTll,2023-04-30T19:59:53.816Z
3,4fQMGlCawbTkH9yPPZ49kP,2vDV0T8sxx2ENnKXds75e5,[2vDV0T8sxx2ENnKXds75e5],Green Onions,176333,False,71,2aGFVLz0oQPa3uxCfq9lcU,2023-04-30T19:56:25.105Z
4,6vxHp3CDNo0afgKGp2yi1E,2CvCyf1gEVhI0mX6aFXmVI,[2CvCyf1gEVhI0mX6aFXmVI],Me and Julio Down by the Schoolyard,164813,False,74,7npBPiCHjPj8PVIGPuHXep,2023-04-30T19:53:28.686Z
5,0Jl5bIEve3A4axcjY3EgDZ,3pFCERyEiP5xeN2EsPXhjI,[3pFCERyEiP5xeN2EsPXhjI],Feelin' Alright,250840,False,64,74sIm8QdXqFwYeDS7OfYVw,2023-04-30T19:47:17.143Z
6,17S4XrLvF5jlGvGCJHgF51,4tX2TplrkIP4v05BNC903e,[4tX2TplrkIP4v05BNC903e],Learning To Fly,242106,False,73,42G5ULkCRRl3crJMlg6eKd,2023-04-30T19:43:05.401Z
7,2dtK02TSAuTvVYU2wGAVG0,1667U2YoucgG5Pdvm2M59c,[1667U2YoucgG5Pdvm2M59c],Still the One,234040,False,64,47YQQPzYY32yZbWC40mlku,2023-04-30T19:39:02.772Z
8,2u0enIyVRQyqC3PsPG8ZTd,0vYQRW5LIDeYQOccTviQNX,[0vYQRW5LIDeYQOccTviQNX],The Winner,298346,False,14,4CCnyV214M5zE6FKWcWjsV,2023-04-29T20:34:17.994Z
9,4RrTSGP3jRe3Cqtz3QCcd4,7x83XhcMbOTl1UdYsPTuZM,[7x83XhcMbOTl1UdYsPTuZM],Make It Rain,218626,False,40,6F17MbloTeRoXs7JI3lTHy,2023-04-29T19:54:23.440Z


In [6]:
sp_oauth = SpotifyOAuth(scope=scopes)
token_info = sp_oauth.get_cached_token() 
token = token_info['access_token']


if not token_info:
    auth_url = sp_oauth.get_authorize_url()
    print(auth_url)
    response = input('Paste the above link into your browser, then paste the redirect url here: ')

    code = sp_oauth.parse_response_code(response)
    token_info = sp_oauth.get_access_token(code)

    token = token_info['access_token']

sp = spotipy.Spotify(auth=token)

In [7]:
top_tracks_result = get_top_tracks(sp)
top_tracks_df = parse_top_tracks(top_tracks_result)

In [8]:
recently_played_result = sp.current_user_recently_played()

In [27]:
recently_played_result['items'][0]['played_at']

'2023-04-30T20:11:00.258Z'

In [45]:
sp.album('7iwS1r6JHYJe9xpPjzmWqD').keys()

dict_keys(['album_group', 'album_type', 'artists', 'available_markets', 'copyrights', 'external_ids', 'external_urls', 'genres', 'href', 'id', 'images', 'label', 'name', 'popularity', 'release_date', 'release_date_precision', 'total_tracks', 'tracks', 'type', 'uri'])

In [46]:
sp.albums??

[0;31mSignature:[0m [0msp[0m[0;34m.[0m[0malbums[0m[0;34m([0m[0malbums[0m[0;34m,[0m [0mmarket[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
    [0;32mdef[0m [0malbums[0m[0;34m([0m[0mself[0m[0;34m,[0m [0malbums[0m[0;34m,[0m [0mmarket[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;34m""" returns a list of albums given the album IDs, URIs, or URLs[0m
[0;34m[0m
[0;34m            Parameters:[0m
[0;34m                - albums - a list of  album IDs, URIs or URLs[0m
[0;34m                - market - an ISO 3166-1 alpha-2 country code[0m
[0;34m        """[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0mtlist[0m [0;34m=[0m [0;34m[[0m[0mself[0m[0;34m.[0m[0m_get_id[0m[0;34m([0m[0;34m"album"[0m[0;34m,[0m [0ma[0m[0;34m)[0m [0;32mfor[0m [0ma[0m [0;32min[0m [0malbums[0m[0;34m][0m[0;34m[0m
[0;34m[0m        [0;32mif[0m [0mmarket

In [64]:
recently_played_df = parse_tracks(recently_played_result)

KeyError: 'id'

In [43]:
recently_played_result['items'][0]

{'track': {'album': {'album_group': 'album',
   'album_type': 'album',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/1eYhYunlNJlDoQhtYBvPsi'},
     'href': 'https://api.spotify.com/v1/artists/1eYhYunlNJlDoQhtYBvPsi',
     'id': '1eYhYunlNJlDoQhtYBvPsi',
     'name': 'Ray Charles',
     'type': 'artist',
     'uri': 'spotify:artist:1eYhYunlNJlDoQhtYBvPsi'}],
   'available_markets': ['AD',
    'AE',
    'AG',
    'AL',
    'AM',
    'AO',
    'AR',
    'AT',
    'AU',
    'AZ',
    'BA',
    'BB',
    'BD',
    'BE',
    'BF',
    'BG',
    'BH',
    'BI',
    'BJ',
    'BN',
    'BO',
    'BR',
    'BS',
    'BT',
    'BW',
    'BY',
    'BZ',
    'CA',
    'CD',
    'CG',
    'CH',
    'CI',
    'CL',
    'CM',
    'CO',
    'CR',
    'CV',
    'CW',
    'CY',
    'CZ',
    'DE',
    'DJ',
    'DK',
    'DM',
    'DO',
    'DZ',
    'EC',
    'EE',
    'EG',
    'ES',
    'ET',
    'FI',
    'FJ',
    'FM',
    'FR',
    'GA',
    'GB',
    'GD',
    'GE

In [44]:
top_tracks_result['items'][0]

{'album': {'album_type': 'ALBUM',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/30niqFGUKKUg1horQSgwBn'},
    'href': 'https://api.spotify.com/v1/artists/30niqFGUKKUg1horQSgwBn',
    'id': '30niqFGUKKUg1horQSgwBn',
    'name': 'Blippi',
    'type': 'artist',
    'uri': 'spotify:artist:30niqFGUKKUg1horQSgwBn'}],
  'available_markets': [],
  'external_urls': {'spotify': 'https://open.spotify.com/album/7CLyVz47Opdfl0hEio8L1Q'},
  'href': 'https://api.spotify.com/v1/albums/7CLyVz47Opdfl0hEio8L1Q',
  'id': '7CLyVz47Opdfl0hEio8L1Q',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab67616d0000b273717b41612a0b7ad5d5b83617',
    'width': 640},
   {'height': 300,
    'url': 'https://i.scdn.co/image/ab67616d00001e02717b41612a0b7ad5d5b83617',
    'width': 300},
   {'height': 64,
    'url': 'https://i.scdn.co/image/ab67616d00004851717b41612a0b7ad5d5b83617',
    'width': 64}],
  'name': 'Blippi Tunes, Vol. 2: Machines (Music for Toddlers)',
  'release_

# TODO
* create tables for artists, albums, tracks, top50, recently played
* write rows for top50 to tables (psycopg2)
  * handle not adding duplicates
* set up pipeline to pull recently played daily (prefect)
* get everything on git
* turn this all into OOP
* build viz (tableau? looker? hex?)
* 