In [1]:
# Use Python to create your own Spotify Playlist!
    
# Prerequisite: User must have a Spotify account and have installed everthing in the README of this project @
# https://github.com/wjewell3/spotipy/blob/master/README.md

# Description: This tutorial provides an intro to the Python programming language.
    # ***OPTIONAL*** cells cover the following topics, but are not necessary to create a playlist:

# Python Topics Covered:
    # 1. Defining Variables
    # 2. Type/Class
    # 3. Printing
    # 4. Modules
    # 5. Try/Except
    # 6. Functions
    # 7. Variables defined in Functions
    # 8. Lists
    # 9. APIs and the requests Module
    # 10. Dictionaries
    # 11. Nested Values
    # 12. DataFrames
    # 13. Accessing DataFrame data
    # 14. For Loops
    # 15. If Statements
    # 16. Merging DataFrames

print('The logic used to create a Spotify playlist mirrors that of PyMuse, a front-end app I built.')
print('PyMuse is available @ https://spotify-playlist-290119.uc.r.appspot.com')

# Last Revised: 2020-11-01
# By: Will Jewell

# Run the cells below by clicking on the Jupyter Notebook "Run" button above

print('''Let's get started!''')

The logic used to create a Spotify playlist mirrors that of PyMuse, a front-end app I built.
PyMuse is available @ https://spotify-playlist-290119.uc.r.appspot.com
Let's get started!


In [2]:
# The Plan!

# Step 1
# a. Import modules
# b. Establish Spotify credentials

# Step 2
# a. Hit the Spotify API to get a list of your Top Artists (raw data)
# b. Create an empty "Top Artist" Pandas DataFrame
# c. Loop over the list of raw "Top Artist" data, migrating key information into the "Top Artists" Pandas DataFrame
# d. Get genre counts from the "Top Artist" DataFrame, and put them into a Top Artist Genre Count DataFrame.

# Step 3
# a. Hit the Spotify API to get a list of Spotify's Featured Playlists (raw data)
# b. Loop over the list of raw song data, migrating key information into "Featured Playlist" Dataframe

# Step 4
# a. Loop over "Featured Playlist" Dataframe's playlists uris, hitting the API to create a list of songs (raw data)
# b. Loop over the list of raw song data, migrating key information into "Featured Playlist Songs" Dataframe
# c. Loop over the "Featured Playlist Song" Dataframe, hitting the API to get the genres associated with each song,
#    and migrating them into a "Genres" DataFrame
# d. Merge the "Featured Playlist Songs" DataFrame with the "Genres "Dataframe

# Step 5
# a. Merge the "Top Artist Genres" Dataframe with the "Featured Playlist Songs" DataFrame
# b. Sum the genre counts
# c. Create a new playlist
# d. Add songs with a genre score threshold into the new playlist

In [3]:
# ***OPTIONAL***
# Comments - any line starting with # is a comment

In [4]:
# ***OPTIONAL***
# Defining Variables 
a = 'hello world'

# Python has some built in functions like "print", which outputs to console
print(a)

hello world


In [5]:
# ***OPTIONAL***
# There are many types/classes of variables
print(a)
print(type(a)) 
# str = string - this means Python interprets this as text

b = 1
print(b)
print(type(b))
# int = integer - this means Python interprets this as an integer, or whole number

c = 1.5
print(c)
print(type(c))
# float - this means Python interprests this as a number with a decimal

# You can convert variables from one class to another
d = str(c)
print(d)
print(type(d))

hello world
<class 'str'>
1
<class 'int'>
1.5
<class 'float'>
1.5
<class 'str'>


In [6]:
# ***OPTIONAL***
# More on Printing
print('More on printing:\n')
print("Print statements on a newline with the newline\nsymbol\n")
print('''To print 'single' or "double" quotes, surround your print statement in triple quotes.''')

print('\nUse the f operator to print variables along with text:\n')
print(f'{a}, using f operator')

# You can also print things together with commas
print(a, ', using commas')

# Or with +
print(a + ', using +')

# You can only combine strings with other strings with the + operator
print(str(b) + ', using +')

More on printing:

Print statements on a newline with the newline
symbol

To print 'single' or "double" quotes, surround your print statement in triple quotes.

Use the f operator to print variables along with text:

hello world, using f operator
hello world , using commas
hello world, using +
1, using +


In [7]:
# ***OPTIONAL***
# Jupyter notebooks automatically print output if it sits on the last line
b
c
# Notice that "b" does not show an output

1.5

In [8]:
# ***OPTIONAL***
# Importing Python modules
# Modules provide extra functionality to Python

import math
print(math.pi)

# round down
print(math.floor(math.pi))

# round up
print(math.ceil(math.pi))

3.141592653589793
3
4


In [9]:
# ***OPTIONAL***
# Some modules have sub-modules
import time
from time import ctime # current time
print(ctime())

Sun Nov  1 18:58:34 2020


In [10]:
# Step 1 of 5

In [11]:
# Step 1a - Let's import the modules needed to create a Spotify playlist:

# Spotipy (the Spotify API) modules:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth

# Credentials to connect to Spotify API:
from config import CONFIG

# Other Python packages
import inspect
import io
import json
from IPython.display import display
from json.decoder import JSONDecodeError
import math
import os
import pandas as pd
import numpy as np 
from pandas.io.json import json_normalize
import pickle
import requests
import subprocess
import sys
import time
from time import ctime
import webbrowser
from datetime import datetime, timedelta

print('Modules imported! You can disregard any warnings above.\n')

  CONFIG = yaml.load(yaml_file)







In [12]:
# ***OPTIONAL***
# Use try, except blocks for error handling
a = 1
b = 2

try:
    print(a + b)
except:
    print('try clause failed')

try:
    print(a + z)
except:
    print('z does not exist')
    
try:
    print(a + z)
except:
    pass 

3
z does not exist


In [13]:
# ***OPTIONAL***
# Functions
print('Use functions to define chunks of logic:')

def f(a,b):
    c = a + b
    return c

f(1,2)

Use functions to define chunks of logic:


3

In [14]:
# ***OPTIONAL***
# Variables can be defined in functions. 

def a():
    var = 'hello world'
    return 

# Note: not all functions have to return anything


# Variables are only accessible globally (i.e. outside function) if they are declared with the global helper

def b():
    global var
    var = 'hello world'
    return

# Let's run function a()
a()

try:
    print(f'var = {var}')
except:
    print('var not defined')
    
b()

print(f'var = {var}')

var not defined
var = hello world


In [15]:
# Step 1b - Use this function to establish (global) Spotify credentials which allow you to connect to Spotify's API:

def define_scope():  
    global scope, sp, user, username, headers
    scope = '''
    playlist-modify-private 
    playlist-modify-public 
    playlist-read-collaborative 
    playlist-read-private
    user-follow-modify 
    user-follow-read 
    user-library-modify 
    user-library-read 
    user-modify-playback-state 
    user-read-currently-playing 
    user-read-email 
    user-read-playback-state 
    user-read-private 
    user-read-recently-played 
    user-top-read'''
    
    # This command logs you in and establishes a spotify object, sp
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))
    
    # This object has certain attributes (e.g. current_user())
    user = sp.current_user()
    username = user['display_name']
    token = util.prompt_for_user_token(username, scope)
    headers = {'Authorization': "Bearer {}".format(token)}
    return

define_scope()
print(f"Logged into Spotify as {user['display_name']}")

Logged into Spotify as 1254636534


In [16]:
# ***OPTIONAL***
# Lists
print('Python lists\n')

l = [1,2,3]
print(f'list = {l}')

print(type(l),'\n')
l = list('hello world')

print(f'another list = {l}\n')
print(f'Length of list = {len(l)}\n')

# The first index, or accessor, is 0 
print(f'First element of list = {l[0]}')
print(f'First two elements of list = {l[0:2]}')
print(f'First three elements of list = {l[0:3]}')
print(f'All elements of list = {l[:]}')
print(f'Elements of list, up to the fifth item = {l[:5]}')
print(f'Last two elements of list = {l[9:11]}')
print(f'Last element of list = {l[10]}')
print(f'All but the last element of list = {l[:-1]}')
print(f'''Elements of the list joined together with an empty sting: {''.join(l)}''')
         
# Note: triple quotes works if you have a print statement with single quotes.

Python lists

list = [1, 2, 3]
<class 'list'> 

another list = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']

Length of list = 11

First element of list = h
First two elements of list = ['h', 'e']
First three elements of list = ['h', 'e', 'l']
All elements of list = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']
Elements of list, up to the fifth item = ['h', 'e', 'l', 'l', 'o']
Last two elements of list = ['l', 'd']
Last element of list = d
All but the last element of list = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l']
Elements of the list joined together with an empty sting: hello world


In [17]:
# Step 2 of 5

In [18]:
# ***OPTIONAL***
# An API holds data that you can GET (retrieve) or POST (push) (e.g. The Spotify API has data on songs)
# You generally need a URL + headers to access an API endpoint - these act like a key to access the API.
# The "requests" module allows Python to make API requests. 
# In the below function, we are making a GET request to retrieve data from the Spotipy API.
# The API sends us this data in json format, which Python handles with json.loads()

In [19]:
# Step 2a - Define a function to get data from the Spotify API
# We can then use this function to get your "top artist" raw data:

def req(url, headers):
    r = requests.get(url, headers=headers)
    return json.loads(r.text)

my_top_artists = req(url = 'https://api.spotify.com/v1/me/top/artists?limit=50',headers=headers)['items']

# To find the URL, i went to the link below, and pulled out the part after curl.
# https://developer.spotify.com/console/get-current-user-top-artists-and-tracks/?type=artists

print(f'''*********************************************************************************************************************\n
Below is the raw "top artist" data returned from a Spotify API GET request (yay, you got some data)!\n
Notice it starts and ends with [], a clue that it is a Python list.\n
It look a bit nasty, but we will use Python to clean it up.\n\n
{my_top_artists}''')

*********************************************************************************************************************

Below is the raw "top artist" data returned from a Spotify API GET request (yay, you got some data)!

Notice it starts and ends with [], a clue that it is a Python list.

It look a bit nasty, but we will use Python to clean it up.


[{'external_urls': {'spotify': 'https://open.spotify.com/artist/5Zx2zsHC975JnRXFDjNG0f'}, 'followers': {'href': None, 'total': 75}, 'genres': [], 'href': 'https://api.spotify.com/v1/artists/5Zx2zsHC975JnRXFDjNG0f', 'id': '5Zx2zsHC975JnRXFDjNG0f', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/ab67616d0000b273d1ccb8b2fa11d4801b4cb2ad', 'width': 640}, {'height': 300, 'url': 'https://i.scdn.co/image/ab67616d00001e02d1ccb8b2fa11d4801b4cb2ad', 'width': 300}, {'height': 64, 'url': 'https://i.scdn.co/image/ab67616d00004851d1ccb8b2fa11d4801b4cb2ad', 'width': 64}], 'name': 'Richard L Hargrave', 'popularity': 42, 'type': 'artist', 'uri': 

In [20]:
# ***OPTIONAL***
# Dictionaries (dicts) - look for curly braces
print('Python dictionaries (dicts)- look for curly braces\n')
d = {'a': 'hello', 'b': 'world'}

print(f'd = {d}\n')
print('A Python dict contains keys and values')
print(f'keys = {list(d.keys())}')
print(f'values = {list(d.values())}')
print('\nAccessing dict values:')

print(f'''dict[key] = value''')
print(f'''d['a'] = {d['a']}''')

Python dictionaries (dicts)- look for curly braces

d = {'a': 'hello', 'b': 'world'}

A Python dict contains keys and values
keys = ['a', 'b']
values = ['hello', 'world']

Accessing dict values:
dict[key] = value
d['a'] = hello


In [21]:
# ***OPTIONAL***
print (f'''Let's look at the first two elements of the "my_top_artists" list:
\n1st Artist:\n{my_top_artists[0]}
\n2nd Artist:\n{my_top_artists[1]}
\nEach element is wrapped in curly braces, a clue that it is a Python dict.''')

Let's look at the first two elements of the "my_top_artists" list:

1st Artist:
{'external_urls': {'spotify': 'https://open.spotify.com/artist/5Zx2zsHC975JnRXFDjNG0f'}, 'followers': {'href': None, 'total': 75}, 'genres': [], 'href': 'https://api.spotify.com/v1/artists/5Zx2zsHC975JnRXFDjNG0f', 'id': '5Zx2zsHC975JnRXFDjNG0f', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/ab67616d0000b273d1ccb8b2fa11d4801b4cb2ad', 'width': 640}, {'height': 300, 'url': 'https://i.scdn.co/image/ab67616d00001e02d1ccb8b2fa11d4801b4cb2ad', 'width': 300}, {'height': 64, 'url': 'https://i.scdn.co/image/ab67616d00004851d1ccb8b2fa11d4801b4cb2ad', 'width': 64}], 'name': 'Richard L Hargrave', 'popularity': 42, 'type': 'artist', 'uri': 'spotify:artist:5Zx2zsHC975JnRXFDjNG0f'}

2nd Artist:
{'external_urls': {'spotify': 'https://open.spotify.com/artist/4oV5EVJ0XFWsJKoOvdRPvl'}, 'followers': {'href': None, 'total': 107890}, 'genres': ['alternative pop', 'alternative rock', 'anti-folk', 'art rock', 'dance ro

In [22]:
# ***OPTIONAL***
# Accessing nested values (lists of dicts of lists of ...)
L = [[d, {'c': 'chips', 'd': 'ahoy', 'e': 'matey!'}], {'f': {'ooo': 'lala'}}]
print(f'L = {L}')
print(f'L[0] = {L[0]}')
print(f'L[0][0] = {L[0][0]}')
print(f'''L[0][0]['a'] = {L[0][0]['a']}''')
print(f'''L[1]['f'] = {L[1]['f']}''')
print(f'''L[1]['f']['ooo'] = {L[1]['f']['ooo']}''')

L = [[{'a': 'hello', 'b': 'world'}, {'c': 'chips', 'd': 'ahoy', 'e': 'matey!'}], {'f': {'ooo': 'lala'}}]
L[0] = [{'a': 'hello', 'b': 'world'}, {'c': 'chips', 'd': 'ahoy', 'e': 'matey!'}]
L[0][0] = {'a': 'hello', 'b': 'world'}
L[0][0]['a'] = hello
L[1]['f'] = {'ooo': 'lala'}
L[1]['f']['ooo'] = lala


In [23]:
# ***OPTIONAL***
# Let's access the values in the "my_top_artists" list of dictionaries:

# First artist dictionary is my_top_artists[0]
# Second artist dictionary is my_top_artists[1]

print('First Artist:')
print('   Name = ' + my_top_artists[0]['name'])
print('   Genres = ' + str(my_top_artists[0]['genres']))
print('''   URI (Spotify's "artist key" = ''' + my_top_artists[0]['uri'])

print('\nSecond Artist:')
print('   Name = ' + my_top_artists[1]['name'])
print('   Genres = ' + str(my_top_artists[1]['genres']))
print('''   URI (Spotify's "artist key" = ''' + my_top_artists[1]['uri'])

print('''\nThe artist's genres will be important for helping us decide which songs to add to a playlist later on...''')

First Artist:
   Name = Richard L Hargrave
   Genres = []
   URI (Spotify's "artist key" = spotify:artist:5Zx2zsHC975JnRXFDjNG0f

Second Artist:
   Name = Guided By Voices
   Genres = ['alternative pop', 'alternative rock', 'anti-folk', 'art rock', 'dance rock', 'freak folk', 'garage psych', 'gbvfi', 'indie pop', 'indie rock', 'lo-fi', 'modern power pop', 'noise pop', 'ohio indie', 'post-hardcore', 'power pop']
   URI (Spotify's "artist key" = spotify:artist:4oV5EVJ0XFWsJKoOvdRPvl

The artist's genres will be important for helping us decide which songs to add to a playlist later on...


In [24]:
# ***OPTIONAL***
# Pandas DataFrames
# Think of a Pandas DataFrame as the Python version of an Excel spreadsheet or Database table
# DataFrames can be defined in a variety of ways
# Note: since we imported pandas as pd, we can define a pandas DataFrame as pd.DataFrame()

print('Pandas DataFrames')
df1 = pd.DataFrame({'a': 'hello', 'b': 'world'}, index = [1])
display(df1)
df2 = df1.rename(columns={'a': 'a_renamed', 'b': 'b_renamed'})
display(df2)
# alternatively, use "inplace=True" - this prevents you from having to define a new df
df1.rename(columns={'a': 'a_renamed_inplace', 'b': 'b_renamed_inplace'}, inplace=True)
display(df1)
# You can add a column like this
df1['combined'] = df1['a_renamed_inplace'] ++ df1['b_renamed_inplace']
display(df1)

Pandas DataFrames


Unnamed: 0,a,b
1,hello,world


Unnamed: 0,a_renamed,b_renamed
1,hello,world


Unnamed: 0,a_renamed_inplace,b_renamed_inplace
1,hello,world


Unnamed: 0,a_renamed_inplace,b_renamed_inplace,combined
1,hello,world,helloworld


In [25]:
# ***OPTIONAL***
# More DataFrames
df = pd.DataFrame({'a': [1,2,3,3], 'b': [4,5,6,6]})
display(df)
df = df.set_index('a')
display(df)
df = df.reset_index()
display(df)
df.drop(columns=['b'], inplace=True)
display(df)
df.drop_duplicates(inplace=True)
display(df)
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html
df['pct_rank'] = df['a'].rank(pct=True)
display(df)
l = df['pct_rank'].to_list()
print('\n', l)

df = pd.DataFrame(columns=['empty','df'])
display(df)
df = df.append(pd.DataFrame([['not','anymore']], columns = ['empty','df']))
display(df)

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,3,6


Unnamed: 0_level_0,b
a,Unnamed: 1_level_1
1,4
2,5
3,6
3,6


Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,3,6


Unnamed: 0,a
0,1
1,2
2,3
3,3


Unnamed: 0,a
0,1
1,2
2,3


Unnamed: 0,a,pct_rank
0,1,0.333333
1,2,0.666667
2,3,1.0



 [0.3333333333333333, 0.6666666666666666, 1.0]


Unnamed: 0,empty,df


Unnamed: 0,empty,df
0,not,anymore


In [26]:
# ***OPTIONAL***
# Accessing Dataframe data
print('\nUse .loc to access dataframe by rows, columns:\n')
df = pd.DataFrame({'a': [1,2,3,3], 'b': [4,5,6,6]})
print('\n\ndf:')
display(df)
print('''\ndf.loc[:,'b']''')
display(df.loc[:,'b'])
print('''\ndf.loc[1,'b']''')
display(df.loc[1,'b'])
print('''\ndisplay(df['b'].unique())''')
display(df['b'].unique())
# You can incorporate logic into a .loc accessor
display(df.loc[df['a'] <= 2])
# You can also stack logic
print(df.loc[df['a'] <= 2].reset_index()['index'].to_list())


print('''\nFantastic resource on Python data structures:\n
http://www.grapenthin.org/teaching/geop501/lectures/lecture_06_data_structures.pdf''')


Use .loc to access dataframe by rows, columns:



df:


Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,3,6



df.loc[:,'b']


0    4
1    5
2    6
3    6
Name: b, dtype: int64


df.loc[1,'b']


5


display(df['b'].unique())


array([4, 5, 6])

Unnamed: 0,a,b
0,1,4
1,2,5


[0, 1]

Fantastic resource on Python data structures:

http://www.grapenthin.org/teaching/geop501/lectures/lecture_06_data_structures.pdf


In [27]:
# Step 2b - create an empty "Top Artist" Pandas DataFrame
print('''\nNext let's define a Pandas DataFrame (currently emtpy) to hold the data from the list of top artists:''')
my_top_artists_df = pd.DataFrame(columns=['artist_uri','name','genre list'])
display(my_top_artists_df)

print('''\nUse the display function to "print" dataframes in jupyter notebooks.''')


Next let's define a Pandas DataFrame (currently emtpy) to hold the data from the list of top artists:


Unnamed: 0,artist_uri,name,genre list



Use the display function to "print" dataframes in jupyter notebooks.


In [28]:
# ***OPTIONAL***
# "For" Loops
print('For Loops:\n')

print('''for i in [1,2,3]:
    print(i**2)\n''')
for i in [1,2,3]:
    print(i**2)

print('''\nfor i in range(1,4):
    print(i**2)''')
for i in range(1,4):
    print(i**2)
    
# Be careful with the indentation - anything indented after the "for" statement will be in the loop.

For Loops:

for i in [1,2,3]:
    print(i**2)

1
4
9

for i in range(1,4):
    print(i**2)
1
4
9


In [29]:
# Step 2c - Loop over the list of raw "Top Artist" data, migrating key information into the "Top Artists" Pandas DataFrame

for i in range(len(my_top_artists)): # for each dictionary item in our "my top artists" list
    uri = my_top_artists[i]['uri'] 
    name = my_top_artists[i]['name'] # assign the value for the key "name" to the variable "name"
    genres = my_top_artists[i]['genres'] 
    # append these variables to the dataframe, which is empty at first, but it will grow with each iteration
    my_top_artists_df = my_top_artists_df.append(pd.DataFrame([[uri, name, genres]], columns = ['artist_uri','name', 'genre list']))

# Use dataframe.head() to only see the top 5 rows of a dataframe
display(my_top_artists_df.head())

# Note: not all artists have genres, so you may see some empty lists

Unnamed: 0,artist_uri,name,genre list
0,spotify:artist:5Zx2zsHC975JnRXFDjNG0f,Richard L Hargrave,[]
0,spotify:artist:4oV5EVJ0XFWsJKoOvdRPvl,Guided By Voices,"[alternative pop, alternative rock, anti-folk,..."
0,spotify:artist:0K1q0nXQ8is36PzOKAMbNe,Spoon,"[alternative dance, alternative rock, austindi..."
0,spotify:artist:6k8oBFzievbIn6XJK0pDpa,Tunng,[folktronica]
0,spotify:artist:37eqxl8DyLd5sQN54wYJbE,Hiss Golden Messenger,"[alternative country, deep new americana, funk..."


In [30]:
# Step 2d - Get genre counts from the "Top Artist" DataFrame, and put them into a Top Artist Genre Count DataFrame.

# The cool thing about Dataframes is they have some built in functions, like "explode".
# Explode takes a column with list values and creates a row for each element of the list.

# FYI, Python and it's associated packages (e.g. to include Pandas) has some great documentation, if you are interested:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
# Stack Overflow is a great place if you have questions.

genre_exploded_df = my_top_artists_df.explode('genre list')
display(genre_exploded_df.head())

# Next let's use the built in value_counts function to count the times a genre shows up in our top artists
genre_count_df = pd.DataFrame(genre_exploded_df['genre list'].value_counts())

print('Your top artist genre counts:')
display(genre_count_df.head())

print('''\nNotice there is no column name for the genres. \n
That's b/c the genres make up the index, or unique row identifier.\n
Let's push the index out with .reset_index()''')
top_artist_genre_count_reset_index_df = genre_count_df.reset_index()
display(top_artist_genre_count_reset_index_df.head())

print('\n... and rename the column')
top_artist_genre_count_reset_index_df.columns = ['genre','genre count']
# Let's update the Dataframe's name, so we can more easily recall what it is later on
print('\n\ntop_artist_genre_count_df:')
top_artist_genre_count_df = top_artist_genre_count_reset_index_df
display(top_artist_genre_count_df)

Unnamed: 0,artist_uri,name,genre list
0,spotify:artist:5Zx2zsHC975JnRXFDjNG0f,Richard L Hargrave,
0,spotify:artist:4oV5EVJ0XFWsJKoOvdRPvl,Guided By Voices,alternative pop
0,spotify:artist:4oV5EVJ0XFWsJKoOvdRPvl,Guided By Voices,alternative rock
0,spotify:artist:4oV5EVJ0XFWsJKoOvdRPvl,Guided By Voices,anti-folk
0,spotify:artist:4oV5EVJ0XFWsJKoOvdRPvl,Guided By Voices,art rock


Your top artist genre counts:


Unnamed: 0,genre list
indie folk,26
stomp and holler,24
new americana,17
indie rock,17
indie pop,16



Notice there is no column name for the genres. 

That's b/c the genres make up the index, or unique row identifier.

Let's push the index out with .reset_index()


Unnamed: 0,index,genre list
0,indie folk,26
1,stomp and holler,24
2,new americana,17
3,indie rock,17
4,indie pop,16



... and rename the column


top_artist_genre_count_df:


Unnamed: 0,genre,genre count
0,indie folk,26
1,stomp and holler,24
2,new americana,17
3,indie rock,17
4,indie pop,16
...,...,...
116,vermont indie,1
117,modern alternative rock,1
118,shimmer pop,1
119,garage rock,1


In [31]:
# Step 3 of 5

In [32]:
# ***OPTIONAL***
# In the following Jupyter notebook cells, let's combine concepts learned into some larger functions
# Functions can be useful for consolidating more complex sequences of logic

In [33]:
# Step 3a - Hit the Spotify API to get a list of Spotify's Featured Playlists (raw data)
# Step 3b - Loop over the list of raw playlist data, migrating key information into a "Featured Playlist" Dataframe

# How did I find the URL below? I went to the link below and pulled out the part after curl (with some trial and error)
# https://developer.spotify.com/console/get-featured-playlists/?country=US&limit=50

# In case your token has expired (tokens act like URL + headers to access an API), run the define_scope function.
define_scope()

def get_featured_playlist_df():
    print('\nGet raw "featured playlists" from Spotify - Spotify updates these playlists a few times per day!\n')
    url = f"https://api.spotify.com/v1/browse/featured-playlists?limit=50&country=US"
    raw_data = req(url,headers=headers)
    featured_playlist_count = raw_data['playlists']['total']
    print(f'featured playlist count = {featured_playlist_count}')
    print('\nRaw data for 1st playlist:\n')
    display(raw_data['playlists']['items'][0])
    df = pd.DataFrame()
    
    for i in range(0, featured_playlist_count):
        raw_featured_playlist = raw_data['playlists']['items']
        playlist = raw_featured_playlist[i]['name']
        playlist_uri = raw_featured_playlist[i]['uri']
        song_count = raw_featured_playlist[i]['tracks']['total']
        df = df.append(pd.DataFrame([[playlist, playlist_uri, song_count]], columns = ['playlist','playlist_uri','song_count']))
    df.set_index(['playlist'], inplace=True)
    return df

playlist_df = get_featured_playlist_df()
print('\nplaylist_df:')
display(playlist_df)

print('''Notice that the playlist column falls slightly lower than the other two. That's b/c it is actually the index.''')

# Spotify API reference (in beta):
# https://developer.spotify.com/documentation/web-api/reference-beta/


Get raw "featured playlists" from Spotify - Spotify updates these playlists a few times per day!

featured playlist count = 10

Raw data for 1st playlist:



{'collaborative': False,
 'description': 'Up and down I-95. Cover: Sheff G & Sleepy Hallow',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DX1YPTAhwehsC'},
 'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX1YPTAhwehsC',
 'id': '37i9dQZF1DX1YPTAhwehsC',
 'images': [{'height': None,
   'url': 'https://i.scdn.co/image/ab67706f0000000317a8debc8333a988acd0de5d',
   'width': None}],
 'name': 'State of Mind',
 'owner': {'display_name': 'Spotify',
  'external_urls': {'spotify': 'https://open.spotify.com/user/spotify'},
  'href': 'https://api.spotify.com/v1/users/spotify',
  'id': 'spotify',
  'type': 'user',
  'uri': 'spotify:user:spotify'},
 'primary_color': None,
 'public': None,
 'snapshot_id': 'MTYwNDAzMDQ2MCwwMDAwMDQ3YzAwMDAwMTc1NzdhYTcxZWQwMDAwMDE3NGU3ZjUzMGY0',
 'tracks': {'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX1YPTAhwehsC/tracks',
  'total': 50},
 'type': 'playlist',
 'uri': 'spotify:playlist:37i9dQZF1DX1YPTAhwehsC'}


playlist_df:


Unnamed: 0_level_0,playlist_uri,song_count
playlist,Unnamed: 1_level_1,Unnamed: 2_level_1
State of Mind,spotify:playlist:37i9dQZF1DX1YPTAhwehsC,50
Deep Sleep,spotify:playlist:37i9dQZF1DWYcDQ1hSjOpY,261
Late Night Vibes,spotify:playlist:37i9dQZF1DXdQvOLqzNHSW,75
Devastating,spotify:playlist:37i9dQZF1DX33TfGBYPkRQ,100
Clout Culture,spotify:playlist:37i9dQZF1DWVk7x1ClrO0Y,50
Deep Dark Indie,spotify:playlist:37i9dQZF1DWTtTyjgd08yp,87
Soft Pop Hits,spotify:playlist:37i9dQZF1DWTwnEm1IYyoj,232
The Newness,spotify:playlist:37i9dQZF1DWUzFXarNiofw,50
Moonlight Bumps,spotify:playlist:37i9dQZF1DXdipfKDeMPTE,119
Lava Lamp,spotify:playlist:37i9dQZF1DWWtqHeytOZ8f,200


Notice that the playlist column falls slightly lower than the other two. That's b/c it is actually the index.


In [34]:
# Step 4 of 5

In [35]:
# Step 4a - Loop over "Featured Playlist" Dataframe's playlist uris, hitting the API to create a list of raw song data

# Since the Spotify API can only handle requests of up to 100 songs at a time,
# let's iterate over the requests to grab all songs

def get_raw_featured_playlist_song_list(df):
    raw_featured_songs = []
    for uri in df.loc[:,'playlist_uri'].to_list():
        song_count = df.loc[df['playlist_uri']==uri,'song_count'][0]
        offset = 0
        url = f"https://api.spotify.com/v1/playlists/{uri.split(':')[2]}/tracks?limit=50&offset={offset}"
        for i in range(int(math.ceil(song_count/100.0))):
            offset = 100*i
            raw_featured_songs.extend(req(url,headers=headers)['items'])
    return raw_featured_songs

raw_featured_playlist_songs_list = get_raw_featured_playlist_song_list(playlist_df)
print('''Let's take a look at the first song's raw data:\n''')
print(raw_featured_playlist_songs_list[0])

Let's take a look at the first song's raw data:

{'added_at': '2020-10-30T04:01:00Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'}, 'href': 'https://api.spotify.com/v1/users/', 'id': '', 'type': 'user', 'uri': 'spotify:user:'}, 'is_local': False, 'primary_color': None, 'track': {'album': {'album_type': 'single', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6EPlBSH2RSiettczlz7ihV'}, 'href': 'https://api.spotify.com/v1/artists/6EPlBSH2RSiettczlz7ihV', 'id': '6EPlBSH2RSiettczlz7ihV', 'name': 'Sleepy Hallow', 'type': 'artist', 'uri': 'spotify:artist:6EPlBSH2RSiettczlz7ihV'}], 'available_markets': ['AD', 'AE', 'AL', 'AR', 'AT', 'AU', 'BA', 'BE', 'BG', 'BH', 'BO', 'BR', 'BY', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HR', 'HU', 'ID', 'IE', 'IL', 'IN', 'IS', 'IT', 'JO', 'JP', 'KW', 'KZ', 'LB', 'LI', 'LT', 'LU', 'LV', 'MA', 'MC', 'MD', 'ME', 'M

In [36]:
# ***OPTIONAL***
# If statement
a = 1
b = 2

if a == b:
    print('a == b')
else:
    print('a != b')

a != b


In [37]:
# ***OPTIONAL***
# String Split
a = 'hello-world'
b = a.split('-')
print(b)

['hello', 'world']


In [38]:
# Step 4b - Loop over the list of raw song data, migrating key information into "Featured Playlist Songs" Dataframe

define_scope()

def song_raw_data_to_df(raw_songs):
    print('Transforming raw song data list into DataFrame(df)')
    # create empty df
    df = pd.DataFrame(columns = [
        'album', 
        'artist_uri', 
        'artist',
        'track',
        'url',
        'popularity',
        'added_at',
        'release_date',
        'release_year',
        'uri'
        ])
    # Pipe songs into df 1 at a time
    for i in range(0, len(raw_songs)):
        # Print progress every 100 songs
        if (i/100).is_integer():
            print(f"{i}/{len(raw_songs)} parsed")
        try:
            album = raw_songs[i]['track']['album']['name']
            artist_uri = raw_songs[i]['track']['artists'][0]['uri'].split(':')[2]
            artist = raw_songs[i]['track']['album']['artists'][0]['name']
            track = raw_songs[i]['track']['name']
            url = raw_songs[i]['track']['external_urls']['spotify']
            popularity = raw_songs[i]['track']['popularity']
            added_at = raw_songs[i]['added_at']
            release_date = raw_songs[i]['track']['album']['release_date']
            release_year = release_date.split('-')[0]
            uri = raw_songs[i]['track']['uri']
            df = df.append(pd.DataFrame([[
                album, 
                artist_uri,
                artist, 
                track, 
                url, 
                popularity, 
                added_at, 
                release_date, 
                release_year,
                uri
                ]], columns = [
                'album', 
                'artist_uri', 
                'artist',
                'track',
                'url',
                'popularity',
                'added_at',
                'release_date',
                'release_year',
                'uri'
                ]))
        except:
            pass
    # Make an index out of the uri column
    df.set_index('uri', inplace=True)
    return df

featured_playlist_song_df = song_raw_data_to_df(raw_featured_playlist_songs_list)
display(featured_playlist_song_df.head())
print('''\nThe uri is the unique index for the song.
Notice that we don't have genres - we will get these from the artist_uris, like we did for our featured playlists.''')

Transforming raw song data list into DataFrame(df)
0/800 parsed
100/800 parsed
200/800 parsed
300/800 parsed
400/800 parsed
500/800 parsed
600/800 parsed
700/800 parsed


Unnamed: 0_level_0,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
spotify:track:15xfk8vQvVttTlh3tZn6db,The Black House,6EPlBSH2RSiettczlz7ihV,Sleepy Hallow,Different,https://open.spotify.com/track/15xfk8vQvVttTlh...,59,2020-10-30T04:01:00Z,2020-07-31,2020
spotify:track:1RSzyxqtIO4yX3EyiV4zT5,Whoopty,7arQA31aZVS8yS6zUveWzb,CJ,Whoopty,https://open.spotify.com/track/1RSzyxqtIO4yX3E...,81,2020-10-30T04:01:00Z,2020-08-20,2020
spotify:track:2r6OAV3WsYtXuXjvJ1lIDi,Shoot For The Stars Aim For The Moon (Deluxe),0eDvMgVFoNV3TpwtrVCoTj,Pop Smoke,Hello (feat. A Boogie Wit da Hoodie),https://open.spotify.com/track/2r6OAV3WsYtXuXj...,85,2020-10-30T04:01:00Z,2020-07-20,2020
spotify:track:3bAzFi7wGuOXJdgOYyFDSW,Losses,6jGMq4yGs7aQzuGsMgVgZR,Lil Tjay,Losses,https://open.spotify.com/track/3bAzFi7wGuOXJdg...,65,2020-10-30T04:01:00Z,2020-10-30,2020
spotify:track:2Pa3VfH66eTxLJhfXJblCs,Young Jefe 3,1DvtabXAjfrMihPP6JQdHs,Shy Glizzy,Feel The Vibe (feat. Meek Mill),https://open.spotify.com/track/2Pa3VfH66eTxLJh...,59,2020-10-30T04:01:00Z,2020-09-18,2020



The uri is the unique index for the song.
Notice that we don't have genres - we will get these from the artist_uris, like we did for our featured playlists.


In [39]:
# Step 4c - Loop over the "Featured Playlist Song" Dataframe, hitting the API to get the genres 
#           associated with each song, and migrating them into a "Genres" DataFrame

define_scope()
print('''\nLet's get the genres for the featured playlist songs' artists. 
As we loop over the data, we add it to a dataframe of featured playlist songs.
Please note - this step can take a couple minutes.
If printing every artists gets annoying, feel free to comment that part out.
As noted before, Spotify does not have genres for every artist.\n''')

def get_genres(df):
    g = []
    artist_uris = df['artist_uri'].unique()
    print(f"{len(artist_uris)} distinct artists\n")
    for i in range(0, len(artist_uris)):
        if (i/100).is_integer():
            print(f"{i}/{len(artist_uris)} parsed\n")
        request = req(url = f"https://api.spotify.com/v1/artists/{artist_uris[i]}",headers=headers)
        artist = request['name']
        genres = request['genres']
        print(f"{i}:{artist}:{genres}")
        # You can append to lists just like you can append to Dataframes
        g.append(genres)

    genres_df = pd.DataFrame({'artist_uri': artist_uris, 'genre list': g})
    return genres_df

genre_df = get_genres(featured_playlist_song_df)


Let's get the genres for the featured playlist songs' artists. 
As we loop over the data, we add it to a dataframe of featured playlist songs.
Please note - this step can take a couple minutes.
If printing every artists gets annoying, feel free to comment that part out.
As noted before, Spotify does not have genres for every artist.

417 distinct artists

0/417 parsed

0:Sleepy Hallow:['brooklyn drill', 'nyc rap']
1:CJ:[]
2:Pop Smoke:['brooklyn drill']
3:Lil Tjay:['brooklyn drill', 'melodic rap', 'nyc rap']
4:Shy Glizzy:['dmv rap', 'drill', 'rap', 'southern hip hop', 'trap', 'vapor trap']
5:KJ Balla:[]
6:Rich The Kid:['atl hip hop', 'hip hop', 'melodic rap', 'pop rap', 'rap', 'southern hip hop', 'trap', 'vapor trap']
7:French Montana:['hip hop', 'pop rap', 'rap', 'southern hip hop', 'trap']
8:Lil Tecca:['melodic rap', 'rap', 'trap']
9:Joyner Lucas:['boston hip hop', 'hip hop', 'pop rap', 'rap']
10:Nas:['conscious hip hop', 'east coast hip hop', 'gangster rap', 'hardcore hip hop', 'hip

130:Morphine:['alternative rock', 'blues rock', 'boston rock']
131:Grizzly Bear:['alternative dance', 'art pop', 'baroque pop', 'brooklyn indie', 'chillwave', 'freak folk', 'indie pop', 'indie rock', 'modern rock', 'new weird america', 'nu gaze', 'shimmer pop', 'stomp and holler']
132:Kevin Morby:['freak folk', 'indie folk', 'indie rock', 'kc indie', 'modern folk rock', 'stomp and holler']
133:Washed Out:['alternative dance', 'chillwave', 'indie pop', 'indie rock', 'indietronica', 'neo-synthpop', 'nu gaze', 'shimmer pop']
134:The War On Drugs:['indie rock', 'modern rock', 'philly indie']
135:James Blake:['art pop', 'uk alternative pop']
136:The National:['indie rock', 'modern rock']
137:Hovvdy:['austindie', 'bubblegrunge', 'indie punk', 'indie rock', 'small room']
138:Praything:[]
139:Iron & Wine:['acoustic pop', 'indie folk', 'indie pop', 'neo mellow', 'stomp and holler']
140:Lucy Dacus:['art pop', 'bubblegrunge', 'indie folk', 'indie pop', 'indie rock']
141:Colin Hay:['neo mellow']
1

246:Brooke Annibale:['pop folk']
247:Mt. Wolf:['chamber pop', 'vapor soul']
248:Emily Jane White:['freak folk', 'indie folk']
249:Aoife O'Donovan:['boston folk', 'folk', 'indie folk', 'new americana', 'progressive bluegrass']
250:Ailbhe Reddy:['irish indie']
251:Blanco White:['british singer-songwriter', 'indie anthem-folk', 'indie folk']
252:Florence + The Machine:['art pop', 'baroque pop', 'pop', 'uk alternative pop']
253:Rainbow Kitten Surprise:['indie folk', 'indie pop', 'indie rock', 'modern alternative rock', 'modern rock', 'stomp and holler']
254:SYML:['indie cafe pop', 'pop']
255:whenyoung:['irish indie rock', 'limerick indie']
256:Jason Mraz:['acoustic pop', 'neo mellow', 'pop', 'pop rock']
257:Maroon 5:['pop', 'pop rock']
258:Lewis Capaldi:['pop', 'uk pop']
259:Ed Sheeran:['pop', 'uk pop']
260:Lady Gaga:['dance pop', 'pop']
261:Sam Fischer:['australian pop', 'pop', 'uk pop']
262:Sam Smith:['pop', 'post-teen pop', 'uk pop']
263:John Legend:['neo soul', 'pop', 'pop soul', 'r&b'

388:Kelly Moran:['electra', 'fourth world', 'mandible', 'new isolationism', 'prepared piano']
389:Roger Eno:['ambient', 'art rock', 'compositional ambient', 'fourth world']
390:Autechre:['abstract', 'ambient', 'ambient techno', 'art pop', 'bass music', 'drill and bass', 'electro', 'electronica', 'experimental', 'fourth world', 'glitch', 'intelligent dance music', 'microhouse', 'ninja', 'outsider house', 'techno', 'uk experimental electronic']
391:Four Tet:['alternative dance', 'electronica', 'folktronica', 'indie soul', 'intelligent dance music', 'new rave']
392:Colleen:['ambient', 'art pop', 'compositional ambient', 'drone', 'electra', 'experimental pop', 'folktronica', 'fourth world', 'freak folk']
393:Fax:['microhouse']
394:Aphex Twin:['ambient', 'braindance', 'electronica', 'intelligent dance music', 'uk experimental electronic']
395:Daniel Avery:['ambient techno', 'chamber psych', 'electronica', 'float house', 'intelligent dance music', 'microhouse', 'shiver pop']
396:Nate Young:[

In [40]:
# Step 4c - extended...
# Let's take a look at the genre df we just created
print('genre_df:')
display(genre_df.head())
genre_exploded_df = genre_df.explode('genre list')

print('\n\ngenre_exploded_df:')
# ... and the exploded version
display(genre_exploded_df.head())

genre_df:


Unnamed: 0,artist_uri,genre list
0,6EPlBSH2RSiettczlz7ihV,"[brooklyn drill, nyc rap]"
1,7arQA31aZVS8yS6zUveWzb,[]
2,0eDvMgVFoNV3TpwtrVCoTj,[brooklyn drill]
3,6jGMq4yGs7aQzuGsMgVgZR,"[brooklyn drill, melodic rap, nyc rap]"
4,1DvtabXAjfrMihPP6JQdHs,"[dmv rap, drill, rap, southern hip hop, trap, ..."




genre_exploded_df:


Unnamed: 0,artist_uri,genre list
0,6EPlBSH2RSiettczlz7ihV,brooklyn drill
0,6EPlBSH2RSiettczlz7ihV,nyc rap
1,7arQA31aZVS8yS6zUveWzb,
2,0eDvMgVFoNV3TpwtrVCoTj,brooklyn drill
3,6jGMq4yGs7aQzuGsMgVgZR,brooklyn drill


In [41]:
# ***OPTIONAL***
# Merging Dataframes - this combines rows from two or more dfs, based on a shared index and/or column(s) between them.
# If you know SQL, merging is the same as SQL joining.

df1 = pd.DataFrame({'index': [1,2,3], 'colA': [4,5,6], 'colB': [7,8,9]}).set_index('index')
df2 = pd.DataFrame({'index': [1,2], 'colC': ['hello','world']}).set_index('index')
print('df1:')
display(df1)
print('\n\ndf2:')
display(df2)

print('\n\ndf3 - Inner join - return rows where the index and/or "join-column(s)" match in both dataframes:')
print('''Since df2 does not have a 3rd index, that row is excluded from the results.''')
df3 = df1.merge(df2, on = 'index', how = 'inner')
display(df3)

print('\n\ndf4 - Left join - all data from df1 (left df) is preserved, and matching rows from df2 (right df) are added:')
print('''Where records don't match, null (i.e. blank, empty, or Not a Number (NaN) values result.''')
df4 = df1.merge(df2, on = 'index', how = 'left')
display(df4)

df1:


Unnamed: 0_level_0,colA,colB
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,7
2,5,8
3,6,9




df2:


Unnamed: 0_level_0,colC
index,Unnamed: 1_level_1
1,hello
2,world




df3 - Inner join - return rows where the index and/or "join-column(s)" match in both dataframes:
Since df2 does not have a 3rd index, that row is excluded from the results.


Unnamed: 0_level_0,colA,colB,colC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,4,7,hello
2,5,8,world




df4 - Left join - all data from df1 (left df) is preserved, and matching rows from df2 (right df) are added:
Where records don't match, null (i.e. blank, empty, or Not a Number (NaN) values result.


Unnamed: 0_level_0,colA,colB,colC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,4,7,hello
2,5,8,world
3,6,9,


In [42]:
# Step 4d - Merge the "Featured Playlist Songs" DataFrame with the "Genres "Dataframe

def add_genres(df, genre_exploded_df):
    df = df.reset_index().merge(genre_exploded_df, on = 'artist_uri', how = 'left').set_index('uri')
    df.rename(columns={'genre list': 'genre'}, inplace=True)
    return df

print('''Let's add the genres to the featured playlist song df, merging on uri''')
featured_playlist_songs_with_genres_df = add_genres(featured_playlist_song_df, genre_exploded_df)
display(featured_playlist_songs_with_genres_df.head())

Let's add the genres to the featured playlist song df, merging on uri


Unnamed: 0_level_0,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year,genre
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
spotify:track:15xfk8vQvVttTlh3tZn6db,The Black House,6EPlBSH2RSiettczlz7ihV,Sleepy Hallow,Different,https://open.spotify.com/track/15xfk8vQvVttTlh...,59,2020-10-30T04:01:00Z,2020-07-31,2020,brooklyn drill
spotify:track:15xfk8vQvVttTlh3tZn6db,The Black House,6EPlBSH2RSiettczlz7ihV,Sleepy Hallow,Different,https://open.spotify.com/track/15xfk8vQvVttTlh...,59,2020-10-30T04:01:00Z,2020-07-31,2020,nyc rap
spotify:track:1RSzyxqtIO4yX3EyiV4zT5,Whoopty,7arQA31aZVS8yS6zUveWzb,CJ,Whoopty,https://open.spotify.com/track/1RSzyxqtIO4yX3E...,81,2020-10-30T04:01:00Z,2020-08-20,2020,
spotify:track:2r6OAV3WsYtXuXjvJ1lIDi,Shoot For The Stars Aim For The Moon (Deluxe),0eDvMgVFoNV3TpwtrVCoTj,Pop Smoke,Hello (feat. A Boogie Wit da Hoodie),https://open.spotify.com/track/2r6OAV3WsYtXuXj...,85,2020-10-30T04:01:00Z,2020-07-20,2020,brooklyn drill
spotify:track:3bAzFi7wGuOXJdgOYyFDSW,Losses,6jGMq4yGs7aQzuGsMgVgZR,Lil Tjay,Losses,https://open.spotify.com/track/3bAzFi7wGuOXJdg...,65,2020-10-30T04:01:00Z,2020-10-30,2020,brooklyn drill


In [43]:
# Step 5 of 5

In [44]:
# Step 5a - Merge the "Top Artist Genres" Dataframe with the "Featured Playlist Songs" DataFrame

def add_top_artist_genre_counts(genre_df, song_df):
    df = song_df.reset_index()
    df = df.merge(genre_df , on = 'genre',how = 'left')
    df.drop(columns=['genre'], inplace=True)
    return df

# use the forward slash \ to escape the newline
featured_playlist_songs_with_top_artist_genre_counts = \
add_top_artist_genre_counts(top_artist_genre_count_df, featured_playlist_songs_with_genres_df)

display(featured_playlist_songs_with_top_artist_genre_counts.head())

Unnamed: 0,uri,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year,genre count
0,spotify:track:15xfk8vQvVttTlh3tZn6db,The Black House,6EPlBSH2RSiettczlz7ihV,Sleepy Hallow,Different,https://open.spotify.com/track/15xfk8vQvVttTlh...,59,2020-10-30T04:01:00Z,2020-07-31,2020,
1,spotify:track:15xfk8vQvVttTlh3tZn6db,The Black House,6EPlBSH2RSiettczlz7ihV,Sleepy Hallow,Different,https://open.spotify.com/track/15xfk8vQvVttTlh...,59,2020-10-30T04:01:00Z,2020-07-31,2020,
2,spotify:track:1RSzyxqtIO4yX3EyiV4zT5,Whoopty,7arQA31aZVS8yS6zUveWzb,CJ,Whoopty,https://open.spotify.com/track/1RSzyxqtIO4yX3E...,81,2020-10-30T04:01:00Z,2020-08-20,2020,
3,spotify:track:2r6OAV3WsYtXuXjvJ1lIDi,Shoot For The Stars Aim For The Moon (Deluxe),0eDvMgVFoNV3TpwtrVCoTj,Pop Smoke,Hello (feat. A Boogie Wit da Hoodie),https://open.spotify.com/track/2r6OAV3WsYtXuXj...,85,2020-10-30T04:01:00Z,2020-07-20,2020,
4,spotify:track:3bAzFi7wGuOXJdgOYyFDSW,Losses,6jGMq4yGs7aQzuGsMgVgZR,Lil Tjay,Losses,https://open.spotify.com/track/3bAzFi7wGuOXJdg...,65,2020-10-30T04:01:00Z,2020-10-30,2020,


In [45]:
# ***OPTIONAL***
# Dataframe aggregation (e.g. summing, counting, etc.)
# To do any aggregation, you must group by all columns EXCEPT the ones you are aggregating

df1 = pd.DataFrame({'colA': ['hello', 'hello', 'hello', 'world'], 'colB': ['gottem','coach','coach','coach'], 'colC': [1,2,3,4]})
print('df1:')
display(df1)

df2 = df1.groupby(['colA','colB']).sum()
print('\n\ndf2 - sum values in colC:')
display(df2)

df3 = df2.sort_values('colC', ascending=False)
print('\n\ndf3 - sort rows by values in colC, ascending:')
display(df3)

df1:


Unnamed: 0,colA,colB,colC
0,hello,gottem,1
1,hello,coach,2
2,hello,coach,3
3,world,coach,4




df2 - sum values in colC:


Unnamed: 0_level_0,Unnamed: 1_level_0,colC
colA,colB,Unnamed: 2_level_1
hello,coach,5
hello,gottem,1
world,coach,4




df3 - sort rows by values in colC, ascending:


Unnamed: 0_level_0,Unnamed: 1_level_0,colC
colA,colB,Unnamed: 2_level_1
hello,coach,5
world,coach,4
hello,gottem,1


In [46]:
# Step 4b - Sum the genre counts

def sum_genre_col(df):
    groupby_cols = df.columns.to_list()[:-1]
    df = df.groupby(groupby_cols).sum().sort_values('genre count', ascending=False)
    df = df.rename({'genre count': 'genre score'}, axis=1).reset_index() 
    # reset index after summing - otherwise everything prior to sum will become index
    df.set_index('uri', inplace=True)
    return df

featured_playlist_songs_with_top_artist_genre_score = \
sum_genre_col(featured_playlist_songs_with_top_artist_genre_counts)

display(featured_playlist_songs_with_top_artist_genre_score.head())

Unnamed: 0_level_0,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year,genre score
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
spotify:track:6XcfKZvJio9Z0fQy11GnNX,finding it hard to smile,4KJ6jujcNPzOyhdNoiNftp,lovelytheband,broken,https://open.spotify.com/track/6XcfKZvJio9Z0fQ...,75,2020-10-23T19:02:00Z,2018-08-03,2018,138.0
spotify:track:2hqhIH3UeFZ4Sa2eDpbEMw,There's a Riot Going On,5hAhrnb0Ch4ODwWu4tsbpi,Yo La Tengo,Shortwave,https://open.spotify.com/track/2hqhIH3UeFZ4Sa2...,33,2020-10-30T06:17:19Z,2018-03-16,2018,128.0
spotify:track:0VXp4zDspR993l2hIKW17g,You Forgot It In People,7lOJ7WXyopaxri0dbOiZkd,Broken Social Scene,Anthems For A Seventeen Year-Old Girl,https://open.spotify.com/track/0VXp4zDspR993l2...,57,2020-10-16T04:01:00Z,2003-03-11,2003,122.0
spotify:track:3Cl35xMXJUEhpPaYBeNUQR,You Forgot It In People,7lOJ7WXyopaxri0dbOiZkd,Broken Social Scene,Lover's Spit,https://open.spotify.com/track/3Cl35xMXJUEhpPa...,47,2020-10-16T04:01:00Z,2003-03-11,2003,122.0
spotify:track:5oUV6yWdDM0R9Q2CizRhIt,"Oh, Inverted World",4LG4Bs1Gadht7TCrMytQUO,The Shins,New Slang,https://open.spotify.com/track/5oUV6yWdDM0R9Q2...,2,2020-10-16T04:01:00Z,2001-06-19,2001,122.0


In [47]:
# Step 4c - Create an empty Spotify playlist to store the songs
# Like API GET requests, POST requests (sending data to the API) also need the URL + headers
# We also need the data we are sending, called a payload, which must be formatted as a json object (basically, a dict)
# I got the URL for this and the format for the request payload/body @ https://developer.spotify.com/console/post-playlists/

define_scope()

def create_playlist():
    payload = {"name": f'Fresh Jams, {ctime()}'}
    result = requests.post(url = f'https://api.spotify.com/v1/users/{username}/playlists'
        , headers = headers
        , data = json.dumps(payload)
        )
    print(f'''playlist = {payload['name']}''')
    # Now let's get our playlists from Spotify.
    playlists_raw = sp.current_user_playlists()['items']
    # If the name matches, grab the uri (which is the unique identifier or key for the playlist)
    for i in playlists_raw:
        if i['name'] == payload['name']:
            uri = i['uri']
    return uri

uri = create_playlist()
print(f'uri = {uri}')

playlist = Fresh Jams, Sun Nov  1 18:59:39 2020
uri = spotify:playlist:0Nldqzz5Mw8IlnQpoNAHt7


In [48]:
# Step 4d - Add songs with a genre score threshold (95th percentile) into the new playlist

# Got the URL for this @ https://developer.spotify.com/console/post-playlist-tracks/

def create_playlist(uri, df):
    playlist_uri = uri.split(':')[2]
    df['pct_rank'] = df['genre score'].rank(pct=True)
    uri_list = df.loc[df['pct_rank'] >= 0.95].reset_index()['uri'].to_list()
    num_songs_to_add = len(uri_list)
    # add songs
    for i in range(int(math.ceil(num_songs_to_add/100.0))):
        url = f"https://api.spotify.com/v1/playlists/{playlist_uri}/tracks?uris={','.join(uri_list[i*100:(i+1)*100])}"
        requests.post(url, headers=headers)
    return print('****Complete! Check Spotify to see if playlist was filled with songs.****')

create_playlist(uri,featured_playlist_songs_with_top_artist_genre_score)
print('Thank you!')

****Complete! Check Spotify to see if playlist was filled with songs.****
Thank you!
