In [1]:
# Use Python to create your own Spotify Playlist!
# Last revised: 11/13/2020
    
# Prerequisite: User must have a Spotify account and have installed everthing in the README of this project @
# https://github.com/wjewell3/spotipy/blob/master/README.md

# Description: This tutorial provides an intro to the Python programming language.
    # ***OPTIONAL*** cells cover the following topics, but are not necessary to create a playlist:

# Python Topics Covered:
    # 1.  Defining Variables
    # 2.  Type/Class
    # 3.  Printing
    # 4.  Modules
    # 5.  Try/Except
    # 6.  Functions
    # 7.  Variables defined in Functions
    # 8.  Lists
    # 9.  APIs and the requests Module
    # 10. Dictionaries
    # 11. Nested Values
    # 12. DataFrames
    # 13. Accessing DataFrame data
    # 14. For Loops
    # 15. If Statements
    # 16. Merging DataFrames

print('The logic used to create a Spotify playlist mirrors that of PyMuse, a front-end app I built.')
print('PyMuse is available @ https://spotify-playlist-290119.uc.r.appspot.com')

# Run the cells below by clicking on the Jupyter Notebook "Run" button above

print('''Let's get started!''')

The logic used to create a Spotify playlist mirrors that of PyMuse, a front-end app I built.
PyMuse is available @ https://spotify-playlist-290119.uc.r.appspot.com
Let's get started!


In [2]:
# The Plan!

# Step 1
# a. Import modules
# b. Establish Spotify credentials

# Step 2
# a. Hit the Spotify API to get a list of your Top Artists (raw data)
# b. Create an empty "Top Artist" Pandas DataFrame
# c. Loop over the list of raw "Top Artist" data, migrating key information into the "Top Artists" Pandas DataFrame
# d. Get genre counts from the "Top Artist" DataFrame, and put them into a Top Artist Genre Count DataFrame.

# Step 3
# a. Hit the Spotify API to get a list of Spotify's Featured Playlists (raw data)
# b. Loop over the list of raw song data, migrating key information into "Featured Playlist" Dataframe

# Step 4
# a. Loop over "Featured Playlist" Dataframe's playlists uris, hitting the API to create a list of songs (raw data)
# b. Loop over the list of raw song data, migrating key information into "Featured Playlist Songs" Dataframe
# c. Loop over the "Featured Playlist Song" Dataframe, hitting the API to get the genres associated with each song,
#    and migrating them into a "Genres" DataFrame
# d. Merge the "Featured Playlist Songs" DataFrame with the "Genres "Dataframe

# Step 5
# a. Merge the "Top Artist Genres" Dataframe with the "Featured Playlist Songs" DataFrame
# b. Sum the genre counts
# c. Create a new playlist
# d. Add songs with a genre score threshold into the new playlist

In [3]:
# ***OPTIONAL***
# Comments - any line starting with # is a comment

In [4]:
# ***OPTIONAL***
# Defining Variables 
a = 'hello world'

# Python has some built in functions like "print", which outputs to console
print(a)

hello world


In [5]:
# ***OPTIONAL***
# There are many types/classes of variables
print(a)
print(type(a)) 
# str = string - this means Python interprets this as text

b = 1
print(b)
print(type(b))
# int = integer - this means Python interprets this as an integer, or whole number

c = 1.5
print(c)
print(type(c))
# float - this means Python interprests this as a number with a decimal

# You can convert variables from one class to another
d = str(c)
print(d)
print(type(d))

hello world
<class 'str'>
1
<class 'int'>
1.5
<class 'float'>
1.5
<class 'str'>


In [6]:
# ***OPTIONAL***
# More on Printing
print('More on printing:\n')
print("Print statements on a newline with the newline\nsymbol\n")
print('''To print 'single' or "double" quotes, surround your print statement in triple quotes.''')

print('\nUse the f operator to print variables along with text:\n')
print(f'{a}, using f operator')

# You can also print things together with commas
print(a, ', using commas')

# Or with +
print(a + ', using +')

# You can only combine strings with other strings with the + operator
print(str(b) + ', using +')

More on printing:

Print statements on a newline with the newline
symbol

To print 'single' or "double" quotes, surround your print statement in triple quotes.

Use the f operator to print variables along with text:

hello world, using f operator
hello world , using commas
hello world, using +
1, using +


In [7]:
# ***OPTIONAL***
# Jupyter notebooks automatically print output if it sits on the last line
b
c
# Notice that "b" does not show an output

1.5

In [10]:
# ***OPTIONAL***
# Importing Python modules
# Modules provide extra functionality to Python

import math
print(math.pi)

# round down
print(math.floor(math.pi))

# round up
print(math.ceil(math.pi))

3.141592653589793
3
4


In [12]:
# ***OPTIONAL***
# Some modules have sub-modules
import time
from time import ctime # current time
print(ctime())

Fri Nov 13 11:24:59 2020


In [13]:
# Step 1 of 5

In [14]:
# Step 1a - Let's import the Python modules/packages needed to create a Spotify playlist:

# If you followed the instructions in the Readme file, you should already have all the modules necessary installed
# If you hadn't you could uncomment the following line (remove the #) to install them from within the jupyter notebook
# !pip install -r requirements.txt
# Now we must "import" them for them to be accessed within our script

# Spotipy (the Spotify API) modules:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth

# Credentials to connect to Spotify API:
from config import CONFIG

# Other Python packages
import inspect
import io
import json
from IPython.display import display
from json.decoder import JSONDecodeError
import math
import os
import pandas as pd
import numpy as np 
from pandas.io.json import json_normalize
import pickle
import requests
import subprocess
import sys
import time
from time import ctime
import webbrowser
from datetime import datetime, timedelta

print('Modules imported! You can disregard any warnings above.\n')




In [16]:
# ***OPTIONAL***
# Use try, except blocks for error handling
a = 1
b = 2

try:
    print(a + b)
except:
    print('try clause failed')

try:
    print(a + z)
except:
    print('z does not exist')
    
# If you want the script to keep running without an except clause, use the "pass" clause
try:
    print(a + z)
except:
    pass 

3
z does not exist


In [17]:
# ***OPTIONAL***
# Functions
print('Use functions to define chunks of logic:')

def f(a,b):
    c = a + b
    return c

f(1,2)

Use functions to define chunks of logic:


3

In [19]:
# ***OPTIONAL***
# Variables can be defined in functions. 

def a():
    var = 'hello world'
    return 

# Note: not all functions have to return anything


# Variables are only accessible globally (i.e. outside function) if they are declared with the "global" helper

def b():
    global var
    var = 'hello world'
    return

# Let's run function a(), which attempts to define a variable, var, and outputs nothing
a()

# Now let's see if it defined the variable
try:
    print(f'var = {var}')
except:
    print('var not defined')

# Let's run function b(), which uses the "global" helper
b()

# And try printing again
try:
    print(f'var = {var}')
except:
    print('var not defined')

var = hello world
var = hello world


In [31]:
print('''
******** PLEASE FOLLOW THE BELOW INSTRUCTIONS CAREFULLY ********
In the following section, you will be authenticating your Spotify credentials:
1. You will be redirected to a Spotify login page
    a. Please log in to your Spotify account
2. You will then be redirected to http://google.com/
    a. Copy the URL, which contains a token to authenticate you with the Spotify API
3. Return to the Jupyter notebook and paste the URL in the prompt
''')


******** PLEASE FOLLOW THE BELOW INSTRUCTIONS CAREFULLY ********
In the following section, you will be authenticating your Spotify credentials:
1. You will be redirected to a Spotify login page
    a. Please log in to your Spotify account
2. You will then be redirected to http://google.com/
    a. Copy the URL, which contains a token to authenticate you with the Spotify API
3. Return to the Jupyter notebook and paste the URL in the prompt



In [32]:
# Step 1b - Use this function to establish (global) Spotify credentials which allow you to connect to Spotify's API:

# Set environment variables specific that Spotify needs to connect to it's API (more on APIs in a bit)
# These are stored/accessed by Python's os (Operating System) module

os.environ['SPOTIPY_CLIENT_ID']=CONFIG['env_variables']['SPOTIPY_CLIENT_ID']
os.environ['SPOTIPY_CLIENT_SECRET']=CONFIG['env_variables']['SPOTIPY_CLIENT_SECRET']
os.environ['SPOTIPY_REDIRECT_URI']='http://google.com/'

# Next we define some variables globally, connect to the Spotify API
# Note: we will use the variable "headers" in step 2a
def define_scope():  
    global scope, sp, user, username, headers
    scope = '''
    playlist-modify-private 
    playlist-modify-public 
    playlist-read-collaborative 
    playlist-read-private
    user-follow-modify 
    user-follow-read 
    user-library-modify 
    user-library-read 
    user-modify-playback-state 
    user-read-currently-playing 
    user-read-email 
    user-read-playback-state 
    user-read-private 
    user-read-recently-played 
    user-top-read'''
    
    # This command logs you in and establishes a spotify object, sp
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
        scope=scope
        ,cache_path='./.cache'
        ))

    # This object has certain attributes (e.g. current_user())
    user = sp.current_user()
    username = user['display_name']
    token = util.prompt_for_user_token(username, scope)
    headers = {'Authorization': "Bearer {}".format(token)}
    return

define_scope()
print(f"Logged into Spotify as {user['display_name']}")

Logged into Spotify as 1254636534


In [33]:
# ***OPTIONAL***
# Lists
print('Python lists\n')

l = [1,2,3]
print(f'list = {l}')
print(type(l),'\n')

# Another List defined
l = list('hello world')
print(f'another list = {l}\n')

print(f'Length of "another list" = {len(l)}\n')

# The first index, or accessor, is 0 
print(f'First element of "another list" = {l[0]}')
print(f'First two elements of "another list" = {l[0:2]}')
print(f'First three elements of "another list" = {l[0:3]}')
print(f'All elements of "another list" = {l[:]}')
print(f'Elements of "another list", up to the fifth item = {l[:5]}')
print(f'Last two elements of "another list" = {l[9:11]}')
print(f'Last element of "another list" = {l[10]}')
print(f'All but the last element of "another list" = {l[:-1]}')
print(f'''Elements of the "another list" joined together with an empty sting: {''.join(l)}''')
         
# Note: triple quotes works if you have a print statement with single quotes.

Python lists

list = [1, 2, 3]
<class 'list'> 

another list = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']

Length of "another list" = 11

First element of "another list" = h
First two elements of "another list" = ['h', 'e']
First three elements of "another list" = ['h', 'e', 'l']
All elements of "another list" = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']
Elements of "another list", up to the fifth item = ['h', 'e', 'l', 'l', 'o']
Last two elements of "another list" = ['l', 'd']
Last element of "another list" = d
All but the last element of "another list" = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l']
Elements of the "another list" joined together with an empty sting: hello world


In [34]:
# Step 2 of 5

In [35]:
# ***OPTIONAL***
# An API holds data that you can GET (retrieve) or POST (push) (e.g. The Spotify API has data on songs)
# You generally need a URL + headers to access an API endpoint - these act like a key to access the API's data.
# The "requests" module allows Python to make API requests. 
# In the below function, we are making a GET request to retrieve data from the Spotipy API.
# The API sends us this data in json format, which Python handles with json.loads()

In [44]:
# Step 2a - Define a function to get data from the Spotify API
# We can then use this function to get your "top artist" raw data:

define_scope() # to get the API headers

def req(url, headers):
    r = requests.get(url, headers=headers)
    return json.loads(r.text)

try:
    my_top_artists = req(url = 'https://api.spotify.com/v1/me/top/artists?limit=50?time_range=long_term',headers=headers)['items']
except:
    my_top_artists = req(url = 'https://api.spotify.com/v1/me/top/artists?time_range=long_term',headers=headers)['items']

# To find the URL, i went to the link below, and pulled out the part after curl.
# https://developer.spotify.com/console/get-current-user-top-artists-and-tracks/?type=artists

print(f'''*********************************************************************************************************************\n
Below is the raw "top artist" data returned from a Spotify API GET request (hopefully you got some data?)!\n
Notice it starts and ends with [], a clue that it is a Python list.\n
It look a bit nasty, but we will use Python to clean it up.\n\n
{my_top_artists}\n\n
***Troubleshooting***
If you get an error, please review step 1b. 
If, you get an empty list [], Spotify may not have any data for your profile. 
Try listening to some songs, and then try this step again.
If all else fails, please send a screenshot of your error and any helpful info to jewell.will@gmail.com.
I am happy to work with you to get this working.''')

*********************************************************************************************************************

Below is the raw "top artist" data returned from a Spotify API GET request (hopefully you got some data?)!

Notice it starts and ends with [], a clue that it is a Python list.

It look a bit nasty, but we will use Python to clean it up.


[{'external_urls': {'spotify': 'https://open.spotify.com/artist/4M5nCE77Qaxayuhp3fVn4V'}, 'followers': {'href': None, 'total': 915345}, 'genres': ['acoustic pop', 'indie folk', 'indie pop', 'neo mellow', 'stomp and holler'], 'href': 'https://api.spotify.com/v1/artists/4M5nCE77Qaxayuhp3fVn4V', 'id': '4M5nCE77Qaxayuhp3fVn4V', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/82e0135c52978f4d54e8ddf7275cbe97b5831142', 'width': 640}, {'height': 320, 'url': 'https://i.scdn.co/image/413ed48070545cff2b7dba843336923be6d0ab9a', 'width': 320}, {'height': 160, 'url': 'https://i.scdn.co/image/cbd23d08cdfe211d404d6e30e6cd63207617b230', 'w

In [47]:
# ***OPTIONAL***
# Dictionaries (dicts) - look for curly braces
print('Python dictionaries (dicts)- look for curly braces\n')
d = {'a': 'hello', 'b': 'world'}

print(f'd = {d}\n')
print('A Python dict contains keys and values:')
print(f'd.keys() = {list(d.keys())}')
print(f'd.values() = {list(d.values())}')
print('\nAccessing dict values:')

print(f'''dict[key] = value''')
print(f'''d['a'] = {d['a']}''')

Python dictionaries (dicts)- look for curly braces

d = {'a': 'hello', 'b': 'world'}

A Python dict contains keys and values:
d.keys() = ['a', 'b']
d.values() = ['hello', 'world']

Accessing dict values:
dict[key] = value
d['a'] = hello


In [51]:
# ***OPTIONAL***
print (f'''***Let's look at the first two elements of the "my_top_artists" list:***
\n***1st Artist:***\n{my_top_artists[0]}
\n***2nd Artist:***\n{my_top_artists[1]}
\n***Each element is wrapped in curly braces, a clue that it is a Python dict.***''')

***Let's look at the first two elements of the "my_top_artists" list:***

***1st Artist:***
{'external_urls': {'spotify': 'https://open.spotify.com/artist/4M5nCE77Qaxayuhp3fVn4V'}, 'followers': {'href': None, 'total': 915345}, 'genres': ['acoustic pop', 'indie folk', 'indie pop', 'neo mellow', 'stomp and holler'], 'href': 'https://api.spotify.com/v1/artists/4M5nCE77Qaxayuhp3fVn4V', 'id': '4M5nCE77Qaxayuhp3fVn4V', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/82e0135c52978f4d54e8ddf7275cbe97b5831142', 'width': 640}, {'height': 320, 'url': 'https://i.scdn.co/image/413ed48070545cff2b7dba843336923be6d0ab9a', 'width': 320}, {'height': 160, 'url': 'https://i.scdn.co/image/cbd23d08cdfe211d404d6e30e6cd63207617b230', 'width': 160}], 'name': 'Iron & Wine', 'popularity': 71, 'type': 'artist', 'uri': 'spotify:artist:4M5nCE77Qaxayuhp3fVn4V'}

***2nd Artist:***
{'external_urls': {'spotify': 'https://open.spotify.com/artist/43O3c6wewpzPKwVaGEEtBM'}, 'followers': {'href': None, 'total': 4

In [54]:
# ***OPTIONAL***
print('Accessing nested values (lists of dicts of lists of ...)\n')
L = [[d, {'c': 'chips', 'd': 'ahoy', 'e': 'matey!'}], {'f': {'ooo': 'lala'}}]
print(f'L = {L}\n')
print(f'L[0] = {L[0]}')
print(f'L[0][0] = {L[0][0]}')
print(f'''L[0][0]['a'] = {L[0][0]['a']}''')
print(f'''L[1]['f'] = {L[1]['f']}''')
print(f'''L[1]['f']['ooo'] = {L[1]['f']['ooo']}''')

Accessing nested values (lists of dicts of lists of ...)

L = [[{'a': 'hello', 'b': 'world'}, {'c': 'chips', 'd': 'ahoy', 'e': 'matey!'}], {'f': {'ooo': 'lala'}}]

L[0] = [{'a': 'hello', 'b': 'world'}, {'c': 'chips', 'd': 'ahoy', 'e': 'matey!'}]
L[0][0] = {'a': 'hello', 'b': 'world'}
L[0][0]['a'] = hello
L[1]['f'] = {'ooo': 'lala'}
L[1]['f']['ooo'] = lala


In [58]:
# ***OPTIONAL***
print('''Let's access the 1st couple values of the "my_top_artists" list of dictionaries:\n''')

# First artist dictionary is my_top_artists[0]
# Second artist dictionary is my_top_artists[1]

print('First Artist:')
print('   Name = ' + my_top_artists[0]['name'])
print('   Genres = ' + str(my_top_artists[0]['genres']))
print('''   URI (Spotify's "artist key" = ''' + my_top_artists[0]['uri'])

print('\nSecond Artist:')
print('   Name = ' + my_top_artists[1]['name'])
print('   Genres = ' + str(my_top_artists[1]['genres']))
print('''   URI (Spotify's "artist key" = ''' + my_top_artists[1]['uri'])

print('''\nThe artist's genres will be important for helping us decide which songs to add to a playlist later on...''')

Let's access the 1st couple values of the "my_top_artists" list of dictionaries:

First Artist:
   Name = Iron & Wine
   Genres = ['acoustic pop', 'indie folk', 'indie pop', 'neo mellow', 'stomp and holler']
   URI (Spotify's "artist key" = spotify:artist:4M5nCE77Qaxayuhp3fVn4V

Second Artist:
   Name = My Morning Jacket
   Genres = ['alternative country', 'alternative rock', 'indie folk', 'indie rock', 'jam band', 'louisville indie', 'melancholia', 'modern rock', 'new americana', 'rock', 'roots rock', 'stomp and holler']
   URI (Spotify's "artist key" = spotify:artist:43O3c6wewpzPKwVaGEEtBM

The artist's genres will be important for helping us decide which songs to add to a playlist later on...


In [60]:
# ***OPTIONAL***
# Pandas DataFrames
# Think of a Pandas DataFrame as the Python version of an Excel spreadsheet or Database table
# DataFrames can be defined in a variety of ways
# Note: since we imported the Python module, pandas, as pd, we can define a pandas DataFrame as pd.DataFrame()

print('Pandas DataFrames')
df1 = pd.DataFrame({'a': 'hello', 'b': 'world'}, index = [1])
display(df1)
df2 = df1.rename(columns={'a': 'a_renamed', 'b': 'b_renamed'})
display(df2)
# alternatively, use "inplace=True" - this prevents you from having to define a new df
df1.rename(columns={'a': 'a_renamed_inplace', 'b': 'b_renamed_inplace'}, inplace=True)
display(df1)
# You can add a column like this
df1['combined'] = df1['a_renamed_inplace'] ++ df1['b_renamed_inplace']
display(df1)

Pandas DataFrames


Unnamed: 0,a,b
1,hello,world


Unnamed: 0,a_renamed,b_renamed
1,hello,world


Unnamed: 0,a_renamed_inplace,b_renamed_inplace
1,hello,world


Unnamed: 0,a_renamed_inplace,b_renamed_inplace,combined
1,hello,world,helloworld


In [68]:
# ***OPTIONAL***
print('\nMore on DataFrames - mastering Dataframes is critical for data analysis\n')
df = pd.DataFrame({'a': [1,2,3,3], 'b': [4,5,6,6]})
display(df)

df = df.set_index('a')
display(df)

df = df.reset_index()
display(df)

df.drop(columns=['b'], inplace=True)
display(df)

df.drop_duplicates(inplace=True)
display(df)

# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html
df['pct_rank'] = df['a'].rank(pct=True)
display(df)
l = df['pct_rank'].to_list()
print('\n', l)

df = pd.DataFrame(columns=['empty','df'])
display(df)

df = df.append(pd.DataFrame([['not','anymore']], columns = ['empty','df']))
display(df)


More on DataFrames - mastering Dataframes is critical for data analysis



Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,3,6


Unnamed: 0_level_0,b
a,Unnamed: 1_level_1
1,4
2,5
3,6
3,6


Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,3,6


Unnamed: 0,a
0,1
1,2
2,3
3,3


Unnamed: 0,a
0,1
1,2
2,3


Unnamed: 0,a,pct_rank
0,1,0.333333
1,2,0.666667
2,3,1.0



 [0.3333333333333333, 0.6666666666666666, 1.0]


Unnamed: 0,empty,df


Unnamed: 0,empty,df
0,not,anymore


In [94]:
# ***OPTIONAL***
# Accessing Dataframe data
print('\n***Use .loc to access dataframe by rows, columns:***')
df = pd.DataFrame({'a': [1,2,3,3], 'b': [4,5,6,6]})
print('\n\ndf:')
display(df)
print('''\n***All(":") rows, and column = "b":***\n
df.loc[:,'b'] ''')
display(df.loc[:,'b'])
print('''\n***Shortcut to do all rows, and column = "b", without .loc:***\n
df['b'] ''')
display(df['b'])
print('''\n***Row/index = 1, and column = "b":***\n
df.loc[1,'b']''')
display(df.loc[1,'b'])
print('''\n***Unique records in column "b":***\n
df['b'].unique()''')
display(df['b'].unique())
print('''\n*** You can incorporate logic into a .loc accessor (returns rows that evaluate to True)***\n
df.loc[df.loc[:,'a'] <= 2]''')
display(df.loc[df.loc[:,'a'] <= 2])
print('''\n***You can also stack logic***\n
df.loc[df['a'] <= 2].reset_index()['index'].to_list()''')
# You can also stack logic
print(df.loc[df['a'] <= 2].reset_index()['index'].to_list())


print('''\nFantastic resource on Python data structures:\n
http://www.grapenthin.org/teaching/geop501/lectures/lecture_06_data_structures.pdf''')


***Use .loc to access dataframe by rows, columns:***


df:


Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,3,6



***All(":") rows, and column = "b":***

df.loc[:,'b'] 


0    4
1    5
2    6
3    6
Name: b, dtype: int64


***Shortcut to do all rows, and column = "b", without .loc:***

df['b'] 


0    4
1    5
2    6
3    6
Name: b, dtype: int64


***Row/index = 1, and column = "b":***

df.loc[1,'b']


5


***Unique records in column "b":***

df['b'].unique()


array([4, 5, 6])


*** You can incorporate logic into a .loc accessor (returns rows that evaluate to True)***

df.loc[df.loc[:,'a'] <= 2]


Unnamed: 0,a,b
0,1,4
1,2,5



***You can also stack logic***

df.loc[df['a'] <= 2].reset_index()['index'].to_list()
[0, 1]

Fantastic resource on Python data structures:

http://www.grapenthin.org/teaching/geop501/lectures/lecture_06_data_structures.pdf


In [96]:
# Step 2b - create an empty "Top Artist" Pandas DataFrame
print('''\n***Next let's define a Pandas DataFrame (currently emtpy) to hold the data from the list of top artists:***''')
my_top_artists_df = pd.DataFrame(columns=['artist_uri','name','genre list'])
display(my_top_artists_df)

print('''\n***Use the display function to "print" dataframes in jupyter notebooks.***''')


***Next let's define a Pandas DataFrame (currently emtpy) to hold the data from the list of top artists:***


Unnamed: 0,artist_uri,name,genre list



***Use the display function to "print" dataframes in jupyter notebooks.***


In [98]:
# ***OPTIONAL***
# "For" Loops
print('***For Loops:***\n')

print('''for i in [1,2,3]:
    print(i**2)\n''')
for i in [1,2,3]:
    print(i**2)

print('''\nfor i in range(1,4):
    print(i**2)''')
for i in range(1,4):
    print(i**2)
    
# Be careful with the indentation - anything indented after the "for" statement will be in the loop.

***For Loops:***

for i in [1,2,3]:
    print(i**2)

1
4
9

for i in range(1,4):
    print(i**2)
1
4
9


In [105]:
# Step 2c - Loop over the list of raw "Top Artist" data, migrating key information into the "Top Artists" Pandas DataFrame

for i in range(len(my_top_artists)): # for each dictionary item in our "my top artists" list
    uri = my_top_artists[i]['uri'] 
    name = my_top_artists[i]['name'] # assign the value for the key "name" to the variable "name"
    genres = my_top_artists[i]['genres'] 
    # append these variables to the dataframe, which is empty at first, but it will grow with each iteration
    my_top_artists_df = my_top_artists_df.append(pd.DataFrame([[uri, name, genres]], columns = ['artist_uri','name', 'genre list']))

# Use dataframe.head() to only see the top 5 rows of a dataframe
print('\nmy_top_artists_df.head():')
display(my_top_artists_df.head())

# Note: not all artists have genres, so you may see some empty lists


my_top_artists_df.head():


Unnamed: 0,artist_uri,name,genre list
0,spotify:artist:4M5nCE77Qaxayuhp3fVn4V,Iron & Wine,"[acoustic pop, indie folk, indie pop, neo mell..."
0,spotify:artist:43O3c6wewpzPKwVaGEEtBM,My Morning Jacket,"[alternative country, alternative rock, indie ..."
0,spotify:artist:3TVXtAsR1Inumwj472S9r4,Drake,"[canadian hip hop, canadian pop, hip hop, pop ..."
0,spotify:artist:3XHO7cRUPCLOr6jwp8vsx5,alt-J,"[indie rock, modern rock]"
0,spotify:artist:6YvKo1VtZxCoT8IkbAYswy,Patton Oswalt,"[comedy, comic]"


In [112]:
# Step 2d - Get genre counts from the "Top Artist" DataFrame, and put them into a Top Artist Genre Count DataFrame.

# The cool thing about Dataframes is they have some built in functions, like "explode".
# Explode takes a column with list values and creates a row for each element of the list.

# FYI, Python and it's associated packages (e.g. to include Pandas) has some great documentation, if you are interested:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
# Stack Overflow is a great place if you have questions.

genre_exploded_df = my_top_artists_df.explode('genre list')
print('genre_exploded_df.head()')
display(genre_exploded_df.head())

# Next let's use the built in value_counts function to count the times a genre shows up in our top artists
genre_count_df = pd.DataFrame(genre_exploded_df['genre list'].value_counts())

print('\n***Your top artist genre counts:***\n')
print('genre_count_df.head()')
display(genre_count_df.head())

print('''\n***Notice there is no column name for the genres.\n
That's b/c the genres make up the index, or unique row identifier.\n
Let's push the index out with .reset_index()***''')
top_artist_genre_count_reset_index_df = genre_count_df.reset_index()
display(top_artist_genre_count_reset_index_df.head())

print('\n***... and rename the column***\n')
top_artist_genre_count_reset_index_df.columns = ['genre','genre count']
# Let's update the Dataframe's name, so we can more easily recall what it is later on
top_artist_genre_count_df = top_artist_genre_count_reset_index_df
print('top_artist_genre_count_df:')
display(top_artist_genre_count_df)

genre_exploded_df.head()


Unnamed: 0,artist_uri,name,genre list
0,spotify:artist:4M5nCE77Qaxayuhp3fVn4V,Iron & Wine,acoustic pop
0,spotify:artist:4M5nCE77Qaxayuhp3fVn4V,Iron & Wine,indie folk
0,spotify:artist:4M5nCE77Qaxayuhp3fVn4V,Iron & Wine,indie pop
0,spotify:artist:4M5nCE77Qaxayuhp3fVn4V,Iron & Wine,neo mellow
0,spotify:artist:4M5nCE77Qaxayuhp3fVn4V,Iron & Wine,stomp and holler



***Your top artist genre counts:***

genre_count_df.head()


Unnamed: 0,genre list
indie folk,28
rock,28
stomp and holler,24
indie rock,20
modern rock,20



***Notice there is no column name for the genres.

That's b/c the genres make up the index, or unique row identifier.

Let's push the index out with .reset_index()***


Unnamed: 0,index,genre list
0,indie folk,28
1,rock,28
2,stomp and holler,24
3,indie rock,20
4,modern rock,20



***... and rename the column***

top_artist_genre_count_df:


Unnamed: 0,genre,genre count
0,indie folk,28
1,rock,28
2,stomp and holler,24
3,indie rock,20
4,modern rock,20
5,alternative rock,16
6,hip hop,12
7,indie pop,12
8,rap,12
9,permanent wave,12


In [113]:
# Step 3 of 5

In [114]:
# ***OPTIONAL***
# In the following Jupyter notebook cells, let's combine concepts learned into some larger functions
# Functions can be useful for consolidating more complex sequences of logic

In [119]:
# Step 3a - Hit the Spotify API to get a list of Spotify's Featured Playlists (raw data)
# Step 3b - Loop over the list of raw playlist data, migrating key information into a "Featured Playlist" Dataframe

# How did I find the URL below? I went to the link below and pulled out the part after curl (with some trial and error)
# https://developer.spotify.com/console/get-featured-playlists/?country=US&limit=50

# In case your headers (which contain an authentication token) have expired, run the define_scope function.
define_scope()

def get_featured_playlist_df():
    print('\n***Get raw "featured playlists" from Spotify - Spotify updates these playlists a few times per day!***\n')
    url = f"https://api.spotify.com/v1/browse/featured-playlists?limit=50&country=US"
    raw_data = req(url,headers=headers)
    featured_playlist_count = raw_data['playlists']['total']
    print(f'featured playlist count = {featured_playlist_count}')
    print('\n***Raw data for 1st playlist:***\n')
    display(raw_data['playlists']['items'][0])
    df = pd.DataFrame()
    
    for i in range(0, featured_playlist_count):
        raw_featured_playlist = raw_data['playlists']['items']
        playlist = raw_featured_playlist[i]['name']
        playlist_uri = raw_featured_playlist[i]['uri']
        song_count = raw_featured_playlist[i]['tracks']['total']
        df = df.append(pd.DataFrame([[playlist, playlist_uri, song_count]], columns = ['playlist','playlist_uri','song_count']))
    df.set_index(['playlist'], inplace=True)
    return df

playlist_df = get_featured_playlist_df()
print('\nplaylist_df:')
display(playlist_df)

print('''***Notice that the playlist column falls slightly lower than the other two. That's b/c it is actually the index.***''')

# Spotify API reference (in beta):
# https://developer.spotify.com/documentation/web-api/reference-beta/


***Get raw "featured playlists" from Spotify - Spotify updates these playlists a few times per day!***

featured playlist count = 13

***Raw data for 1st playlist:***



{'collaborative': False,
 'description': 'New music from Billie Eilish, Lil Nas X, Future & Lil Uzi Vert, and more!',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DX4JAvHpjipBk'},
 'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4JAvHpjipBk',
 'id': '37i9dQZF1DX4JAvHpjipBk',
 'images': [{'height': None,
   'url': 'https://i.scdn.co/image/ab67706f00000003620aa638094ee3c0e8bc5ecb',
   'width': None}],
 'name': 'New Music Friday',
 'owner': {'display_name': 'Spotify',
  'external_urls': {'spotify': 'https://open.spotify.com/user/spotify'},
  'href': 'https://api.spotify.com/v1/users/spotify',
  'id': 'spotify',
  'type': 'user',
  'uri': 'spotify:user:spotify'},
 'primary_color': None,
 'public': None,
 'snapshot_id': 'MTYwNTI4MjE1OCwwMDAwMDMxODAwMDAwMTc1YzI0NWQ5MGEwMDAwMDE3NWJlZTIwNTBm',
 'tracks': {'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4JAvHpjipBk/tracks',
  'total': 100},
 'type': 'playlist',
 'uri': 'spotify:playlist:37i9dQZF1DX


playlist_df:


Unnamed: 0_level_0,playlist_uri,song_count
playlist,Unnamed: 1_level_1,Unnamed: 2_level_1
New Music Friday,spotify:playlist:37i9dQZF1DX4JAvHpjipBk,100
Teen Party,spotify:playlist:37i9dQZF1DX1N5uK98ms5p,67
Channel X,spotify:playlist:37i9dQZF1DX4y8h9WqDPAE,50
Internet People,spotify:playlist:37i9dQZF1DX6OgmB2fwLGd,114
Baila Reggaeton,spotify:playlist:37i9dQZF1DWY7IeIP1cdjF,50
Country Cookout,spotify:playlist:37i9dQZF1DXbIbVYph0Zr5,94
Most Necessary,spotify:playlist:37i9dQZF1DX2RxBh64BHjQ,100
Ultimate Indie,spotify:playlist:37i9dQZF1DX2Nc3B70tvx0,101
Varsity Bars,spotify:playlist:37i9dQZF1DXdWMJMjqz9bm,100
Dance Pop,spotify:playlist:37i9dQZF1DWZQaaqNMbbXa,116


***Notice that the playlist column falls slightly lower than the other two. That's b/c it is actually the index.***


In [120]:
# Step 4 of 5

In [122]:
# Step 4a - Loop over "Featured Playlist" Dataframe's playlist uris, hitting the API to create a list of raw song data

# Since the Spotify API can only handle requests of up to 100 songs at a time,
# let's iterate over the requests to grab all songs

def get_raw_featured_playlist_song_list(df):
    raw_featured_songs = []
    for uri in df.loc[:,'playlist_uri'].to_list():
        song_count = df.loc[df['playlist_uri']==uri,'song_count'][0]
        offset = 0
        url = f"https://api.spotify.com/v1/playlists/{uri.split(':')[2]}/tracks?limit=50&offset={offset}"
        for i in range(int(math.ceil(song_count/100.0))):
            offset = 100*i
            raw_featured_songs.extend(req(url,headers=headers)['items'])
    return raw_featured_songs

raw_featured_playlist_songs_list = get_raw_featured_playlist_song_list(playlist_df)
print('''***Let's take a look at the first song's raw data:***\n''')
print(raw_featured_playlist_songs_list[0])

***Let's take a look at the first song's raw data:***

{'added_at': '2020-11-13T05:00:28Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'}, 'href': 'https://api.spotify.com/v1/users/', 'id': '', 'type': 'user', 'uri': 'spotify:user:'}, 'is_local': False, 'primary_color': None, 'track': {'album': {'album_type': 'single', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6qqNVTkY8uBg9cP3Jd7DAH'}, 'href': 'https://api.spotify.com/v1/artists/6qqNVTkY8uBg9cP3Jd7DAH', 'id': '6qqNVTkY8uBg9cP3Jd7DAH', 'name': 'Billie Eilish', 'type': 'artist', 'uri': 'spotify:artist:6qqNVTkY8uBg9cP3Jd7DAH'}], 'available_markets': ['AD', 'AE', 'AL', 'AR', 'AT', 'AU', 'BA', 'BE', 'BG', 'BH', 'BO', 'BR', 'BY', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HR', 'HU', 'ID', 'IE', 'IL', 'IN', 'IS', 'IT', 'JO', 'JP', 'KW', 'KZ', 'LB', 'LI', 'LT', 'LU', 'LV', 'MA', 'MC', 'MD', 'M

In [125]:
# ***OPTIONAL***
# If statement
a = 1
b = 2

if a == b:
    print('a == b')
else:
    print('a != b')

a != b


In [126]:
# ***OPTIONAL***
# String Split
a = 'hello-world'
b = a.split('-')
print(b)

['hello', 'world']


In [131]:
# Step 4b - Loop over the list of raw song data, migrating key information into "Featured Playlist Songs" Dataframe

define_scope()

def song_raw_data_to_df(raw_songs):
    print('***Transforming raw song data list into DataFrame(df)***')
    # create empty df
    df = pd.DataFrame(columns = [
        'album', 
        'artist_uri', 
        'artist',
        'track',
        'url',
        'popularity',
        'added_at',
        'release_date',
        'release_year',
        'uri'
        ])
    # Pipe songs into df 1 at a time
    for i in range(0, len(raw_songs)):
        # Print progress every 100 songs
        if (i/100).is_integer():
            print(f"{i}/{len(raw_songs)} parsed")
        try:
            album = raw_songs[i]['track']['album']['name']
            artist_uri = raw_songs[i]['track']['artists'][0]['uri'].split(':')[2]
            artist = raw_songs[i]['track']['album']['artists'][0]['name']
            track = raw_songs[i]['track']['name']
            url = raw_songs[i]['track']['external_urls']['spotify']
            popularity = raw_songs[i]['track']['popularity']
            added_at = raw_songs[i]['added_at']
            release_date = raw_songs[i]['track']['album']['release_date']
            release_year = release_date.split('-')[0]
            uri = raw_songs[i]['track']['uri']
            df = df.append(pd.DataFrame([[
                album, 
                artist_uri,
                artist, 
                track, 
                url, 
                popularity, 
                added_at, 
                release_date, 
                release_year,
                uri
                ]], columns = [
                'album', 
                'artist_uri', 
                'artist',
                'track',
                'url',
                'popularity',
                'added_at',
                'release_date',
                'release_year',
                'uri'
                ]))
        except:
            pass
    # Make an index out of the uri column
    df.set_index('uri', inplace=True)
    return df

featured_playlist_song_df = song_raw_data_to_df(raw_featured_playlist_songs_list)
print('\nfeatured_playlist_song_df.head():')
display(featured_playlist_song_df.head())
print('''\n***The uri is the unique index for the song.
Notice that we don't have genres - we will get these from the artist_uris, like we did for our featured playlists.***''')

***Transforming raw song data list into DataFrame(df)***
0/850 parsed
100/850 parsed
200/850 parsed
300/850 parsed
400/850 parsed
500/850 parsed
600/850 parsed
700/850 parsed
800/850 parsed

featured_playlist_song_df.head():


Unnamed: 0_level_0,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
spotify:track:54bFM56PmE4YLRnqpW6Tha,Therefore I Am,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,Therefore I Am,https://open.spotify.com/track/54bFM56PmE4YLRn...,0,2020-11-13T05:00:28Z,2020-11-12,2020
spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020
spotify:track:0k7wmahjkn389wAZdz19Cv,Pluto x Baby Pluto,1RyvyyTE3xzB2ZywiAwp0i,Future,Drankin N Smokin,https://open.spotify.com/track/0k7wmahjkn389wA...,0,2020-11-13T05:00:28Z,2020-11-13,2020
spotify:track:7oN7mBg1r39crVwTK5PrWt,So Help Me God!,17lzZA2AlOHwCwFALHttmp,2 Chainz,Save Me (feat. YoungBoy Never Broke Again),https://open.spotify.com/track/7oN7mBg1r39crVw...,0,2020-11-13T05:00:28Z,2020-11-13,2020
spotify:track:1jgu8MFTEGdL1wjw9gZj5y,Fly Away,2NjfBq1NflQcKSeiDooVjY,Tones And I,Fly Away,https://open.spotify.com/track/1jgu8MFTEGdL1wj...,0,2020-11-13T05:00:28Z,2020-11-13,2020



***The uri is the unique index for the song.
Notice that we don't have genres - we will get these from the artist_uris, like we did for our featured playlists.***


In [134]:
# Step 4c - Loop over the "Featured Playlist Song" Dataframe, hitting the API to get the genres 
#           associated with each song, and migrating them into a "Genres" DataFrame

define_scope()
print('''\n***Let's get the genres for the featured playlist songs' artists. 
As we loop over the data, we add it to a dataframe of featured playlist songs.
Please note - this step can take a couple minutes.
If printing every artists gets annoying, feel free to comment that part out.
Please note: Spotify does not have genres for every artist.***\n''')

def get_genres(df):
    g = []
    artist_uris = df['artist_uri'].unique()
    print(f"{len(artist_uris)} distinct artists\n")
    for i in range(0, len(artist_uris)):
        if (i/100).is_integer():
            print(f"\n{i}/{len(artist_uris)} parsed\n")
        request = req(url = f"https://api.spotify.com/v1/artists/{artist_uris[i]}",headers=headers)
        artist = request['name']
        genres = request['genres']
        print(f"{i}:{artist}:{genres}")
        # You can append to lists just like you can append to Dataframes
        g.append(genres)

    genres_df = pd.DataFrame({'artist_uri': artist_uris, 'genre list': g})
    return genres_df

genre_df = get_genres(featured_playlist_song_df)


***Let's get the genres for the featured playlist songs' artists. 
As we loop over the data, we add it to a dataframe of featured playlist songs.
Please note - this step can take a couple minutes.
If printing every artists gets annoying, feel free to comment that part out.
As noted before, Spotify does not have genres for every artist.***

492 distinct artists


0/492 parsed

0:Billie Eilish:['electropop', 'pop']
1:Lil Nas X:['country rap', 'lgbtq+ hip hop', 'pop rap']
2:Future:['atl hip hop', 'pop rap', 'rap', 'southern hip hop', 'trap']
3:2 Chainz:['atl hip hop', 'gangster rap', 'hip hop', 'pop rap', 'rap', 'southern hip hop', 'trap']
4:Tones And I:['australian pop']
5:BENEE:['nz pop', 'pop']
6:Chris Stapleton:['contemporary country', 'outlaw country']
7:YoungBoy Never Broke Again:['baton rouge rap', 'trap']
8:Rod Wave:['florida rap']
9:Lil Mosey:['melodic rap', 'rap conscient', 'vapor trap']
10:Lava La Rue:['indie r&b', 'uk alternative hip hop', 'uk contemporary r&b']
11:Jhay Corte

126:Jaz Karis:['alternative r&b', 'chill r&b', 'indie r&b', 'uk contemporary r&b']
127:Taste of Pluto:[]
128:Karla Felecia Scaife:[]
129:Zion Foster:['uk contemporary r&b']
130:Savannah Ré:['canadian contemporary r&b', 'indie r&b']
131:RIMON:['alternative r&b', 'chill r&b', 'dutch r&b', 'indie r&b', 'indie soul']
132:Savannah Cristina:['alternative r&b', 'chill r&b', 'pop r&b']
133:Ebenezer:['trap soul', 'uk hip hop']
134:BLK:['chill r&b', 'indie r&b']
135:Lila Iké:['dancehall', 'modern reggae', 'uk reggae']
136:Championxiii:['viral rap']
137:Mario Judah:[]
138:Kenndog:[]
139:Popp Hunna:[]
140:347aidan:['sad rap']
141:Only The Family:['chicago rap', 'trap']
142:HVN:[]
143:WhoHeem:['viral rap']
144:DJ Chose:['houston rap']
145:Justin LaBoy:[]
146:$NOT:['emo rap', 'florida rap', 'sad rap', 'underground hip hop', 'vapor trap']
147:XXXTENTACION:['emo rap', 'miami hip hop']
148:AG Club:['indie hip hop']
149:Aries:['emo rap', 'pop rap', 'sad rap']
150:Sada Baby:['detroit trap', 'scam rap', '

260:CHASE B:[]
261:Moe:[]
262:Dee Watkins:['florida rap', 'vapor trap']
263:Yella Beezy:['dfw rap', 'hip hop', 'rap', 'southern hip hop', 'trap']
264:Lakeyah:[]
265:J.I the Prince of N.Y:['nyc rap']
266:MAVI:['alternative hip hop', 'indie hip hop', 'underground hip hop']
267:Katori Walker:['cali rap']
268:Luh Soldier:['alabama rap', 'atl trap', 'trap']
269:42 Dugg:['detroit trap', 'rap', 'southern hip hop', 'trap']
270:Doe Boy:['atl hip hop', 'atl trap', 'ohio hip hop', 'rap', 'southern hip hop', 'trap', 'underground hip hop', 'vapor trap']
271:Rah Swish:['brooklyn drill', 'new york drill', 'nyc rap']
272:Bfb Da Packman:['flint hip hop']
273:KJ Balla:['nyc rap']
274:Shordie Shordie:['baltimore hip hop', 'cali rap']
275:Big Scarr:['atl trap', 'trap']
276:Nana:[]
277:Kalan.FrFr:['cali rap', 'west coast trap']
278:Yung Bleu:['alabama rap', 'pop r&b', 'trap']
279:Rylo Rodriguez:['alabama rap', 'atl trap', 'tennessee hip hop']
280:Pooh Shiesty:['memphis hip hop', 'southern hip hop', 'tennes

396:The Chainsmokers:['dance pop', 'edm', 'electropop', 'pop', 'pop dance', 'tropical house']
397:Jimi Hendrix:['acid rock', 'album rock', 'blues rock', 'classic rock', 'hard rock', 'psychedelic rock', 'rock']
398:Franz Ferdinand:['alternative rock', 'dance-punk', 'indie rock', 'modern rock', 'new rave', 'rock', 'scottish rock']
399:Green Day:['permanent wave', 'pop punk', 'punk']

400/492 parsed

400:The Strokes:['alternative rock', 'garage rock', 'modern rock', 'permanent wave', 'rock']
401:The Smashing Pumpkins:['alternative metal', 'alternative rock', 'grunge', 'modern rock', 'permanent wave', 'pop rock', 'rock']
402:The Clash:['classic rock', 'dance rock', 'permanent wave', 'punk', 'rock']
403:Beastie Boys:['alternative rock', 'east coast hip hop', 'hip hop', 'old school hip hop', 'rap']
404:AWOLNATION:['la indie', 'modern alternative rock', 'modern rock', 'pop rock', 'rock', 'stomp pop']
405:The Cure:['dance rock', 'new romantic', 'new wave', 'permanent wave', 'rock']
406:Lenny K

In [136]:
# Step 4c - extended...
# Let's take a look at the genre df we just created
print('genre_df:')
display(genre_df.head())
genre_exploded_df = genre_df.explode('genre list')

print('\n\ngenre_exploded_df:')
# ... and the exploded version
display(genre_exploded_df.head())

genre_df:


Unnamed: 0,artist_uri,genre list
0,6qqNVTkY8uBg9cP3Jd7DAH,"[electropop, pop]"
1,7jVv8c5Fj3E9VhNjxT4snq,"[country rap, lgbtq+ hip hop, pop rap]"
2,1RyvyyTE3xzB2ZywiAwp0i,"[atl hip hop, pop rap, rap, southern hip hop, ..."
3,17lzZA2AlOHwCwFALHttmp,"[atl hip hop, gangster rap, hip hop, pop rap, ..."
4,2NjfBq1NflQcKSeiDooVjY,[australian pop]




genre_exploded_df:


Unnamed: 0,artist_uri,genre list
0,6qqNVTkY8uBg9cP3Jd7DAH,electropop
0,6qqNVTkY8uBg9cP3Jd7DAH,pop
1,7jVv8c5Fj3E9VhNjxT4snq,country rap
1,7jVv8c5Fj3E9VhNjxT4snq,lgbtq+ hip hop
1,7jVv8c5Fj3E9VhNjxT4snq,pop rap


In [141]:
# ***OPTIONAL***
# Merging Dataframes - this combines rows from two or more dfs, based on a shared index and/or column(s) between them.
# If you know SQL, merging is the same as SQL joining.

df1 = pd.DataFrame({'index': [1,2,3], 'colA': [4,5,6], 'colB': [7,8,9]}).set_index('index')
df2 = pd.DataFrame({'index': [1,2], 'colC': ['hello','world']}).set_index('index')
print('df1:')
display(df1)
print('\n\ndf2:')
display(df2)

print('''\n\n***Inner join - return rows where the index and/or "join-column(s)" match in both dataframes:')
Since df2 does not have a 3rd index, that row is excluded from the results.***

df3''')
df3 = df1.merge(df2, on = 'index', how = 'inner')
display(df3)

print('''\n\n***Left join - all data from df1 (left df) is preserved, and matching rows from df2 (right df) are added:')
Where records don't match, null (i.e. blank, empty, or Not a Number (NaN) values result.***

df4''')
df4 = df1.merge(df2, on = 'index', how = 'left')
display(df4)

df1:


Unnamed: 0_level_0,colA,colB
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,7
2,5,8
3,6,9




df2:


Unnamed: 0_level_0,colC
index,Unnamed: 1_level_1
1,hello
2,world




***Inner join - return rows where the index and/or "join-column(s)" match in both dataframes:')
Since df2 does not have a 3rd index, that row is excluded from the results.***

df3


Unnamed: 0_level_0,colA,colB,colC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,4,7,hello
2,5,8,world




***Left join - all data from df1 (left df) is preserved, and matching rows from df2 (right df) are added:')
Where records don't match, null (i.e. blank, empty, or Not a Number (NaN) values result.***

df4


Unnamed: 0_level_0,colA,colB,colC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,4,7,hello
2,5,8,world
3,6,9,


In [146]:
# Step 4d - Merge the "Featured Playlist Songs" DataFrame with the "Genres "Dataframe

def add_genres(df, genre_exploded_df):
    df = df.reset_index().merge(genre_exploded_df, on = 'artist_uri', how = 'left').set_index('uri')
    df.rename(columns={'genre list': 'genre'}, inplace=True)
    return df

print('''***Let's add the genres to the featured playlist song df, merging on uri***''')
featured_playlist_songs_with_genres_df = add_genres(featured_playlist_song_df, genre_exploded_df)
print('\nfeatured_playlist_songs_with_genres_df.head()')
display(featured_playlist_songs_with_genres_df.head())

***Let's add the genres to the featured playlist song df, merging on uri***

featured_playlist_songs_with_genres_df.head()


Unnamed: 0_level_0,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year,genre
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
spotify:track:54bFM56PmE4YLRnqpW6Tha,Therefore I Am,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,Therefore I Am,https://open.spotify.com/track/54bFM56PmE4YLRn...,0,2020-11-13T05:00:28Z,2020-11-12,2020,electropop
spotify:track:54bFM56PmE4YLRnqpW6Tha,Therefore I Am,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,Therefore I Am,https://open.spotify.com/track/54bFM56PmE4YLRn...,0,2020-11-13T05:00:28Z,2020-11-12,2020,pop
spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020,country rap
spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020,lgbtq+ hip hop
spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020,pop rap


In [147]:
# Step 5 of 5

In [149]:
# Step 5a - Merge the "Top Artist Genres" Dataframe with the "Featured Playlist Songs" DataFrame

def add_top_artist_genre_counts(genre_df, song_df):
    df = song_df.reset_index()
    df = df.merge(genre_df , on = 'genre',how = 'left')
    df.drop(columns=['genre'], inplace=True)
    return df

# use the forward slash \ to escape the newline
featured_playlist_songs_with_top_artist_genre_counts = \
add_top_artist_genre_counts(top_artist_genre_count_df, featured_playlist_songs_with_genres_df)

print('\nfeatured_playlist_songs_with_top_artist_genre_counts.head()')
display(featured_playlist_songs_with_top_artist_genre_counts.head())


featured_playlist_songs_with_top_artist_genre_counts.head()


Unnamed: 0,uri,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year,genre count
0,spotify:track:54bFM56PmE4YLRnqpW6Tha,Therefore I Am,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,Therefore I Am,https://open.spotify.com/track/54bFM56PmE4YLRn...,0,2020-11-13T05:00:28Z,2020-11-12,2020,
1,spotify:track:54bFM56PmE4YLRnqpW6Tha,Therefore I Am,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,Therefore I Am,https://open.spotify.com/track/54bFM56PmE4YLRn...,0,2020-11-13T05:00:28Z,2020-11-12,2020,8.0
2,spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020,
3,spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020,
4,spotify:track:6zFMeegAMYQo0mt8rXtrli,HOLIDAY,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,HOLIDAY,https://open.spotify.com/track/6zFMeegAMYQo0mt...,0,2020-11-13T05:00:28Z,2020-11-13,2020,8.0


In [153]:
# ***OPTIONAL***
# Dataframe aggregation (e.g. summing, counting, etc.)
# To do any aggregation, you must group by all columns EXCEPT the ones you are aggregating

df1 = pd.DataFrame({'colA': ['hello', 'hello', 'hello', 'world'], 'colB': ['gottem','coach','coach','coach'], 'colC': [1,2,3,4]})
print('df1:')
display(df1)

df2 = df1.groupby(['colA','colB']).sum()
print('\n\n***Sum values in colC:***\n\ndf2')
display(df2)

df3 = df2.sort_values('colC', ascending=False)
print('\n\n***Sort rows by values in colC, ascending:***\n\ndf3')
display(df3)

df1:


Unnamed: 0,colA,colB,colC
0,hello,gottem,1
1,hello,coach,2
2,hello,coach,3
3,world,coach,4




***Sum values in colC:***

df2


Unnamed: 0_level_0,Unnamed: 1_level_0,colC
colA,colB,Unnamed: 2_level_1
hello,coach,5
hello,gottem,1
world,coach,4




***Sort rows by values in colC, ascending:***

df3


Unnamed: 0_level_0,Unnamed: 1_level_0,colC
colA,colB,Unnamed: 2_level_1
hello,coach,5
world,coach,4
hello,gottem,1


In [156]:
# Step 4b - Sum the genre counts

def sum_genre_col(df):
    groupby_cols = df.columns.to_list()[:-1]
    df = df.groupby(groupby_cols).sum().sort_values('genre count', ascending=False)
    df = df.rename({'genre count': 'genre score'}, axis=1).reset_index() 
    # reset index after summing - otherwise everything prior to sum will become index
    df.set_index('uri', inplace=True)
    return df

featured_playlist_songs_with_top_artist_genre_score = \
sum_genre_col(featured_playlist_songs_with_top_artist_genre_counts)

print('\nfeatured_playlist_songs_with_top_artist_genre_score.head()')
display(featured_playlist_songs_with_top_artist_genre_score.head())


featured_playlist_songs_with_top_artist_genre_score.head()


Unnamed: 0_level_0,album,artist_uri,artist,track,url,popularity,added_at,release_date,release_year,genre score
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
spotify:track:1zNrtAQhwbRyFmsf3yoBqT,The Great Divide,4LG4Bs1Gadht7TCrMytQUO,The Shins,The Great Divide,https://open.spotify.com/track/1zNrtAQhwbRyFms...,55,2020-10-13T12:34:03Z,2020-09-24,2020,312.0
spotify:track:25JOOXgL3bVkBzjzGvcLoE,Sour Lemon,75dQReiBOHN37fQgWQrIAJ,Local Natives,Lost,https://open.spotify.com/track/25JOOXgL3bVkBzj...,59,2020-10-28T20:46:07Z,2020-10-23,2020,264.0
spotify:track:3pYB28IRzhtR5cHXLINchp,Shore,4EVpmkEwrLYEg6jIsiPMIb,Fleet Foxes,Can I Believe You,https://open.spotify.com/track/3pYB28IRzhtR5cH...,70,2020-09-25T04:01:00Z,2020-09-22,2020,168.0
spotify:track:308prODCCD0O660tIktbUi,Shore,4EVpmkEwrLYEg6jIsiPMIb,Fleet Foxes,Sunblind,https://open.spotify.com/track/308prODCCD0O660...,66,2020-10-28T20:46:07Z,2020-09-22,2020,168.0
spotify:track:06LHQcT0e2tq8arQJQFJ1Q,Frequency,39vA9YljbnOApXKniLWBZv,Sylvan Esso,Frequency,https://open.spotify.com/track/06LHQcT0e2tq8ar...,52,2020-09-25T04:01:00Z,2020-09-10,2020,120.0


In [157]:
# Step 4c - Create an empty Spotify playlist to store the songs
# Like API GET requests, POST requests (sending data to the API) also need the URL + headers
# We also need the data we are sending, called a payload, which must be formatted as a json object (basically, a dict)
# I got the URL for this and the format for the request payload/body @ https://developer.spotify.com/console/post-playlists/

define_scope()

def create_playlist():
    payload = {"name": f'Fresh Jams, {ctime()}'}
    result = requests.post(url = f'https://api.spotify.com/v1/users/{username}/playlists'
        , headers = headers
        , data = json.dumps(payload)
        )
    print(f'''playlist = {payload['name']}''')
    # Now let's get our playlists from Spotify.
    playlists_raw = sp.current_user_playlists()['items']
    # If the name matches, grab the uri (which is the unique identifier or key for the playlist)
    for i in playlists_raw:
        if i['name'] == payload['name']:
            uri = i['uri']
    return uri

uri = create_playlist()
print(f'uri = {uri}')

playlist = Fresh Jams, Fri Nov 13 12:28:23 2020
uri = spotify:playlist:669gJqWY5xpI8EBcxLrEkn


In [161]:
# Step 4d - Add songs with a genre score threshold (95th percentile) into the new playlist

# Got the URL for this @ https://developer.spotify.com/console/post-playlist-tracks/

def create_playlist(uri, df):
    playlist_uri = uri.split(':')[2]
    df['pct_rank'] = df['genre score'].rank(pct=True)
    uri_list = df.loc[df['pct_rank'] >= 0.95].reset_index()['uri'].to_list()
    num_songs_to_add = len(uri_list)
    # add songs
    for i in range(int(math.ceil(num_songs_to_add/100.0))):
        url = f"https://api.spotify.com/v1/playlists/{playlist_uri}/tracks?uris={','.join(uri_list[i*100:(i+1)*100])}"
        requests.post(url, headers=headers)
    return print('***Complete! Check Spotify to see if playlist was filled with songs.***')

create_playlist(uri,featured_playlist_songs_with_top_artist_genre_score)
print('***Thank you! If you have feedback, please send to jewell.will@gmail.com!***')

***Complete! Check Spotify to see if playlist was filled with songs.***
***Thank you! If you have feedback, please send to jewell.will@gmail.com!***
