# Creating a MVP for a Song Recommendation Tool 

### Step 1: Scrape the data to be used as reference for the model

In [1]:

import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm

In [2]:
url = "https://www.billboard.com/charts/hot-100/"


In [3]:
response = requests.get(url)
top100=BeautifulSoup(response.content,'html.parser')


In [4]:
top100.select('span.c-label.a-font-primary-s')[0].get_text(strip=True)


'Adele'

In [5]:
top100.select('h3.c-title.a-no-trucate')[0].get_text(strip=True)


'Easy On Me'

In [6]:
top_100 = len(top100.select('h3.c-title.a-no-trucate'))


In [7]:

song = []
artist = []

for i in tqdm(range(top_100)):
    song.append(top100.select('h3.c-title.a-no-trucate')[i].get_text(strip=True))
    artist.append(top100.select('span.c-label.a-font-primary-s')[i].get_text(strip=True))
    
    
   

  0%|          | 0/100 [00:00<?, ?it/s]

In [8]:
 top100_table = pd.DataFrame({'song':song, 'artist':artist})
top100_table

Unnamed: 0,song,artist
0,Easy On Me,Adele
1,Stay,The Kid LAROI & Justin Bieber
2,Industry Baby,Lil Nas X & Jack Harlow
3,All Too Well (Taylor's Version),Taylor Swift
4,Oh My God,Adele
...,...,...
95,Too Easy,Gunna & Future
96,Escape Plan,Travis Scott
97,Who's In Your Head,Jonas Brothers
98,Jugaste y Sufri,Eslabon Armado Featuring DannyLux


### Step 2: Build the Recommender

In [9]:
#creating the user input 
song = input("Search here for a song! ").lower()

Search here for a song! stay


In [10]:
#Checking if the input is actually in the data frame

check = top100_table[top100_table['song'].str.lower().str.contains(song)]
check

Unnamed: 0,song,artist
1,Stay,The Kid LAROI & Justin Bieber


In [11]:
#assign the relevant index number to list for later string-operations, model identifies song names through index

index = check.index.tolist()

In [12]:
top100_table.song[index].values[0]

'Stay'

In [13]:
# check if a value has been returned in the input or not - (is the value in our dataframe or not)
if len(check) == 0:
    print("That ain't it chief, try again..")
else: # in case the song was spelt in correctly, confirm the song name
    answer = input("Did you mean "+ top100_table.song[index].values[0] + " by " + top100_table.artist[index].values[0] + "? ")
        
    if answer.lower() == 'yes':
        suggestion = top100_table.sample().index.tolist()
        print("Well you got good taste alright! Here's another banger you might like called " + top100_table['song'][suggestion].item() + " by " +  top100_table['artist'][suggestion].item())
    else: 
        print ("I got nothing else for you, you should be dissapointed.. " )
        
        



  

Did you mean Stay by The Kid LAROI & Justin Bieber? yes
Well you got good taste alright! Here's another banger you might like called Tequila Little Time by Jon Pardi


### Breaking each step down 

In [14]:
#step 1: create user input: 

song = input("Enter your song here  ").lower()




# converting input to lowercase to standardize it 

Enter your song here  baby


In [15]:
#step 2: create a check field to find if the user input is in the dataframe, if not? 

mic_check = top100_table[top100_table['song'].str.lower().str.contains(song)]
mic_check


# need to add str since lower only works if the value is a string, 
#the outer[] is to return the exact value and not a list of true and false
#contains makes sure we are working on the user input

Unnamed: 0,song,artist
2,Industry Baby,Lil Nas X & Jack Harlow


In [16]:
#Step 3: Assign the values an index so the recommender can return the names later based on that. 

index = mic_check.index.to_list()

#to_list converts values into a list so we can work with it later

In [17]:
#Step 4: Create value to return the required songs - that we will use in the actual recommender code

top100_table.song[index].values[0]

'Industry Baby'

In [18]:
mic_check

Unnamed: 0,song,artist
2,Industry Baby,Lil Nas X & Jack Harlow


In [19]:
#Step 5: Create scenarios for each input 

while True: 
        song = input("Enter your song here: ").lower()
        mic_check = top100_table[top100_table['song'].str.lower().str.contains(song)]
        index = mic_check.index.to_list()
        top100_table.song[index].values[0]

        if len(mic_check) == 0:
                 print("That ain't it chief, try again")
        else: 
            answer=input("I think we're onto something, did you mean " + top100_table.song[index].values[0] + " by " + top100_table.artist[index].values[0] + "? ")
  
            if answer.lower() == "yes":
                suggestion=top100_table.sample().index.to_list()
                print("Seems like you got good taste in music! Try this song out by " + top100_table['artist'][suggestion].item() + " it's called " + top100_table['song'][suggestion].item())
                break
            else: 
                input("Ok, let's try that again now ")
              

Enter your song here: baby
I think we're onto something, did you mean Industry Baby by Lil Nas X & Jack Harlow? yes
Seems like you got good taste in music! Try this song out by Taylor Swift it's called Red (Taylor's Version)


In [20]:

while True: 
        song = input("Enter your song here: ").lower()
        mic_check = top100_table[top100_table['song'].str.lower().str.contains(song)]
        index = mic_check.index.to_list()
        top100_table.song[index].values[0]

        if len(mic_check) == 0:
                 print("That ain't it chief, try again")
                 continue
        else: 
            answer=input("I think we're onto something, did you mean " + top100_table.song[index].values[0] + " by " + top100_table.artist[index].values[0] + "? ")
  
            if answer.lower() == "yes":
                suggestion=top100_table.sample().index.to_list()
                print("Seems like you got good taste in music! Try this song out by " + top100_table['artist'][suggestion].item() + " it's called " + top100_table['song'][suggestion].item())
                break
            else: 
                input("Ok, let's try that again now ")

Enter your song here: baby
I think we're onto something, did you mean Industry Baby by Lil Nas X & Jack Harlow? yes
Seems like you got good taste in music! Try this song out by Drake Featuring Lil Baby it's called Girls Want Girls


In [21]:
len(mic_check)

1

## Getting song data from Spotify

In [22]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [23]:
import getpass

In [24]:
# inputting our spotify id & secret in an encrypted way
client_id = str(getpass.getpass('client_id?'))
client_secret=str(getpass.getpass('client_secret?'))

client_id?········
client_secret?········


In [25]:
#embedding getpass credentials in the access key
sp=spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,client_secret=client_secret))

#### Choosing a playlist

In [27]:
playlist=sp.user_playlist_tracks("SunSubiroRecords","3xMQTDLOIGvj3lWH5e5x6F")



In [28]:
playlist.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [34]:
playlist["items"][2].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

In [31]:
playlist["items"][2]['track'].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])

In [49]:
def get_playlist_tracks(user_id,playlist_id):
    results=sp.user_playlist_tracks(user_id,playlist_id)
    tracks=results["items"]
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [36]:
results2=get_playlist_tracks("SunSubiroRecords","3xMQTDLOIGvj3lWH5e5x6F")

In [37]:
len(results2)

689

In [39]:
results2[0]['track'].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])

In [40]:
results2[0]['track']["uri"].rsplit(':',1)

['spotify:track', '5PjdY0CKGZdEuoNab3yDmX']

In [41]:
results2[0]['track']["artists"][0].keys()

dict_keys(['external_urls', 'href', 'id', 'name', 'type', 'uri'])

In [42]:
song_name=[]
song_URI=[]
song_ID=[]
song_artist=[]
playlist=results2

In [43]:
for r in results2:
    song_URI.append(r['track']['uri'])

In [44]:
for r in results2:
    song_ID.append(r['track']['id'])

In [45]:
for r in results2:
    song_name.append(r['track']['name'])

In [47]:
for r in results2: 
    song_artist.append(r['track']['artists'][0]['name'])

In [52]:
import pandas as pd
playlist_song_info=pd.DataFrame({"song_name":song_name,"song_ID":song_ID,"song_artist":song_artist})

In [53]:
playlist_song_info

Unnamed: 0,song_name,song_ID,song_artist
0,STAY (with Justin Bieber),5PjdY0CKGZdEuoNab3yDmX,The Kid LAROI
1,Easy On Me,0gplL1WMoJ6iYaPgMCL0gX,Adele
2,INDUSTRY BABY (feat. Jack Harlow),5Z9KJZvQzH6PFmb8SNkxuk,Lil Nas X
3,More Than You Know,2yaZGI4MWkdCGmkTWYHW6u,Filip Nordin
4,abcdefu,4fouWK6XVHhzl78KzQ1UjL,GAYLE
...,...,...,...
684,Sweet Child O' Mine,4oWy1Rx027Xj55EdK9qCgM,Covers Culture
685,Canada,0NiZ9lc9y6CZqaFYwsUL9c,Bobbylene
686,Maybe It's Time,5gQu2dp4DIcgEDWnrZblrN,Bobbylene
687,Ivy,1cfLUa6FPK2Oie3W6Sa7rb,Bobbylene


In [57]:
feats = sp.audio_features(tracks=song_ID[0:99])

In [83]:
audio_feat_df=pd.DataFrame(sp.audio_features(tracks=song_ID[0:99]))


TypeError: 'int' object is not subscriptable

In [69]:
audio_features = []
for song_ID in tqdm(range(len(playlist_song_info))):
    audio_features.extend(sp.audio_features(tracks=playlist_song_info['song_ID'][song_ID]))

  0%|          | 0/689 [00:00<?, ?it/s]

In [82]:
audio_features_df = pd.DataFrame(audio_features) 


In [80]:
names_audio_features_df = pd.merge(playlist_song_info, audio_features_df, left_on='song_ID',right_on='id', how='inner')

### Final Merged Data Frame

In [81]:
names_audio_features_df

Unnamed: 0,song_name,song_ID,song_artist,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,STAY (with Justin Bieber),5PjdY0CKGZdEuoNab3yDmX,The Kid LAROI,0.591,0.7640,1,-5.484,1,0.0483,0.0383,...,0.1030,0.478,169.928,audio_features,5PjdY0CKGZdEuoNab3yDmX,spotify:track:5PjdY0CKGZdEuoNab3yDmX,https://api.spotify.com/v1/tracks/5PjdY0CKGZdE...,https://api.spotify.com/v1/audio-analysis/5Pjd...,141806,4
1,Easy On Me,0gplL1WMoJ6iYaPgMCL0gX,Adele,0.604,0.3660,5,-7.519,1,0.0282,0.5780,...,0.1330,0.130,141.981,audio_features,0gplL1WMoJ6iYaPgMCL0gX,spotify:track:0gplL1WMoJ6iYaPgMCL0gX,https://api.spotify.com/v1/tracks/0gplL1WMoJ6i...,https://api.spotify.com/v1/audio-analysis/0gpl...,224695,4
2,INDUSTRY BABY (feat. Jack Harlow),5Z9KJZvQzH6PFmb8SNkxuk,Lil Nas X,0.741,0.6910,10,-7.395,0,0.0672,0.0221,...,0.0476,0.892,150.087,audio_features,5Z9KJZvQzH6PFmb8SNkxuk,spotify:track:5Z9KJZvQzH6PFmb8SNkxuk,https://api.spotify.com/v1/tracks/5Z9KJZvQzH6P...,https://api.spotify.com/v1/audio-analysis/5Z9K...,212353,4
3,More Than You Know,2yaZGI4MWkdCGmkTWYHW6u,Filip Nordin,0.775,0.3170,8,-7.527,1,0.0314,0.7890,...,0.2080,0.509,98.017,audio_features,2yaZGI4MWkdCGmkTWYHW6u,spotify:track:2yaZGI4MWkdCGmkTWYHW6u,https://api.spotify.com/v1/tracks/2yaZGI4MWkdC...,https://api.spotify.com/v1/audio-analysis/2yaZ...,155510,4
4,abcdefu,4fouWK6XVHhzl78KzQ1UjL,GAYLE,0.695,0.5400,4,-5.692,1,0.0493,0.2990,...,0.3670,0.415,121.932,audio_features,4fouWK6XVHhzl78KzQ1UjL,spotify:track:4fouWK6XVHhzl78KzQ1UjL,https://api.spotify.com/v1/tracks/4fouWK6XVHhz...,https://api.spotify.com/v1/audio-analysis/4fou...,168602,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
684,Sweet Child O' Mine,4oWy1Rx027Xj55EdK9qCgM,Covers Culture,0.747,0.2400,6,-12.393,1,0.0407,0.9190,...,0.1250,0.399,115.965,audio_features,4oWy1Rx027Xj55EdK9qCgM,spotify:track:4oWy1Rx027Xj55EdK9qCgM,https://api.spotify.com/v1/tracks/4oWy1Rx027Xj...,https://api.spotify.com/v1/audio-analysis/4oWy...,132425,4
685,Canada,0NiZ9lc9y6CZqaFYwsUL9c,Bobbylene,0.608,0.1500,11,-12.729,1,0.0321,0.8630,...,0.1240,0.434,81.988,audio_features,0NiZ9lc9y6CZqaFYwsUL9c,spotify:track:0NiZ9lc9y6CZqaFYwsUL9c,https://api.spotify.com/v1/tracks/0NiZ9lc9y6CZ...,https://api.spotify.com/v1/audio-analysis/0NiZ...,126951,4
686,Maybe It's Time,5gQu2dp4DIcgEDWnrZblrN,Bobbylene,0.505,0.0974,7,-14.228,1,0.0331,0.8860,...,0.1180,0.345,151.449,audio_features,5gQu2dp4DIcgEDWnrZblrN,spotify:track:5gQu2dp4DIcgEDWnrZblrN,https://api.spotify.com/v1/tracks/5gQu2dp4DIcg...,https://api.spotify.com/v1/audio-analysis/5gQu...,142105,4
687,Ivy,1cfLUa6FPK2Oie3W6Sa7rb,Bobbylene,0.579,0.1620,9,-11.063,1,0.0577,0.9080,...,0.1950,0.615,159.858,audio_features,1cfLUa6FPK2Oie3W6Sa7rb,spotify:track:1cfLUa6FPK2Oie3W6Sa7rb,https://api.spotify.com/v1/tracks/1cfLUa6FPK2O...,https://api.spotify.com/v1/audio-analysis/1cfL...,130312,4
