# Lab - song recommender

## Day 1 - web scraping

In [77]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import pprint
import config

#import re


In [78]:
url = 'https://www.billboard.com/charts/hot-100/'
res = requests.get(url)

res.status_code


200

In [79]:
soup = BeautifulSoup(res.text, 'html.parser')


In [80]:
top100 = soup.find_all('div', {'class': 'o-chart-results-list-row-container'})


In [81]:
def get_songs(containers):
    songs = []
    i = 0
    while i < len(containers):
        song = containers[i].find('h3').get_text()
        songs.append(song.replace('\n', '').replace('\t', ''))
        i += 1
    return songs


In [82]:
def get_artists(containers):
    artists = []
    i = 0
    while i < len(containers):
        artist = containers[i].find(
            'span', {'class': 'a-no-trucate'}).get_text()
        artists.append(artist.replace('\n', '').replace('\t', ''))
        i += 1
    return artists


In [83]:
artists = get_artists(top100)
songs = get_songs(top100)


In [84]:
top100_df = pd.DataFrame(zip(artists, songs), columns=['Artist', 'Song'])
top100_df.index = np.arange(1, len(top100_df) + 1)

top100_df


Unnamed: 0,Artist,Song
1,Miley Cyrus,Flowers
2,SZA,Kill Bill
3,Morgan Wallen,Last Night
4,Sam Smith & Kim Petras,Unholy
5,"Metro Boomin, The Weeknd & 21 Savage",Creepin'
...,...,...
96,Yandel & Feid,Yandel 150
97,Mac DeMarco,Heart To Heart
98,Jordan Davis,Next Thing You Know
99,P!nk,Never Gonna Not Dance Again


In [87]:
def recommender(df):
    artist = input('Artist name: ').lower()
    song = input('Song name: ').lower()

    lower_df = df.applymap(lambda x: x.lower())
    songRow = lower_df.loc[(lower_df['Artist'] == artist)
                           & (lower_df['Song'] == song)]

    if not songRow.empty:
        exclude_index = songRow.index
        rec_index = np.random.choice(
            [i for i in range(1, len(df) + 1) if i != exclude_index])

        recommended_artist = df.loc[rec_index, 'Artist']
        recommended_song = df.loc[rec_index, 'Song']

        return f'{recommended_artist} - {recommended_song}'

    return 'Unfortunately, the song is not in the hot list!'


In [88]:
recommender(top100_df)


'Unfortunately, the song is not in the hot list!'

## Spotify

In [89]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.client_id,
                                                           client_secret=config.client_secret))


In [90]:
test_pl = sp.user_playlist_tracks("spotify", "0iAeUtwINlqfjwAyQ4ykur", market="GB")
test_pl['items'][0]['track']['id']

#json_playlist = json.dumps(playlist['items'])
# pprint.pprint(playlist)


'4IqLCkdTQUl4tYb389pcSy'

In [91]:
# get playlist tracks
def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks('spotify', playlist_id, market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks



In [92]:
# extract song ids
def extract_ids(playlist):
    ids = []
    i = 0
    while i < len(playlist):
        ids.append(playlist[i]['track']['id'])
        i += 1
    return ids



In [106]:
# extract song features for each id
def extract_features(ids):
    features_by_id = []
    i = 0
    while i < len(ids):
        features_by_id.append(sp.audio_features(ids[i])[0])
        i += 1
    return features_by_id



In [94]:
# extracrt artists names
def extract_artists(playlist):
    artists = []
    i = 0
    while i < len(playlist):
        artists.append(playlist[i]['track']['artists'][0]['name'])
        i += 1
    return artists


In [95]:
# extract songs names
def extract_songs(playlist):
    songs = []
    i = 0
    while i < len(playlist):
        songs.append(playlist[i]['track']['name'])
        i += 1
    return songs


In [107]:
playlist = get_playlist_tracks('0iAeUtwINlqfjwAyQ4ykur')
ids = extract_ids(playlist)
features = extract_features(ids)
artists = extract_artists(playlist)
songs = extract_songs(playlist)

In [138]:
spotify_pd = pd.DataFrame({'id' : ids, 'artist' : artists, 'song' : songs, 'features' : features})

spotify_pd = pd.concat([spotify_pd.drop(['features'], axis=1), spotify_pd['features'].apply(pd.Series)], axis=1)
spotify_pd.drop(['type', 'uri', 'track_href', 'analysis_url', 'time_signature'], axis=1, inplace=True)
#spotify_pd = pd.concat([spotify_pd['id'], spotify_pd.drop(['id'], axis=1)])

spotify_pd


Unnamed: 0,id,artist,song,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id.1,duration_ms
0,4IqLCkdTQUl4tYb389pcSy,Sung,Neon Artery,0.513,0.734,9,-6.890,0,0.1510,0.1460,0.002690,0.0898,0.3750,104.004,4IqLCkdTQUl4tYb389pcSy,212308
1,6JS1zboZEMNJbfeJIfSzNU,M.A.D.E.S,Return - Original Mix,0.490,0.683,9,-6.485,1,0.0589,0.0268,0.030200,0.3850,0.0389,109.993,6JS1zboZEMNJbfeJIfSzNU,303042
2,2KLaeABcYhR1K37R7uj6Ji,Sebastian Weikum,Monsoon - Æ7 Night Remix,0.395,0.729,8,-8.170,1,0.0523,0.0171,0.846000,0.3900,0.4860,194.001,2KLaeABcYhR1K37R7uj6Ji,210309
3,6mYEmhiiLDdNaUQbE73uvx,Tesla Boy,Hate,0.634,0.675,9,-7.056,1,0.0311,0.0169,0.000115,0.1720,0.2890,100.021,6mYEmhiiLDdNaUQbE73uvx,323000
4,7zqS6rZTqQiL2OzNKMO3Fb,Punker,Dream Theater,0.369,0.696,9,-8.687,0,0.0382,0.0109,0.780000,0.1490,0.0381,119.979,7zqS6rZTqQiL2OzNKMO3Fb,216000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,27ugnGd4ORVLi9ej3qU682,NINA,Counting Stars - Instrumental,0.529,0.514,2,-10.014,1,0.0288,0.0325,0.899000,0.0717,0.0955,91.992,27ugnGd4ORVLi9ej3qU682,278179
103,6UVaNrWSeP15FtQ5Z1JX04,PYLOT,Lost - Instrumental,0.459,0.674,8,-12.173,0,0.0437,0.0250,0.890000,0.0889,0.1660,179.975,6UVaNrWSeP15FtQ5Z1JX04,318000
104,4H05gMIBERpCtVXDEiKsAY,Moonraccoon,Showdown,0.466,0.565,9,-6.923,0,0.0341,0.0758,0.629000,0.4610,0.0391,119.993,4H05gMIBERpCtVXDEiKsAY,288000
105,7warZ7DWnbhwelyuJ95Tgk,Pilotpriest,Bonus Track - Switchblade,0.439,0.820,9,-5.506,0,0.0502,0.0338,0.843000,0.1060,0.3970,169.849,7warZ7DWnbhwelyuJ95Tgk,305419
