# Lab - song recommender

## Day 1 - web scraping

In [403]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import pprint
import config

#import re


In [404]:
url = 'https://www.billboard.com/charts/hot-100/'
res = requests.get(url)

res.status_code


200

In [405]:
soup = BeautifulSoup(res.text, 'html.parser')


In [406]:
top100 = soup.find_all('div', {'class': 'o-chart-results-list-row-container'})


In [407]:
def get_songs(containers):
    songs = []
    i = 0
    while i < len(containers):
        song = containers[i].find('h3').get_text()
        songs.append(song.replace('\n', '').replace('\t', ''))
        i += 1
    return songs


In [408]:
def get_artists(containers):
    artists = []
    i = 0
    while i < len(containers):
        artist = containers[i].find(
            'span', {'class': 'a-no-trucate'}).get_text()
        artists.append(artist.replace('\n', '').replace('\t', ''))
        i += 1
    return artists


In [409]:
artists = get_artists(top100)
songs = get_songs(top100)


In [410]:
top100_df = pd.DataFrame(zip(artists, songs), columns=['Artist', 'Song'])
top100_df.index = np.arange(1, len(top100_df) + 1)

top100_df


Unnamed: 0,Artist,Song
1,Miley Cyrus,Flowers
2,SZA,Kill Bill
3,Morgan Wallen,Last Night
4,Sam Smith & Kim Petras,Unholy
5,"Metro Boomin, The Weeknd & 21 Savage",Creepin'
...,...,...
96,Yandel & Feid,Yandel 150
97,Mac DeMarco,Heart To Heart
98,Jordan Davis,Next Thing You Know
99,P!nk,Never Gonna Not Dance Again


In [411]:
def recommender(df):
    artist = input('Artist name: ').lower()
    song = input('Song name: ').lower()

    lower_df = df.applymap(lambda x: x.lower())
    songRow = lower_df.loc[(lower_df['Artist'] == artist)
                           & (lower_df['Song'] == song)]

    if not songRow.empty:
        exclude_index = songRow.index
        rec_index = np.random.choice(
            [i for i in range(1, len(df) + 1) if i != exclude_index])

        recommended_artist = df.loc[rec_index, 'Artist']
        recommended_song = df.loc[rec_index, 'Song']

        return f'{recommended_artist} - {recommended_song}'

    return 'Unfortunately, the song is not in the hot list!'


In [412]:
recommender(top100_df)


'Unfortunately, the song is not in the hot list!'

## Spotify

In [413]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.client_id,
                                                           client_secret=config.client_secret))


In [414]:
test_pl = sp.user_playlist_tracks("spotify", "0iAeUtwINlqfjwAyQ4ykur", market="GB")
test_pl['items'][0]['track']['id']

#json_playlist = json.dumps(playlist['items'])
# pprint.pprint(playlist)


'4IqLCkdTQUl4tYb389pcSy'

In [415]:
# get playlist tracks
def get_tracks(playlist_id):
    results = sp.user_playlist_tracks('spotify', playlist_id, market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks



In [416]:
# extract song ids
def extract_ids(playlist):
    ids = []
    i = 0
    while i < len(playlist):
        ids.append(playlist[i]['track']['id'])
        i += 1
    return ids



In [417]:
# extract song features for each id
def extract_features(ids):
    features_by_id = []
    i = 0

    while i < len(ids):
        try:
            features_by_id.append(sp.audio_features(ids[i])[0])
        except TypeError:
            continue
        finally:
            i += 1

    return features_by_id


In [418]:
# playlist to dataframe
def to_df(playlist_id):
    playlist = get_tracks(playlist_id)
    ids = extract_ids(playlist)
    features = extract_features(ids)

    df = pd.DataFrame({'id' : ids, 'features' : features})
    df = pd.concat([df.drop(['features'], axis=1), df['features'].apply(pd.Series).drop(['id'], axis=1)], axis=1)
    df.drop(['type', 'uri', 'track_href', 'analysis_url', 'time_signature'], axis=1, inplace=True)

    return df


In [None]:
#to_df('5S8SJdl1BDc0ugpkEvFsIL')  #longest 10k  ---tracks without features
#to_df('7beGd4yYY1qpsBv6K3clFZ') - 4.5k

In [419]:
df = pd.concat([to_df('0iAeUtwINlqfjwAyQ4ykur'),
                to_df('37i9dQZF1DWXWbLEOaHnU3')], ignore_index=True)
