# Song Recommendation Project

## Imports

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from tqdm.notebook import tqdm
from time import sleep

In [2]:
from bs4 import BeautifulSoup
import requests
from tqdm.notebook import tqdm
import numpy as np
import random
import Levenshtein as lev

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [4]:
## Credentials
import getpass
client_id = str(getpass.getpass('client_id'))
client_secret = str(getpass.getpass('client_secret'))


In [5]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [6]:
df_orig = pd.read_csv('full_features.csv')

## Defining Functions

In [7]:
# look up for single song

def feature_lookup(song_title):

    song_reccomender.song = sp.search(q=song_title)

    feature_lookup.id = song_reccomender.song['tracks']['items'][0]['id']
    artist = song_reccomender.song['tracks']['items'][0]['album']['artists'][0]['name']
    title = song_reccomender.song['tracks']['items'][0]['name']

    print('Good news! We found', title, 'by', artist, 'on Spotify.')    
    print('Please wait a few seconds for us to retrieve your recommendation.')

    return pd.DataFrame(sp.audio_features(tracks=feature_lookup.id))

In [8]:
# getting a reccomender off that song

def spotify_recommender():

    #user input
    df_song = feature_lookup(song_reccomender.song)

    # combining with original dataframe

    df = pd.concat([df_orig, df_song]).drop_duplicates()

    # basic cleaning

    df.rename_axis(None, inplace=True)
    df_numeric = df._get_numeric_data()
    df_numeric = df_numeric.drop(['Unnamed: 0', 'level_0', 'index'], axis=1)

    # running the scaler

    df_scaled = StandardScaler().fit_transform(df_numeric)
    df_scaled_df = pd.DataFrame(df_scaled, columns=df_numeric.columns)

    # getting the index
    
    df_scaled_df.index = df['id']
    df_scaled_df.rename_axis(None, inplace=True)

    # kmeans

    kmeans = KMeans(n_clusters=250)
    cluster_scaled = kmeans.fit(df_scaled_df)
    df['cluster'] = cluster_scaled.labels_

    # getting back the cluster of the chosen song

    chosen_song_cluster = df[df['id'] == feature_lookup.id]['cluster'].item()

    # filtering out that song from data frame

    df1 = df[df['id'] != feature_lookup.id]

    # choosing a single song to reccommend

    suggestion = df1[df1['cluster'] == chosen_song_cluster].sample()
    print('If you like that, try this instrumental tune:', '\''+suggestion['song'].item()+'\'', 'by', suggestion['artist'].item())    

In [9]:
# function to scrape top songs

def scrape_billboard():
    url = 'https://www.billboard.com/charts/hot-100/'
    response = requests.get(url)

    soup = BeautifulSoup(response.content)

    artist = []
    title = []

    length = len(soup.select('li.lrv-u-width-100p > ul > li.o-chart-results-list__item.\\/\\/.lrv-u-flex-grow-1.lrv-u-flex.lrv-u-flex-direction-column.lrv-u-justify-content-center.lrv-u-border-b-1.u-border-b-0\\@mobile-max.lrv-u-border-color-grey-light.lrv-u-padding-l-1\\@mobile-max > span'))

    for i in tqdm(range(length)):
        artist.append(soup.select('li.lrv-u-width-100p > ul > li.o-chart-results-list__item.\\/\\/.lrv-u-flex-grow-1.lrv-u-flex.lrv-u-flex-direction-column.lrv-u-justify-content-center.lrv-u-border-b-1.u-border-b-0\\@mobile-max.lrv-u-border-color-grey-light.lrv-u-padding-l-1\\@mobile-max > span')[i].text)
        title.append(soup.select('div.chart-results-list li.o-chart-results-list__item h3')[i].text)

    scrape_billboard.df = pd.DataFrame({'title':title, 'artist':artist})

    scrape_billboard.df['title'] = scrape_billboard.df['title'].apply(lambda x:x.strip('\n'))
    scrape_billboard.df['artist'] = scrape_billboard.df['artist'].apply(lambda x:x.strip('\n'))
    scrape_billboard.df['rank'] = np.arange(1, len(scrape_billboard.df)+1)

In [10]:
scrape_billboard()

  0%|          | 0/100 [00:00<?, ?it/s]

## Final Product

In [11]:
def song_reccomender():

    print('Hey there,')
    sleep(0.2)
    print('I\'m a song reccomendation engine.')
    sleep(0.2)
    print('hot song >>> other hot song')
    sleep(0.2)
    print('some other song >>> instrumental study song')
    sleep(0.2)
    print('Sound good?')
    sleep(0.2)
    print('Enter a song above ^^^ \n')

    attempts = 0
    while attempts < 5:

    # input

        song_reccomender.song = input('Type in a song: ')

    # no input: continue asking

        if song_reccomender.song == '':
            attempts +=1
            if attempts == 5:
                print('That\'s it. I give up! You\'ll have to try again later!')
            else:
                print('At least type something...')
        
        # if not a match:

        elif ((scrape_billboard.df['title'].apply(lambda x:x.lower()) == song_reccomender.song.lower()) == False).all():

                # check if there is a near match
                string_list = []
                for row in range(len(scrape_billboard.df['title'])):
                    Str1 = song_reccomender.song
                    Str2 = scrape_billboard.df.iloc[row]['title']
                    distance = lev.distance(Str1.lower(),Str2.lower())
                    if distance < 3:
                        string_list.append(Str2)

                if bool(string_list):

                    for string in string_list:

                        artist_input = scrape_billboard.df[scrape_billboard.df['title'].apply(lambda x:x.lower()) == string.lower()]['artist'].item()
                        print('Did you mean', '\''+string+'\'', 'by', artist_input + '? Because that\'s a hot song right now!')
                        
                        rand = random.randint(0, 100)

                        if rand != int(scrape_billboard.df[scrape_billboard.df['title'].apply(lambda x:x.lower()) == string.lower()]['rank']):

                            suggested_title = scrape_billboard.df[scrape_billboard.df['rank'] == rand]['title'].item()

                            suggested_artist = scrape_billboard.df[scrape_billboard.df['rank'] == rand]['artist'].item()

                            print('If that\'s the song you meant, here\'s another song you might like: ', '\''+suggested_title+'\'', 
                            'by',suggested_artist)
                    
                    break

                #integrating spotify 
        
                else:
                    spotify_recommender()

                    break
        
        # if match: response + suggest new song

        elif scrape_billboard.df['title'].apply(lambda x:x.lower()).str.contains(song_reccomender.song.lower()).any():
            
            song_input = scrape_billboard.df[scrape_billboard.df['title'].apply(lambda x:x.lower()) == song_reccomender.song.lower()]['title'].item()
            artist_input = scrape_billboard.df[scrape_billboard.df['title'].apply(lambda x:x.lower()) == song_reccomender.song.lower()]['artist'].item()
            
            print('Good choice!', '\''+song_input+'\'', 'by', artist_input, 'is a hot song right now!' )

            rand = random.randint(0, 100)

            if rand != int(scrape_billboard.df[scrape_billboard.df['title'].apply(lambda x:x.lower()) == song_reccomender.song.lower()]['rank']):
                
                suggested_title = scrape_billboard.df[scrape_billboard.df['rank'] == rand]['title'].item()

                suggested_artist = scrape_billboard.df[scrape_billboard.df['rank'] == rand]['artist'].item()

                print('Here\'s another song you might like:', '\''+suggested_title+'\'', 
                'by',suggested_artist)

            break

In [18]:
# need to ask whether that was the song someone meant
song_reccomender()

Hey there,
I'm a song reccomendation engine.
hot song >>> other hot song
some other song >>> instrumental study song
Sound good?
Enter a song above ^^^ 

Good choice! 'Shivers' by Ed Sheeran is a hot song right now!
Here's another song you might like: 'Oh My God' by Adele


In [17]:
# fix duplicates