# Song recommender (using Spotify API)

In [2]:
# import all necessary libraries
import pandas as pd
import numpy as np
import random
from pandas import json_normalize
pd.set_option("display.max_columns", 0) #no limit to cols we want to see
from sklearn import cluster 
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# spotify
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import pickle

In [7]:
#import all files
hot_songs = pd.read_csv('hot_songs.csv')
top10k_songs = pd.read_pickle('playlist_final.pkl')

In [9]:
# import models and predictors
kmeans = pickle.load(open('kmeans8.sav','rb'))
scaler = pickle.load(open('scaler.sav','rb'))

In [10]:
# lower all cases in both datasets
hot_songs['song']=hot_songs['song'].str.lower()
top10k_songs['track.name']=top10k_songs['track.name'].str.lower()


***

In [11]:
# spotify auth
# auth path with creds text file
secrets_file = open('secrets.txt','r') 

# read and split to ready for creds
string = secrets_file.read()
string.split('\n')

# use this to make it a dictionary
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        #print(line.split(':'))
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

# spotipy init with user credentials
sp = spotipy.Spotify(auth_manager = SpotifyClientCredentials(client_id = secrets_dict['clientid'],
                                                            client_secret = secrets_dict['clientsecret']))

#### Recommender function

In [21]:
# receive a song and recommend another random one from the df
def recommender(query, df1, df2):
    if query.lower() in df1.song.values:
        # pick random number to get random song and artist
        random_idx = random.randint(0, len(df1)-1)   

        # rec response
        print("-Paris Hilton voice- That's hot! Here's another recommendation for you:", df1['song'].iloc[random_idx], 
        "by", 
        df1['artist'].iloc[random_idx])

    elif query.lower() in df2['track.name'].values:
        # identify song in playlist and get all songs in the query cluster
        query_row = df2[df2['track.name'].isin([query])]
        group = df2[df2['cluster'].values == query_row['cluster'].values]

        # pick random number to get random song and artist
        random_idx = random.randint(0, len(group)-1)

        # rec response    
        print("-Paris Hilton voice- That's hot! Here's another recommendation for you:", df2['track.name'].iloc[random_idx], 
        "by", 
        df2['name'].iloc[random_idx])
        
    else:
        # look up song from user query
        spsearch = sp.search(q = query.lower(), type = 'track', limit = 1)

        # get the song id to look up audio feats
        song_id = spsearch['tracks']['items'][0]['id']
        audio_feats = sp.audio_features(song_id)

        # normalise and get only numericals
        au_norm = json_normalize(audio_feats)
        song_df = au_norm.select_dtypes(np.number)

        # scale
        song_scaled = scaler.transform(song_df)
        X_song_scaled = pd.DataFrame(song_scaled, columns = song_df.columns)

        # then predict
        song_predict = kmeans.predict(X_song_scaled)

        # now point it to the playlist and get its cluster buddies
        group = df2[df2['cluster'] == song_predict[0]]

        # get a random song from that cluster
        random_idx = random.randint(0, len(group)-1)
        print("We don't have that babes, but here's something similar:", 
            group['track.name'].iloc[random_idx],
            "by",
            group['name'].iloc[random_idx])

***

In [22]:
# test with user input
song = input('Gimme a song baybeh')
song_info = recommender(song, hot_songs, top10k_songs)
song_info

We don't have that babes, but here's something similar: flamenco sketches (feat. john coltrane, cannonball adderley & bill evans) by Bill Evans
