In [27]:
# Basic Libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Data Extraction

In [28]:
musicdata = pd.read_csv('Spotify Dataset.csv')
musicdata = musicdata.drop(["Unnamed: 0", "track_id"], axis = 1) 
musicdata['songlists'] = musicdata.apply(lambda row: row['artists'] + row['track_name'], axis = 1)
musicdata = musicdata.drop_duplicates('songlists')
musicdata[['artists','track_name', 'album_name']]
print("Are all track_names in the database unique? ",len(pd.unique(musicdata.songlists))==len(musicdata))

Are all track_names in the database unique?  True


In [29]:
data = musicdata.sort_values(by=['popularity'], ascending = False).head(20000) #First 20K popular songs

In [30]:
genre_vectors = CountVectorizer()
genre_vectors.fit(data['track_genre'])

CountVectorizer()

In [31]:
def recommended_songs(song_name):

    #If the song could not be found within the dataset
    if data[data['track_name'] == song_name].shape[0] == 0:
        
        print('This song is not available. Here are some other popular songs that you may like:\n')
        suggested_music_list = data.nlargest(100, 'popularity')
        suggested_music = suggested_music_list.sample(n = 5) #any 5 samples

        return display(suggested_music[['artists', 'track_name', 'album_name']])
    
    #If the song could be found within the dataset
    song_input_array1 = genre_vectors.transform(data[data['track_name'] == song_name]['track_genre']).toarray() 
    #for text comparison
    song_input_array2 = data[data['track_name']==song_name].select_dtypes(include = np.number).to_numpy() 
    #for other numeric factors comparison
   
    similarity_index = []
    for index, row in data.iterrows():
        song = row['track_name']
     
        # Getting vector for existing songs in the playlist.
        existing_song_array1 = genre_vectors.transform(data[data['track_name'] == song]['track_genre']).toarray()
        #for text comparison
        existing_song_array2 = data[data['track_name'] == song].select_dtypes(include = np.number).to_numpy()
        #for other numeric factors comparison
 
        # Calculating similarities for text as well as numeric features
        text_similarity = cosine_similarity(song_input_array1, existing_song_array1)[0][0]
        feature_similarity = cosine_similarity(song_input_array2, existing_song_array2)[0][0]
        
        #adding the results into the array created
        similarity_index.append(text_similarity + feature_similarity)
 
    data['similarity'] = similarity_index
    
    data.sort_values(by=['similarity'], ascending = [False], inplace=True)
   
    display(data[['artists', 'track_name', 'album_name']][1:6]) 
    #Ignore song 1 as it is just the input song itself

In [32]:
recommended_songs('Love Someone')

Unnamed: 0,artists,track_name,album_name
465,Joshua Hyslop,Wells,Wells
582,Matt Nathanson,All We Are,Some Mad Hope
783,Drew Holcomb & The Neighbors,I Like to Be With Me When I'm With You,A Million Miles Away
56,Aron Wright,Build It Better,Build It Better
152,Howie Day,Collide,Stop All The World Now


In [33]:
recommended_songs('Efecto')

Unnamed: 0,artists,track_name,album_name
67353,Daddy Yankee,Gasolina,Barrio Fino (Bonus Track Version)
67359,Bad Bunny,Tití Me Preguntó,Un Verano Sin Ti
67801,Rauw Alejandro;Lyanno;Brray,LOKERA,LOKERA
67806,Ozuna,Mañana,OzuTochi
67552,Rvssian;Rauw Alejandro;Chris Brown,Nostálgico,Nostálgico


In [34]:
recommended_songs('How You Remind Me')

Unnamed: 0,artists,track_name,album_name
2007,KALEO,Way down We Go,A/B
2874,Ashes Remain,End of Me,What I've Become
2418,The Score,The Fear,Pressure
2659,Zayde Wølf,Gladiator,Modern Alchemy
2513,Skillet,Awake and Alive,Awake


In [35]:
recommended_songs('I Miss You')

Unnamed: 0,artists,track_name,album_name
65707,SOYOU,I Miss You,"Guardian (Original Television Soundtrack), Pt. 7"
11202,Adele,I Miss You,25
94657,DJ Covy,H E A R T B R O K E N (Oblivion),H E A R T B R O K E N (Oblivion)
94809,Beowülf,Is It Too Late for Me,Is It Too Late for Me
94757,Papithbk,Midnight in My Mind,Thbk.


In [36]:
recommended_songs('Homage')

Unnamed: 0,artists,track_name,album_name
38006,Arctic Monkeys,R U Mine?,AM
38151,TEMPOREX,Nice Boys,Care
38000,Arctic Monkeys,I Wanna Be Yours,AM
38182,Mild High Club,Tesselation,Skiptracing
38427,Left Lane Cruiser,Juice To Get Loose,Rock Them Back to Hell


In [37]:
recommended_songs('Simple Man')

Unnamed: 0,artists,track_name,album_name
8266,Deep Purple,Smoke On The Water - Remastered 2012,Machine Head (Remastered)
8484,Muddy Waters,Mannish Boy,King Of The Electric Blues
8601,Blues Delight,Slightly Hung Over,Rock Island Line
8508,The Heavy,Short Change Hero,The House That Dirt Built
8602,Queens of the Stone Age,Make It Wit Chu,Era Vulgaris
