In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import linear_kernel


In [2]:
music_df=pd.read_csv('music_genre.csv')
music_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50005 entries, 0 to 50004
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   instance_id       50000 non-null  float64
 1   artist_name       50000 non-null  object 
 2   track_name        50000 non-null  object 
 3   popularity        50000 non-null  float64
 4   acousticness      50000 non-null  float64
 5   danceability      50000 non-null  float64
 6   duration_ms       50000 non-null  float64
 7   energy            50000 non-null  float64
 8   instrumentalness  50000 non-null  float64
 9   key               50000 non-null  object 
 10  liveness          50000 non-null  float64
 11  loudness          50000 non-null  float64
 12  mode              50000 non-null  object 
 13  speechiness       50000 non-null  float64
 14  tempo             50000 non-null  object 
 15  obtained_date     50000 non-null  object 
 16  valence           50000 non-null  float6

In [3]:
music_df=music_df.dropna()

In [4]:
music_df=music_df[music_df['tempo']!='?']

In [5]:
music_df

Unnamed: 0,instance_id,artist_name,track_name,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,obtained_date,valence,music_genre
0,32894.0,Röyksopp,Röyksopp's Night Out,27.0,0.00468,0.652,-1.0,0.941,0.79200,A#,0.115,-5.201,Minor,0.0748,100.889,4-Apr,0.759,Electronic
1,46652.0,Thievery Corporation,The Shining Path,31.0,0.01270,0.622,218293.0,0.890,0.95000,D,0.124,-7.043,Minor,0.0300,115.00200000000001,4-Apr,0.531,Electronic
2,30097.0,Dillon Francis,Hurricane,28.0,0.00306,0.620,215613.0,0.755,0.01180,G#,0.534,-4.617,Major,0.0345,127.994,4-Apr,0.333,Electronic
3,62177.0,Dubloadz,Nitro,34.0,0.02540,0.774,166875.0,0.700,0.00253,C#,0.157,-4.498,Major,0.2390,128.014,4-Apr,0.270,Electronic
4,24907.0,What So Not,Divide & Conquer,32.0,0.00465,0.638,222369.0,0.587,0.90900,F#,0.157,-6.266,Major,0.0413,145.036,4-Apr,0.323,Electronic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50000,58878.0,BEXEY,GO GETTA,59.0,0.03340,0.913,-1.0,0.574,0.00000,C#,0.119,-7.022,Major,0.2980,98.02799999999999,4-Apr,0.330,Hip-Hop
50001,43557.0,Roy Woods,Drama (feat. Drake),72.0,0.15700,0.709,251860.0,0.362,0.00000,B,0.109,-9.814,Major,0.0550,122.04299999999999,4-Apr,0.113,Hip-Hop
50002,39767.0,Berner,Lovin' Me (feat. Smiggz),51.0,0.00597,0.693,189483.0,0.763,0.00000,D,0.143,-5.443,Major,0.1460,131.079,4-Apr,0.395,Hip-Hop
50003,57944.0,The-Dream,Shawty Is Da Shit,65.0,0.08310,0.782,262773.0,0.472,0.00000,G,0.106,-5.016,Minor,0.0441,75.88600000000001,4-Apr,0.354,Hip-Hop


In [6]:
data=music_df.copy().drop(['artist_name','track_name','instance_id','obtained_date'],axis=1).reset_index(drop=True)

In [7]:
songs=music_df.copy()[['instance_id','artist_name','track_name','music_genre']].reset_index(drop=True)

In [8]:
le=LabelEncoder()
data['key']=le.fit_transform(data['key'])

In [9]:
data['mode']=le.fit_transform(data['mode'])
data['music_genre']=le.fit_transform(data['music_genre'])

In [10]:
#compute similarities between songs
cosine_sim =linear_kernel(data,data)

In [11]:
cosine_sim.shape

(45020, 45020)

In [12]:
songs

Unnamed: 0,instance_id,artist_name,track_name,music_genre
0,32894.0,Röyksopp,Röyksopp's Night Out,Electronic
1,46652.0,Thievery Corporation,The Shining Path,Electronic
2,30097.0,Dillon Francis,Hurricane,Electronic
3,62177.0,Dubloadz,Nitro,Electronic
4,24907.0,What So Not,Divide & Conquer,Electronic
...,...,...,...,...
45015,58878.0,BEXEY,GO GETTA,Hip-Hop
45016,43557.0,Roy Woods,Drama (feat. Drake),Hip-Hop
45017,39767.0,Berner,Lovin' Me (feat. Smiggz),Hip-Hop
45018,57944.0,The-Dream,Shawty Is Da Shit,Hip-Hop


In [13]:
songs.loc[songs['instance_id']==30097.0].index

Index([2], dtype='int64')

In [39]:
def get_recommendation(id=1,cosine_sim=cosine_sim,num_recommend=10):
    idx=songs.loc[songs['instance_id']==id].index
    sim_scores=list(enumerate(cosine_sim[idx]))
    sim_scores=sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # Get the scores of the 10 most similar movies
    top_similar = sim_scores[0][1:num_recommend+1]
    # Get the movie indices
    movie_indices = [i[0] for i in top_similar]
# Return the top 10 most similar movies
    return songs.iloc[movie_indices]

In [27]:
sim_scores

Index([2], dtype='int64')

In [32]:
sim_scores[0][1]

array([-2.01882360e+05,  4.70668243e+10,  4.64889831e+10, ...,
        4.08550164e+10,  5.66572865e+10,  5.76262544e+10])

In [34]:
movie_indices

[-201882.35952447922]

In [40]:
get_recommendation(id=30097.0,num_recommend=1)

IndexError: positional indexers are out-of-bounds