-
Notifications
You must be signed in to change notification settings - Fork 0
/
recommender.py
87 lines (69 loc) · 2.79 KB
/
recommender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
pip show spotipy
!pip install python-dotenv spotipy
import pandas as pd
import numpy as np
import json
import re
import sys
import itertools
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
#to slow down code
pd.set_option('display.max_columns', None)
pd.set_option("max_rows", None)
spotify_df = pd.read_csv(path)
spotify_df.info()
spotify_df.isnull().sum()
corr_matrix = spotify_df.drop(columns=['id','name','release_date','year','artists'])
corr_matrix.corr()
import seaborn as sns
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
#Using minmaxscalar method to normarize the data with columns seelted only of types = int and float variables
from sklearn.preprocessing import MinMaxScaler
# MinMaxScaler -> where the minimum of feature is made equal to zero and the maximum of feature equal to one.
datatypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
normarization = spotify_df.select_dtypes(include=datatypes)
for col in normarization.columns:
MinMaxScaler(col)
normarization
#Use K-means clustering to allot different
from sklearn.cluster import KMeans
#buulding 10 clusters
kmeans = KMeans(n_clusters=10)
#fitting clusters in normarized data
features = kmeans.fit_predict(normarization)
spotify_df['features'] = features
#There is another way of data scaling, MinMaxScaler -> where the minimum of feature is made equal to zero and the maximum of feature equal to one.
MinMaxScaler(spotify_df['features'])
#Using the data to build recommender system for given song
class Spotify_Recommendation():
def __init__(self,dataset):
self.dataset = dataset
def recommend_songs(self,song,amount=1):
#initialiseda as empty array
distance =[]
song = self.dataset[(self.dataset.name.str.lower() == songs.lower())].head(1).values[0]
rec = self.dataset[self.dataset.name.str.lower() != songs.lower()]
for songs in tqdm(rec.values):
d = 0
for col in np.arange(len(rec.columns)):
if not col in [1, 6, 12, 14, 18]:
d = d + np.absolute(float(song[col]) - float(songs[col]))
distance.append(d)
rec['distance'] = distance
rec = rec.sort_values('distance')
columns = ['artists', 'name']
return rec[columns][:amount]
recommendations = Spotify_Recommendation(data)
recommendations.recommend("Mixe", 10)