In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import style
from sklearn import preprocessing
from scipy.sparse import coo_matrix
from scipy.sparse.linalg import *
from scipy.sparse.linalg import svds
from scipy.sparse import csc_matrix
import math as mth
#style.use('ggplot')
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv(r'/content/processed_data.csv')

In [3]:
#Finding the percentage of listen of each user to the songs they have heard (similar to movie ratings)
fraction_count=df.groupby(['user_id']).agg({'freq':'sum'})
#finding total listen count per user
fraction_count.rename(columns={'freq':'total_count'},inplace=True) 
data=pd.merge(df,fraction_count,how='left',on='user_id')
#deleting unnecessary columns
del data['artist_name']
del data['release']
del data['Unnamed: 0']
#finding the percentage on each songs for every user
data['fraction_count']=data.freq/data.total_count
data.head()

Unnamed: 0,user_id,song_id,freq,total_count,fraction_count
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1.0,6.0,0.166667
1,c34670d9c1718361feb93068a853cead3c95b76a,SOBONKR12A58A7A7E0,1.0,23.0,0.043478
2,c5006d9f41f68ccccbf5ee29212b6af494110c5e,SOBONKR12A58A7A7E0,1.0,1.0,1.0
3,e4332e11f4df6dd26673bb6b085e9a2bbdc9b8a5,SOBONKR12A58A7A7E0,2.0,9.0,0.222222
4,baf2fe5885ab93fbbdb7fecc6691788e70afb6c8,SOBONKR12A58A7A7E0,4.0,4.0,1.0


In [4]:
#creating user index value and song index value for unique users and songs
user_index= data.user_id.drop_duplicates().reset_index()
user_index.rename(columns={'index':'user_index'}, inplace=True)
user_index['user_index_value'] = list(user_index.index)
song_index= data.song_id.drop_duplicates().reset_index()
song_index.rename(columns={'index':'song_index'}, inplace=True)
song_index['song_index_value'] = list(song_index.index)
indices=pd.merge(data,song_index,how='left')
indices=pd.merge(indices,user_index,how='left')
indices

Unnamed: 0,user_id,song_id,freq,total_count,fraction_count,song_index,song_index_value,user_index,user_index_value
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1.0,6.0,0.166667,0,0,0,0
1,c34670d9c1718361feb93068a853cead3c95b76a,SOBONKR12A58A7A7E0,1.0,23.0,0.043478,0,0,1,1
2,c5006d9f41f68ccccbf5ee29212b6af494110c5e,SOBONKR12A58A7A7E0,1.0,1.0,1.000000,0,0,2,2
3,e4332e11f4df6dd26673bb6b085e9a2bbdc9b8a5,SOBONKR12A58A7A7E0,2.0,9.0,0.222222,0,0,3,3
4,baf2fe5885ab93fbbdb7fecc6691788e70afb6c8,SOBONKR12A58A7A7E0,4.0,4.0,1.000000,0,0,4,4
...,...,...,...,...,...,...,...,...,...
49790,8a34edcef0ada4a0114545f94c31f38ec91cbd84,SOSXLTC12AF72A7F54,2.0,3.0,0.666667,46151,134,34938,24182
49791,3967bbc5bf61c98375a97105f55851ac41a0234b,SOSXLTC12AF72A7F54,1.0,3.0,0.333333,46151,134,18679,15055
49792,f61e12889f24d08f09d044a5b9306f019b8d88d0,SOSXLTC12AF72A7F54,10.0,20.0,0.500000,46151,134,7384,6731
49793,db4e03c6d955131c74abb5c840e6191f70527789,SOSXLTC12AF72A7F54,8.0,36.0,0.222222,46151,134,7385,6732


In [5]:
row_array= indices.user_index_value
column_array= indices.song_index_value
values= indices.fraction_count

In [6]:
#creating a triplets with users and songs heard by them (representations of sparse matrix)
sparse_matrix=coo_matrix((values,(row_array,column_array)),dtype=float)

In [7]:
#function to create a relation between each user with songs 
#here each user contributes to find the S [diagonal matrix] which contains the relationship
def compute_svd(data, k):
    U, s, Vt = svds(data, k)
    dim = (len(s),len(s)) #50*50
    S = np.zeros(dim, dtype = np.float32)
    for i in range(0, len(s)):
        S[i,i] = mth.sqrt(s[i]) #create a k*k diagonal matrix
    U = csc_matrix(U)
    Vt = csc_matrix(Vt)
    return U, S, Vt 

In [8]:
#latent factors
k = 100 
U, S, Vt = compute_svd(sparse_matrix, k)

In [9]:
indices

Unnamed: 0,user_id,song_id,freq,total_count,fraction_count,song_index,song_index_value,user_index,user_index_value
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1.0,6.0,0.166667,0,0,0,0
1,c34670d9c1718361feb93068a853cead3c95b76a,SOBONKR12A58A7A7E0,1.0,23.0,0.043478,0,0,1,1
2,c5006d9f41f68ccccbf5ee29212b6af494110c5e,SOBONKR12A58A7A7E0,1.0,1.0,1.000000,0,0,2,2
3,e4332e11f4df6dd26673bb6b085e9a2bbdc9b8a5,SOBONKR12A58A7A7E0,2.0,9.0,0.222222,0,0,3,3
4,baf2fe5885ab93fbbdb7fecc6691788e70afb6c8,SOBONKR12A58A7A7E0,4.0,4.0,1.000000,0,0,4,4
...,...,...,...,...,...,...,...,...,...
49790,8a34edcef0ada4a0114545f94c31f38ec91cbd84,SOSXLTC12AF72A7F54,2.0,3.0,0.666667,46151,134,34938,24182
49791,3967bbc5bf61c98375a97105f55851ac41a0234b,SOSXLTC12AF72A7F54,1.0,3.0,0.333333,46151,134,18679,15055
49792,f61e12889f24d08f09d044a5b9306f019b8d88d0,SOSXLTC12AF72A7F54,10.0,20.0,0.500000,46151,134,7384,6731
49793,db4e03c6d955131c74abb5c840e6191f70527789,SOSXLTC12AF72A7F54,8.0,36.0,0.222222,46151,134,7385,6732


In [10]:
#To make recommendations for each users 
class recommendations:
    data=indices
    U=U
    S=S
    Vt=Vt
    def make_recommendations(self,user_id,number_of_recommend=None):
        prod = self.S*self.Vt
        self.user_id=user_id
        self.max_recommend = number_of_recommend
        self.user = self.data.user_index_value[self.data['user_id']==self.user_id].unique()[0]
        self.estimates = U[self.user,:] * prod
        self.recommendings = ((-self.estimates[0,:]).argsort())[:150]
        user_recommendations = []
        top = []
        j = 0
        for i in self.recommendings:
            if j < self.max_recommend:
                if (len(self.data[ (self.data.user_id == self.user_id) & (self.data.song_index_value == i)]) == 0):
                    top.append(i)
                    j = j+1
        for i in top:
           user_recommendations.append(list(self.data.song_id[self.data['song_index_value'] == i].drop_duplicates())[0])       
        print(np.reshape(user_recommendations,(-1,1)))

In [12]:
recommend=recommendations()
recommend.make_recommendations('fd50c4007b68a3737fe052d5a4f78ce8aa117f3d',15)

[['SOJZBHH12AB017F611']
 ['SOVAMBN12AB0187FEF']
 ['SODEAJC12A58A7769F']
 ['SOWLMMH12A81C22F5E']
 ['SOEXWRK12AAF3B26AE']
 ['SOJZOFG12A8C137BAE']
 ['SOJOIWK12AF729FAA2']
 ['SORKDWS12A6701E6D9']
 ['SOJHCOH12A8AE45F4F']
 ['SOHOTAA12A8AE45F43']
 ['SODTAHY12A8C13ACAF']
 ['SORALMO12AF729EBB4']
 ['SOHMFHE12AAF3B3E07']
 ['SORNKQU12A6D4FCDC0']
 ['SOONUTJ12A6701D7B4']]
