In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [5]:
data = pd.read_csv('data.csv')
print(data.columns)
data.head()

Index(['acousticness', 'artists', 'danceability', 'duration_ms', 'energy',
       'explicit', 'id', 'instrumentalness', 'key', 'liveness', 'loudness',
       'mode', 'name', 'popularity', 'release_date', 'speechiness', 'tempo',
       'valence', 'year'],
      dtype='object')


Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.995,['Carl Woitschach'],0.708,158648,0.195,0,6KbQ3uYMLKb5jDxLF7wYDD,0.563,10,0.151,-12.428,1,Singende Bataillone 1. Teil,0,1928,0.0506,118.469,0.779,1928
1,0.994,"['Robert Schumann', 'Vladimir Horowitz']",0.379,282133,0.0135,0,6KuQTIu1KoTTkLXKrwlLPV,0.901,8,0.0763,-28.454,1,"Fantasiestücke, Op. 111: Più tosto lento",0,1928,0.0462,83.972,0.0767,1928
2,0.604,['Seweryn Goszczyński'],0.749,104300,0.22,0,6L63VW0PibdM1HDSBoqnoM,0.0,5,0.119,-19.924,0,Chapter 1.18 - Zamek kaniowski,0,1928,0.929,107.177,0.88,1928
3,0.995,['Francisco Canaro'],0.781,180760,0.13,0,6M94FkXd15sOAOQYRnWPN8,0.887,1,0.111,-14.734,0,Bebamos Juntos - Instrumental (Remasterizado),0,1928-09-25,0.0926,108.003,0.72,1928
4,0.99,"['Frédéric Chopin', 'Vladimir Horowitz']",0.21,687733,0.204,0,6N6tiFZ9vLTSOIxkj8qKrd,0.908,11,0.098,-16.829,1,"Polonaise-Fantaisie in A-Flat Major, Op. 61",1,1928,0.0424,62.149,0.0693,1928


In [7]:
data.drop(['duration_ms','key','mode','id','name','year'],axis=1,inplace=True)

In [8]:
data['popularity'] = data['popularity']/100
data['tempo'] = (data['tempo'] - 50)/100
data['loudness'] = (data['loudness'] + 60)/60

In [9]:
features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 
            'loudness', 'speechiness', 'tempo', 'valence', 'popularity']
def createUserPrefMatrix(artistRatingDict):
    
    artists = artistRatingDict.keys()
    artMat = data[data['artists'].isin(artists)]
    #print(artMat)
    
    for artist, rating in artistRatingDict.items():
        artMat.loc[artMat['artists']==
                   artist,features] = artMat.loc[artMat['artists']==
                                                 artist,features].mul(rating,axis=0)
    
    userProfile = artMat.loc[:,features].sum(axis=0)
    normalized_userProfile = (userProfile/userProfile.sum())*10
    
    return normalized_userProfile

def createRecomMatrix(userProfile,artists):
    
    artMat = data[~data['artists'].isin(artists)]
    artMat.set_index('artists',inplace=True) 
    #print(userProfile)
    #print(artMat.head())
    
    recomMat = pd.DataFrame(artMat.values*userProfile.values, 
                            columns=artMat.columns, index=artMat.index)
    recomMat = recomMat.sum(axis=1)
    recomMat.sort_values(ascending = False,inplace=True)
    
    return recomMat

def recommend(artistRatingDict):
    
    userProfile = createUserPrefMatrix(artistRatingDict)
    
    recommendationMat = createRecomMatrix(userProfile,
                                          artistRatingDict.keys()) 
    
    return recommendationMat.head(10)

In [10]:
import random
artists = random.sample(list(data['artists']),k=10)
ratings = [10,10,8,5,9,2,3,7,6,10]
dictionary = dict(zip(artists, ratings))
print(dictionary)

{"['Ted Heath']": 10, "['Talat Mahmood']": 10, "['Smokey Robinson']": 8, "['Aminé', 'Gucci Mane']": 5, "['Maurice Ravel', 'Pierre Monteux']": 9, "['Herbie Hancock']": 2, "['Newsies Ensemble']": 3, "['Garnett Silk']": 7, "['Django Reinhardt', 'Grand Orchestre de Danse']": 6, "['Ella Fitzgerald']": 10}
