In [1]:
%matplotlib inline

import pandas
from sklearn.model_selection import train_test_split
import numpy as np
import time
from sklearn.externals import joblib
import Recommenders as Recommenders
import Evaluation as Evaluation

In [2]:
#Read userid-songid-listen_count triplets
#This step might take time to download data from external sources
triplets_file = '10000.txt'
songs_metadata_file = 'song_data.csv'

song_df_1 = pandas.read_table(triplets_file,header=None)
song_df_1.columns = ['user_id', 'song_id', 'listen_count']

#Read song  metadata
song_df_2 =  pandas.read_csv(songs_metadata_file)

#Merge the two dataframes above to create input dataframe for recommender systems
song_df = pandas.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on="song_id", how="left") 

In [3]:
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


In [4]:
len(song_df)

2000000

In [5]:
song_df = song_df.head(10000)

#Merge song title and artist_name columns to make a merged column
song_df['song'] = song_df['title'].map(str) + " - " + song_df['artist_name']

In [6]:
song_grouped = song_df.groupby(['song']).agg({'listen_count': 'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage']  = song_grouped['listen_count'].div(grouped_sum)*100
song_grouped.sort_values(['listen_count', 'song'], ascending = [0,1])

Unnamed: 0,song,listen_count,percentage
3660,Sehr kosmisch - Harmonia,45,0.45
4678,Undo - Björk,32,0.32
5105,You're The One - Dwight Yoakam,32,0.32
1071,Dog Days Are Over (Radio Edit) - Florence + Th...,28,0.28
3655,Secrets - OneRepublic,28,0.28
4378,The Scientist - Coldplay,27,0.27
4712,Use Somebody - Kings Of Leon,27,0.27
3476,Revelry - Kings Of Leon,26,0.26
1387,Fireflies - Charttraxx Karaoke,24,0.24
1862,Horn Concerto No. 4 in E flat K495: II. Romanc...,23,0.23


In [7]:
users = song_df['user_id'].unique()

In [8]:
songs = song_df['song'].unique()

In [9]:
train_data, test_data = train_test_split(song_df, test_size = 0.20, random_state=0)

In [10]:
is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'user_id', 'song')

In [11]:
song = 'Yellow - Coldplay'
###Fill in the code here
is_model.get_similar_items([song])

no. of unique songs in the training set: 4483
Non zero values in cooccurence_matrix :969


Unnamed: 0,user_id,song,score,rank
0,,Fix You - Coldplay,0.375,1
1,,Creep (Explicit) - Radiohead,0.291667,2
2,,Clocks - Coldplay,0.28,3
3,,Seven Nation Army - The White Stripes,0.25,4
4,,Paper Planes - M.I.A.,0.208333,5
5,,Halo - Beyoncé,0.2,6
6,,The Funeral (Album Version) - Band Of Horses,0.181818,7
7,,In My Place - Coldplay,0.181818,8
8,,Kryptonite - 3 Doors Down,0.166667,9
9,,When You Were Young - The Killers,0.166667,10


In [15]:
#Drop The World - Lil Wayne / Eminem
song = 'Goodbye - Danny Boy'
###Fill in the code here
predicted_song = is_model.get_similar_items([song])

no. of unique songs in the training set: 4483
Non zero values in cooccurence_matrix :0


In [16]:
predicted_song

Unnamed: 0,user_id,song,score,rank
0,,Cover My Eyes - La Roux,0.0,1
1,,The Carpal Tunnel Of Love - Fall Out Boy,0.0,2
2,,The Whole World - Outkast Featuring Killer Mike,0.0,3
3,,Go With The Flow - Queens Of The Stone Age,0.0,4
4,,She Just Likes To Fight - Four Tet,0.0,5
5,,Mourning Air - Portishead,0.0,6
6,,Break Through - Colbie Caillat,0.0,7
7,,Creepin Up The Backstairs - The Fratellis,0.0,8
8,,Warning Sign - Coldplay,0.0,9
9,,Your Arms Feel Like home - 3 Doors Down,0.0,10


In [17]:
type(predicted_song)

pandas.core.frame.DataFrame

In [18]:
predicted_song["song"]

0                            Cover My Eyes - La Roux
1           The Carpal Tunnel Of Love - Fall Out Boy
2    The Whole World - Outkast Featuring Killer Mike
3         Go With The Flow - Queens Of The Stone Age
4                 She Just Likes To Fight - Four Tet
5                          Mourning Air - Portishead
6                     Break Through - Colbie Caillat
7          Creepin Up The Backstairs - The Fratellis
9            Your Arms Feel Like home - 3 Doors Down
Name: song, dtype: object

In [19]:
for i in predicted_song["song"]:
    print(i)

Cover My Eyes - La Roux
The Carpal Tunnel Of Love - Fall Out Boy
The Whole World - Outkast Featuring Killer Mike
Go With The Flow - Queens Of The Stone Age
She Just Likes To Fight - Four Tet
Mourning Air - Portishead
Break Through - Colbie Caillat
Creepin Up The Backstairs - The Fratellis
Your Arms Feel Like home - 3 Doors Down
