In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [5]:
df = df.sample(40000).drop('link', axis=1).reset_index(drop=True)

In [6]:
df.head(10)

Unnamed: 0,artist,song,text
0,Modern Talking,Superstar,"I have the feeling, that you are the one \r\n..."
1,Red Hot Chili Peppers,Right On Time,One shot all I need \r\nI've got rhythm when ...
2,Regine Velasquez,Sa Ugoy Ng Duyan,I. \r\nSana'y di magmaliw ang dati kong araw ...
3,Miley Cyrus,Twinkle Song,[Verse 1] \r\nI had a dream \r\nDavie Bowie ...
4,Weird Al Yankovic,Another One Rides The Bus,Riding in the bus down the boulevard \r\nAnd ...
5,R. Kelly,Lost In Your Love,"I wanna bring love, back to the radio, \r\nCa..."
6,Ellie Goulding,Devotion,[Verse 1] \r\nOpen up and let me in \r\nShow...
7,Vybz Kartel,Love Of Money,"Sorry honey, I'm in love with money \r\nYou s..."
8,Hank Snow,If It's Wrong To Love You,Written by Bonnie Dodd - Charles Mitchell \r\...
9,Whitesnake,Come On,"I don't go looking for trouble, \r\nIt's alwa..."


In [7]:
df['text'][0]

"I have the feeling, that you are the one  \r\nYou're really special, you can't go wrong  \r\nYou have to work, but you reach your aim  \r\nI see your power, try it again  \r\nIt's such a long way, you know it all  \r\nI know the eyes of the winner  \r\nYou will stand up, if you will fall  \r\nYou are the greatest singer  \r\n  \r\nYou are my hero, my superstar from zero  \r\nYou are to me a superstar tonight  \r\nYou are a winner, you'll never be sinner  \r\nOh baby you will always do it right  \r\n  \r\nOh you're to me - a superstar  \r\nNo matter if your dream is far  \r\nOh you're to me a hero now  \r\nYou reach the top, just anyhow  \r\n  \r\nYou connot win, if you don't take this chance  \r\nI swear you'll fell, it's like a romance  \r\nSometimes it's hard, but please don't give up  \r\nIt's not so easy climb to the top  \r\nI tell you only the strong will survive  \r\nOh you are playing with fire  \r\nIt is the gear test chance of your life  \r\nSo you feel the desire?  \r\n  \r

In [8]:
df.shape

(40000, 3)

TEXT CLEANING / TEXT PREPROCESSING

In [9]:
df['text']=df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex=True)

In [10]:
df.tail(5)

Unnamed: 0,artist,song,text
39995,Yonder Mountain String Band,To See You Coming 'round The Bend,to see you coming 'round the bend \r i just c...
39996,Ozzy Osbourne,Back On Earth,"i have fallen from grace, and my ashes are sca..."
39997,Erik Santos,Bakit Ba Iniibig Ka (Ft. Regine Velasquez),e: ang sabi mo sa akin tayong dalawa'y magmama...
39998,Independence Day,God Bless The USA,"if tomorrow all the things were gone, \r i'd ..."
39999,Keith Urban,If You Wanna Stay,"go if you wanna go, stay if you wanna stay \r..."


In [11]:
import nltk 
from nltk.stem.porter import PorterStemmer

In [12]:
stemmer = PorterStemmer()

In [13]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a =[stemmer.stem(w) for w in token]
    return " ".join(a)

In [14]:
token("you are beautiful, beati")

'you are beauti , beati'

In [15]:
df['text'].apply(lambda x: token(x))

0        i have the feel , that you are the one you 're...
1        one shot all i need i 've got rhythm when i bl...
2        i. sana ' y di magmaliw ang dati kong araw nan...
3        [ vers 1 ] i had a dream davi bowi taught us h...
4        ride in the bu down the boulevard and the plac...
                               ...                        
39995    to see you come 'round the bend i just ca n't ...
39996    i have fallen from grace , and my ash are scat...
39997    e : ang sabi mo sa akin tayong dalawa ' y magm...
39998    if tomorrow all the thing were gone , i 'd wor...
39999    go if you wan na go , stay if you wan na stay ...
Name: text, Length: 40000, dtype: object

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
tfid = TfidfVectorizer(analyzer='word', stop_words='english')

In [19]:
matrix = tfid.fit_transform(df['text'])

In [20]:
similar = cosine_similarity(matrix)

In [21]:
similar[0]

array([1.        , 0.0306999 , 0.        , ..., 0.        , 0.01694336,
       0.02472191])

In [23]:
df[df['song']=='Back On Earth'].index[0]

39996

RECOMMENDER FUNCTION

In [24]:
def recommender(song_name):
    idx = df[df['song']==song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse=True, key = lambda x:x[1])
    song = []
    for s_id in distance[1:20]:
        song.append(df.iloc[s_id[0]].song)
    return song
    

In [25]:
recommender("Back On Earth")

['It Must Be Love',
 'Good Friend',
 'National Health',
 'Sing Together',
 'Midnight Blue',
 'Not Of This Earth',
 'See Me Now',
 'Dark Fantasy',
 'Love Song',
 'In The End',
 "Thanx 4 Nothin'",
 'Better',
 'Bells Of Christmas',
 'Here Comes The Hammer',
 'All About You',
 'Down To Earth',
 'Tightrope',
 "If I Can't Have You",
 'Million Dollar Bill']

In [27]:
import pickle

In [29]:
pickle.dump(similar, open("similarity", "wb"))

In [1]:
pickle.dump(df, open("df", "wb"))

NameError: name 'pickle' is not defined

In [2]:
recommender("Back On Earth")

NameError: name 'recommender' is not defined