In [41]:
import pandas as pd

In [42]:
df = pd.read_csv("spotify_millsongdata.csv")

In [43]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [44]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [45]:
df.shape

(57650, 4)

In [46]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [47]:
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [48]:
df.head(10)

Unnamed: 0,artist,song,text
0,Dean Martin,In The Misty Moonlight,In the misty moonlight \r\nBy the flickering ...
1,Cher,A Different Kind Of Love Song,What if the world was crazy and I was sane \r...
2,Hillsong United,Just Let Me Say,Just let me say how much I love You \r\nLet m...
3,Grateful Dead,Ramble On Rose,"Just like jack the ripper, just like mojo hand..."
4,Children,Babes In The Woods,"Oh, don't you remember, a long time ago, \r\n..."
5,Outkast,Bust,Ghosts and goblins run amock \r\nIn the caver...
6,Hanson,Tearing It Down,"I am, taking a chance \r\nWalking with my lac..."
7,Metallica,The Four Horsemen,By the last breath of the fourth winds blow \...
8,Conway Twitty,Baby I'm-a Want You,"Baby, I'm a-want you \r\nBaby, I'm a-need you..."
9,Thin Lizzy,Cold Sweat,I put my money in the suitcase \r\nAnd headed...


In [49]:
df.shape

(5000, 3)

Text Cleaning / Text Preprocessing

In [50]:
df['text'] = df['text'].str.lower().replace(r'^\w\s',' ').replace(r'\n', ' ', regex = True)

In [51]:
df.tail(5)

Unnamed: 0,artist,song,text
4995,Randy Travis,How Do I Wrap My Heart Up For Christmas,silver bells are ringing \r carolers are sing...
4996,Queen Latifah,Just Another Day,(queen talking) \r we gon take this one over ...
4997,Westlife,Colour My World,oh no \r \r everybody needs affection \r l...
4998,Gary Numan,We Are Glass,we are young we can break \r watch us fall \...
4999,Nirvana,Paper Cuts,when i'm feeling tired \r she pushed food thr...


In [52]:
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [53]:
stemmer = PorterStemmer()

In [54]:
def token(txt) :
    token = nltk.word_tokenize(txt)
    a =[stemmer.stem(w) for w in token]
    return " ".join(a)

In [55]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [56]:
df['text'].apply(lambda x: token(x))

0       in the misti moonlight by the flicker fireligh...
1       what if the world wa crazi and i wa sane would...
2       just let me say how much i love you let me spe...
3       just like jack the ripper , just like mojo han...
4       oh , do n't you rememb , a long time ago , tho...
                              ...                        
4995    silver bell are ring carol are sing snowflak d...
4996    ( queen talk ) we gon take thi one over to 275...
4997    oh no everybodi need affect look for a deep co...
4998    we are young we can break watch us fall we can...
4999    when i 'm feel tire she push food through the ...
Name: text, Length: 5000, dtype: object

In [57]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [58]:
tfid = TfidfVectorizer(analyzer='word', stop_words='english')

In [59]:
matrix = tfid.fit_transform(df['text'])

In [60]:
similar = cosine_similarity(matrix)

In [61]:
similar[0]

array([1.        , 0.        , 0.016455  , ..., 0.01438716, 0.00814565,
       0.01056872])

In [63]:
df[df['song']=='Colour My World'].index[0]

4997

Recommender Function

In [68]:
def recommender(song_name):
    idx = df[df['song']== song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse=True, key=lambda x:x[1])
    song = []
    for s_id in distance[1:5]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [70]:
recommender("Colour My World")

['Color Of The Blues', 'Viva La Vida', 'Club Rocker', 'Please, Please, Please']

In [71]:
import pickle

In [72]:
pickle.dump(similar, open("similarity", "wb"))

In [73]:
pickle.dump(df, open("df", "wb"))