In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df =df.sample(15000).drop('link', axis=1).reset_index(drop=True)

In [8]:
df.head(10)

Unnamed: 0,artist,song,text
0,Leann Rimes,Life Goes On,You sucked me in and played my mind \r\nJust ...
1,Hollies,King Midas In Reverse,If you could only see me. \r\nAnd know exactl...
2,Glee,L-O-V-E,"L is for the way you look at me, \r\nO is for..."
3,Nine Inch Nails,Suck,There is no god up in the sky tonight \r\nNo ...
4,Billie Holiday,He Ain't Got Rhythm,'Cause he ain't got rhythm \r\nEvery night he...
5,Primus,Lacquer Head,Sometimes bored and sometimes lonely \r\nPimp...
6,Linda Ronstadt,All I Left Behind,All I left behind should come as no surprise ...
7,Stevie Wonder,Saturn,Packing my bags going away \r\nTo a place whe...
8,Glen Campbell,Early Morning Song,There's a woman and she thinks I can do no wro...
9,Van Morrison,He Ain't Give You None,"Little gal, little girl, Lord, you know it's t..."


In [9]:
df['text'][0]

"You sucked me in and played my mind  \r\nJust like a toy  \r\nYou would crank and wind  \r\nBaby, I would give you to what you want  \r\nYou left me lying in a pool of doubt  \r\nIf you're still thinking your the daddy mack  \r\nYa shouldn't known better  \r\nBut ya didn't  \r\nAnd I can't go back  \r\n  \r\nOh life goes on  \r\nAnd it's only gonna make me strong  \r\nIt's a fact  \r\nOnce you get on board  \r\nSay good-bye  \r\n'Cause you can't go back  \r\nOh it's a fight  \r\nAnd I really want to get it right  \r\nWhere I'm at  \r\nIt's my life before me  \r\nGot this feeling  \r\nThat I can't go back  \r\n  \r\nWish I knew then what I know now  \r\nYou held all the cards  \r\nAnd sold me out  \r\nBaby, shame on you if you fool me once  \r\nShame on me if you fool me twice  \r\nYou've been a pretty hard case to crack  \r\nShould've of known better  \r\nBut I didn't  \r\nAnd I can't go back  \r\n  \r\nOh life goes on  \r\nAnd it's only gonna make me strong  \r\nIt's a fact  \r\nOnce

In [10]:
df.shape

(15000, 3)

Text Cleaning/ Text Preprocessing

In [11]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [12]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [13]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [16]:
similarity[0]

array([1.        , 0.02517192, 0.02135445, ..., 0.02758657, 0.10556118,
       0.02216649])

In [17]:
df[df['song'] == 'Life Goes On']

Unnamed: 0,artist,song,text
0,Leann Rimes,Life Goes On,you suck me in and play my mind just like a to...
9216,Utopia,Life Goes On,"life goe on , the world keep turn and life goe..."


In [18]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [19]:
recommendation('Life Goes On')

['Gotta Get Ya',
 'I Believe In You',
 'The Book I Read',
 "What You've Done For Me",
 'Somebody Loves You',
 'Na Na Na',
 'Say Na Na',
 'F-Games',
 'Birthday Song',
 "Love's A Slap In The Face",
 'White Dove',
 'Why Try',
 'Heartache',
 'You Are The Music In Me',
 "This Ain't Goodbye",
 'What Am I Gonna Do',
 'I Remember You',
 'Move On',
 "I'm Gonna Leave You",
 'Everytime']

In [20]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))