In [69]:
import pandas as pd

In [70]:
df = pd.read_csv("spotify_millsongdata.csv")

In [71]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [72]:
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [73]:
df.head(10)

Unnamed: 0,artist,song,text
0,Bruno Mars,Lights,Dim all the lights sweet darling \r\nCause to...
1,Kelly Clarkson,Think Twice,Ooh \r\nDon't think I can't feel there's some...
2,Leonard Cohen,The Traitor,Now the Swan it floated on the English river ...
3,Crowded House,I Walk Away,You came \r\nOut of this world to me \r\nMy ...
4,Bread,Ann,Ann a-sleepin' on my bed \r\nHair a-tumblin''...
5,Kate Bush,Kite,(Come up and be a kite) \r\n \r\nBeelzebub i...
6,John Mellencamp,In Our Lives,Well I'm a middle-aged teenager \r\nI'm a man...
7,Uriah Heep,Tales,We told our tales as we sat under \r\nMorning...
8,Justin Timberlake,Hootnanny,I could go number one ten times \r\nPretty gi...
9,Kylie Minogue,Feels So Good,"I realize, realize \r\nI get a little bit wil..."


In [74]:
df['text'][0]

"Dim all the lights sweet darling  \r\nCause tonight it's all the way  \r\nTurn up the old Victrola  \r\nGonna dance the night away  \r\n  \r\nLove just don't come easy  \r\nNo it seldom does  \r\nWhen you find the perfect love  \r\nLet it fill you up  \r\n  \r\nDim all the lights sweet darling  \r\nCause tonight it's all the way, hey baby  \r\nTurn up the old Victrola  \r\nGonna dance the night away  \r\n  \r\nLove just don't come easy  \r\nNo it seldom does  \r\nWhen you find the perfect love  \r\nLet it fill you up  \r\n  \r\nDim all the light sweet honey  \r\nCause tonight it's you and me  \r\nNo need to worry darlin'  \r\nCause it's for eternity  \r\n  \r\nLove don't come easy  \r\nThis you know I understand  \r\nI want to be your woman  \r\nIf you'll be my man  \r\nLet yourself go freely and I'll  \r\nShow you things that you've dreamed of  \r\nDon't think that your dreaming  \r\nWe've found the perfect love  \r\nAnd I'm like a cup  \r\nSo fill me up  \r\n  \r\nDim all the lights

In [75]:
df.shape

(5000, 3)

Text Cleaning / Processing

In [76]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [77]:
df.tail(5)

Unnamed: 0,artist,song,text
4995,Amy Grant,Giggle,giggle \r giggle if you want to \r but i kno...
4996,Carly Simon,If I Wasn't So Small (The Piglet Song),it's not as if i want to rule the world \r or...
4997,Nightwish,The Carpenter,who are you? \r man condemned to shine a salv...
4998,Old 97's,Niteclub,eighteen-hundred miles from this old niteclub ...
4999,Status Quo,Nanana,writing songs that i think sound so strange \...


In [78]:
import nltk
from nltk.stem.porter import PorterStemmer

In [79]:
from nltk import word_tokenize

In [80]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vanam\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [81]:
stemmer = PorterStemmer()

In [82]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return " ".join(a)

In [83]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [84]:
df['text'].apply(lambda x: token(x))

0       dim all the light sweet darl caus tonight it '...
1       ooh do n't think i ca n't feel there 's someth...
2       now the swan it float on the english river ah ...
3       you came out of thi world to me my life part l...
4       ann a-sleepin ' on my bed hair a-tumblin '' ro...
                              ...                        
4995    giggl giggl if you want to but i know it 's st...
4996    it 's not as if i want to rule the world or ev...
4997    who are you ? man condemn to shine a salvat th...
4998    eighteen-hundr mile from thi old niteclub a gi...
4999    write song that i think sound so strang write ...
Name: text, Length: 5000, dtype: object

In [85]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [86]:
tfid = TfidfVectorizer(analyzer='word', stop_words='english')

In [87]:
matrix = tfid.fit_transform(df['text'])

In [88]:
similar = cosine_similarity(matrix)

In [89]:
similar[0]

array([1.        , 0.0844491 , 0.01149226, ..., 0.00610437, 0.05971647,
       0.00241543])

Recommneder Function

In [93]:
def recommender(song_name):
    idx = df[df['song']==song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse=True, key = lambda x:x[1])
    song = []
    for s_id in distance[1:5]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [95]:
recommender("Niteclub")

['I Stole Your Love', 'Just Like Fire Would', 'Nine Tonight', 'I Got Drunk']

In [123]:
import pickle

In [124]:
pickle.dump(similar, open("similarity.pkl", "wb"))

In [125]:
pickle.dump(df, open("df.pkl", "wb"))

In [126]:
pickle.load(open("similarity.pkl", "rb"))

array([[1.00000000e+00, 8.44491044e-02, 1.14922637e-02, ...,
        6.10437242e-03, 5.97164686e-02, 2.41543457e-03],
       [8.44491044e-02, 1.00000000e+00, 1.06019180e-02, ...,
        0.00000000e+00, 2.21495421e-02, 4.26312128e-03],
       [1.14922637e-02, 1.06019180e-02, 1.00000000e+00, ...,
        4.55513280e-03, 7.89021032e-04, 1.29596236e-04],
       ...,
       [6.10437242e-03, 0.00000000e+00, 4.55513280e-03, ...,
        1.00000000e+00, 5.09156165e-03, 0.00000000e+00],
       [5.97164686e-02, 2.21495421e-02, 7.89021032e-04, ...,
        5.09156165e-03, 1.00000000e+00, 1.02495657e-03],
       [2.41543457e-03, 4.26312128e-03, 1.29596236e-04, ...,
        0.00000000e+00, 1.02495657e-03, 1.00000000e+00]])