In [1]:
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import requests 
import string
import unicodedata

import gensim.downloader as api
from gensim.models import Word2Vec

from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize

In [2]:
from happysadsongs.data import *
sid = SentimentIntensityAnalyzer()

In [3]:
test_lyrics = get_test_lyrics()

In [4]:
test_lyrics

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,title,artist,label,lyrics
0,0,0,If I Die Young,Naya Rivera,sad,"Text\nIf I die young, bury me in satin\nLay me..."
1,1,1,Angie,The Rolling Stones,sad,"Angie, Angie\nWhen will those clouds all disap..."
2,2,2,Pretty Sad,XYLØ,sad,"[Intro]\n(Feeling pretty sad, pretty, pretty s..."
3,3,3,Tear In Your Hand,Tori Amos,sad,All the world just stopped now_x000D_\nSo you ...
4,4,4,Canvas,Shane Smith & The Saints,sad,"I had a a brother, who wasn't from my family\n..."
...,...,...,...,...,...,...
255,255,255,Fucking Hostile,Pantera,angry,"ne, two, three, four!\n\nAlmost every day\nI s..."
256,256,256,Refuse/Resist,Sepultura,angry,A.D.\nTanks on the streets\nConfronting police...
257,257,257,Dam That River,Alice in Chains,angry,I broke you in the canyon\nI drowned you in th...
258,258,258,Destroy Everything,Hatebreed,angry,A new life begins\n\nDestroy everything\nDestr...


In [9]:
test_lyrics.iloc[2]['lyrics']

"[Intro]\n(Feeling pretty sad, pretty, pretty sad, sad)\n(Feeling pretty sad, pretty, pretty sad, sad)\n\n[Verse 1]\nI got friends online\nThey love me, but I can't see into their eyes\nIf they only knew what I'm like\nI'm always looking for something to kill my mind\n\n[Pre-Chorus]\n'Cause I'm feeling pretty sad, pretty sad, pretty sad\nFeeling pretty sad, pretty sad, pretty sad\n\n[Chorus]\nAnd unhappy\nIs how I feel about everything\nSo unhappy\nYeah, I'm pretty sad, but the truth is ugly\nYou can dress me up in all your modern love\n'Til I'm happy\nThen I'll fall apart again\n\n[Post-Chorus]\nFeeling pretty sad, pretty sad, pretty sad\nFeeling pretty sad, pretty sad, pretty sad\n(Feeling pretty sad, pretty, pretty sad, sad)\n(Feeling pretty sad, pretty, pretty sad, sad)\n[Verse 2]\nI'll get the light just right\nThen I'll show you a picture into my life\nDon't feel the love, I just see the likes\nNo, you'll forget me right after your phone dies\n\n[Chorus]\nSo unhappy\nIs how I fee

In [53]:
def additional_clean(text):
    text = text.replace('\n', ' ').replace('[Intro]', ' ').replace('[Verse 1]', ' ').replace('[Verse 2]', ' ').replace('[Verse 3]', ' ').replace('[Post-Chorus]', ' ').replace('[Chorus]', ' ').replace('[Pre-Chorus]', ' ').replace('[Verse]', ' ').replace('[Outro]', ' ')
    return text

In [44]:

test_lyrics['text'] = test_lyrics['lyrics'].apply(additional_clean).apply(clean)

In [45]:
test_lyrics

Unnamed: 0,title,artist,label,lyrics,text,negative,neutral,positive,compound
0,If I Die Young,Naya Rivera,sad,"Text\nIf I die young, bury me in satin\nLay me...",text if i die young bury me in satin lay me do...,0.072,0.743,0.185,0.9944
1,Angie,The Rolling Stones,sad,"Angie, Angie\nWhen will those clouds all disap...",angie angie when will those clouds all disappe...,0.128,0.658,0.214,0.9806
2,Pretty Sad,XYLØ,sad,"[Intro]\n(Feeling pretty sad, pretty, pretty s...",feeling pretty sad pretty pretty sad sad fee...,0.345,0.230,0.425,0.9964
3,Tear In Your Hand,Tori Amos,sad,All the world just stopped now_x000D_\nSo you ...,all the world just stopped nowxd so you say yo...,0.022,0.934,0.044,0.8126
4,Canvas,Shane Smith & The Saints,sad,"I had a a brother, who wasn't from my family\n...",i had a a brother who wasnt from my family we ...,0.021,0.923,0.056,0.8622
...,...,...,...,...,...,...,...,...,...
255,Fucking Hostile,Pantera,angry,"ne, two, three, four!\n\nAlmost every day\nI s...",ne two three four almost every day i see the ...,0.111,0.758,0.131,0.3162
256,Refuse/Resist,Sepultura,angry,A.D.\nTanks on the streets\nConfronting police...,ad tanks on the streets confronting police ble...,0.331,0.669,0.000,-0.8858
257,Dam That River,Alice in Chains,angry,I broke you in the canyon\nI drowned you in th...,i broke you in the canyon i drowned you in the...,0.164,0.794,0.043,-0.7509
258,Destroy Everything,Hatebreed,angry,A new life begins\n\nDestroy everything\nDestr...,a new life begins destroy everything destroy ...,0.436,0.501,0.063,-0.9935


In [46]:
negative = []
neutral = []
positive = []
compound = []

for i in test_lyrics.index:
    scores = sid.polarity_scores(test_lyrics['text'].iloc[i])
    negative.append(scores['neg'])
    neutral.append(scores['neu'])
    positive.append(scores['pos'])
    compound.append(scores['compound'])

In [47]:
test_lyrics['negative'] = negative
test_lyrics['neutral'] = neutral
test_lyrics['positive'] = positive
test_lyrics['compound']= compound
#test_lyrics.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'], inplace=True)


In [48]:
labeled_lyrics = test_lyrics[['title','artist','label','negative','neutral','positive','compound']]

In [49]:
labeled_lyrics

Unnamed: 0,title,artist,label,negative,neutral,positive,compound
0,If I Die Young,Naya Rivera,sad,0.122,0.700,0.178,0.9807
1,Angie,The Rolling Stones,sad,0.128,0.661,0.211,0.9786
2,Pretty Sad,XYLØ,sad,0.389,0.187,0.424,0.9705
3,Tear In Your Hand,Tori Amos,sad,0.020,0.830,0.150,0.9921
4,Canvas,Shane Smith & The Saints,sad,0.021,0.923,0.056,0.8493
...,...,...,...,...,...,...,...
255,Fucking Hostile,Pantera,angry,0.135,0.739,0.126,-0.2716
256,Refuse/Resist,Sepultura,angry,0.320,0.605,0.075,-0.8176
257,Dam That River,Alice in Chains,angry,0.167,0.790,0.043,-0.7509
258,Destroy Everything,Hatebreed,angry,0.439,0.497,0.064,-0.9935


In [54]:
angry = labeled_lyrics[(labeled_lyrics['label'] == 'angry') & (labeled_lyrics['compound'] < 0)].count()/\
labeled_lyrics[(labeled_lyrics['label'] == 'angry')].count()
angry

title       0.672727
artist      0.672727
label       0.672727
negative    0.672727
neutral     0.672727
positive    0.672727
compound    0.672727
dtype: float64

In [51]:
sad = labeled_lyrics[(labeled_lyrics['label'] == 'sad') & (labeled_lyrics['compound'] < 0)].count()/\
labeled_lyrics[(labeled_lyrics['label'] == 'sad')].count()
sad

title       0.44
artist      0.44
label       0.44
negative    0.44
neutral     0.44
positive    0.44
compound    0.44
dtype: float64

In [52]:
happy = labeled_lyrics[(labeled_lyrics['label'] == 'happy') & (labeled_lyrics['compound'] > 0)].count()/\
labeled_lyrics[(labeled_lyrics['label'] == 'happy')].count()
happy

title       0.838095
artist      0.838095
label       0.838095
negative    0.838095
neutral     0.838095
positive    0.838095
compound    0.838095
dtype: float64