# Installation

Go to terminal, type in "pip install NRCLex"

# Library

In [16]:
from nrclex import NRCLex

import numpy as np
import pandas as pd

# Detect emotions: sentence

In [6]:
# Assign emotion
text = 'your website is horrible'
  
# Create object
emotion = NRCLex(text)
  
# Using methods to classigy emotion
print('\n', emotion.words)
print('\n', emotion.affect_dict)


 ['your', 'website', 'is', 'horrible']

 {'horrible': ['anger', 'disgust', 'fear', 'negative']}


# "Raw emotion scores" output how many words triggered each emotion

The word "horrible" is the only emotion word, so we just have "1"

In [4]:
print('\n', emotion.raw_emotion_scores)


 {'anger': 1, 'disgust': 1, 'fear': 1, 'negative': 1}


# "Top emotions" standardize "raw emotion scores"

Assuming each sentence can only have a total score of 1, which emotions weigh more and which emotions weigh less?

In [5]:
print('\n', emotion.top_emotions)


 [('fear', 0.25), ('anger', 0.25), ('negative', 0.25), ('disgust', 0.25)]


# Why do we need to standardize? Without standardization, longer sentences can give us more false positives

In [14]:
text = 'It is unclear what brands Amazon will offer in the stores, although the company’s private-label goods are expected to feature prominently, the people said. Amazon sells scores of products including clothes, furniture, batteries and electronic devices through many of its own labels. The plans arent yet final and could change, these people said.'
emotion = NRCLex(text)
print('\n', emotion.affect_dict)
print('\n', emotion.raw_emotion_scores)



 {'offer': ['positive'], 'goods': ['positive'], 'expected': ['anticipation'], 'feature': ['positive'], 'prominently': ['positive'], 'including': ['positive'], 'change': ['fear']}

 {'positive': 5, 'anticipation': 1, 'fear': 1}


# Let's look at a longer example

In [10]:
text = 'I am humbled and honored to be surrounded by colleagues who challenge, support and encourage me at each stage. Through them, I’ve learned work isn’t just about the tasks we set out to do, but the experiences, growth and friendships we build along the way.'
emotion = NRCLex(text)
print('\n', emotion.affect_dict)
print('\n', emotion.raw_emotion_scores)


 {'humbled': ['positive', 'sadness'], 'challenge': ['anger', 'fear', 'negative'], 'encourage': ['joy', 'positive', 'trust'], 'growth': ['positive'], 'build': ['positive']}

 {'positive': 4, 'sadness': 1, 'anger': 1, 'fear': 1, 'negative': 1, 'joy': 1, 'trust': 1}


# Detect emotions: dataframe

In [22]:
df = pd.read_csv('test.csv')
df.head()

Unnamed: 0,id,text
0,1,your website is very easy to use!
1,2,your website is not good
2,3,is this refundable?
3,4,someone needs to be fired
4,5,"Way too big for a 3, 4, & 5 year old..... disa..."


## Make sure you process the data: lemmatization etc (not shown here)

In [23]:
df['emotions'] = df['text'].apply(lambda x: NRCLex(x).affect_frequencies)
df.head()

Unnamed: 0,id,text,emotions
0,1,your website is very easy to use!,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
1,2,your website is not good,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
2,3,is this refundable?,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
3,4,someone needs to be fired,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
4,5,"Way too big for a 3, 4, & 5 year old..... disa...","{'fear': 0.0, 'anger': 0.25, 'anticip': 0.0, '..."


In [24]:
df = pd.concat([df.drop(['emotions'], axis = 1), df['emotions'].apply(pd.Series)], axis = 1)
df.head()

Unnamed: 0,id,text,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy,anticipation
0,1,your website is very easy to use!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,2,your website is not good,0.0,0.0,0.0,0.2,0.2,0.2,0.0,0.0,0.0,0.2,0.2
2,3,is this refundable?,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,4,someone needs to be fired,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,5,"Way too big for a 3, 4, & 5 year old..... disa...",0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.25,0.25,0.0,


# Limitation: NRCLex can't handle negations

# Limitation: NRCLex can't recognize words it doesn't know

# References

- https://github.com/metalcorebear/NRCLex
- https://www.geeksforgeeks.org/emotion-classification-using-nrc-lexicon-in-python/

In [2]:
from datetime import datetime
date = datetime.today().strftime('%y%m%d')
print ('Last modified: ' + date)

Last modified: 210524
