In [1]:
pip install vaderSentiment # dictionary and rule based sentiment analysis

Note: you may need to restart the kernel to use updated packages.


In [2]:
import nltk # Natural Language toolkit for text processing

In [3]:
nltk.download('vader_lexicon') #vadersentiment dictionary/lexicon

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/vatsalgarg/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [5]:
nltk.download('stopwords') #commonly used words

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/vatsalgarg/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
nltk.download('wordnet') #database of parts of speech: nouns, adjectives, etc.  

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/vatsalgarg/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [7]:
import pandas as pd #library for working with dataframe/tables in this case
data = pd.read_csv(r'/Users/vatsalgarg/Downloads/InsightsHub/Burger_Company_Reviews.csv', encoding="unicode_escape")

In [8]:
data.head()

Unnamed: 0,Sr No,Reviewer Name,Date,Review
0,1,TheBarracksClub,21-Aug,"Burger Co, in a word, rocks. I read some of th..."
1,2,Honey and Hot Sauce,21-Jul,Spicy burger is amazing. Huge burgers cooked t...
2,3,Barbara Buczynski,21-Jul,I appreciate that they have a vegan alternative.
3,4,Karen N.,21-Jul,Don't be fooled by the lack of atmosphere. The...
4,5,Amandeep Kaur,21-Jul,I happened to stumble upon this place last yea...


In [9]:
import numpy as np #used for mathmatical operations
import re # Regular expression library for text cleaning and information extraction based on patterns
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer # Vectorization of words
from sklearn.model_selection import train_test_split # Split for training and testing the model
# from sklearn.naive_bayes import MultinomialNB 
# import joblib

# Data Preprocessing

In [10]:
corpus = []
df_list = list(data['Review'])

for i in range(len(df_list)):
    review = df_list[i]
    review = re.sub('\S*@\S*\s', " ", review) # remove any emails
    review = re.sub('http\S*\s', " ", review) # remove any links
    
    #remove individual characters
    review = ' '.join([i for i in review.split() if len(i) >= 2])

    # Creating a corpus
    corpus.append(review)


In [11]:
corpus[0:6]

["Burger Co, in word, rocks. read some of the less than complimentary reviews and simply cannot understand how such and experience can be possible. I've frequented CBC many times and not once was it anything but spectacular. Dave, you are doing bang-up job running tough niche business during even tougher times. We love you. With or without onions. Be in to see you, soon.",
 'Spicy burger is amazing. Huge burgers cooked to order with POTATO BREAD. wish sauce came with the fried raviolis but now know to ask next time:)',
 'appreciate that they have vegan alternative.',
 "Don't be fooled by the lack of atmosphere. The interior is dirty and looks like college dorm. The signage done by middle school art student with stencil and the customer service doesn't exist. There are interior and exterior tables but service does not exist. This place is strictly take out. However .... The burgers were delicious and the onion rings were great. If you want nice place to eat skip this place. But if you w

# 1. Vader Model

In [12]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer #library to compute the sentiment score
analyzer = SentimentIntensityAnalyzer()

def text_sentiment_vader(text):
    vs = analyzer.polarity_scores(text)
    return (vs.get("compound"))

# get score for each row
predictions = data.Review.map(lambda x : text_sentiment_vader(x))

In [13]:
pred = pd.DataFrame(predictions)
pred

Unnamed: 0,Review
0,0.8364
1,0.5994
2,0.4019
3,0.9520
4,0.9186
...,...
134,0.9128
135,0.0000
136,0.9656
137,0.8775


In [14]:
pred['Feedback_text'] = data['Review']
pred['Date'] = data['Date']
# Created threshold of 0.05 to define positive, negative and neutral
pred['Sentiment_Pred_vs'] = np.where(pred['Review'] >= 0.05, "Positive", np.where(pred['Review'] <= -0.05, "Negative", "Neutral"))

In [15]:
pred.head()

Unnamed: 0,Review,Feedback_text,Date,Sentiment_Pred_vs
0,0.8364,"Burger Co, in a word, rocks. I read some of th...",21-Aug,Positive
1,0.5994,Spicy burger is amazing. Huge burgers cooked t...,21-Jul,Positive
2,0.4019,I appreciate that they have a vegan alternative.,21-Jul,Positive
3,0.952,Don't be fooled by the lack of atmosphere. The...,21-Jul,Positive
4,0.9186,I happened to stumble upon this place last yea...,21-Jul,Positive


In [20]:
pred['Sentiment_Pred_vs'].describe()

count          139
unique           3
top       Positive
freq           120
Name: Sentiment_Pred_vs, dtype: object

In [19]:
pred['Sentiment_Pred_vs'].value_counts()

Positive    120
Negative     15
Neutral       4
Name: Sentiment_Pred_vs, dtype: int64

In [21]:
# output the sentiment analysis to csv
pred.to_csv('/Users/vatsalgarg/Downloads/InsightsHub/result_vader.csv')

# 2. TextBlob Model

In [22]:
pip install textblob #similar to Vader, a powerful dictionary based approach and uses rules on positive and negative words

Note: you may need to restart the kernel to use updated packages.


In [23]:
import textblob
from textblob import TextBlob

def text_sentiment(text):
    testimonial = TextBlob(text)
    return (testimonial.sentiment.polarity)

predictions_tb = data.Review.map(lambda x :  text_sentiment(x))

In [24]:
pred_tb = pd.DataFrame(predictions_tb)
pred_tb

Unnamed: 0,Review
0,0.174444
1,0.375000
2,0.000000
3,0.360000
4,0.421429
...,...
134,0.081250
135,0.258333
136,0.211563
137,0.387500


In [25]:
pred_tb['Feedback_text'] = data['Review']
pred_tb['Date'] = data['Date']
# experimenting with the threshold and analyzed the results, ended up keeping zero
pred_tb['Sentiment_Pred_tb'] = np.where(pred_tb['Review'] > 0, "Positive", np.where(pred_tb['Review'] < 0, "Negative", "Neutral"))

In [26]:
pred_tb['Sentiment_Pred_tb'].describe()

count          139
unique           3
top       Positive
freq           119
Name: Sentiment_Pred_tb, dtype: object

In [27]:
pred_tb['Sentiment_Pred_tb'].value_counts()

Positive    119
Negative     17
Neutral       3
Name: Sentiment_Pred_tb, dtype: int64

In [28]:
pred_tb.to_csv('/Users/vatsalgarg/Downloads/InsightsHub/result_textblob.csv')