<a href="https://colab.research.google.com/github/ugr-gowthamraj/Sentimental-Analysis-of-product-reviews-using-Python/blob/main/Text_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Installing libraries

In [None]:
!pip install nltk



In [None]:
!pip install afinn



#Importing libraries

In [None]:
from google.colab import files                                                    #for file operations in colab
import io                                                                         #for i/o operations in colab
import numpy as np                                                                #for arrays                                                
import pandas as pd                                                               #for dataframes
import nltk                                                                       #for text analysis --> Natural Language processing toolkit
nltk.download('punkt')                                                            #needed for conveting text into tokens
from nltk.tokenize import RegexpTokenizer                                         #for removing punctuations
from nltk.tokenize import sent_tokenize, word_tokenize                            #for tokenizing                         
nltk.download('stopwords')                                                        #needed for stops words
from nltk.corpus import stopwords                                                 #for filtering stops words
from nltk.stem import PorterStemmer                                               #for stemming
from nltk.stem.snowball import SnowballStemmer                                    #for stemming
nltk.download('wordnet')                                                          #needed for Lemmatizing
from nltk.stem.wordnet import WordNetLemmatizer                                   #for Lemmatization
from afinn import Afinn                                                           #for Sentimental analysis
import re


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#Uploading file

In [None]:
file_upload = files.upload()

Saving Unlock 1.0.csv to Unlock 1.0 (1).csv


In [None]:
dataset = pd.read_csv(io.BytesIO(file_upload['Unlock 1.0.csv'])) 
dataset.head()

Unnamed: 0,Sentiment,Comments
0,0,I will not pay any money in future please keep...
1,0,"Customer service num not response, My repaymen..."
2,0,"Please don't download this app this App , they..."
3,0,What the hell is this once I get 1000 loan fro...
4,0,"After making pre closure for all my loan, stil..."


In [None]:
print("\nCounting Given Sentiments\n",dataset['Sentiment'].value_counts())


Counting Given Sentiments
 0    423
1    332
Name: Sentiment, dtype: int64


#Data processing

##Creating new dataframe(df) for working on text analysis

In [None]:
df=pd.DataFrame(dataset, columns = ['Comments'])
df.head()

Unnamed: 0,Comments
0,I will not pay any money in future please keep...
1,"Customer service num not response, My repaymen..."
2,"Please don't download this app this App , they..."
3,What the hell is this once I get 1000 loan fro...
4,"After making pre closure for all my loan, stil..."


###Converting the Comments to lower case

In [None]:
df['Comments']=df['Comments'].str.lower()
df.head()

Unnamed: 0,Comments
0,i will not pay any money in future please keep...
1,"customer service num not response, my repaymen..."
2,"please don't download this app this app , they..."
3,what the hell is this once i get 1000 loan fro...
4,"after making pre closure for all my loan, stil..."


##Tokenization

###Using RegexpTokenizer

In [None]:
tokenizer = RegexpTokenizer(r'\w+')
df['Tokens']=df.apply(lambda row: tokenizer.tokenize(row['Comments']), axis=1)
df.head()

Unnamed: 0,Comments,Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl..."
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re..."
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap..."
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000..."
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo..."


###Using WordTokenizer

In [None]:
#df['Tokens']=df.apply(lambda row: nltk.word_tokenize(row['Comments']), axis=1)
#df.head()

##Remove Stopwords

###Using library for stopwords

In [None]:
#stop_wordsl=set(stopwords.words("english"))
#print(stop_wordsl)
#df['Filtered Tokens']=df['Tokens'].apply(lambda x: [item for item in x if item not in stop_words])
#df.head()

###Using custom stop words

In [None]:
file_upload = files.upload()

Saving stopwords-1.txt to stopwords-1 (2).txt


In [None]:
stop_df= pd.read_csv(io.StringIO(file_upload['stopwords-1.txt'].decode('utf-8')))
#stop_df.head()
stop_words=stop_df.iloc[::].values
print(stop_words)

[['loan']
 ['instant']
 ['amazing']
 ...
 ['should']
 ['such']
 ['does']]


In [None]:
df['Filtered Tokens']=df['Tokens'].apply(lambda x: [item for item in x if item not in stop_words])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ..."
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ..."
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,..."
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]"
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]"


##Stemming

In [None]:
ps = PorterStemmer()
ss=SnowballStemmer("english")
df['Stemmed Filtered Tokens']=df['Filtered Tokens'].apply(lambda x: [ss.stem(y) for y in x])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou..."
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a..."
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r..."
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]"
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]"


##Lemmatization

In [None]:
lem = WordNetLemmatizer()
df['Lemmatized Filtered Tokens']=df['Filtered Tokens'].apply(lambda x: [lem.lemmatize(y,"v") for y in x])
df.head()


Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2..."
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ..."
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r..."
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]"
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]"


#Sentimental Analysis

##Using Afinn

###Trying with Lemmas

In [None]:
af = Afinn()

df['Afinn Score of Lem']=df['Lemmatized Filtered Tokens'].apply(lambda x: [af.score(y) for y in x])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ..."
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]"
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]"
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]"


In [None]:
df['Afinn Overall Score of Lem']=df['Afinn Score of Lem'].apply(lambda x: [sum(y for y in x)])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0]
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0]
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0]
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0]
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0]


In [None]:
df['Afinn Sentiment']=['positive' if score > [0.0] else 'negative' if score < [0.0] else 'neutral' for score in df['Afinn Overall Score of Lem']]
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative


In [None]:
print("\nCounting Afinn Sentiments\n",df['Afinn Sentiment'].value_counts())


Counting Afinn Sentiments
 neutral     281
positive    277
negative    197
Name: Afinn Sentiment, dtype: int64


In [None]:
#dataset['Afinn Sentiment']=df['Afinn Sentiment']
#dataset.head()

###Trying with comments directly

In [None]:
sentiment_scores = [af.score(row) for row in df['Comments']]
sentiment_category = ['positive' if score > 0 else 'negative' if score < 0 else 'neutral' for score in sentiment_scores]

In [None]:
print(sentiment_scores,'\n',sentiment_category)

[0.0, -3.0, -3.0, -4.0, -2.0, 0.0, -5.0, -3.0, -1.0, 2.0, 1.0, 0.0, -3.0, -5.0, -7.0, -12.0, -2.0, -2.0, -4.0, -1.0, -6.0, -3.0, 2.0, -5.0, 1.0, -11.0, 1.0, 6.0, -11.0, -3.0, 5.0, -4.0, -3.0, 0.0, -3.0, 0.0, -9.0, -3.0, -5.0, 2.0, 1.0, -5.0, -5.0, 0.0, 1.0, -1.0, -3.0, -5.0, 2.0, -10.0, -4.0, -11.0, -4.0, 4.0, -3.0, 3.0, -7.0, -2.0, -2.0, -5.0, -1.0, -3.0, -2.0, 1.0, -3.0, 1.0, 1.0, -3.0, 2.0, 4.0, 4.0, -12.0, 0.0, -7.0, 6.0, -1.0, -3.0, -7.0, 0.0, -7.0, 0.0, 0.0, 0.0, -3.0, 0.0, 0.0, -4.0, -4.0, 1.0, 2.0, -3.0, -10.0, 1.0, -10.0, -16.0, -3.0, -4.0, -6.0, -2.0, 0.0, -3.0, 0.0, 0.0, -3.0, 0.0, 2.0, -11.0, 0.0, -10.0, -4.0, 2.0, -2.0, -3.0, -4.0, -5.0, -1.0, -5.0, 2.0, -2.0, 0.0, -2.0, -10.0, 3.0, 6.0, -4.0, 4.0, 1.0, -7.0, -5.0, 2.0, 3.0, -8.0, -2.0, 2.0, -2.0, 0.0, -2.0, 12.0, -11.0, -7.0, -5.0, 0.0, -4.0, -2.0, -6.0, -3.0, 0.0, -1.0, -10.0, -2.0, -1.0, 0.0, -8.0, 0.0, -2.0, -2.0, -4.0, -7.0, -2.0, -3.0, -2.0, -4.0, -6.0, -2.0, 1.0, -4.0, -4.0, -1.0, 0.0, -2.0, 0.0, -3.0, 1.0, -2.0, -2

##Using Bing

###Uploading positive words to list

In [None]:
file_upload = files.upload()

Saving positive.txt to positive (2).txt


In [None]:
positive_df= pd.read_csv(io.StringIO(file_upload['positive.txt'].decode('utf-8')))
positive_words=positive_df.iloc[::].values
print(positive_words)

[['abounds']
 ['abundance']
 ['abundant']
 ...
 ['zenith']
 ['zest']
 ['zippy']]


###positive word count

In [None]:
df['Bing Positive Tokens']=df['Lemmatized Filtered Tokens'].apply(lambda x: [item for item in x if item in positive_words])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[]
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support]
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[]
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[]
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[]


In [None]:
df['Bing Positive count']=df.apply(lambda row: len(row['Bing Positive Tokens']), axis=1)
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens,Bing Positive count
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[],0
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support],1
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[],0
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[],0
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[],0


###Uploading negative words to list

In [None]:
file_upload = files.upload()

Saving negative.txt to negative (1).txt


In [None]:
negative_df= pd.read_csv(io.StringIO(file_upload['negative.txt'].decode('utf-8')))
negative_words=negative_df.iloc[::].values
print(negative_words)

[['2-faces']
 ['abnormal']
 ['abolish']
 ...
 ['zealous']
 ['zealously']
 ['zombie']]


###Negative word count

In [None]:
df['Bing Negative Tokens']=df['Lemmatized Filtered Tokens'].apply(lambda x: [item for item in x if item in negative_words])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens,Bing Positive count,Bing Negative Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[],0,[mistake]
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support],1,"[issue, penalty]"
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[],0,[threaten]
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[],0,[]
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[],0,[frustrate]


In [None]:
df['Bing Negative count']=df.apply(lambda row: len(row['Bing Negative Tokens']), axis=1)
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens,Bing Positive count,Bing Negative Tokens,Bing Negative count
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[],0,[mistake],1
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support],1,"[issue, penalty]",2
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[],0,[threaten],1
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[],0,[],0
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[],0,[frustrate],1


###Sentiment

In [None]:
df['Bing Score']=df['Bing Positive count']-df['Bing Negative count']
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens,Bing Positive count,Bing Negative Tokens,Bing Negative count,Bing Score
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[],0,[mistake],1,-1
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support],1,"[issue, penalty]",2,-1
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[],0,[threaten],1,-1
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[],0,[],0,0
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[],0,[frustrate],1,-1


In [None]:
df['Bing Sentiment']=['positive' if score > 0 else 'negative' if score < 0 else 'neutral' for score in df['Bing Score']]
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens,Bing Positive count,Bing Negative Tokens,Bing Negative count,Bing Score,Bing Sentiment
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[],0,[mistake],1,-1,negative
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support],1,"[issue, penalty]",2,-1,negative
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[],0,[threaten],1,-1,negative
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[],0,[],0,0,neutral
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[],0,[frustrate],1,-1,negative


###Checking unknown tokens

In [None]:
from itertools import chain
word_cloud=list(chain(positive_words,negative_words))
print(word_cloud)



In [None]:
df['Unknown Tokens']=df['Lemmatized Filtered Tokens'].apply(lambda x: [item for item in x if item  not in word_cloud])
df.head()

Unnamed: 0,Comments,Tokens,Filtered Tokens,Stemmed Filtered Tokens,Lemmatized Filtered Tokens,Afinn Score of Lem,Afinn Overall Score of Lem,Afinn Sentiment,Bing Positive Tokens,Bing Positive count,Bing Negative Tokens,Bing Negative count,Bing Score,Bing Sentiment,Unknown Tokens
0,i will not pay any money in future please keep...,"[i, will, not, pay, any, money, in, future, pl...","[future, mind, guys, reference, future, mind, ...","[futur, mind, guy, refer, futur, mind, 24, hou...","[future, mind, guy, reference, future, mind, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...",[0.0],neutral,[],0,[mistake],1,-1,negative,"[future, mind, guy, reference, future, mind, 2..."
1,"customer service num not response, my repaymen...","[customer, service, num, not, response, my, re...","[customer, service, num, response, repayment, ...","[custom, servic, num, respons, repay, month, a...","[customer, service, num, response, repayment, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[-1.0],negative,[support],1,"[issue, penalty]",2,-1,negative,"[customer, service, num, response, repayment, ..."
2,"please don't download this app this app , they...","[please, don, t, download, this, app, this, ap...","[download, threatened, payment, contact, send,...","[download, threaten, payment, contact, send, r...","[download, threaten, payment, contact, send, r...","[0.0, -2.0, 0.0, 0.0, 0.0, 0.0]",[-2.0],negative,[],0,[threaten],1,-1,negative,"[download, payment, contact, send, reminder]"
3,what the hell is this once i get 1000 loan fro...,"[what, the, hell, is, this, once, i, get, 1000...","[apply, message, match, criteria, ffffffkkkkkk]","[appli, messag, match, criteria, ffffffkkkkkk]","[apply, message, match, criteria, ffffffkkkkkk]","[0.0, 0.0, 0.0, 0.0, 0.0]",[0.0],neutral,[],0,[],0,0,neutral,"[apply, message, match, criteria, ffffffkkkkkk]"
4,"after making pre closure for all my loan, stil...","[after, making, pre, closure, for, all, my, lo...","[pre, closure, emi, frustrating, friends]","[pre, closur, emi, frustrat, friend]","[pre, closure, emi, frustrate, friends]","[0.0, 0.0, 0.0, -2.0, 0.0]",[-2.0],negative,[],0,[frustrate],1,-1,negative,"[pre, closure, emi, friends]"


#Summary

In [None]:
dataset['Afinn Sentiment']=df['Afinn Sentiment']
dataset['Bing Sentiment']=df['Bing Sentiment']
dataset.head()

Unnamed: 0,Sentiment,Comments,Afinn Sentiment,Bing Sentiment
0,0,I will not pay any money in future please keep...,neutral,negative
1,0,"Customer service num not response, My repaymen...",negative,negative
2,0,"Please don't download this app this App , they...",negative,negative
3,0,What the hell is this once I get 1000 loan fro...,neutral,neutral
4,0,"After making pre closure for all my loan, stil...",negative,negative


In [None]:
print("\nCounting Afinn Sentiments\n",df['Afinn Sentiment'].value_counts())
print("\nCounting Bing Sentiments\n",df['Bing Sentiment'].value_counts())


Counting Afinn Sentiments
 neutral     281
positive    277
negative    197
Name: Afinn Sentiment, dtype: int64

Counting Bing Sentiments
 neutral     329
negative    232
positive    194
Name: Bing Sentiment, dtype: int64


#Export the processed dataframe to csv

In [None]:
df.to_csv('processed tokens with Afinn & Bing sentiments.csv')
files.download("processed tokens with Afinn & Bing sentiments.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
dataset.to_csv('summary.csv')
files.download("summary.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>