In [1]:
#Installing Afinn Lexicon
!pip install contractions
!pip install afinn
from afinn import Afinn

You should consider upgrading via the '/Users/victornathanael/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/Users/victornathanael/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
#Importing Libraries for Processing the Data
import numpy as np
import pandas as pd
import re,string,unicodedata
%matplotlib inline
import contractions

In [3]:
#Importing Libraries For Text Preprocessing
import nltk
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.tokenize import word_tokenize
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords

In [4]:
#Preparing the Dict
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/victornathanael/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/victornathanael/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/victornathanael/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [5]:
#Import the dataset

data = pd.read_csv("data.csv", encoding= 'unicode_escape')
print(data.head())

  sentiment_from_dataset                                               news
0                neutral  According to Gran , the company has no plans t...
1                neutral  Technopolis plans to develop in stages an area...
2               negative  The international electronic industry company ...
3               positive  With the new production plant the company woul...
4               positive  According to the company 's updated strategy f...


In [6]:
#Remove unused column (Name and Date) and missing values
data = data.dropna()
print(data.head())

  sentiment_from_dataset                                               news
0                neutral  According to Gran , the company has no plans t...
1                neutral  Technopolis plans to develop in stages an area...
2               negative  The international electronic industry company ...
3               positive  With the new production plant the company woul...
4               positive  According to the company 's updated strategy f...


In [7]:
#Preprocessing

#Case Folding
data['news'] = data['news'].str.lower() #Lowercase the sentence

#Tokenizing 
data['news'] = data['news'].str.strip() #Remove Leading Space and Trailing Space

from string import punctuation

def remove_punct(text):
  for punctuations in punctuation:
    text = text.replace(punctuations, '')
  return text

data['news'] = data['news'].apply(remove_punct) #Remove Punctuation

def remove_special_char(text, remove_digits=True):
  pattern = r'[^a-zA-z0-9\s]'
  text = re.sub(pattern, '', text)
  return text

data['news'] = data['news'].apply(remove_special_char) #Remove Symbols or other special characters

def expand_contractions(con_text):
  con_text = contractions.fix(con_text)
  return con_text
  
data['news'] = data['news'].apply(expand_contractions) #Expand English Contractions (Ex : I've -> I have)

def remove_accented_chars(text):
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return text


data['news'] = data['news'].apply(remove_accented_chars) #Remove macrons & accented characters

print(data.head())

  sentiment_from_dataset                                               news
0                neutral  according to gran  the company has no plans to...
1                neutral  technopolis plans to develop in stages an area...
2               negative  the international electronic industry company ...
3               positive  with the new production plant the company woul...
4               positive  according to the company s updated strategy fo...


In [8]:
#Using Afinn Lexicon to classify the sentence
af = Afinn()

def afinn_sent_analysis(text):
  score = af.score(text)
  return score

#applying the function to Normalized Comments
data['afinn_score'] = [afinn_sent_analysis(comm) for comm in data['news']]

#If Afinn Score is more than 0 : The text contains Positive Sentiment
                #  less than 0 : The text contains Negative Sentiment
                #  equals to 0 : The text contains Neutral Sentiment
        
def afinn_sent_category(score):
  categories = ['positive','negative','neutral']
  if score > 0:
    return categories[0]
  elif score < 0:
    return categories[1]
  else:
    return categories[2]  

data['sentiment_results_afinn'] = [afinn_sent_category(scr) for scr in data['afinn_score']]
print(data)

     sentiment_from_dataset  \
0                   neutral   
1                   neutral   
2                  negative   
3                  positive   
4                  positive   
...                     ...   
4841               negative   
4842                neutral   
4843               negative   
4844               negative   
4845               negative   

                                                   news  afinn_score  \
0     according to gran  the company has no plans to...          0.0   
1     technopolis plans to develop in stages an area...         -1.0   
2     the international electronic industry company ...          0.0   
3     with the new production plant the company woul...          4.0   
4     according to the company s updated strategy fo...          4.0   
...                                                 ...          ...   
4841  london marketwatch  share prices ended lower i...         -3.0   
4842  rinkuskiai s beer sales fell by 65 per cent t

In [9]:
data['comparison'] = (data['sentiment_from_dataset'] == data['sentiment_results_afinn'])
print(data)

     sentiment_from_dataset  \
0                   neutral   
1                   neutral   
2                  negative   
3                  positive   
4                  positive   
...                     ...   
4841               negative   
4842                neutral   
4843               negative   
4844               negative   
4845               negative   

                                                   news  afinn_score  \
0     according to gran  the company has no plans to...          0.0   
1     technopolis plans to develop in stages an area...         -1.0   
2     the international electronic industry company ...          0.0   
3     with the new production plant the company woul...          4.0   
4     according to the company s updated strategy fo...          4.0   
...                                                 ...          ...   
4841  london marketwatch  share prices ended lower i...         -3.0   
4842  rinkuskiai s beer sales fell by 65 per cent t

In [10]:
data["comparison"].value_counts()

True     2881
False    1965
Name: comparison, dtype: int64

In [11]:
accuracy = 2885/4845
print("Accuracy = ", accuracy)

Accuracy =  0.5954592363261094
