In [17]:
import nltk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier



import warnings
warnings.filterwarnings('ignore')

nltk.download('stopwords')
nltk.download('punkt')

print("Libraries imported")


Libraries imported


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\TusharManna\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\TusharManna\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [18]:
file_path = 'c:\work\learnai\SentimentAnalysis\Emotion_classify_Data.csv'
# Read the CSV file
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe
print(data.head())

                                             Comment Emotion
0  i seriously hate one subject to death but now ...    fear
1                 im so full of life i feel appalled   anger
2  i sit here to write i start to dig out my feel...    fear
3  ive been really angry with r and i feel like a...     joy
4  i feel suspicious if there is no one outside l...    fear


In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5937 entries, 0 to 5936
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Comment  5937 non-null   object
 1   Emotion  5937 non-null   object
dtypes: object(2)
memory usage: 92.9+ KB


In [16]:
data['Emotion'].value_counts()

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64

In [23]:
"use this function to get preprocessed text"
def preprocess_text(text):
    # Tokenize the text into words
    tokens = word_tokenize(text.lower())

    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    
    
    # Perform stemming
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
    print(stemmed_tokens)
    
    # Perform lemmatization
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in stemmed_tokens]
    
    # Join the tokens back into a single string
    preprocessed_text = ' '.join(lemmatized_tokens)
    
    return preprocessed_text    

In [28]:
data['preprocessed_comment'] = data['Comment'].apply(preprocess_text)

['serious', 'hate', 'one', 'subject', 'death', 'feel', 'reluct', 'drop']
['im', 'full', 'life', 'feel', 'appal']
['sit', 'write', 'start', 'dig', 'feel', 'think', 'afraid', 'accept', 'possibl', 'might', 'make']
['ive', 'realli', 'angri', 'r', 'feel', 'like', 'idiot', 'trust', 'first', 'place']
['feel', 'suspici', 'one', 'outsid', 'like', 'raptur', 'happen', 'someth']
['feel', 'jealou', 'becasu', 'want', 'kind', 'love', 'true', 'connect', 'two', 'soul', 'want']
['friend', 'mine', 'keep', 'tell', 'morbid', 'thing', 'happen', 'dog']
['final', 'fell', 'asleep', 'feel', 'angri', 'useless', 'still', 'full', 'anxieti']
['feel', 'bit', 'annoy', 'antsi', 'good', 'way']
['feel', 'like', 'regain', 'anoth', 'vital', 'part', 'life', 'live']
['feel', 'bit', 'like', 'franz', 'liebkind', 'produc', 'mani', 'peopl', 'know', 'fuhrer', 'terrif', 'dancer']
['feel', 'start', 'didnt', 'want', 'move', 'much', 'realli', 'glad', 'experi', 'glimps', 'sort', 'vibrant', 'energi', 'gain', 'year']
['bitten', 'dog']


In [29]:
data.head()

Unnamed: 0,Comment,Emotion,preprocessed_comment
0,i seriously hate one subject to death but now ...,fear,serious hate one subject death feel reluct drop
1,im so full of life i feel appalled,anger,im full life feel appal
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feel think afraid accept p...
3,ive been really angry with r and i feel like a...,joy,ive realli angri r feel like idiot trust first...
4,i feel suspicious if there is no one outside l...,fear,feel suspici one outsid like raptur happen someth


In [31]:
encoder = LabelEncoder()
data['emotion_num'] = encoder.fit_transform(data['Emotion'])

In [32]:
data.head()

Unnamed: 0,Comment,Emotion,preprocessed_comment,emotion_num
0,i seriously hate one subject to death but now ...,fear,serious hate one subject death feel reluct drop,1
1,im so full of life i feel appalled,anger,im full life feel appal,0
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feel think afraid accept p...,1
3,ive been really angry with r and i feel like a...,joy,ive realli angri r feel like idiot trust first...,2
4,i feel suspicious if there is no one outside l...,fear,feel suspici one outsid like raptur happen someth,1


In [34]:
X = data['preprocessed_comment']
y = data['emotion_num']

In [35]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(x_train)
X_test = vectorizer.transform(x_test)

In [43]:
#use random forest classifier
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))


Accuracy: 0.9393939393939394
              precision    recall  f1-score   support

           0       0.91      0.97      0.94       392
           1       0.96      0.90      0.93       416
           2       0.96      0.95      0.95       380

    accuracy                           0.94      1188
   macro avg       0.94      0.94      0.94      1188
weighted avg       0.94      0.94      0.94      1188



In [45]:
test_text = "i am so angry and will hit you"
test_text_preprocessed = preprocess_text(test_text)
print("Preprocessed text:", test_text_preprocessed)
test_text_vectorized = vectorizer.transform([test_text_preprocessed])
print("Vectorized text:", test_text_vectorized)
prediction = classifier.predict(test_text_vectorized)
print("Predicted emotion:", encoder.inverse_transform(prediction))

['angri', 'hit']
Preprocessed text: angri hit
Vectorized text:   (0, 2273)	0.7891746699945269
  (0, 188)	0.6141688206340579
Predicted emotion: ['anger']
