In [48]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import pad_sequences
### nlp library
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.preprocessing.text import Tokenizer

### deep learning library 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, SimpleRNN

from sklearn.model_selection import train_test_split
import re
import string
from textblob import TextBlob
import joblib


In [49]:
test_data=open('Data/test.txt','r').readlines()


In [50]:
train_data=open('Data/train.txt','r').readlines()


In [51]:
val_data=open('Data/val.txt','r').readlines()


In [52]:
total_data=test_data+train_data+val_data

In [53]:
len(total_data)

20000

In [54]:
total_data

['im feeling rather rotten so im not very ambitious right now;sadness\n',
 'im updating my blog because i feel shitty;sadness\n',
 'i never make her separate from me because i don t ever want her to feel like i m ashamed with her;sadness\n',
 'i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived;joy\n',
 'i was feeling a little vain when i did this one;sadness\n',
 'i cant walk into a shop anywhere where i do not feel uncomfortable;fear\n',
 'i felt anger when at the end of a telephone call;anger\n',
 'i explain why i clung to a relationship with a boy who was in many ways immature and uncommitted despite the excitement i should have been feeling for getting accepted into the masters program at the university of virginia;joy\n',
 'i like to have the same breathless feeling as a reader eager to see what will happen next;joy\n',
 'i jest i feel grumpy tired and pre menstrual which i probably am but then again its only been a we

In [55]:
x = []
y = []
for item in total_data:
    text , label=item.split(';')
    label=label.replace('\n','')
    x.append(text)
    y.append(label)



In [56]:
y  # depend on sentiment

['sadness',
 'sadness',
 'sadness',
 'joy',
 'sadness',
 'fear',
 'anger',
 'joy',
 'joy',
 'anger',
 'fear',
 'sadness',
 'fear',
 'joy',
 'love',
 'sadness',
 'joy',
 'sadness',
 'anger',
 'joy',
 'sadness',
 'joy',
 'joy',
 'sadness',
 'sadness',
 'fear',
 'anger',
 'sadness',
 'fear',
 'anger',
 'fear',
 'anger',
 'sadness',
 'anger',
 'sadness',
 'joy',
 'joy',
 'sadness',
 'joy',
 'joy',
 'anger',
 'sadness',
 'joy',
 'sadness',
 'joy',
 'anger',
 'joy',
 'joy',
 'fear',
 'fear',
 'sadness',
 'fear',
 'joy',
 'sadness',
 'joy',
 'sadness',
 'sadness',
 'joy',
 'sadness',
 'anger',
 'sadness',
 'sadness',
 'joy',
 'joy',
 'sadness',
 'surprise',
 'sadness',
 'anger',
 'fear',
 'surprise',
 'joy',
 'love',
 'surprise',
 'joy',
 'love',
 'anger',
 'joy',
 'sadness',
 'joy',
 'love',
 'joy',
 'anger',
 'sadness',
 'joy',
 'sadness',
 'sadness',
 'joy',
 'joy',
 'joy',
 'sadness',
 'joy',
 'fear',
 'anger',
 'fear',
 'anger',
 'anger',
 'love',
 'sadness',
 'anger',
 'sadness',
 'sadn

In [57]:
x  # independent variable

['im feeling rather rotten so im not very ambitious right now',
 'im updating my blog because i feel shitty',
 'i never make her separate from me because i don t ever want her to feel like i m ashamed with her',
 'i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived',
 'i was feeling a little vain when i did this one',
 'i cant walk into a shop anywhere where i do not feel uncomfortable',
 'i felt anger when at the end of a telephone call',
 'i explain why i clung to a relationship with a boy who was in many ways immature and uncommitted despite the excitement i should have been feeling for getting accepted into the masters program at the university of virginia',
 'i like to have the same breathless feeling as a reader eager to see what will happen next',
 'i jest i feel grumpy tired and pre menstrual which i probably am but then again its only been a week and im about as fit as a walrus on vacation for the summer',
 'i don t

In [58]:
stemmer=PorterStemmer()

In [59]:
def text_preprocessing(text):
    clean_text = []
    for sent in text:
        lower_sent=sent.lower() # lower case
        word_tokens=word_tokenize(lower_sent) # tokenize
        remove_stopwords=[word for word in word_tokens if word not in stopwords.words('english')]  # remove stopwords
        stemmed_sent =[stemmer.stem(word) for word in remove_stopwords] ### stemming

        cleaned_sent=" ".join(stemmed_sent)
        clean_text.append(cleaned_sent)
    return clean_text









In [60]:
clean_text=text_preprocessing(x)

In [61]:
clean_text[0:10]

['im feel rather rotten im ambiti right',
 'im updat blog feel shitti',
 'never make separ ever want feel like asham',
 'left bouquet red yellow tulip arm feel slightli optimist arriv',
 'feel littl vain one',
 'cant walk shop anywher feel uncomfort',
 'felt anger end telephon call',
 'explain clung relationship boy mani way immatur uncommit despit excit feel get accept master program univers virginia',
 'like breathless feel reader eager see happen next',
 'jest feel grumpi tire pre menstrual probabl week im fit walru vacat summer']

In [62]:
## Tokenizer 
Tokenizer=Tokenizer(oov_token='<nothing>') ### out of vocabulary token and it will be replaced with nothing. tokenizer will ignore this word and its is not part of vocabulary 
Tokenizer.fit_on_texts(clean_text)



In [63]:
Tokenizer.word_index

{'<nothing>': 1,
 'feel': 2,
 'like': 3,
 'im': 4,
 'get': 5,
 'time': 6,
 'know': 7,
 'realli': 8,
 'make': 9,
 'go': 10,
 'want': 11,
 'love': 12,
 'littl': 13,
 'think': 14,
 'peopl': 15,
 'day': 16,
 'thing': 17,
 'one': 18,
 'would': 19,
 'even': 20,
 'still': 21,
 'ive': 22,
 'life': 23,
 'bit': 24,
 'way': 25,
 'need': 26,
 'someth': 27,
 'much': 28,
 'dont': 29,
 'work': 30,
 'start': 31,
 'could': 32,
 'say': 33,
 'look': 34,
 'see': 35,
 'tri': 36,
 'back': 37,
 'good': 38,
 'pretti': 39,
 'come': 40,
 'right': 41,
 'alway': 42,
 'help': 43,
 'also': 44,
 'today': 45,
 'year': 46,
 'take': 47,
 'friend': 48,
 'use': 49,
 'around': 50,
 'cant': 51,
 'person': 52,
 'made': 53,
 'though': 54,
 'hate': 55,
 'well': 56,
 'got': 57,
 'happi': 58,
 'thought': 59,
 'someon': 60,
 'didnt': 61,
 'never': 62,
 'felt': 63,
 'find': 64,
 'write': 65,
 'lot': 66,
 'hope': 67,
 'quit': 68,
 'live': 69,
 'week': 70,
 'everi': 71,
 'sure': 72,
 'less': 73,
 'read': 74,
 'enough': 75,
 'give':

In [64]:
total_data[0]

'im feeling rather rotten so im not very ambitious right now;sadness\n'

In [65]:
y 

['sadness',
 'sadness',
 'sadness',
 'joy',
 'sadness',
 'fear',
 'anger',
 'joy',
 'joy',
 'anger',
 'fear',
 'sadness',
 'fear',
 'joy',
 'love',
 'sadness',
 'joy',
 'sadness',
 'anger',
 'joy',
 'sadness',
 'joy',
 'joy',
 'sadness',
 'sadness',
 'fear',
 'anger',
 'sadness',
 'fear',
 'anger',
 'fear',
 'anger',
 'sadness',
 'anger',
 'sadness',
 'joy',
 'joy',
 'sadness',
 'joy',
 'joy',
 'anger',
 'sadness',
 'joy',
 'sadness',
 'joy',
 'anger',
 'joy',
 'joy',
 'fear',
 'fear',
 'sadness',
 'fear',
 'joy',
 'sadness',
 'joy',
 'sadness',
 'sadness',
 'joy',
 'sadness',
 'anger',
 'sadness',
 'sadness',
 'joy',
 'joy',
 'sadness',
 'surprise',
 'sadness',
 'anger',
 'fear',
 'surprise',
 'joy',
 'love',
 'surprise',
 'joy',
 'love',
 'anger',
 'joy',
 'sadness',
 'joy',
 'love',
 'joy',
 'anger',
 'sadness',
 'joy',
 'sadness',
 'sadness',
 'joy',
 'joy',
 'joy',
 'sadness',
 'joy',
 'fear',
 'anger',
 'fear',
 'anger',
 'anger',
 'love',
 'sadness',
 'anger',
 'sadness',
 'sadn

In [66]:
Tokenizer.document_count

20000

In [67]:
sequences=Tokenizer.texts_to_sequences(clean_text)

In [68]:
sequences[0:5]

[[4, 2, 123, 625, 4, 3696, 41],
 [4, 1451, 108, 2, 387],
 [62, 9, 1269, 92, 11, 2, 3, 333],
 [85, 6002, 822, 3184, 4452, 600, 2, 269, 547, 976],
 [2, 13, 472, 18]]

In [69]:
sequences

[[4, 2, 123, 625, 4, 3696, 41],
 [4, 1451, 108, 2, 387],
 [62, 9, 1269, 92, 11, 2, 3, 333],
 [85, 6002, 822, 3184, 4452, 600, 2, 269, 547, 976],
 [2, 13, 472, 18],
 [51, 141, 786, 1304, 2, 395],
 [63, 626, 103, 3697, 127],
 [627,
  4453,
  315,
  396,
  77,
  25,
  2375,
  6003,
  787,
  139,
  2,
  5,
  97,
  1199,
  1042,
  759,
  2844],
 [3, 6004, 2, 653, 565, 35, 115, 202],
 [6005, 2, 654, 216, 1868, 4454, 283, 70, 4, 628, 6006, 1393, 629],
 [2, 311, 234],
 [2, 316, 109, 7, 380, 364, 192, 428, 4455, 1118],
 [777, 727, 3698, 2, 2376, 212],
 [2, 274, 200, 638, 15, 29, 20, 26, 835],
 [64, 1229, 230, 2, 89],
 [2, 550, 4, 72, 4456],
 [2, 13, 566, 45],
 [2, 3, 1394, 19, 655, 3699, 6007, 639],
 [2, 6008, 705, 5, 350, 1499, 872, 897, 481, 9, 918, 1499, 93, 100, 7, 705],
 [2, 3, 822, 2845, 431, 78, 275],
 [4,
  72,
  2,
  750,
  92,
  10,
  81,
  160,
  419,
  158,
  2,
  2217,
  157,
  23,
  802,
  1762,
  802,
  46],
 [2, 3, 22, 887, 7, 77, 397, 1200, 4, 276, 288, 129, 13, 728],
 [3185,
 

In [70]:
sequences=pad_sequences(sequences, maxlen= 35, padding='post') ## padding

In [71]:
sequences[0:5]

array([[   4,    2,  123,  625,    4, 3696,   41,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0],
       [   4, 1451,  108,    2,  387,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0],
       [  62,    9, 1269,   92,   11,    2,    3,  333,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0],
       [  85, 6002,  822, 3184, 4452,  600,    2,  269,  547,  976,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0],
       [   2,   13,  472,   

In [72]:
unique_labels=list(pd.Series(np.array(y)).unique())

In [73]:
unique_labels

['sadness', 'joy', 'fear', 'anger', 'love', 'surprise']

In [74]:
labal_dict = {
    'sadness':0, 'joy':1, 'fear':2, 'anger':3, 'love':4, 'surprise':5
}

In [75]:
def label_encoding(labels):
    labels = []
    for x in labels:
        labels.append(labal_dict[x])
        labels =np.array(labels)
    return labels

In [76]:
labels=label_encoding(y)

In [77]:
labels

[]

In [78]:
sequences.shape

(20000, 35)

In [81]:
len(labels)

0

In [87]:
import numpy as np
labels=np.array(labels).shape

In [90]:
train_x, test_x, train_y, test_y = train_test_split(sequences, labals, test_size=0.2)

In [91]:
train_x.shape, test_x.shape, train_y.shape, test_y.shape

((16000, 35), (4000, 35), (16000,), (4000,))

In [92]:
model =Sequential()

In [93]:
model.add(SimpleRNN(64, return_sequences=False, input_shape=(35,1)))
# model.add(SimpleRNN(128, return_sequences=False, input_shape=(35,1)))

model.add(Dense(6, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


  super().__init__(**kwargs)


In [94]:
model.fit(train_x, train_y, epochs=10, validation_data=(test_x, test_y))

Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.3232 - loss: 1.6007 - val_accuracy: 0.3183 - val_loss: 1.5856
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3293 - loss: 1.5798 - val_accuracy: 0.3390 - val_loss: 1.5847
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3334 - loss: 1.5750 - val_accuracy: 0.2792 - val_loss: 1.5900
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3281 - loss: 1.5802 - val_accuracy: 0.3388 - val_loss: 1.5823
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3339 - loss: 1.5732 - val_accuracy: 0.3413 - val_loss: 1.5830
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3269 - loss: 1.5719 - val_accuracy: 0.3410 - val_loss: 1.5845
Epoch 7/10
[1m500/500[0m 

<keras.src.callbacks.history.History at 0x1ef439cfe50>

In [95]:
pred=model.predict(np.expand_dims(sequences[0], axis=0))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step


In [96]:
np.argmax(pred)

np.int64(1)

In [97]:
pred

array([[0.2967234 , 0.33827177, 0.10843373, 0.13578784, 0.08859469,
        0.03218859]], dtype=float32)

In [98]:
pred=model.predict(np.expand_dims(sequences[14], axis=0))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


In [99]:
unique_labels[np.argmax(pred)]

'joy'

In [100]:
model.save('model.h5')



In [101]:
## load model
model =tf.keras.models.load_model('model.h5')



In [102]:
pred =model.predict(np.expand_dims(sequences[14], axis=0))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step


In [103]:
np.argmax(pred)

np.int64(1)