In [23]:
import pandas as pd

In [24]:
#load dataset
df = pd.read_csv("emotion.csv", sep=',')
df.head(10)

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
5,ive been feeling a little burdened lately wasn...,sadness
6,ive been taking or milligrams or times recomme...,surprise
7,i feel as confused about life as a teenager or...,fear
8,i have been with petronas for years i feel tha...,happy
9,i feel romantic too,love


In [25]:
#get total data
df.shape

(21459, 2)

In [26]:
#get data info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21459 entries, 0 to 21458
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Text     21459 non-null  object
 1   Emotion  21459 non-null  object
dtypes: object(2)
memory usage: 335.4+ KB


In [27]:
#get data count by categories
df.Emotion.value_counts()

happy       7029
sadness     6265
anger       2993
fear        2652
love        1641
surprise     879
Name: Emotion, dtype: int64

**Cleansing Data**

In [28]:
#import and download package
import nltk, os, re , string

from keras.layers import Input, LSTM, Bidirectional, SpatialDropout1D, Dropout
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn

nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [29]:
#lower case words
df.Text = df.Text.apply(lambda x: x.lower())

#remove functuation
def rem_fun(data) :
  return(data.translate(str.maketrans('','', string.punctuation)))
  df.Text = df.Text.apply(lambda x : cleaner(x))

# remove stopword
st_words = stopwords.words()
def stopword(data):
    return(' '.join([w for w in data.split() if w not in st_words ]))
    df.Text = df.Text.apply(lambda x: stopword(x))

df.head(10)

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
5,ive been feeling a little burdened lately wasn...,sadness
6,ive been taking or milligrams or times recomme...,surprise
7,i feel as confused about life as a teenager or...,fear
8,i have been with petronas for years i feel tha...,happy
9,i feel romantic too,love


In [30]:
#data emotion
emotion = pd.get_dummies(df.Emotion)
df_emotion = pd.concat([df, emotion], axis=1)
df_emotion = df_emotion.drop(columns='Emotion')
df_emotion.head(10)

Unnamed: 0,Text,anger,fear,happy,love,sadness,surprise
0,i didnt feel humiliated,0,0,0,0,1,0
1,i can go from feeling so hopeless to so damned...,0,0,0,0,1,0
2,im grabbing a minute to post i feel greedy wrong,1,0,0,0,0,0
3,i am ever feeling nostalgic about the fireplac...,0,0,0,1,0,0
4,i am feeling grouchy,1,0,0,0,0,0
5,ive been feeling a little burdened lately wasn...,0,0,0,0,1,0
6,ive been taking or milligrams or times recomme...,0,0,0,0,0,1
7,i feel as confused about life as a teenager or...,0,1,0,0,0,0
8,i have been with petronas for years i feel tha...,0,0,1,0,0,0
9,i feel romantic too,0,0,0,1,0,0


In [31]:
text = df_emotion['Text'].values
label = df_emotion[['anger','fear','happy','love','sadness','surprise']].values

In [32]:
text

array(['i didnt feel humiliated',
       'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake',
       'im grabbing a minute to post i feel greedy wrong', ...,
       'vincent was irritated but not dismay',
       'kendall-hume turned back to face the dismayed coup',
       'i am dismayed , but not surpris'], dtype=object)

In [33]:
label

array([[0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0],
       ...,
       [0, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0]], dtype=uint8)

In [34]:
#Split data into training and validation
from sklearn.model_selection import train_test_split
text_train, text_test, label_train, label_test = train_test_split(text, label, test_size=0.2, shuffle=True)

In [35]:
#Show splitted dataset size
print(text_train.shape)
print(text_test.shape)

(17167,)
(4292,)


In [36]:
#Tokenizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=5000, oov_token = 'OOV', filters='!"#$%&()*+,-./:;<=>@[\]^_`{|}~ ')
tokenizer.fit_on_texts(text_train)
tokenizer.fit_on_texts(text_test)

sequence_train = tokenizer.texts_to_sequences(text_train)
sequence_test = tokenizer.texts_to_sequences(text_test)

padded_train = pad_sequences(sequence_train)
padded_test = pad_sequences(sequence_test)

In [37]:
#Create Model
import tensorflow as tf
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=64),
    tf.keras.layers.LSTM(128),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(6, activation='softmax')
])
model.compile(optimizer='adam', metrics=['accuracy'], loss='categorical_crossentropy')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 64)          320000    
                                                                 
 lstm_1 (LSTM)               (None, 128)               98816     
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_3 (Dense)             (None, 6)                 774       
                                                                 
Total params: 436102 (1.66 MB)
Trainable params: 436102 (1.66 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [38]:
#callback to stop training when reach accuracy > 0.9
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}) :
    if(logs.get('accuracy') > 0.9 and logs.get('val_accuracy') > 0.9) :
      self.model.stop_training = True
callbacks = myCallback()

In [39]:
num_epochs = 50
history = model.fit(padded_train, label_train, epochs=num_epochs,
                    validation_data=(padded_test, label_test), callbacks = [callbacks], verbose=2)

Epoch 1/50
537/537 - 18s - loss: 1.2406 - accuracy: 0.5036 - val_loss: 0.6637 - val_accuracy: 0.7640 - 18s/epoch - 34ms/step
Epoch 2/50
537/537 - 5s - loss: 0.3980 - accuracy: 0.8650 - val_loss: 0.3237 - val_accuracy: 0.8903 - 5s/epoch - 9ms/step
Epoch 3/50
537/537 - 5s - loss: 0.2025 - accuracy: 0.9345 - val_loss: 0.2718 - val_accuracy: 0.9082 - 5s/epoch - 9ms/step
