# Implementation of LSTM model for training and testing

In [1]:
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Dropout, Activation, Flatten
import warnings
warnings.filterwarnings("ignore") 

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [3]:
data = pd.read_csv('Corona.csv', encoding='latin-1')

In [4]:
data=data.dropna()

In [5]:
data.columns

Index(['UserName', 'ScreenName', 'Location', 'TweetAt', 'OriginalTweet',
       'Sentiment'],
      dtype='object')

In [6]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['UserName', 'ScreenName', 'Location', 'TweetAt', 'OriginalTweet',
       'Sentiment']
le = LabelEncoder()
for i in var_mod:
    data[i] = le.fit_transform(data[i]).astype(int)

In [7]:
data.head()

Unnamed: 0,UserName,ScreenName,Location,TweetAt,OriginalTweet,Sentiment
0,0,0,1006,0,501,3
1,1,1,1925,0,3857,4
2,2,2,1964,0,1216,4
5,3,3,2229,0,921,4
6,4,4,40,0,1147,4


In [8]:
from sklearn.metrics import confusion_matrix, classification_report, matthews_corrcoef, cohen_kappa_score, accuracy_score, average_precision_score, roc_auc_score

In [9]:
X = data.drop(labels='Sentiment', axis=1)
#Response variable
y = data.loc[:,'Sentiment']    

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1, stratify=y)

In [11]:
scaler = MinMaxScaler()
data_training = scaler.fit_transform(X_train)
data_training

array([[0.31257846, 0.31257846, 0.57795699, 0.5       , 0.50878514],
       [0.412001  , 0.412001  , 0.59453405, 0.5       , 0.77058233],
       [0.43635451, 0.43635451, 0.49014337, 0.5       , 0.80898594],
       ...,
       [0.85262365, 0.85262365, 0.76523297, 1.        , 0.55923695],
       [0.72357519, 0.72357519, 0.05421147, 1.        , 0.35190763],
       [0.24504143, 0.24504143, 0.01299283, 0.5       , 0.07128514]])

In [12]:
scaler = MinMaxScaler()
data_testing = scaler.fit_transform(X_test)
data_testing

array([[0.54568273, 0.54568273, 0.03587444, 1.        , 0.39693698],
       [0.24021084, 0.24021084, 0.43004484, 0.5       , 0.81923173],
       [0.8498996 , 0.8498996 , 0.45560538, 1.        , 0.95681647],
       ...,
       [0.46485944, 0.46485944, 0.10358744, 0.5       , 0.06100929],
       [0.64608434, 0.64608434, 0.14618834, 1.        , 0.75370324],
       [0.77635542, 0.77635542, 0.85470852, 1.        , 0.74190309]])

In [13]:
data_training[0:10]

array([[0.31257846, 0.31257846, 0.57795699, 0.5       , 0.50878514],
       [0.412001  , 0.412001  , 0.59453405, 0.5       , 0.77058233],
       [0.43635451, 0.43635451, 0.49014337, 0.5       , 0.80898594],
       [0.50740648, 0.50740648, 0.19668459, 0.5       , 0.72138554],
       [0.36931961, 0.36931961, 0.625     , 0.5       , 0.72213855],
       [0.14612101, 0.14612101, 0.04793907, 0.5       , 0.15763052],
       [0.46974642, 0.46974642, 0.62858423, 0.5       , 0.6814759 ],
       [0.17273412, 0.17273412, 0.73297491, 0.5       , 0.1812249 ],
       [0.14109967, 0.14109967, 0.76836918, 0.5       , 0.93022088],
       [0.26487572, 0.26487572, 0.83422939, 0.5       , 0.45456827]])

In [14]:
data_testing[0:10]

array([[0.54568273, 0.54568273, 0.03587444, 1.        , 0.39693698],
       [0.24021084, 0.24021084, 0.43004484, 0.5       , 0.81923173],
       [0.8498996 , 0.8498996 , 0.45560538, 1.        , 0.95681647],
       [0.0564759 , 0.0564759 , 0.6896861 , 0.        , 0.91965855],
       [0.3438755 , 0.3438755 , 0.08071749, 0.5       , 0.88149636],
       [0.16465863, 0.16465863, 0.48251121, 0.5       , 0.60582476],
       [0.71009036, 0.71009036, 0.42825112, 1.        , 0.43509917],
       [0.35768072, 0.35768072, 0.27443946, 0.5       , 0.42028622],
       [0.96209839, 0.96209839, 0.45784753, 1.        , 0.66834045],
       [0.53413655, 0.53413655, 0.23587444, 1.        , 0.25307557]])

In [15]:
X_train1 = []
y_train1 = []

In [16]:
for i in range(60, data_training.shape[0]):
    X_train1.append(data_training[i-60:i])
    y_train1.append(data_training[i, 0])

In [17]:
X_train, y_train = np.array(X_train1), np.array(y_train1)

In [18]:
X_train.shape

(1933, 60, 5)

In [19]:
X_test1 = []
y_test1 = []

In [20]:
for i in range(60, data_training.shape[0]):
    X_train1.append(data_training[i-60:i])
    y_train1.append(data_training[i, 0])

In [21]:
X_train, y_train = np.array(X_train1), np.array(y_train1)

In [22]:
X_train.shape

(3866, 60, 5)

In [23]:
X_test1 = []
y_test1 = []

In [24]:
for i in range(60, data_testing.shape[0]):
    X_test1.append(data_testing[i-60:i])
    y_test1.append(data_testing[i, 0])

In [25]:
X_test, y_test = np.array(X_test1), np.array(y_test1)

In [26]:
X_test.shape

(1933, 60, 5)

In [27]:
# Initialising the RNN
model = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
model.add(LSTM(24, return_sequences=True,input_shape=(X_train.shape[1], X_train.shape[2])))  # returns a sequence of vectors of dimension 64
model.add(Dropout(0.2))

# Adding a second LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50))
#model.add(Dropout(0.2))

model.add(Dense(20,activation='relu'))


# Adding the output layer
#model.add(Dense(1, activation="linear"))
model.add(Dense(5, activation = 'softmax'))

#model.add(Dense(units = 1))
# Compiling the RNN

model.compile(optimizer = 'adam', loss = 'mean_squared_error',metrics=['accuracy'])

history=model.fit(X_train, y_train,batch_size=32, epochs=10)
# Model summary for number of parameters use in the algorithm 
model.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 24)            2880      
                                                                 
 dropout (Dropout)           (None, 60, 24)            0         
                                                                 
 lstm_1 (LSTM)               (None, 50)                15000     
                                                                 
 dense (Dense)               (None, 20)                1020      
                                                                 
 dense_1 (Dense)             (None, 5)                 105       
                                                                 
Total params: 19,005
Trainable params: 19,005
Non-trainable params: 0
_______

In [28]:
scores = model.predict(X_test)

In [29]:
import math, time

In [30]:
print("")
trainScore = model.evaluate(X_train, y_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0])))
print("")
testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))


Train Score: 0.18 MSE (0.42 RMSE)

Test Score: 0.17 MSE (0.41 RMSE)
