In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

In [None]:
data = pd.read_csv("https://raw.githubusercontent.com/MuhammadYaseenKhan/Urdu-Sentiment-Corpus/master/urdu-sentiment-corpus-v1.tsv", sep="\t")
data

Unnamed: 0,Tweet,Class
0,میں نے ایٹم بم بنایا ھے ۔۔۔۔او بھائی ایٹم بمب ...,P
1,چندے سے انقلاب اور عمران خان وزیر اعظم نہیں بن...,N
2,ٹویٹر کا خیال کیسے آیا ؟,O
3,"سرچ انجن گوگل کے نائب صدر نے فضا میں ، 130,000...",P
4,ابھی تک اسکی لہریں کبھی کبھی آ جاتی ہیں یار :أْ,P
...,...,...
995,اُس آدمی نے اِس سالار کو کافی معقول ٹپ دی ہے ۔,P
996,چچا غالب کی روح سے معذرت کے ساتھہم نے مانا کہ ...,P
997,واہ جناب واہ! اچھی رہی۔ جناب خود کو فرشتہ سمجو...,P
998,اسلام آباد :پی اے ٹی کا دھرنا ختم، صفائی کے کا...,P


In [None]:
data['Class'].value_counts()

N    499
P    480
O     20
Name: Class, dtype: int64

**Dropping values with O tag**

In [None]:
specific_value = 'O'
data = data[data['Class'] != specific_value]
data

Unnamed: 0,Tweet,Class
0,میں نے ایٹم بم بنایا ھے ۔۔۔۔او بھائی ایٹم بمب ...,P
1,چندے سے انقلاب اور عمران خان وزیر اعظم نہیں بن...,N
3,"سرچ انجن گوگل کے نائب صدر نے فضا میں ، 130,000...",P
4,ابھی تک اسکی لہریں کبھی کبھی آ جاتی ہیں یار :أْ,P
5,گندی زبان اور گٹر جیسے دماغ والے جاهل جیالے ه...,N
...,...,...
995,اُس آدمی نے اِس سالار کو کافی معقول ٹپ دی ہے ۔,P
996,چچا غالب کی روح سے معذرت کے ساتھہم نے مانا کہ ...,P
997,واہ جناب واہ! اچھی رہی۔ جناب خود کو فرشتہ سمجو...,P
998,اسلام آباد :پی اے ٹی کا دھرنا ختم، صفائی کے کا...,P


In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN, GRU, LSTM, Bidirectional
from keras.layers import Flatten, Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


**Tokenizing text**

In [None]:
X=data['Tweet']
tokenize=Tokenizer()
tokenize.fit_on_texts(X)
X = tokenize.texts_to_sequences(X)
X

[[2,
  10,
  724,
  725,
  268,
  21,
  1867,
  180,
  724,
  1868,
  1869,
  1870,
  195,
  726,
  1062,
  2,
  13,
  1871,
  725,
  1872,
  3,
  1873],
 [73, 4, 36, 8, 43, 26, 93, 101, 13, 76, 237],
 [1874,
  1875,
  1063,
  1,
  1876,
  430,
  10,
  1877,
  2,
  17,
  1878,
  1879,
  1064,
  3,
  1880,
  11,
  1065,
  269,
  16,
  727,
  519,
  318,
  1066,
  1065,
  728],
 [196, 94, 520, 1067, 132, 132, 102, 521, 14, 522, 1881],
 [1882,
  364,
  8,
  1883,
  319,
  523,
  65,
  1068,
  1069,
  181,
  1884,
  1885,
  181,
  8,
  1068,
  1070,
  181,
  320,
  524,
  217,
  9],
 [1886, 15, 111, 1887, 15, 111, 1888, 15, 48, 8, 1889, 15, 1890],
 [1891,
  1892,
  82,
  1893,
  145,
  161,
  1071,
  1894,
  1895,
  431,
  82,
  1896,
  270,
  1897,
  161],
 [28,
  1072,
  1898,
  1899,
  21,
  1900,
  16,
  51,
  1073,
  1901,
  111,
  169,
  432,
  21,
  1902,
  28,
  1903,
  181,
  1073,
  1904,
  21,
  238],
 [1074, 433, 1, 1075, 1, 54, 1076, 1077, 1078, 1905],
 [93, 101, 3, 365, 1079,

**Padding**

In [None]:
max_length = max(len(sequence) for sequence in X)
X = pad_sequences(X, maxlen=max_length)
X

array([[   0,    0,    0, ..., 1872,    3, 1873],
       [   0,    0,    0, ...,   13,   76,  237],
       [   0,    0,    0, ..., 1066, 1065,  728],
       ...,
       [   0,    0,    0, ...,   29,  655,   14],
       [   0,    0,    0, ...,  639,  640,  641],
       [   0,    0,    0, ...,   94,  682,  722]], dtype=int32)

**Label encoding Y**

In [None]:
y=np.array(data['Class'])
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Class'])

y

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,

**Test train splitting**

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.25)

In [None]:
droupout_rate=[0.3,0.7]
num_layers=[2,3]
bidirectional=0
models = [
    ('RNN', SimpleRNN),
    ('GRU', GRU),
    ('LSTM', LSTM),
    ('BiLSTM',bidirectional)
]

**Training**

In [None]:
from sklearn.metrics import recall_score

res = []

for layers in num_layers:
    num = layers
    for rate in droupout_rate:
        for name, model in models:
            mod = Sequential()
            mod.add(Embedding(input_dim=len(tokenize.word_index) + 1, output_dim=100, input_length=max_length))
            for i in range(0, num):
                if name == 'BiLSTM':
                    mod.add(Bidirectional(LSTM(100, return_sequences=True)))
                else:
                    mod.add(model(100, return_sequences=True))
                mod.add(Dropout(rate))
            mod.add(Flatten())
            mod.add(Dense(1, activation='relu'))
            mod.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])
            mod.fit(X_train, y_train, epochs=4, batch_size=100)
            y_pred = mod.predict(X_test)
            y_pred_binary = (y_pred > 0.5).astype(int)

            loss, accuracy = mod.evaluate(X_test, y_test)
            precision = precision_score(y_test, y_pred_binary, average='macro')
            recall = recall_score(y_test, y_pred_binary, average='macro')
            f1 = f1_score(y_test, y_pred_binary, average='macro')

            res.append({'model': name, 'Dropout_rate': rate, 'num of layers': num,
                        'Loss': loss, 'Accuracy': accuracy, 'Precision': precision, 'F1-score': f1, 'Recall': recall})



Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


  _warn_prf(average, modifier, msg_start, len(result))


**Printing results**

In [None]:
res_=pd.DataFrame(res)
res_

Unnamed: 0,model,Dropout_rate,num of layers,Loss,Accuracy,Precision,F1-score,Recall
0,RNN,0.3,2,7.457424,0.477551,0.243776,0.219983,0.327597
1,GRU,0.3,2,0.6938,0.514286,0.353107,0.322844,0.348504
2,LSTM,0.3,2,0.698515,0.530612,0.377407,0.32297,0.360381
3,BiLSTM,0.3,2,0.685464,0.542857,0.362492,0.362602,0.36381
4,RNN,0.7,2,1.265839,0.485714,0.161905,0.217949,0.333333
5,GRU,0.7,2,0.695854,0.518367,0.363156,0.314907,0.352112
6,LSTM,0.7,2,0.704633,0.514286,0.361789,0.306912,0.349714
7,BiLSTM,0.7,2,0.698867,0.534694,0.365928,0.345328,0.3613
8,RNN,0.3,3,6.469222,0.481633,0.294444,0.22567,0.330263
9,GRU,0.3,3,0.688213,0.530612,0.353233,0.353129,0.354331
