# Disaster tweets classification

In this project I will use NLP techniques for tweets classification

In [230]:
import pandas as pd
import torch
import numpy as np
import sklearn
import bokeh
from collections import Counter
from torch import nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset
import torch.optim

In [231]:
data_train = pd.read_csv('nlp-getting-started/train.csv')
data_test = pd.read_csv('nlp-getting-started/test.csv')

### Part 1. Some data analysis

In [232]:
data_train

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1
...,...,...,...,...,...
7608,10869,,,Two giant cranes holding a bridge collapse int...,1
7609,10870,,,@aria_ahrary @TheTawniest The out of control w...,1
7610,10871,,,M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...,1
7611,10872,,,Police investigating after an e-bike collided ...,1


In [233]:
data_test

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan
...,...,...,...,...
3258,10861,,,EARTHQUAKE SAFETY LOS ANGELES ÛÒ SAFETY FASTE...
3259,10865,,,Storm in RI worse than last hurricane. My city...
3260,10868,,,Green Line derailment in Chicago http://t.co/U...
3261,10874,,,MEG issues Hazardous Weather Outlook (HWO) htt...


Let's find the length of the longest tweet and the average lenght of a tweet

In [234]:
maxx = 0
ids = []
for id, tweet in enumerate(data_train.text):
    length = len(tweet.split())
    if length > maxx:
        ids = [id]
        maxx = length
    elif length == maxx:
        ids.append(id)


    
print(f"Longest training tweet: {maxx}")
print(f"Longest tweets' ids: {ids}")

Longest training tweet: 31
Longest tweets' ids: [954, 4432, 5005]


In [235]:
total = 0
for i in data_train.text:
    total += len(i.split())
print(f"Average len: {total / data_train.shape[0]}")

Average len: 14.903585971364771


In [236]:
all_words = []
for id, tweet in enumerate(data_train.text):
    all_words.extend([word for word in tweet.split()])

count_w = Counter(all_words)


### I Will use word2vec embeddings

    Firstly let's clear the data and transorm it to lower case

In [237]:
#https://www.youtube.com/watch?v=My80O5Vx6fs&list=PLEwK9wdS5g0pc4NeOQqGLPcxmBHGUjnWB&index=2
# Семинар по DL в тексте

In [238]:

from string import punctuation
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [239]:
def clear_punct(text):
    for p in punctuation:
        text = text.replace(p, ' ')
    text = text.strip().split()
    return text


In [240]:
data_train['text'] = data_train['text'].apply(lambda x: clear_punct(x.lower()))

In [241]:
data_train

Unnamed: 0,id,keyword,location,text,target
0,1,,,"[our, deeds, are, the, reason, of, this, earth...",1
1,4,,,"[forest, fire, near, la, ronge, sask, canada]",1
2,5,,,"[all, residents, asked, to, shelter, in, place...",1
3,6,,,"[13, 000, people, receive, wildfires, evacuati...",1
4,7,,,"[just, got, sent, this, photo, from, ruby, ala...",1
...,...,...,...,...,...
7608,10869,,,"[two, giant, cranes, holding, a, bridge, colla...",1
7609,10870,,,"[aria, ahrary, thetawniest, the, out, of, cont...",1
7610,10871,,,"[m1, 94, 01, 04, utc, 5km, s, of, volcano, haw...",1
7611,10872,,,"[police, investigating, after, an, e, bike, co...",1


In [242]:
from gensim.models import Word2Vec

model = Word2Vec(data_train.text,
                 vector_size=32,
                 min_count=1,
                 window=5).wv


In [243]:
vocab = model.index_to_key

# Create the embedding matrix
embedding_matrix = torch.zeros((len(vocab), model.vector_size))
for i, word in enumerate(vocab):
    embedding_matrix[i] = torch.from_numpy(model[word])

### We can see that words such as "http", "co", "t" are the most common words which may worsen accuracy of our future model.

In [244]:
model.most_similar('deeds')

[('saturated', 0.8081209063529968),
 ('honda', 0.8078197836875916),
 ('yazidis', 0.8048319220542908),
 ('syndrome', 0.801944375038147),
 ('mpp', 0.7974042296409607),
 ('earbuds', 0.7916249632835388),
 ('object', 0.7915449738502502),
 ('marlon', 0.7882564067840576),
 ('1976', 0.7877078056335449),
 ('cld', 0.786592423915863)]

Now I'm going to visualize embedding vectors for first 1000 most common words

In [245]:
words = model.index_to_key[:1000]


In [246]:
word_vectors = [model.get_vector(word) for word in words]

Our embedding vectors are high-dimensional, so to visualize them we have to use some dimensionality reduction technique. First, let's try PCA.

In [247]:
from sklearn.decomposition import PCA


word_vectors_pca = PCA(n_components=2).fit_transform(word_vectors)

In [248]:
import bokeh.models as bm, bokeh.plotting as pl
from bokeh.io import output_notebook
output_notebook()

def draw_vectors(x, y, radius=10, alpha=0.25, color='blue',
                 width=600, height=400, show=True, **kwargs):
    """ draws an interactive plot for data points with auxilirary info on hover """
    if isinstance(color, str): color = [color] * len(x)
    data_source = bm.ColumnDataSource({ 'x' : x, 'y' : y, 'color': color, **kwargs })

    fig = pl.figure(active_scroll='wheel_zoom', width=width, height=height)
    fig.scatter('x', 'y', size=radius, color='color', alpha=alpha, source=data_source)

    fig.add_tools(bm.HoverTool(tooltips=[(key, "@" + key) for key in kwargs.keys()]))
    if show: pl.show(fig)
    return fig

In [249]:
draw_vectors(word_vectors_pca[:, 0], word_vectors_pca[:, 1], token=words)


Now let's try t-SNE.

In [250]:
word_vectors = np.array(word_vectors)

In [251]:
from sklearn.manifold import TSNE

vectors_tsne = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3).fit_transform(word_vectors)

In [252]:
draw_vectors(vectors_tsne[:, 0], vectors_tsne[:, 1], token=words)

In [253]:
data_train['vectorized'] = data_train['text'].apply(lambda x: [model.get_vector(word) for word in x])

In [254]:
X_train = data_train['vectorized'].apply(lambda x: torch.tensor(x, dtype=torch.float32))
y_train = torch.tensor(data_train['target'])

In [255]:
data_train = data_train.drop('id', axis=1)

In [256]:
out = []
for i in data_train.vectorized:
    res = i
    
    while len(res) < 34:
        res.append(np.zeros_like(i[0]))
    #print(res)
    out.append(np.array(res))
    
embeddings = torch.from_numpy(np.array(out))

Now to make embeddings for sentences I will sum up vectors for words for each sentence

In [257]:
sentences = torch.sum(embeddings, 1)

In [258]:
torch.flatten(embeddings, start_dim=1)

tensor([[-0.1468, -1.7839,  1.0463,  ...,  0.0000,  0.0000,  0.0000],
        [-0.1342, -1.3187,  0.7134,  ...,  0.0000,  0.0000,  0.0000],
        [-0.4786, -2.7714,  1.8150,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0065, -0.1059,  0.0648,  ...,  0.0000,  0.0000,  0.0000],
        [-0.1010, -2.2022,  1.0907,  ...,  0.0000,  0.0000,  0.0000],
        [-0.5097, -3.6388,  2.0940,  ...,  0.0000,  0.0000,  0.0000]])

In [259]:
sentences.shape

torch.Size([7613, 32])

### Now for baseline model let's use Fully Connected Network with one layer.

In [260]:
class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.func = nn.Sequential(nn.Linear(32, 1),
                                  nn.Sigmoid())
    
    
    def forward(self, x):
        return self.func(x)


### Now let's create dataloader

In [261]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_tr, y_val = train_test_split(sentences, y_train, test_size=0.3, random_state=1)

In [262]:
from dataset import Data_Set

trainset = Data_Set(X_train, y_tr)
trainloader = DataLoader(dataset=trainset, batch_size=4, shuffle=True, num_workers=1)
valset = Data_Set(X_val, y_val)
valloader = DataLoader(dataset=valset, batch_size=4, shuffle=False, num_workers=1)

In [263]:
X_train.shape, y_tr.shape

(torch.Size([5329, 32]), torch.Size([5329]))

In [264]:
device = torch.device('mps')

In [265]:
def train_epoch(model, optimizer, train_loader, criterion):
    model.train()
    loss_log = []
    correct = 0
    total = 0

    for data, target in train_loader:
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target.view(-1, 1))
        loss.backward()
        optimizer.step()
        pred = (outputs >= 0.5).float()
        correct += (pred == target.view_as(pred)).sum().item()
        total += target.size(0)

        loss_log.append(loss.item())

    accuracy = correct / total
    return loss_log, accuracy

# Функция тестирования на валидационной выборке
def evaluate(model, val_loader, criterion):
    model.eval()
    loss_log = []
    correct = 0
    total = 0

    for data, target in val_loader:
        data, target = data.to(device), target.to(device)

        outputs = model(data)
        loss = criterion(outputs, target.view(-1, 1))
        pred = (outputs >= 0.5).float()

        correct += (pred == target.view_as(pred)).sum().item()
        total += target.size(0)

        loss_log.append(loss.item())

    accuracy = correct / total
    return np.mean(loss_log), accuracy


fnmodel = Model()
fnmodel = fnmodel.to(device)
optimizer = torch.optim.Adam(fnmodel.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(10):
    train_loss, train_accuracy = train_epoch(fnmodel, optimizer, trainloader, criterion)
    val_loss, val_accuracy = evaluate(fnmodel, valloader, criterion)

    print(f"Epoch {epoch + 1}/{10}")
    print(f" Train Loss: {np.mean(train_loss)}, Train Accuracy: {train_accuracy}")
    print(f" Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}\n")

  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 1/10
 Train Loss: 0.6899243285467279, Train Accuracy: 0.5912929255019703
 Validation Loss: 0.8247858805487327, Validation Accuracy: 0.47591943957968474



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 2/10
 Train Loss: 0.6830837006575169, Train Accuracy: 0.5984237192719084
 Validation Loss: 0.656303315467467, Validation Accuracy: 0.5814360770577933



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 3/10
 Train Loss: 0.6730989647801443, Train Accuracy: 0.6049915556389567
 Validation Loss: 0.643688562141826, Validation Accuracy: 0.6037653239929948



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 4/10
 Train Loss: 0.668850542255031, Train Accuracy: 0.6066804278476262
 Validation Loss: 0.6344757000222515, Validation Accuracy: 0.6212784588441331



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 5/10
 Train Loss: 0.665422772498213, Train Accuracy: 0.6100581722649653
 Validation Loss: 0.6250377348550773, Validation Accuracy: 0.6536777583187391



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 6/10
 Train Loss: 0.672388209897776, Train Accuracy: 0.6018014636892475
 Validation Loss: 0.6525843670092615, Validation Accuracy: 0.6427320490367776



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 7/10
 Train Loss: 0.6611807346232208, Train Accuracy: 0.6211296678551324
 Validation Loss: 0.6231088331670561, Validation Accuracy: 0.6370402802101576



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 8/10
 Train Loss: 0.6554356497745271, Train Accuracy: 0.6237568024019516
 Validation Loss: 0.6338191603011927, Validation Accuracy: 0.6164623467600701



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 9/10
 Train Loss: 0.6543064189534034, Train Accuracy: 0.6200037530493526
 Validation Loss: 0.6293090507779564, Validation Accuracy: 0.6563047285464098



  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 10/10
 Train Loss: 0.6526512980729409, Train Accuracy: 0.6233814974666917
 Validation Loss: 0.6300559643062318, Validation Accuracy: 0.6322241681260946



### Now let's try using RNN

In [266]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_tr, y_val = train_test_split(embeddings, y_train, test_size=0.3, random_state=1)

In [267]:
from dataset import Data_Set

trainset = Data_Set(X_train, y_tr)
trainloader = DataLoader(dataset=trainset, batch_size=4, shuffle=True, num_workers=1)
valset = Data_Set(X_val, y_val)
valloader = DataLoader(dataset=valset, batch_size=4, shuffle=False, num_workers=1)

In [268]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset


class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc  = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_hidden_state = lstm_out[:, -1, :]
        logits = self.fc(last_hidden_state)
        return self.sigmoid(logits)



input_size = X_train.shape[2]
hidden_size = 64
output_size = 1
learning_rate = 0.003
epochs = 10

model = LSTMClassifier(input_size, hidden_size, output_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()
    for data, target in trainloader:
        optimizer.zero_grad()
        output = model(data)
        output = torch.flatten(output)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    model.eval()
    with torch.no_grad():
        val_loss = 0
        correct = 0
        total = 0
        for data, target in valloader:
            output = torch.flatten(model(data))
            val_loss += criterion(output, target).item()
            predicted = (output >= 0.5).float()
            correct += (predicted == target).sum().item()
            total += target.size(0)

        val_accuracy = correct / total
        avg_val_loss = val_loss / len(valloader)

    print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss}, Validation Accuracy: {val_accuracy}")


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 1/10, Validation Loss: 0.6680236078289887, Validation Accuracy: 0.580122591943958


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 2/10, Validation Loss: 0.6408007557776262, Validation Accuracy: 0.6313485113835376


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 3/10, Validation Loss: 0.6068139616954557, Validation Accuracy: 0.6781961471103327


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 4/10, Validation Loss: 0.5869903116113458, Validation Accuracy: 0.7035901926444834


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 5/10, Validation Loss: 0.5764681229762564, Validation Accuracy: 0.6939579684763573


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 6/10, Validation Loss: 0.5775245521511171, Validation Accuracy: 0.7171628721541156


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 7/10, Validation Loss: 0.566845964555565, Validation Accuracy: 0.718476357267951


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 8/10, Validation Loss: 0.5566955883636575, Validation Accuracy: 0.7171628721541156


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 9/10, Validation Loss: 0.5656519827095305, Validation Accuracy: 0.7193520140105079


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 10/10, Validation Loss: 0.5434286437550275, Validation Accuracy: 0.728984238178634


As we can see the validation accuracy improved.

### The main problem with the approach above is that I did not use pretrained embeddings for the task. Now I'm going to utilize pretrained word2vec embeddings. 

In [269]:
import gensim.downloader as api

pretrained = Word2Vec(api.load("text8"))

In [270]:
pretrained = pretrained.wv

In [271]:
def f(sentence):
    out = []
    for word in sentence:
        if word in pretrained.key_to_index:
            out.append(pretrained[word])
        else:
            out.append(np.zeros_like(pretrained['word']))
    while len(out) < 34:
        out.append(np.zeros_like(pretrained['word']))
    out = np.array(out)
    return out

In [272]:
data_train['pretrained'] = data_train['text'].apply(lambda x: f(x))

In [273]:
new = list()
for i in range(0, len(data_train['pretrained'])):
    new.append(data_train['pretrained'].iloc[i])
train = np.array(new)
y_train = torch.tensor(data_train['target'])

In [274]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_tr, y_val = train_test_split(train, y_train, test_size=0.3, random_state=1, stratify=y_train)

In [275]:
from dataset import Data_Set

trainset = Data_Set(X_train, y_tr)
trainloader = DataLoader(dataset=trainset, batch_size=4, shuffle=True, num_workers=1)
valset = Data_Set(X_val, y_val)
valloader = DataLoader(dataset=valset, batch_size=4, shuffle=False, num_workers=1)

In [276]:
import torch
import torch.nn as nn


class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc  = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_hidden_state = lstm_out[:, -1, :]
        logits = self.fc(last_hidden_state)
        return self.sigmoid(logits)


input_size = 100
hidden_size = 64
output_size = 1
learning_rate = 0.003
epochs = 10

model = LSTMClassifier(input_size, hidden_size, output_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()
    for data, target in trainloader:
        optimizer.zero_grad()
        output = model(data)
        output = torch.flatten(output)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    model.eval()
    with torch.no_grad():
        val_loss = 0
        correct = 0
        total = 0
        for data, target in valloader:
            output = torch.flatten(model(data))
            val_loss += criterion(output, target).item()
            predicted = (output >= 0.5).float()
            correct += (predicted == target).sum().item()
            total += target.size(0)

        val_accuracy = correct / total
        avg_val_loss = val_loss / len(valloader)

    print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss}, Validation Accuracy: {val_accuracy}")


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 1/10, Validation Loss: 0.683962493665164, Validation Accuracy: 0.5704903677758318


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 2/10, Validation Loss: 0.6375084827848158, Validation Accuracy: 0.6742556917688266


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 3/10, Validation Loss: 0.5454480922577051, Validation Accuracy: 0.7434325744308231


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 4/10, Validation Loss: 0.5115182388789182, Validation Accuracy: 0.7618213660245184


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 5/10, Validation Loss: 0.4908355364474858, Validation Accuracy: 0.7828371278458844


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 6/10, Validation Loss: 0.49622367938324785, Validation Accuracy: 0.7740805604203153


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 7/10, Validation Loss: 0.5103806143140521, Validation Accuracy: 0.7727670753064798


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 8/10, Validation Loss: 0.5380532735087721, Validation Accuracy: 0.7609457092819615


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 9/10, Validation Loss: 0.5213674186694643, Validation Accuracy: 0.7705779334500875


  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)
  return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)


Epoch 10/10, Validation Loss: 0.55684823925801, Validation Accuracy: 0.7635726795096323


### As expected RNN with pretrained embeddings showed the best accuracy on validation.