## Libraries

### Installing

In [None]:
%%capture
!pip install contractions
!pip install pandas
!pip install numpy
!pip install nltk
!pip install re
!pip install bs4
!pip install string
!pip install contractions
!pip install gensim
!pip install gensim
!pip install -U scikit-learn
!pip install pickle


## Importing

In [None]:
%%capture
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import nltk
import re
from bs4 import BeautifulSoup
import string
import contractions
import gensim
import gensim.models
import gensim.downloader as api
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_recall_fscore_support as scores
from sklearn.metrics import classification_report
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.stem import WordNetLemmatizer
nltk.download('omw-1.4')
nltk.download('wordnet')
import pickle

In [None]:
print(gensim.__version__)

4.3.0


## Missing values 

In [None]:
df= pd.read_csv("amazon_reviews_us_Beauty_v1_00.tsv", sep = '\t', on_bad_lines= 'skip')

In [None]:
data = df[["review_body", "star_rating"]]
data.dropna(subset = ["star_rating"], inplace= True)
data["star_rating"].isnull().sum()

0

In [None]:
data.dropna(subset = ["review_body"], inplace= True)
data["review_body"].isnull().sum()

0

In [None]:
def splitting(text):
  text = str(text)
  text = text[0]
  return text

def change_type(text):
  text = int(text)
  return text

def typ(text):
  if type(text)!=int:
    print(type(text), text)

data["star_rating"] =data["star_rating"].apply(lambda x : splitting(x))
data['star_rating'] = data["star_rating"].apply(lambda x: change_type(x))
data["star_rating"].apply(lambda x : typ(x))

0          None
1          None
2          None
3          None
4          None
           ... 
5094302    None
5094303    None
5094304    None
5094305    None
5094306    None
Name: star_rating, Length: 5093907, dtype: object

 ## We form three classes and select 20000 reviews randomly from each class.



In [None]:
data_1 = data[data['star_rating']==1]
data_1 = data_1.append(data[data['star_rating']==2])
data_1["label"] = 1
data_1 = data_1.sample(n=20000)

In [None]:
data_2 = data[data['star_rating']==3]
data_2 = data_2.append(data[data['star_rating']==4])
data_2["label"] = 2
data_2 = data_2.sample(n=20000)
data_1 = data_1.append(data_2)

In [None]:
data_3 = data[data['star_rating']==5]
data_3["label"] = 3
data_3 = data_3.sample(n=20000)
data_1 = data_1.append(data_3)

In [None]:
df_train = data_1[['review_body','label']]
df_train.head()

Unnamed: 0,review_body,label
3749382,I use Elizabeth Arden &#34;Overnight success&#...,1
1520778,Was disappointed in the quality.,1
510908,These brush heads are suppose to be for sensit...,1
2237476,I have several bracelets like this and this on...,1
1842395,Very good nail hardener. You must use it every...,1


## Data Cleaning and Preprocessing.
  1. Removing Unnecessary data from reviews
  2. Contractions etc

### Contractions

In [None]:
df_train["contracted_reviews"] = df_train["review_body"].apply(lambda x : contractions.fix(x) )
df_train.head()

Unnamed: 0,review_body,label,contracted_reviews
3749382,I use Elizabeth Arden &#34;Overnight success&#...,1,I use Elizabeth Arden &#34;Overnight success&#...
1520778,Was disappointed in the quality.,1,Was disappointed in the quality.
510908,These brush heads are suppose to be for sensit...,1,These brush heads are suppose to be for sensit...
2237476,I have several bracelets like this and this on...,1,I have several bracelets like this and this on...
1842395,Very good nail hardener. You must use it every...,1,Very good nail hardener. You must use it every...


In [None]:
print("Before and After Contraction:", (df_train["review_body"].str.len()).mean(),(df_train["contracted_reviews"].str.len()).mean())

Before and After Contraction: 266.22585 267.5654666666667


### Removing Punctuation 

In [None]:
df_p = pd.DataFrame()
df_p["Punc"] = df_train["contracted_reviews"].apply(lambda x : "".join([char for char in x if char not in string.punctuation ]))

In [None]:
print("Before and After removing punc:", (df_train["contracted_reviews"].str.len()).mean(),(df_p["Punc"].str.len()).mean())

Before and After removing punc: 267.5654666666667 259.13755


### Keep Important Info

In [None]:
df_info = pd.DataFrame()
df_info["Info"] = df_train["contracted_reviews"].apply(lambda x : re.split("\W+", x))

In [None]:
print("Before and After Keeping only important words:", (df_train["contracted_reviews"].str.len()).mean(),(df_info["Info"].str.len()).mean())

Before and After Keeping only important words: 267.5654666666667 52.437266666666666


### Stemming

In [None]:
stopword = stopwords.words("english")
stopword.append(stopwords.words('french'))
ps = nltk.PorterStemmer()

In [None]:
df_stem = pd.DataFrame()
df_stem["Stemmed"] = df_train["contracted_reviews"].apply(lambda x : ps.stem(x))

In [None]:
print("Before and After Stemming:", (df_train["contracted_reviews"].str.len()).mean(),(df_stem["Stemmed"].str.len()).mean())

Before and After Stemming: 267.5654666666667 267.4111833333333


### Lemmatizing

In [None]:
wn = WordNetLemmatizer()

In [None]:
df_lem = pd.DataFrame()
df_lem["lem"] = df_train["contracted_reviews"].apply(lambda x : wn.lemmatize(x))

In [None]:
print("Before and After Lemmatizing:", (df_train["contracted_reviews"].str.len()).mean(),(df_lem["lem"].str.len()).mean())

Before and After Lemmatizing: 267.5654666666667 267.56536666666665


### Applying all using Clean Data Function

In [None]:
stopword = stopwords.words("english")
ps = nltk.PorterStemmer()
wn = WordNetLemmatizer()
def clean_data(text):
  text = "".join([char for char in text if char not in string.punctuation])
  token = re.split("\W+", text)
  words = [wn.lemmatize(word) for word in token if word not in stopword]
  #words = [ps.stem(word) for word in words if word not in stopword]
  return words

In [None]:
df_train["reviews"]= df_train["contracted_reviews"].apply(lambda x : clean_data(x))

In [None]:
df_train.drop(["review_body",'contracted_reviews'], axis =1, inplace=True)

In [None]:
df_train.head()

Unnamed: 0,label,reviews
3749382,1,"[I, use, Elizabeth, Arden, 34Overnight, succes..."
1520778,1,"[Was, disappointed, quality]"
510908,1,"[These, brush, head, suppose, sensitive, skin,..."
2237476,1,"[I, several, bracelet, like, one, worst, quality]"
1842395,1,"[Very, good, nail, hardener, You, must, use, e..."


In [None]:
#df_train.to_csv("/content/drive/MyDrive/544/hw3/review.csv", )

In [None]:
#data = pd.read_csv("/content/drive/MyDrive/544/hw3/review.csv", sep ='\t')

In [None]:
s= []
for i in df_train["reviews"]:
  s.append(i)

## Task 2: Word Embedding 

In [None]:
%%capture
wv = gensim.downloader.load('word2vec-google-news-300')

In [None]:
#pickle.dump(wv,open("/content/drive/MyDrive/544/hw3/"+"google_wv", "wb"))

## Part a

In [None]:
pairs = [
    ('house', 'home'),   # a house with a family is home
    ('husband','wife'),   # spouse
    ('tea', 'coffee'),  # common beverages
    ('son', 'child'),
    ('man','woman')    # ... and so on 

]
for w1, w2 in pairs:
    print('%r\t%r\t%.2f' % (w1, w2, wv.similarity(w1, w2)))

'house'	'home'	0.56
'husband'	'wife'	0.83
'tea'	'coffee'	0.56
'son'	'child'	0.52
'man'	'woman'	0.77


## Part b

In [None]:
model = gensim.models.Word2Vec(sentences=s, min_count=10,vector_size=300,window=13)

In [None]:
for index, word in enumerate(model.wv.index_to_key):
    if index == 10:
        break
    print(f"word #{index}/{len(model.wv.index_to_key)} is {word}")

word #0/8292 is I
word #1/8292 is product
word #2/8292 is hair
word #3/8292 is It
word #4/8292 is like
word #5/8292 is The
word #6/8292 is use
word #7/8292 is would
word #8/8292 is one
word #9/8292 is This


In [None]:
pairs = [
    ('house', 'home'),   # a house with a family is home
    ('husband','wife'),   # spouse
    ('tea', 'coffee'),  # common beverages
    ('son', 'child'),
    ('man','woman')    # ... and so on 

]
for w1, w2 in pairs:
    print('%r\t%r\t%.2f' % (w1, w2, model.wv.similarity(w1, w2)))

'house'	'home'	0.58
'husband'	'wife'	0.89
'tea'	'coffee'	0.56
'son'	'child'	0.49
'man'	'woman'	0.59


For some of the given examples used for checking similarities, my trained model works better than pretrained eg, (house, home) and (husband and wife). All other work better in the pretrained model. Therefore pretrained works better as it 3/5 has high simmilarities compare to my model.
We can have better accuracy or simmilarity if we would have trained our dataset on pretrained model. 

## Task 3: Simple Model

In [None]:
from gensim.models import KeyedVectors
from sklearn.linear_model import Perceptron
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

In [None]:
X_train = []
y_train = []
indices = []
for index, sentences in enumerate(df_train["reviews"]):
    
    embeddings = []
    for word in sentences:
        try:
            word_embedding = wv[word]
            embeddings.append(word_embedding)
        except KeyError:
            continue
    if len(embeddings)>0:
      review_embedding = np.mean(embeddings, axis=0)
      X_train.append(np.array(review_embedding))
    else:
      indices.append(index)

In [None]:
len(X_train)+len(indices)

60000

In [None]:
for i, label in enumerate(df_train["label"]):
  if i not in indices:
    y_train.append(label)

In [None]:
x_train, x_test, Y_train, Y_test=train_test_split(X_train, y_train, test_size=0.2)

## Perceptron

In [None]:
# Train a single perceptron on the extracted average word embeddings
perceptron_model = Perceptron()
perceptron_model.fit(x_train, Y_train)

In [None]:
accuracy_score( Y_test,perceptron_model.predict(x_test))

0.5037148342933467

## SVM

In [None]:
# Train an SVM on the extracted average word embeddings
svm_model = LinearSVC()
svm_model.fit(X_train, y_train)

In [None]:
accuracy_score( Y_test,svm_model.predict(x_test))

0.6386175807663411

Comparing the accuracies among Tf-Idf and word2vec:
Accuracy : Perceptron in TFIDF has 60% and SVM had 66%. Whereas in WORD2Vec SVM has 63 and Perceptron has 50.


We can conclude that for simple models TF-IDF performs better compare to word2vec.  This also states that dataset work well on keyword-based matching
 and as we are also using a small dataset. 


## Task 4 : Feed Forward Neural Network

### 4.a

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn

In [None]:
#Creating tensor data
#training data
xtrain_np=np.array(x_train)
x=torch.from_numpy(xtrain_np)

ytrain_np=np.array(Y_train)
y=torch.from_numpy(ytrain_np)

#testing data
xtest_np=np.array(x_test)
test_x=torch.from_numpy(xtest_np)

ytest_np=np.array(Y_test)
test_y=torch.from_numpy(ytest_np)

In [None]:
model=torch.nn.Sequential(
    torch.nn.Linear(300,100),
    torch.nn.ReLU(),
    torch.nn.Linear(100,10),
    torch.nn.ReLU(),
    torch.nn.Linear(10,10)    
)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

In [None]:
model=model.float()
for t in range(1000):
    y_pred = model(x.float())
    loss = criterion(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    
    if(t%50==0):
        pred=model(x.float())
        _,predicted = torch.max(pred.data,1)
        correct = (predicted == y).sum()
        print(" traning Accuracy for",t,"-",correct/len(y))
    

 traning Accuracy for 0 - tensor(0.0047)
 traning Accuracy for 50 - tensor(0.3819)
 traning Accuracy for 100 - tensor(0.4160)
 traning Accuracy for 150 - tensor(0.4514)
 traning Accuracy for 200 - tensor(0.5196)
 traning Accuracy for 250 - tensor(0.5217)
 traning Accuracy for 300 - tensor(0.4997)
 traning Accuracy for 350 - tensor(0.4545)
 traning Accuracy for 400 - tensor(0.4762)
 traning Accuracy for 450 - tensor(0.4970)
 traning Accuracy for 500 - tensor(0.5117)
 traning Accuracy for 550 - tensor(0.5286)
 traning Accuracy for 600 - tensor(0.5435)
 traning Accuracy for 650 - tensor(0.5549)
 traning Accuracy for 700 - tensor(0.5651)
 traning Accuracy for 750 - tensor(0.5744)
 traning Accuracy for 800 - tensor(0.5828)
 traning Accuracy for 850 - tensor(0.5880)
 traning Accuracy for 900 - tensor(0.5929)
 traning Accuracy for 950 - tensor(0.5978)


In [None]:
pred=model(test_x.float())
_,predicted = torch.max(pred.data,1)
correct = (predicted == test_y).sum()
print("Accuracy for question 4a:",correct/len(test_y))

Accuracy for question 4a: tensor(0.6122)


### 4.b

In [None]:
X_train = []
y_train = []
indices = []
for index, sentences in enumerate(df_train["reviews"]):
    
    embeddings = []
    for i, word in enumerate(sentences):
        if i<10:
          try:
              word_embedding = wv[word]
              embeddings.append(word_embedding)
          except KeyError:
              continue
        else:
          break
    if len(embeddings)>0:
      review_embedding = np.mean(embeddings, axis=0)
      X_train.append(np.array(review_embedding))
    else:
      indices.append(index)

for i, label in enumerate(df_train["label"]):
  if i not in indices:
    y_train.append(label)

In [None]:
x_train,x_test,Y_train, Y_test = train_test_split(X_train,y_train, test_size=0.2)

In [None]:
#Creating tensor data
#training data
xtrain_np=np.array(x_train)
x=torch.from_numpy(xtrain_np)

ytrain_np=np.array(Y_train)
y=torch.from_numpy(ytrain_np)

#testing data
xtest_np=np.array(x_test)
test_x=torch.from_numpy(xtest_np)

ytest_np=np.array(Y_test)
test_y=torch.from_numpy(ytest_np)

In [None]:
model=torch.nn.Sequential(
    torch.nn.Linear(300,100),
    torch.nn.ReLU(),
    torch.nn.Linear(100,10),
    torch.nn.ReLU(),
    torch.nn.Linear(10,10)    
)

In [None]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

In [None]:
model=model.float()
for t in range(150):
    y_pred=model(x.float())
    loss = criterion(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    
    if(t%10==0):
        pred=model(x.float())
        _,predicted = torch.max(pred.data,1)
        correct = (predicted == y).sum()
        print(" train Accuracy for",t,"-",correct/len(y))

 train Accuracy for 0 - tensor(0.3342)
 train Accuracy for 10 - tensor(0.3342)
 train Accuracy for 20 - tensor(0.3342)
 train Accuracy for 30 - tensor(0.3338)
 train Accuracy for 40 - tensor(0.3361)
 train Accuracy for 50 - tensor(0.3313)
 train Accuracy for 60 - tensor(0.3396)
 train Accuracy for 70 - tensor(0.3343)
 train Accuracy for 80 - tensor(0.3343)
 train Accuracy for 90 - tensor(0.3343)
 train Accuracy for 100 - tensor(0.3343)
 train Accuracy for 110 - tensor(0.3236)
 train Accuracy for 120 - tensor(0.3156)
 train Accuracy for 130 - tensor(0.3155)
 train Accuracy for 140 - tensor(0.3160)


In [None]:
pred=model(test_x.float())

In [None]:
_,predicted = torch.max(pred.data,1)
correct = (predicted == test_y).sum()
print("Test Accuracy - 4b:",correct/len(test_y))

Test Accuracy - 4b: tensor(0.3176)


Comparing the accuracies:
Initially using complete sentences in MLP was better (61) compare to perceptron but not compare to svm. 
Later when using only 10 words in sentences MLP performance has depleted and got accuracy of 31, way low compare to svm and perceptron.

We can conclude that by reducing the no of words in sentence, the performance quality also reduces. May be the vital information might have lost.

## Task 5 : RNN, GRU, LSTM

In [None]:
X = pd.DataFrame()
features=[]
for lis in df_train['reviews']:
    wordvecs=[]
    vector=np.zeros(300,)
    for j in range(len(lis)):
        if(j>=20):
            break
        else:
            if lis[j] in wv.key_to_index:
                wordvecs.append(wv[lis[j]])
    while len(wordvecs)<20:
        wordvecs.append(np.zeros(300,))
        
    features.append(wordvecs)
    
X['input_3']=features
Y = df_train['label']

In [None]:
df = X.input_3.apply(pd.Series)

In [None]:
X_train,X_test,Y_train, Y_test = train_test_split(df,Y, test_size=0.2, random_state=30)

x=X_train
y=Y_train
test_x=X_test
test_y=Y_test

### RNN

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size1,hidden_size2, output_size):
        super(RNN, self).__init__()

        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        
        self.i2h1 = nn.Linear(input_size + hidden_size2, hidden_size1)
        self.h1h2 = nn.Linear(hidden_size1, hidden_size2)
        self.h2o = nn.Linear(hidden_size2, output_size)
        
        #self.i2o = nn.Linear(input_size + hidden_size2, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat([input,hidden],1)
        hidden1 = self.i2h1(combined)
        hidden2 = self.h1h2(hidden1)
        output = self.h2o(hidden2)
        output = self.softmax(output)
        return output, hidden2

    def initHidden1(self):
        return torch.zeros(1,self.hidden_size1)
    
    def initHidden2(self):
        return torch.zeros(1, self.hidden_size2)
    
rnn = RNN(300, 50,10, 3)
learning_rate=0.1
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()


In [None]:
epochs=2

rnn =rnn.float()
for i in range(epochs):
    
    for j in range(len(x)):
        #print("j",j)
        hidden2 = rnn.initHidden2()
        optimizer.zero_grad()
        #pred=[]
        #Hidden state - 20. Sending word by word vectors into RNN
        for index in range(20):
            temp=x.iloc[j,index]
            temp=np.reshape(temp,(1,300))
            temp_df=torch.tensor(temp)
            output, hidden2 = rnn(temp_df.float(), hidden2.float())
        #print("output",output)
        if(j==0):
            pred=output
        else:
            pred=torch.cat([pred,output],0)
        
loss = criterion(pred, torch.tensor(np.array(y)-1))
loss.backward()
optimizer.step()
    

In [None]:
for j in range(len(test_x)):
    hidden2 = rnn.initHidden2()
    optimizer.zero_grad()
    #pred=[]
    for index in range(20):
        temp=test_x.iloc[j,index]
        temp=np.reshape(temp,(1,300))
        temp_df=torch.tensor(temp)
        output, hidden2 = rnn(temp_df.float(), hidden2.float())
    #pred.append(output)
    if(j==0):
        pred=output
    else:
        pred=torch.cat([pred,output],0)

_,predicted = torch.max(pred.data,1)


In [None]:
correct=0
#correct = (predicted == np.array(test_y)).sum()
for i in range(len(predicted)):
    if predicted[i]==list(test_y)[i]:
        correct+=1
print("Accuracy RNN 5(a):",correct*100/len(test_y))

Accuracy RNN 5(a): 36.525


Comparing accuracy of RNN and Feed Forward Neural Network, FFNN got better accuracy of 61% approx, whereas Rnn did not perform well.

Can conclude that dataset is quite small in size and performs well on straight forward network. In my opinion with good enough epochs, more hyperparameter tunning and hit-n-trail approach RNN can work better.

## GRU


In [None]:
# define the GRU architecture
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()

        # Number of hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.layer_dim = layer_dim

        # GRU
        self.gru = nn.GRU(input_dim, hidden_dim, layer_dim, batch_first=True)
        # Output layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):

    # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)

        # One time step
        out, hn = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

gru = GRUModel(300, 20, 1, 5)

criterion = nn.CrossEntropyLoss()

optimizer= torch.optim.SGD(gru.parameters(), lr=0.01)

In [None]:
epochs=1

gru =gru.float()
for i in range(epochs):
    
    for j in range(len(x)):

        optimizer.zero_grad()
        pred=[]
        for index in range(20):
            temp=x.iloc[j,index]
            temp=np.reshape(temp,(1,300))
            temp_df=torch.tensor(temp)
            output = gru(temp_df.float())
        pred.append(output)
    #print(output.shape)
    ten=pred[0]
    for t in range(1,len(pred)):
        torch.cat([ten,t],0)
    
    print(ten.shape())
    loss = criterion(output, torch.tensor(y))
    loss.backward()

    optimizer.step()

In [None]:
for j in range(len(test_x)):
    hidden2 = rnn.initHidden2()
    optimizer.zero_grad()
    #pred=[]
    for index in range(20):
        temp=test_x.iloc[j,index]
        temp=np.reshape(temp,(1,300))
        temp_df=torch.tensor(temp)
        output, hidden2 = rnn(temp_df.float(), hidden2.float())
    #pred.append(output)
    if(j==0):
        pred=output
    else:
        pred=torch.cat([pred,output],0)

_,predicted = torch.max(pred.data,1)


In [None]:
correct=0
#correct = (predicted == np.array(test_y)).sum()
for i in range(len(predicted)):
    if predicted[i]==list(test_y)[i]:
        correct+=1
print("Accuracy GRU 5(b):",correct*100/len(test_y))

Accuracy GRU 5(b): 40.69166666666667


GRU performance is not good compare to FFNN on the basis of accuracy.

Conclusion: Reason behind is could be that dataset is quite small in size and performs well on straight forward network. With good and sufficient epochs, more hyperparameter tunning and hit-n-trail approach GRU may work better.

### LSTM

In [None]:
# Fully connected neural network with one hidden layer
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        #self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        # -> x needs to be: (batch_size, seq, input_size)
        
        # or:
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device) 
        #c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        
        # x: (n, 28, 28), h0: (2, n, 128)
        
        # Forward propagate RNN
        out, _ = self.rnn(x, h0)  
        # or:
        #out, _ = self.lstm(x, (h0,c0))  
        
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # out: (n, 28, 128)
        
        # Decode the hidden state of the last time step
        out = out.reshape(out.shape[0], -1)
        # out: (n, 128)
         
        out = self.fc(out)
        # out: (n, 10)
        return out

In [None]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  


In [None]:
epochs=1

gru =gru.float()
for i in range(epochs):
    
    for j in range(len(x)):

        optimizer.zero_grad()
        pred=[]
        for index in range(20):
            temp=x.iloc[j,index]
            temp=np.reshape(temp,(1,300))
            temp_df=torch.tensor(temp)
            output = gru(temp_df.float())
        pred.append(output)
    #print(output.shape)
    ten=pred[0]
    for t in range(1,len(pred)):
        torch.cat([ten,t],0)
    
    print(ten.shape())
    loss = criterion(output, torch.tensor(y))
    loss.backward()

    optimizer.step()

In [None]:
for j in range(len(test_x)):
    hidden2 = rnn.initHidden2()
    optimizer.zero_grad()
    #pred=[]
    for index in range(20):
        temp=test_x.iloc[j,index]
        temp=np.reshape(temp,(1,300))
        temp_df=torch.tensor(temp)
        output, hidden2 = rnn(temp_df.float(), hidden2.float())
    #pred.append(output)
    if(j==0):
        pred=output
    else:
        pred=torch.cat([pred,output],0)

_,predicted = torch.max(pred.data,1)


In [None]:
correct=0
#correct = (predicted == np.array(test_y)).sum()
for i in range(len(predicted)):
    if predicted[i]==list(test_y)[i]:
        correct+=1
print("Accuracy LSTM 5(c):",correct*100/len(test_y))

Accuracy LSTM 5(c): 41.525


It was observed that the Feed Forward Neural Network performed better than the LSTM model. However, it is important to note that the dataset used in the experiment was relatively small and noisy, which may have affected it performances.

LSTM and GRU models exhibit slightly better performance than Simple RNN models, but all three models have relatively lower accuracies compared to Feed Forward Neural Networks and SVMs in a simple model. It is important to note that truncating the sentences to 20 words may have resulted in the loss of critical information that could have contributed to sequential learning.

It is worth exploring different ways to preprocess the data, such as increasing the maximum sequence length or padding the shorter sequences, to ensure that the models have access to the full context of the text. Additionally, trying out different hyperparameters or using pre-trained embeddings could potentially improve the performance of the recurrent models.