<a href="https://colab.research.google.com/github/vigneshwaran/MaLe/blob/master/imdb_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Trained first 1000 samples, got 52% accuracy \
Applied weight decay and weight initialization \
Trained over all samples with all words, got 60-63% accuracy

Movie review is overloaded with artist name, character name, movie names, some plots and stopwords. \
To select important words from samples, used stanford word embeddings that provided sentiment of each words,
got accuracy of over 70% with just 10 epoches

In [0]:
import torch
import torch.nn as nn
import os
import re
import random
import math

In [2]:
!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

--2020-04-26 16:46:44--  https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz’


2020-04-26 16:46:54 (9.09 MB/s) - ‘aclImdb_v1.tar.gz’ saved [84125825/84125825]



In [0]:
!tar -xf aclImdb_v1.tar.gz

In [4]:
!pwd

/content


In [0]:
# ls aclImdb/

In [0]:
path = os.getcwd()

In [0]:
path += '/aclImdb/'

In [0]:
word_idx = {}

In [0]:
with open(path + '/imdb.vocab', 'r') as f:  # contains all distinct words
    out = f.readlines()
    for i, word in enumerate(out):
        word_idx[word.strip('\n')] = i

In [0]:
idx_sent = {}

In [0]:
with open(path + '/imdbEr.txt', 'r') as f:   # contains sentiment of each corresponding word to vocab
    out = f.readlines()
    for i, word in enumerate(out):
        idx_sent[i] = float(word.strip('\n'))

In [0]:
filepath = ['/train/pos/', '/train/neg/']

In [0]:
samples = []

In [0]:
for directory in filepath:
    for index, file in enumerate(os.listdir(path + directory)):
        # if index > 10 :
        #     break
        with open(path + directory + file, 'r') as f:
            text = f.read().strip('\n').lower()
            words = re.findall('[a-z]+', text)
            weights = []
            for word in words:
              if word_idx.get(word):
                weights.append(idx_sent[word_idx[word]])
              else:
                weights.append(0.0)
            weights = torch.FloatTensor(weights) 
            weights = weights.view(1,1,-1) # (input_size, batch_size, time_steps)
            match = re.search('_[0-9]+', file)
            Y = 0.1 * int(match.group()[1:])
            Y = torch.FloatTensor([Y])
            Y = Y.view(1,1)
            samples.append((weights, Y))

In [0]:
random.shuffle(samples)

In [0]:
def sigmoid_prime(z):
	  return sigmoid(z)*(1- sigmoid(z))

In [0]:
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

In [0]:
class RNN():
    def __init__(self, input_size, hidden_size, output_size, batch_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
#         self.ts = ts  # time step
        # self.parameters = {'Waa' : torch.randn((hidden_size, hidden_size))*0.01,
        #                    'Wax' : torch.randn((hidden_size, input_size))*0.01 ,
        #                    'Wya' : torch.randn((output_size, hidden_size))*0.01,
        #                    'ba'  : torch.randn((hidden_size, 1))*0.01,
        #                    'by'  : torch.randn((output_size, 1))*0.01
        #                   }
        self.parameters = {'Waa' : torch.randn((hidden_size, hidden_size))*math.sqrt(1/(hidden_size)),
                           'Wax' : torch.randn((hidden_size, input_size))*math.sqrt(1/(hidden_size)),
                           'Wya' : torch.randn((output_size, hidden_size))*math.sqrt(1/(output_size)),
                           'ba'  : torch.zeros((hidden_size, 1)),
                           'by'  : torch.zeros((output_size, 1))
                          }

    def forward(self, X):
        self.a_list = [torch.zeros(self.hidden_size, self.batch_size)]
        self.a_out = [torch.zeros(self.output_size, self.batch_size)]
        a_prev = self.a_list[0]
        self.X = X
        self.ts = X.shape[-1]
        for t in range(self.ts):
            xt = X[:, :, t]
            a_prev = torch.tanh(torch.matmul(self.parameters['Waa'], a_prev) + 
                                torch.matmul(self.parameters['Wax'], xt) + self.parameters['ba'])
            self.a_list.append(a_prev)
        a_Out = torch.matmul(self.parameters['Wya'], a_prev) + self.parameters['by']
        y_pred = sigmoid(a_Out)
        self.a_out.append(a_Out)
        self.Y = y_pred
        return self.Y
    
    def getY(self):
        return self.Y
    
    def getHidden(self):
        return self.hidden_size, self.batch_size
    
    def normalize(self):
        for i, j in self.parameters.items():
            self.parameters[i] = j.normal_()

    def normalize_tensor(self, l):
        return (l - l.mean())/l.std()

    def clip(self, l):
      for i in range(len(l)):
        l[i] = torch.clamp(l[i], min = -0.5, max = 0.5)
        return l
            
                                
    def backward(self, y, lr=0.01):
#         print(self.Y.shape, y.shape)
        da = self.a_list[0]
        dx = torch.zeros(self.X.shape)
        dWax = torch.zeros((self.parameters['Wax'].shape))
        dWaa = torch.zeros((self.parameters['Waa'].shape))
        dba = torch.zeros((self.parameters['ba'].shape))
        dWya = torch.zeros((self.parameters['Wya'].shape))
        dby = torch.zeros((self.parameters['by'].shape))
        
        cost = (self.Y - y) 
        dWya += torch.mm(cost, self.a_out[-1].t())
        dby += torch.sum(cost, axis = 1, keepdims=True)
        da += torch.mm(self.parameters['Wya'].t(), cost)
        losses = -1.0 * (y * torch.log(self.Y))
        ts = self.ts
        for t in range(-1, -1*(self.ts+1), -1):
            xt = self.X[:, :, t]
#             cost = (self.Y[:, :, t] - y[:, :, t])*sigmoid_prime(self.a_out[t])
# #             print(cost.shape)
#             dWya += torch.mm(cost, self.a_list[t].t())
#             dby += torch.sum(cost, axis = 1, keepdims=True)
#             da += torch.mm(self.parameters['Wya'].t(), cost)
            dz = da * (1 - (self.a_list[t])**2)
            dxt = torch.mm(self.parameters['Wax'].t(), dz)
            dx[:, :, t] = dxt
            dWax += torch.mm(dz, xt.t())
            da += torch.mm(self.parameters['Waa'].t(), dz)
            dWaa += torch.mm(dz, self.a_list[t-1].t())
            dba += torch.sum(dz, axis = 1, keepdims=True)
            
            
        # (dWax, dWya, dWaa, dba, dby) = self.clip([dWax/ts, dWya/ts, dWaa/ts, dba/ts, dby/ts])
        (dWax, dWya, dWaa, dba, dby) = self.clip([dWax/(bs*ts), dWya/(bs*ts), dWaa/(bs*ts), dba/(ts), dby/(bs*ts)])

        # print((dWax, dWya, dWaa, dba, dby))
        self.parameters['Wax'] -= lr*dWax
        self.parameters['Wya'] -= lr*dWya
        self.parameters['Waa'] -= lr*dWaa
        self.parameters['ba']  -= lr*dba
        self.parameters['by']  -= lr*dby
        self.normalize()
        return losses

In [0]:
sentiment = RNN(input_size = 1, hidden_size = 3, output_size = 1, batch_size = 1)

NameError: ignored

In [0]:
# sentiment.parameters

In [0]:
# count = 1
for epoch in range(10):
  total_cost = 0
  acc = 0
  for i,j in samples[:1000]:
    pred = sentiment.forward(i)
    acc += (pred.round() == j.round())
    total_cost += sentiment.backward(j, lr=0.01)
    # if (count % 1000 == 0):
    #   print(acc)
    #   acc = 0
    # count += 1
  print(epoch, total_cost, acc.item()/10)

NameError: ignored

In [0]:
# weights.mean()

In [0]:
# weights.var()

In [0]:
# (weights - weights.mean()) / weights.std()

In [0]:
# weights

In [0]:
!wget http://nlp.stanford.edu/data/glove.6B.zip

--2020-04-22 15:34:25--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2020-04-22 15:34:25--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2020-04-22 15:34:26--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2020-0

In [0]:
!unzip glove.6B.zip

Archive:  glove.6B.zip
  inflating: glove.6B.50d.txt        
  inflating: glove.6B.100d.txt       
  inflating: glove.6B.200d.txt       
  inflating: glove.6B.300d.txt       


In [0]:
# !head glove.6B.50d.txt

In [0]:
def read_glove_vecs(glove_file):
    with open(glove_file, 'r',encoding='UTF-8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = torch.FloatTensor(list(map(float,line[1:])))
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

In [0]:
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('glove.6B.50d.txt')

In [0]:
samples_we = []

In [0]:
# word_to_index['unk']

In [0]:
for directory in filepath:
    for index, file in enumerate(os.listdir(path + directory)):
        # if index > 10 :
        #     break
        with open(path + directory + file, 'r') as f:
            text = f.read().strip('\n').lower()
            words = re.findall('[a-z]+', text)
            weights = []
            for word in words:
              if word_to_index.get(word):
                weights.append(word_to_vec_map[word])
              else:
                continue
            weights = torch.stack(weights, dim=1) #[:min(len(weights), 10)])
            weights = weights.view(50,1,-1)
            match = re.search('_[0-9]+', file)
            Y = 0.1 * int(match.group()[1:])
            Y = torch.FloatTensor([Y])
            Y = Y.view(1,1)
            samples_we.append((weights, Y))

In [0]:
samples_we[0][0].shape

torch.Size([50, 1, 249])

In [0]:
random.shuffle(samples_we)

In [0]:
sentiment_glove = RNN(input_size = 50, hidden_size = 256, output_size = 1, batch_size = 1)

In [0]:
# sentiment_glove.parameters

In [0]:
for epoch in range(20):
  total_cost = 0
  acc = 0
  for i,j in samples_we:
    pred = sentiment_glove.forward(i)
    acc += (pred.round() == j.round())
    # print(sentiment_glove.parameters)
    total_cost += sentiment_glove.backward(j, lr=0.1)
  print(epoch, total_cost, acc.item()/250)

0 tensor([[86339.0859]]) 49.924
1 tensor([[86224.6328]]) 50.132
2 tensor([[85331.6484]]) 50.376
3 tensor([[84836.6328]]) 49.832


In [0]:
# Used only SGD of batch size = 1 as each sample has different timesteps(words), no efficient usage of GPU. 
# One-hot encoding yields word vector size of (90k, 90k), so used pre-trained word embeddings

In [0]:
test_filepath = ['/test/pos/', 'test/neg/']
test_we = []

In [0]:
for directory in test_filepath:
    for index, file in enumerate(os.listdir(path + directory)):
        # if index > 10 :
        #     break
        with open(path + directory + file, 'r') as f:
            text = f.read().strip('\n').lower()
            words = re.findall('[a-z]+', text)
            weights = []
            for word in words:
              if word_to_index.get(word):
                weights.append(word_to_vec_map[word])
              else:
                weights.append(word_to_vec_map['unk'])
            weights = torch.stack(weights, dim=1) #[:min(len(weights), 10)])
            weights = weights.view(50,1,-1)
            match = re.search('_[0-9]+', file)
            Y = 0.1 * int(match.group()[1:])
            Y = torch.FloatTensor([Y])
            Y = Y.view(1,1)
            test_we.append((weights, Y))

In [0]:
# Validation
total_cost = 0
acc = 0
for i,j in test_we:
  pred = sentiment_glove.forward(i)
  acc += (pred.round() == j.round())
  # print(sentiment_glove.parameters)
  # total_cost += sentiment_glove.backward(j, lr=0.001)
print(acc.item()/len(test_we)*100)

50.0


In [0]:
class RNN(): # LSTM
    def __init__(self, input_size, hidden_size, output_size, batch_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
#         self.ts = ts  # time step
        self.parameters = {'Wf' : torch.randn((hidden_size, input_size + hidden_size))*0.01,
                           'Wi' : torch.randn((hidden_size, input_size + hidden_size))*0.01,
                           'Wo' : torch.randn((hidden_size, input_size + hidden_size))*0.01,
                           'Wc' : torch.randn((hidden_size, input_size + hidden_size))*0.01,
                           'Wy' : torch.randn((output_size, hidden_size))*0.01,
                           'bf' : torch.randn((hidden_size, 1))*0.01,
                           'bi' : torch.randn((hidden_size, 1))*0.01,
                           'bo' : torch.randn((hidden_size, 1))*0.01,
                           'bc' : torch.randn((hidden_size, 1))*0.01,
                           'by' : torch.randn((output_size,1))*0.01
                          }
         
    def forward(self, X):
        self.a = torch.zeros(self.hidden_size, X.shape[1], X.shape[-1])
        self.c = torch.zeros(self.hidden_size, X.shape[1], X.shape[-1])
        self.y = torch.zeros(self.output_size, X.shape[1], X.shape[-1])
        a_prev = torch.randn(self.hidden_size, X.shape[1])
        c_prev = torch.zeros(self.hidden_size, X.shape[1])
        self.X = X
        self.ts = X.shape[-1]
        self.ft = []
        self.it = []
        self.ot = []
        self.cct = []
        for t in range(self.ts):
            xt = X[:, :, t]
            concat = torch.cat((a_prev, xt), dim = 0)
            ft = sigmoid (torch.mm(self.parameters['Wf'], concat) + self.parameters['bf'])
            it = sigmoid (torch.mm(self.parameters['Wi'], concat) + self.parameters['bi'])
            cct = torch.tanh(torch.mm(self.parameters['Wc'], concat) + self.parameters['bc'])
            c_prev = torch.mul(ft, c_prev) + torch.mul(it, cct)
            ot = sigmoid (torch.mm(self.parameters['Wo'], concat) + self.parameters['bo'])
            a_prev = torch.mul(ot, torch.tanh(c_prev))
            self.a[:,:,t] = a_prev
            self.c[:,:,t] = c_prev
            self.ft.append(ft)
            self.it.append(it)
            self.ot.append(ot)
            self.cct.append(cct)
        self.a_out = torch.matmul(self.parameters['Wy'], a_prev) + self.parameters['by']
        y_pred = sigmoid(self.a_out)
        self.Y = y_pred
        return self.Y
    
    
    def clip(self, grad):
        for i in grad.keys():
          grad[i] = torch.clamp(grad[i], -0.5, 0.5, out = grad[i])
        return grad

    def update_parameters(self, gradients, ts, lr = 0.001):
        for i, j in self.parameters.items():
            self.parameters[i] -= lr * gradients['d' + i]
                                    
    def backward(self, y, lr=0.001, lmbda = 1.0e-2):
        
        grad = {}
        grad['dWf'] = torch.zeros((self.parameters['Wf'].shape))
        grad['dWi'] = torch.zeros((self.parameters['Wi'].shape))
        grad['dWo'] = torch.zeros((self.parameters['Wo'].shape))
        grad['dWc'] = torch.zeros((self.parameters['Wc'].shape))
        grad['dWy'] = torch.zeros((self.parameters['Wy'].shape))
        grad['dbf'] = torch.zeros((self.parameters['bf'].shape))
        grad['dbi'] = torch.zeros((self.parameters['bi'].shape))
        grad['dbo'] = torch.zeros((self.parameters['bo'].shape))
        grad['dbc'] = torch.zeros((self.parameters['bc'].shape))
        grad['dby'] = torch.zeros((self.parameters['by'].shape))
        
        
        
        bs = y.shape[-1]
#         print(bs, self.X.shape[1])
        da = torch.zeros(self.hidden_size, bs)
        dc = torch.zeros(self.hidden_size, bs) 
        cost = (self.Y - y)
        # print(cost.shape, self.parameters['Wya'].shape, da.shape) 
        grad['dWy'] += torch.mm(cost, self.a_out.t())
        grad['dby'] += torch.sum(cost, axis = 1, keepdims=True)
#         print((self.parameters['Wy'].t().shape, cost.shape))
        da += torch.mm(self.parameters['Wy'].t(), cost)
        losses = torch.sum(-1.0 * (y * (torch.log(self.Y))))
        ts = self.ts
        ot = self.ot
        it = self.it
        cct = self.cct
        ft = self.ft
        a = self.a
        c = self.c
        if ts < 1:
            return 0
        for t in reversed(range(ts)):
            xt = self.X[:, :, t]
            
#             print(da.shape, c[:,:,t].shape, ot[t].shape, ot[t-1].shape)
            dot = da * (torch.tanh(c[:,:,t])) * ot[t] * (1 - ot[t])
            temp = ot[t] * (1 - (torch.tanh(c[:,:,t])**2))
            dcct = dc * it[t] + temp * it[t] * da * (1 - (cct[t]**2))
            dit = dc * cct[t] + temp * cct[t] * da * (it[t] * (1 - it[t]))
            dft = dc * c[:,:,t-1] + temp * c[:,:,t-1] * da * (ft[t] * (1 - ft[t]))
            concat = torch.cat((a[:, :, t-1], xt), dim = 0)

            grad['dWf'] += torch.mm(dft, concat.t())
            grad['dWi'] += torch.mm(dit, concat.t())
            grad['dWc'] += torch.mm(dcct, concat.t())
            grad['dWo'] += torch.mm(dot, concat.t())
            grad['dbf'] += torch.sum(dft, axis = 1, keepdims=True)
            grad['dbi'] += torch.sum(dit, axis = 1, keepdims=True)
            grad['dbc'] += torch.sum(dcct, axis = 1, keepdims=True)
            grad['dbo'] += torch.sum(dot, axis = 1, keepdims=True)

            # Compute derivatives w.r.t previous hidden state, previous memory state and input. Use equations (19)-(21). (≈3 lines)
            da = torch.mm((self.parameters['Wf'][:, : self.hidden_size]).t(), dft) + torch.mm((self.parameters['Wi'][:, : self.hidden_size]).t(), dit) + torch.mm((self.parameters['Wc'][:, : self.hidden_size]).t(), dcct) + torch.mm((self.parameters['Wo'][:, : self.hidden_size]).t(), dot)
            dc = dc * ft[t] + ot[t] * (1 - (torch.tanh(c[:,:,t])**2)) * da * ft[t]

            
        self.grad = self.clip(grad)
        self.update_parameters(self.grad, ts)
        return losses

In [0]:
sentiment_glove = RNN(input_size = 50, hidden_size = 128, output_size = 1, batch_size = 1)

In [0]:
batches = []
samples_we.sort(key= lambda s: s[0].size(-1))
x = [samples_we[0][0]]
y = [samples_we[0][1]]
for i in range(1, len(samples_we)):
  if samples_we[i][0].size(-1) == samples_we[i-1][0].size(-1):
    x.append(samples_we[i][0])
    y.append(samples_we[i][1])
  else:
    x = torch.stack(x, dim = 2)
    x = x.view(50, x.shape[2], -1)
    y = torch.stack(y, dim = 2)
    y = y.view(1, y.shape[2])
    batches.append((x,y))
    x = [samples_we[i][0]]
    y = [samples_we[i][1]]
x = torch.stack(x, dim = 2)
x = x.view(50, x.shape[2], -1)
y = torch.stack(y, dim = 2)
y = y.view(1, y.shape[2])
batches.append((x,y))

In [0]:
batches[-1][0].shape

torch.Size([50, 1, 2504])

In [0]:
len(batches)

1029

In [0]:
torch.cuda.set_device(0)

In [0]:
# for i in batches:
#   pred = sentiment_glove.forward(i[0])
for epoch in range(20):
  total_cost = 0
  acc = 0
  sz = 0
  for i in batches[:100]:
    sz += i[1].shape[-1]
    pred = sentiment_glove.forward(i[0])
    # print(pred)
    acc += torch.sum(pred.round() == i[1].round())
    # print(sentiment_glove.parameters)
    total_cost += sentiment_glove.backward(i[1], lr=0.001)
  print(epoch, total_cost, acc.item()/sz)

0 tensor(1430.6589) 0.5445462114904246
1 tensor(1388.7999) 0.5445462114904246
2 tensor(1324.2302) 0.5445462114904246
3 tensor(1238.7107) 0.5445462114904246
4 tensor(1176.7863) 0.5445462114904246
5 tensor(1148.0459) 0.5445462114904246
6 tensor(1136.3022) 0.5445462114904246
7 tensor(1130.6210) 0.5445462114904246
8 tensor(1126.9006) 0.5445462114904246
9 tensor(1123.8530) 0.5448237579794616
10 tensor(1121.1937) 0.5456563974465723
11 tensor(1118.9434) 0.5481543158479045
12 tensor(1117.2344) 0.5539827921176798
13 tensor(1116.3185) 0.5584235359422703
14 tensor(1115.6949) 0.5673050235914515
15 tensor(1115.3047) 0.5795170691090757
16 tensor(1114.8475) 0.5856230918678879
17 tensor(1114.1071) 0.5975575908964752
18 tensor(1112.8406) 0.6039411601443242
19 tensor(1110.9589) 0.6094920899250624


In [0]:
samples = []
for directory in filepath:
    for index, file in enumerate(os.listdir(path + directory)):
        # if index > 10 :
        #     break
        with open(path + directory + file, 'r') as f:
            text = f.read().strip('\n').lower()
            words = re.findall('[a-z]+', text)
            weights = []
            for word in words:
              if word_idx.get(word) and abs(idx_sent[word_idx[word]]) >= 0.5: # threshold to priotize only important words
                weights.append(idx_sent[word_idx[word]])
              else:
                # weights.append(0.0)
                continue
            weights = torch.FloatTensor(weights) 
            weights = weights.view(1,1,-1) # (input_size, batch_size, time_steps)
            match = re.search('_[0-9]+', file)
            Y = 0.1 * int(match.group()[1:])
            Y = torch.FloatTensor([Y])
            Y = Y.view(1,1)
            samples.append((weights, Y))

In [0]:
# samples_sorted = 
samples.sort(key= lambda s: s[0].size(-1))

In [0]:
batches_s = []
x = [samples[0][0]]
y = [samples[0][1]]
for i in range(1, len(samples)):
  if samples[i][0].size(-1) == samples[i-1][0].size(-1):
    x.append(samples[i][0])
    y.append(samples[i][1])
  else:
    x = torch.stack(x, dim = 2)
    x = x.view(1, x.shape[2], -1)
    y = torch.stack(y, dim = 2)
    y = y.view(1, y.shape[2])
    batches_s.append((x,y))
    x = [samples[i][0]]
    y = [samples[i][1]]
x = torch.stack(x, dim = 2)
x = x.view(1, x.shape[2], -1)
y = torch.stack(y, dim = 2)
y = y.view(1, y.shape[2])
batches_s.append((x,y))

In [0]:
class RNN():
    def __init__(self, input_size, hidden_size, output_size, batch_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
#         self.ts = ts  # time step
        # self.parameters = {'Waa' : torch.randn((hidden_size, hidden_size))*math.sqrt(1/(hidden_size)),
        #                    'Wax' : torch.randn((hidden_size, input_size))*math.sqrt(1/(hidden_size)),
        #                    'Wya' : torch.randn((output_size, hidden_size))*math.sqrt(1/(output_size)),
        #                    'ba'  : torch.zeros((hidden_size, 1)),
        #                    'by'  : torch.zeros((output_size, 1))
        #                   }
        self.parameters = {'Waa' : torch.randn((hidden_size, hidden_size))*0.01,
                           'Wax' : torch.randn((hidden_size, input_size))*0.01 ,
                           'Wya' : torch.randn((output_size, hidden_size))*0.01,
                           'ba'  : torch.randn((hidden_size, 1))*0.01,
                           'by'  : torch.randn((output_size, 1))*0.01
                          }
         
    def forward(self, X):
        self.a_list = [torch.zeros(self.hidden_size, X.shape[1])]
        self.a_out = [torch.zeros(self.output_size, X.shape[1])]
        a_prev = torch.randn(self.hidden_size, X.shape[1])
        self.X = X
        self.ts = X.shape[-1]
        for t in range(min(50,self.ts)):
            xt = X[:, :, t]
            a_prev = torch.tanh(torch.matmul(self.parameters['Waa'], a_prev) + 
                                torch.matmul(self.parameters['Wax'], xt) + self.parameters['ba'])
            self.a_list.append(a_prev)
        a_Out = torch.matmul(self.parameters['Wya'], a_prev) + self.parameters['by']
        y_pred = sigmoid(a_Out)
        self.a_out.append(a_Out)
        self.Y = y_pred
        return self.Y
    
    def getY(self):
        return self.Y
    
    def getHidden(self):
        return self.hidden_size, self.batch_size
    
    def normalize(self):
        for i, j in self.parameters.items():
            self.parameters[i] = (j - j.mean())/j.std()
    def normalise(self):
        for i, j in self.parameters.items():
            self.parameters[i] = j.normal_()
    def normalize_tensor(self, l):
        return (l - l.mean())/l.std()

    def clip(self, l):
      clamped = []
      for i in range(len(l)):
        clamped.append(torch.clamp(l[i], min = -0.5, max = 0.5))
      return clamped
            
                                
    def backward(self, y, lr=0.001, lmbda = 1.0e-2):
        bs = y.shape[-1]
#         print(self.Y.shape, y.shape)
        da = self.a_list[0]
        dx = torch.zeros(self.X.shape)
        dWax = torch.zeros((self.parameters['Wax'].shape))
        dWaa = torch.zeros((self.parameters['Waa'].shape))
        dba = torch.zeros((self.parameters['ba'].shape))
        dWya = torch.zeros((self.parameters['Wya'].shape))
        dby = torch.zeros((self.parameters['by'].shape))
        
        cost = (self.Y - y)
        # print(cost.shape, self.parameters['Wya'].shape, da.shape) 
        dWya += torch.mm(cost, self.a_out[-1].t())
        dby += torch.sum(cost, axis = 1, keepdims=True)
        da += torch.mm(self.parameters['Wya'].t(), cost)
        losses = torch.sum(-1.0 * (y.mm(torch.log(self.Y.t()))))
        ts = self.ts
        if ts < 1:
          return 0
        for t in reversed(range(0, min(50,ts))):
            xt = self.X[:, :, t]
#             cost = (self.Y[:, :, t] - y[:, :, t])*sigmoid_prime(self.a_out[t])
# #             print(cost.shape)
#             dWya += torch.mm(cost, self.a_list[t].t())
#             dby += torch.sum(cost, axis = 1, keepdims=True)
#             da += torch.mm(self.parameters['Wya'].t(), cost)
            dz = da * (1 - (self.a_list[t])**2)
            dxt = torch.mm(self.parameters['Wax'].t(), dz)
            dx[:, :, t] = dxt
            dWax += torch.mm(dz, xt.t())
            da += torch.mm(self.parameters['Waa'].t(), dz)
            dWaa += torch.mm(dz, self.a_list[t-1].t())
            dba += torch.sum(dz, axis = 1, keepdims=True)
        # print((dWax, dWya, dWaa, dba, dby))   
        (dWax, dWya, dWaa, dba, dby) = self.clip([dWax/ts, dWya/ts, dWaa/ts, dba/ts, dby/ts])
        # (dWax, dWya, dWaa, dba, dby) = self.clip([dWax, dWya, dWaa, dba, dby])
        # print((dWax, dWya, dWaa, dba, dby))
        # self.parameters['Wax'] = (1-lr*(lmbda/bs*ts))*self.parameters['Wax']-lr*dWax
        # self.parameters['Wya'] = (1-lr*(lmbda/bs*ts))*self.parameters['Wya']-lr*dWya
        # self.parameters['Waa'] = (1-lr*(lmbda/bs*ts))*self.parameters['Waa']-lr*dWaa
        # self.parameters['ba']  -= lr*dba/self.ts
        # self.parameters['by']  -= lr*dby/self.ts
        self.parameters['Wax'] -= lr*dWax
        self.parameters['Wya'] -= lr*dWya
        self.parameters['Waa'] -= lr*dWaa
        self.parameters['ba']  -= lr*dba
        self.parameters['by']  -= lr*dby
        return losses

In [0]:
sentiment = RNN(input_size = 1, hidden_size = 256, output_size = 1, batch_size = 1)

In [0]:
# sentiment.parameters

In [0]:
# for i in batches:
#   pred = sentiment_glove.forward(i[0])
for epoch in range(20):
  total_cost = 0
  acc = 0
  sz = 0
  for i in batches_s:
    sz += i[1].shape[-1]
    pred = sentiment.forward(i[0])
    # print(pred)
    acc += torch.sum(pred.round() == i[1].round())
    # print(sentiment.parameters)
    total_cost += sentiment.backward(i[1], lr=0.0001)
  # print(sentiment.parameters)

  print(epoch, total_cost, acc.item()/sz)

0 tensor(9405.6777) 0.70332
1 tensor(9369.3145) 0.71124
2 tensor(9330.1934) 0.713
3 tensor(9287.5986) 0.71352
4 tensor(9241.3232) 0.71416
5 tensor(9188.8105) 0.7136
6 tensor(9131.2930) 0.7118
7 tensor(9070.2031) 0.71168
8 tensor(8996.1113) 0.7098
9 tensor(8916.8535) 0.70668
10 tensor(8822.8633) 0.7062
11 tensor(8725.8760) 0.7058
12 tensor(8617.0488) 0.705
13 tensor(8503.0059) 0.70528
14 tensor(8378.1104) 0.70584
15 tensor(8253.4287) 0.70632
16 tensor(8123.1636) 0.7068
17 tensor(7999.1851) 0.70792
18 tensor(7873.8120) 0.70924
19 tensor(7749.6997) 0.71084


In [0]:
test_filepath = ['/test/pos/', 'test/neg/']
test_we = []

In [0]:
for directory in test_filepath:
    for index, file in enumerate(os.listdir(path + directory)):
        # if index > 10 :
        #     break
        with open(path + directory + file, 'r') as f:
            text = f.read().strip('\n').lower()
            words = re.findall('[a-z]+', text)
            weights = []
            for word in words:
              if word_idx.get(word) and abs(idx_sent[word_idx[word]]) >= 1.0:
                weights.append(idx_sent[word_idx[word]])
              else:
                # weights.append(0.0)
                continue
            weights = torch.FloatTensor(weights) 
            weights = weights.view(1,1,-1) # (input_size, batch_size, time_steps)
            match = re.search('_[0-9]+', file)
            Y = 0.1 * int(match.group()[1:])
            Y = torch.FloatTensor([Y])
            Y = Y.view(1,1)
            test_we.append((weights, Y))

In [0]:
# Validation
total_cost = 0
acc = 0
for i,j in test_we:
  pred = sentiment.forward(i)
  acc += (pred.round() == j.round())
  # print(sentiment_glove.parameters)
  # total_cost += sentiment_glove.backward(j, lr=0.001)
print(acc.item()/len(test_we)*100)

71.816
