In [1]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import torch
from torch import nn,functional
from torchtext import data
from torchtext import datasets
from torchtext.vocab import Vectors, GloVe, CharNGram, FastText
import numpy as np  # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

################################
# DataLoader
################################

# set up fields
TEXT = data.Field()
LABEL = data.Field(sequential=False,dtype=torch.long)

# make splits for data
# DO NOT MODIFY: fine_grained=True, train_subtrees=False
train, val, test = datasets.SST.splits(
    TEXT, LABEL, fine_grained=True, train_subtrees=False)

# print information about the data
print('train.fields', train.fields)
print('len(train)', len(train))
print('vars(train[0])', vars(train[0]))


train.fields {'text': <torchtext.data.field.Field object at 0x7f532c1253c8>, 'label': <torchtext.data.field.Field object at 0x7f532c125470>}
len(train) 8544
vars(train[0]) {'text': ['The', 'Rock', 'is', 'destined', 'to', 'be', 'the', '21st', 'Century', "'s", 'new', '``', 'Conan', "''", 'and', 'that', 'he', "'s", 'going', 'to', 'make', 'a', 'splash', 'even', 'greater', 'than', 'Arnold', 'Schwarzenegger', ',', 'Jean-Claud', 'Van', 'Damme', 'or', 'Steven', 'Segal', '.'], 'label': 'positive'}


In [2]:
# build the vocabulary
# you can use other pretrained vectors, refer to https://github.com/pytorch/text/blob/master/torchtext/vocab.py
TEXT.build_vocab(train, vectors=Vectors(name='vector.txt', cache='./data'))
LABEL.build_vocab(train)
# We can also see the vocabulary directly using either the stoi (string to int) or itos (int to string) method.
print(TEXT.vocab.itos[:10],"\n")
print(LABEL.vocab.stoi,"\n")
print(TEXT.vocab.freqs.most_common(20),"\n")

# print vocab information
print('len(TEXT.vocab)', len(TEXT.vocab),"\n")
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size(),"\n")

['<unk>', '<pad>', '.', ',', 'the', 'and', 'a', 'of', 'to', "'s"] 

defaultdict(<function _default_unk_index at 0x7f52ddcd6f28>, {'<unk>': 0, 'positive': 1, 'negative': 2, 'neutral': 3, 'very positive': 4, 'very negative': 5}) 

[('.', 8024), (',', 7131), ('the', 6037), ('and', 4431), ('a', 4403), ('of', 4386), ('to', 2995), ("'s", 2544), ('is', 2536), ('that', 1915), ('in', 1789), ('it', 1775), ('The', 1265), ('as', 1200), ('film', 1152), ('but', 1076), ('with', 1071), ('for', 963), ('movie', 959), ('its', 912)] 

len(TEXT.vocab) 18280 

TEXT.vocab.vectors.size() torch.Size([18280, 300]) 



In [3]:
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)

torch.Size([18280, 300])


In [4]:
class SentimentNet(nn.Module):
    def __init__(self,embed_size, num_hiddens, num_layers,
                 bidirectional, labels, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding.from_pretrained(pretrained_embeddings)
        self.embedding.weight.requires_grad = False
        self.encoder = nn.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
                               num_layers=num_layers, bidirectional=self.bidirectional,
                               dropout=0.5)
        if self.bidirectional:
            self.decoder = nn.Linear(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Linear(num_hiddens * 2, labels)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.5)


    def forward(self, inputs):
        embeddings = self.dropout(self.embedding(inputs))
        states, hidden = self.encoder(embeddings)
        encoding = torch.cat([states[0], states[-1]], dim=1)
        
        out = self.dropout(self.decoder(encoding))
        out = self.softmax(out)
        return out

In [None]:
embed_size = 300
num_hiddens = 100
num_layers = 2
bidirectional = True
labels = 5
batch_size=128
device = torch.device('cuda:7')
#device = torch.device('cpu')

model = SentimentNet(embed_size=embed_size,
                   num_hiddens=num_hiddens, num_layers=num_layers,
                   bidirectional=bidirectional,labels=labels)
model.to(device)

SentimentNet(
  (embedding): Embedding(18280, 300)
  (encoder): LSTM(300, 100, num_layers=2, dropout=0.5, bidirectional=True)
  (decoder): Linear(in_features=400, out_features=5, bias=True)
  (softmax): Softmax()
  (dropout): Dropout(p=0.5)
)

In [None]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(),lr=0.1, momentum=0.9,weight_decay=1e-7)


In [None]:
# make iterator for splits
train_iter, val_iter, test_iter = data.BucketIterator.splits((train, val, test), batch_size=batch_size,shuffle=True)
epochs = 150
train_losses, validation_losses,validation_accs = [] ,[],[]
for epoch in range(epochs):
    model.zero_grad()
    model.train()
    running_loss=0
    acc=0
    for batch in train_iter:
        text=batch.text.to(device)
        label=batch.label-1
        #label=label>=3
        #label=label.long()
        label=label.to(device)
        
        optimizer.zero_grad()
        output = model(text) 
        loss = criterion(output,label)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        acc+=torch.sum(torch.argmax(output,1)==label).cpu().item()/128.0
        
    with torch.no_grad():
        model.eval()
    val_loss=0
    val_acc=0
    for val_batch in val_iter:
        val_text=val_batch.text.to(device)
        val_label=val_batch.label-1
        #val_label=val_label>=3
        #val_label=val_label.long()
        val_label=val_label.to(device)
        val_output = model.forward(val_text) 
        
        val_loss += criterion(val_output,val_label).item()
        val_acc += torch.sum(torch.argmax(val_output,1)==val_label).cpu().item()/128.0

    
    train_losses.append(running_loss/len(train_iter))
    validation_losses.append(val_loss/len(val_iter))
    validation_accs.append(val_acc/len(val_iter))
        
    print("Epoch: {}/{}.. ".format(epoch+1, epochs),
              "Train Loss: {:.3f}.. ".format(running_loss/len(train_iter)),
              "Train_Acc: {:.3f}.. ".format(acc/len(train_iter)),
              "Val Loss: {:.3f}.. ".format(val_loss/len(val_iter)),
              "Val_Acc: {:.3f}".format(val_acc/len(val_iter)))
    

Epoch: 1/150..  Train Loss: 1.597..  Train_Acc: 0.262..  Val Loss: 1.587..  Val_Acc: 0.242
Epoch: 2/150..  Train Loss: 1.591..  Train_Acc: 0.266..  Val Loss: 1.584..  Val_Acc: 0.261
Epoch: 3/150..  Train Loss: 1.591..  Train_Acc: 0.270..  Val Loss: 1.584..  Val_Acc: 0.242
Epoch: 4/150..  Train Loss: 1.589..  Train_Acc: 0.269..  Val Loss: 1.580..  Val_Acc: 0.303
Epoch: 5/150..  Train Loss: 1.588..  Train_Acc: 0.278..  Val Loss: 1.578..  Val_Acc: 0.270
Epoch: 6/150..  Train Loss: 1.586..  Train_Acc: 0.278..  Val Loss: 1.570..  Val_Acc: 0.315
Epoch: 7/150..  Train Loss: 1.575..  Train_Acc: 0.291..  Val Loss: 1.550..  Val_Acc: 0.332
Epoch: 8/150..  Train Loss: 1.564..  Train_Acc: 0.320..  Val Loss: 1.531..  Val_Acc: 0.356
Epoch: 9/150..  Train Loss: 1.563..  Train_Acc: 0.320..  Val Loss: 1.529..  Val_Acc: 0.343
Epoch: 10/150..  Train Loss: 1.558..  Train_Acc: 0.324..  Val Loss: 1.542..  Val_Acc: 0.318
Epoch: 11/150..  Train Loss: 1.556..  Train_Acc: 0.314..  Val Loss: 1.523..  Val_Acc: 0.3

Epoch: 91/150..  Train Loss: 1.474..  Train_Acc: 0.411..  Val Loss: 1.453..  Val_Acc: 0.425
Epoch: 92/150..  Train Loss: 1.482..  Train_Acc: 0.404..  Val Loss: 1.467..  Val_Acc: 0.411
Epoch: 93/150..  Train Loss: 1.479..  Train_Acc: 0.401..  Val Loss: 1.445..  Val_Acc: 0.423
Epoch: 94/150..  Train Loss: 1.477..  Train_Acc: 0.406..  Val Loss: 1.438..  Val_Acc: 0.439
Epoch: 95/150..  Train Loss: 1.483..  Train_Acc: 0.406..  Val Loss: 1.446..  Val_Acc: 0.437
Epoch: 96/150..  Train Loss: 1.483..  Train_Acc: 0.404..  Val Loss: 1.444..  Val_Acc: 0.432
Epoch: 97/150..  Train Loss: 1.483..  Train_Acc: 0.397..  Val Loss: 1.461..  Val_Acc: 0.413
Epoch: 98/150..  Train Loss: 1.480..  Train_Acc: 0.405..  Val Loss: 1.448..  Val_Acc: 0.422
Epoch: 99/150..  Train Loss: 1.471..  Train_Acc: 0.415..  Val Loss: 1.447..  Val_Acc: 0.431
Epoch: 100/150..  Train Loss: 1.471..  Train_Acc: 0.415..  Val Loss: 1.448..  Val_Acc: 0.426
Epoch: 101/150..  Train Loss: 1.479..  Train_Acc: 0.406..  Val Loss: 1.465..  V

In [None]:
#plot image
plt.plot(train_losses, label='Training loss')
plt.plot(validation_losses, label='Validation loss')
plt.plot(validation_accs,label='Validation Accuracy')
plt.legend(frameon=False)

In [None]:
# make iterator for splits
with torch.no_grad():
    model.eval()
test_loss=0
test_acc=0
for batch in test_iter:
    text=batch.text.to(device)
    label=batch.label-1
    #label=label>=3
    #label=label.long()
    label=label.to(device)
    
    output = model.forward(text) 
    test_loss += criterion(output,label).item()

    #print(torch.sum(torch.argmax(output,1)==label).cpu().item()/len(label))
    test_acc += torch.sum(torch.argmax(output,1)==label).cpu().item()/128.0

print("Test Loss: {:.3f}.. ".format(test_loss/len(test_iter)),
        "Test Accuracy: {:.3f}".format(test_acc/len(test_iter)))