In [4]:
from data import *
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import os 
import data
import pickle
import sys
import pandas as pd
from sklearn.metrics import cohen_kappa_score
import Model
import random


# This is the iterator we'll use during training. 
# It's a generator that gives you one batch at a time.
def data_iter(source, batch_size):
    dataset_size = len(source)
    start = -1 * batch_size
    order = list(range(dataset_size))
    random.shuffle(order)

    while True:
        start += batch_size
        if start > dataset_size - batch_size:
            # Start another epoch.
            start = 0
            random.shuffle(order)   
        batch_indices = order[start:start + batch_size]
        batch = [source[index] for index in batch_indices]
        yield [source[index] for index in batch_indices]

# This is the iterator we use when we're evaluating our model. 
# It gives a list of batches that you can then iterate through.
def eval_iter(source, batch_size):
    batches = []
    dataset_size = len(source)
    start = -1 * batch_size
    order = list(range(dataset_size))
    random.shuffle(order)

    while start < dataset_size - batch_size:
        start += batch_size
        batch_indices = order[start:start + batch_size]
        batch = [source[index] for index in batch_indices]
        if len(batch) == batch_size:
            batches.append(batch)
        else:
            continue
        
    return batches

# The following function gives batches of vectors and labels, 
# these are the inputs to your model and loss function
def get_batch(batch):
    vectors = []
    labels = []
    for dict in batch:
        vectors.append(dict["text_index_sequence"])
        labels.append(dict["label"])
    return vectors, labels

def repackage_hidden(h):
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)

def training_loop(batch_size, num_epochs, model, loss_, optim, training_iter, dev_iter, train_eval_iter):
    step = 0
    epoch = 0
    total_batches = int(len(training_set) / batch_size)
    total_samples = total_batches * batch_size
    hidden = model.init_hidden(batch_size)
    while epoch <= num_epochs:
        epoch_loss = 0
        model.train()

        vectors, labels = get_batch(next(training_iter)) 
        vectors = torch.stack(vectors).squeeze()
        vectors = vectors.transpose(1, 0)
        
        labels = Variable(torch.stack(labels).squeeze().type('torch.FloatTensor')) 
        vectors = Variable(vectors)
        
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(vectors, hidden)
        lossy = loss_(output, labels)
        epoch_loss += lossy.data[0] * batch_size

        lossy.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
        optim.step()

        if step % total_batches == 0:
            loss_train = evaluate(model, train_eval_iter,batch_size)
            loss_dev = evaluate(model, dev_iter,batch_size)
            kappa_dev = evaluate_kappa(model, dev_iter,batch_size)
            with open("test.txt", "a") as myfile:
                myfile.write("Epoch %i; Step %i; Avg Loss %f; Train loss: %f; Dev loss: %f; Dev kappa: %f\n" 
                  %(epoch, step, epoch_loss/total_samples, loss_train, loss_dev, kappa_dev))
            print("Epoch %i; Step %i; Avg Loss %f; Train loss: %f; Dev loss: %f; Dev kappa: %f" 
                  %(epoch, step, epoch_loss/total_samples, loss_train, loss_dev, kappa_dev))
            epoch += 1
            
        if step % 5 == 0:
            with open("test.txt", "a") as myfile:
                myfile.write("Epoch %i; Step %i; loss %f\n" %(epoch, step, lossy.data[0]))
            print("Epoch %i; Step %i; loss %f" %(epoch, step, lossy.data[0]))
        step += 1

# This function outputs the accuracy on the dataset, we will use it during training.
def evaluate(model, data_iter, batch_size):
    model.eval()
    correct = 0
    total = 0
    evalloss = 0.0
    hidden = model.init_hidden(batch_size)
    for i in range(len(data_iter)):
        vectors, labels = get_batch(data_iter[i])
        vectors = torch.stack(vectors).squeeze()
        vectors = vectors.transpose(1, 0)
        
        labels = Variable(torch.stack(labels).squeeze().type('torch.FloatTensor'))
        vectors = Variable(vectors)

        hidden = repackage_hidden(hidden)
        output, hidden = model(vectors, hidden)
        evalloss += F.mse_loss(output, labels).data[0]
    return evalloss/len(data_iter)


def evaluate_kappa(model, data_iter, batch_size):
    model.eval()
    predicted_labels = []
    true_labels = []
    hidden = model.init_hidden(batch_size)
    for i in range(len(data_iter)):
        vectors, labels = get_batch(data_iter[i])
        vectors = torch.stack(vectors).squeeze()
        vectors = vectors.transpose(1, 0)

        vectors = Variable(vectors)
        
        hidden = repackage_hidden(hidden)
        output, hidden = model(vectors, hidden)

        predicted = [int(round(float(num))) for num in output.data.cpu().numpy()]
        predicted_labels.extend([round(float(num)) for num in output.data.cpu().numpy()])
        labels = [int(label[0]) for label in labels]
        true_labels.extend(labels)

    return cohen_kappa_score(true_labels, predicted_labels, weights = "quadratic")

In [5]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class LSTM(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, rnn_type, vocab_size, embedding_dim, hidden_size, num_layers, dropout=0.2, bidirectional = False, pre_emb=None):
        super(LSTM, self).__init__()
        self.encoder = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_size, num_layers, bias=False, dropout=dropout, bidirectional=bidirectional)
        self.decoder = nn.Linear(hidden_size, 1)
        self.decoder_bi = nn.Linear(hidden_size*2, 1)
        self.bidirectional = bidirectional
        self.rnn_type = rnn_type
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.init_weights()

    def init_weights(self, pre_emb=None):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, inputs, hidden):
        emb = self.encoder(inputs)
        output, hidden = self.rnn(emb, hidden)
        # mot here
        output = torch.mean(output, 0)
        output = torch.squeeze(output)
        print(output.size())
        if self.bidirectional:
            decoded = self.decoder_bi(output)
        else:
            decoded = self.decoder(output)
        return decoded, hidden
    
    def init_hidden(self, batch_size):
        ##next(rnn.parameters()).data=rnn.encoder.weight.data
        weight = next(self.parameters()).data
        if self.bidirectional == True:
            return (Variable(weight.new(self.num_layers * 2, batch_size, self.hidden_size).zero_()),
                    Variable(weight.new(self.num_layers * 2, batch_size, self.hidden_size).zero_()))
        else:
            return (Variable(weight.new(self.num_layers, batch_size, self.hidden_size).zero_()),
                    Variable(weight.new(self.num_layers, batch_size, self.hidden_size).zero_()))            



In [10]:
encoder = nn.Embedding(vocab_size, embedding_dim)

In [25]:
vectors, labels = get_batch(next(training_iter)) 
vectors = torch.stack(vectors).squeeze()
vectors = vectors.transpose(1, 0)
labels = Variable(torch.stack(labels).squeeze().type('torch.FloatTensor')) 
vectors = Variable(vectors)
emb = encoder(vectors)

In [34]:
weight = next(rnn.parameters()).data
hidden = (Variable(weight.new(num_layers, batch_size, hidden_size).zero_()),
                    Variable(weight.new(num_layers, batch_size, hidden_size).zero_()))            
#hidden:(1x20x24,1x20x24)
rnn = nn.LSTM(embedding_dim, hidden_size, num_layer, bias=False, dropout=0.5, bidirectional=True)

In [36]:
rnn(emb, hidden)

IndexError: list index out of range

In [8]:
raw_data = pd.read_csv("../data/training_final.csv", sep=',',header=0, index_col=0)
data_set = data.get_data(raw_data)
print('Finished Loading!')

#get max sequence length
max_seq_length = max(list(map(lambda x:len(x.split()),raw_data.essay)))
print('max seq length: ', max_seq_length)

# split to train/val/test
data_size = len(data_set)
print('data_size',data_size)
training_set = data_set[:int(data_size*0.8)]
dev_set = data_set[int(data_size*0.8):int(data_size*0.9)]
test_set = data_set[int(data_size*0.9):]


# convert and formatting
word_to_ix, vocab_size = data.build_dictionary([training_set])
#print('vocab size', vocab_size)
data.sentences_to_padded_index_sequences(word_to_ix, [training_set, dev_set], max_seq_length)
print('Finished Converting!')

Finished Loading!
max seq length:  1064
data_size 12977
Finished Converting!


In [9]:

#######
# Train

# Hyper Parameters 
model = 'LSTM'
input_size = vocab_size
hidden_dim = 24
embedding_dim = 100
batch_size = 20
learning_rate = 0.1
num_epochs = 500
num_layer = 1
bi_direction = True

# Build, initialize, and train model
rnn = Model.LSTM(model, vocab_size, embedding_dim, hidden_dim, num_layer, dropout=0.2, bidirectional=bi_direction, 
pre_emb=None)

# Loss and Optimizer
loss = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

# Train the model
training_iter = data_iter(training_set, batch_size)
train_eval_iter = eval_iter(training_set, batch_size)
dev_iter = eval_iter(dev_set, batch_size)
print('start training:')
training_loop(batch_size, num_epochs, rnn, loss, optimizer, training_iter, dev_iter, train_eval_iter)


start training:
Epoch 0; Step 0; Avg Loss 0.265792; Train loss: 112.579555; Dev loss: 113.170442; Dev kappa: -0.027539
Epoch 1; Step 0; loss 137.946243
Epoch 1; Step 5; loss 41.789780
Epoch 1; Step 10; loss 80.356880
Epoch 1; Step 15; loss 104.155869
Epoch 1; Step 20; loss 47.958649
Epoch 1; Step 25; loss 24.629101
Epoch 1; Step 30; loss 253.317505
Epoch 1; Step 35; loss 117.176674
Epoch 1; Step 40; loss 107.530396
Epoch 1; Step 45; loss 88.053818
Epoch 1; Step 50; loss 77.529739
Epoch 1; Step 55; loss 33.404148
Epoch 1; Step 60; loss 42.197281
Epoch 1; Step 65; loss 46.120537
Epoch 1; Step 70; loss 60.361961
Epoch 1; Step 75; loss 27.875690
Epoch 1; Step 80; loss 13.794611
Epoch 1; Step 85; loss 24.388214
Epoch 1; Step 90; loss 24.260725
Epoch 1; Step 95; loss 14.790932
Epoch 1; Step 100; loss 12.569665
Epoch 1; Step 105; loss 5.861126
Epoch 1; Step 110; loss 10.826163
Epoch 1; Step 115; loss 7.249639
Epoch 1; Step 120; loss 14.581629
Epoch 1; Step 125; loss 65.000916
Epoch 1; Step 13

KeyboardInterrupt: 

In [63]:
#next(rnn.parameters()).data=rnn.encoder.weight.data


 1.8558e-01  4.7335e-02 -1.3447e-01  ...   7.8784e-02 -5.5189e-03 -1.7380e-01
 9.2983e-02 -3.6085e-02 -8.1732e-02  ...  -9.1744e-02  7.3796e-02 -1.5655e-02
 7.5980e-02 -5.6447e-02  5.3446e-02  ...  -6.9504e-02  8.3970e-02 -7.4207e-02
                ...                   ⋱                   ...                
-6.5086e-02 -7.0979e-02 -5.5513e-02  ...  -7.6042e-02  9.2168e-02 -6.8419e-02
-3.2056e-02 -6.2593e-02 -5.3247e-02  ...   5.5068e-02 -5.7677e-03  5.9561e-03
 2.7188e-02 -7.6712e-02  7.0722e-02  ...  -9.8399e-02 -3.1293e-02  6.6518e-02
[torch.FloatTensor of size 68573x100]

In [20]:
vectors, labels = get_batch(next(training_iter)) 

In [23]:
vectors, labels = get_batch(next(training_iter)) 
vectors = torch.stack(vectors).squeeze()
vectors = vectors.transpose(1, 0)

labels = Variable(torch.stack(labels).squeeze().type('torch.FloatTensor')) 
vectors = Variable(vectors)

In [24]:
vectors

Variable containing:
 59350   6867  15880  ...   15880  39854  26002
 61349  65071  30726  ...   68281  59350   6578
 68553   6911  53084  ...   31959  19633  37555
        ...            ⋱           ...         
     0      0      0  ...       0      0      0
     0      0      0  ...       0      0      0
     0      0      0  ...       0      0      0
[torch.LongTensor of size 1064x20]