In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch.nn as nn

import torch

import nltk

In [3]:
doc = "They think he's a good president because he's done things they like: appointing conservatives to the court and cutting taxes, for example. But every other normal Republican would have done the exact same things, made actual deals to get much more, and they'd have left out all the ridiculous drama that keeps Trump's approval so low and his accomplishments so meager." 

In [4]:
sentences = nltk.sent_tokenize(doc)

In [5]:
import pandas as pd

df = pd.DataFrame(columns = ["sentence"])

df["sentence"] = sentences
df["sentence"] = df.sentence.map(lambda s :  s.lower())

In [6]:
df['words'] = df.sentence.map(nltk.word_tokenize)

In [7]:
df.words

0    [they, think, he, 's, a, good, president, beca...
1    [but, every, other, normal, republican, would,...
Name: words, dtype: object

In [8]:
max_sentence_len = df.words.map(len).max()

In [9]:
import itertools

vocab = list(set(itertools.chain.from_iterable(df.words.tolist())))

In [10]:
vocab

['accomplishments',
 'done',
 'his',
 'a',
 'like',
 ',',
 'they',
 'same',
 'cutting',
 'deals',
 'keeps',
 'appointing',
 'and',
 'but',
 'actual',
 'that',
 'conservatives',
 'out',
 'low',
 'the',
 'republican',
 'every',
 'meager',
 'example',
 'exact',
 'get',
 'to',
 "'d",
 'all',
 'because',
 'made',
 'drama',
 ':',
 'he',
 'things',
 'normal',
 'left',
 '.',
 "'s",
 'other',
 'president',
 'ridiculous',
 'trump',
 'good',
 'would',
 'court',
 'approval',
 'for',
 'think',
 'have',
 'more',
 'so',
 'taxes',
 'much']

In [11]:
import re
?re.match

In [12]:
re.match("\w+",vocab[1])

<_sre.SRE_Match object; span=(0, 4), match='done'>

In [13]:
def matcher(word):
    return re.match("\w+", word)

vocab = list(filter(matcher, itertools.chain.from_iterable(df.words)))

In [14]:
vocab += ["<unk>"]

In [15]:
vocab

['they',
 'think',
 'he',
 'a',
 'good',
 'president',
 'because',
 'he',
 'done',
 'things',
 'they',
 'like',
 'appointing',
 'conservatives',
 'to',
 'the',
 'court',
 'and',
 'cutting',
 'taxes',
 'for',
 'example',
 'but',
 'every',
 'other',
 'normal',
 'republican',
 'would',
 'have',
 'done',
 'the',
 'exact',
 'same',
 'things',
 'made',
 'actual',
 'deals',
 'to',
 'get',
 'much',
 'more',
 'and',
 'they',
 'have',
 'left',
 'out',
 'all',
 'the',
 'ridiculous',
 'drama',
 'that',
 'keeps',
 'trump',
 'approval',
 'so',
 'low',
 'and',
 'his',
 'accomplishments',
 'so',
 'meager',
 '<unk>']

In [16]:
index2vocab = {
    index: word
    for index, word in enumerate(vocab)
}

vocab2index = {
    word: index
    for index, word in enumerate(vocab)
}

In [17]:
vocab2index

{'they': 42,
 'think': 1,
 'he': 7,
 'a': 3,
 'good': 4,
 'president': 5,
 'because': 6,
 'done': 29,
 'things': 33,
 'like': 11,
 'appointing': 12,
 'conservatives': 13,
 'to': 37,
 'the': 47,
 'court': 16,
 'and': 56,
 'cutting': 18,
 'taxes': 19,
 'for': 20,
 'example': 21,
 'but': 22,
 'every': 23,
 'other': 24,
 'normal': 25,
 'republican': 26,
 'would': 27,
 'have': 43,
 'exact': 31,
 'same': 32,
 'made': 34,
 'actual': 35,
 'deals': 36,
 'get': 38,
 'much': 39,
 'more': 40,
 'left': 44,
 'out': 45,
 'all': 46,
 'ridiculous': 48,
 'drama': 49,
 'that': 50,
 'keeps': 51,
 'trump': 52,
 'approval': 53,
 'so': 59,
 'low': 55,
 'his': 57,
 'accomplishments': 58,
 'meager': 60,
 '<unk>': 61}

In [18]:
def get_word_index(word):
    index = vocab2index.get(
        word,
        vocab2index["<unk>"]
    )
    return index

df["word_indices"] = df.words.map(
    lambda words: list(map(get_word_index, words))
)

In [19]:
df.word_indices

0    [42, 1, 7, 61, 3, 4, 5, 6, 7, 61, 29, 33, 42, ...
1    [22, 23, 24, 25, 26, 27, 43, 29, 47, 31, 32, 3...
Name: word_indices, dtype: object

In [20]:
from torch.autograd import Variable

def list2var(l):
    print(l)
    tensor = torch.LongTensor(l)
    return Variable(tensor)

variables = df.word_indices.map(list2var).tolist()

[42, 1, 7, 61, 3, 4, 5, 6, 7, 61, 29, 33, 42, 11, 61, 12, 13, 37, 47, 16, 56, 18, 19, 61, 20, 21, 61]
[22, 23, 24, 25, 26, 27, 43, 29, 47, 31, 32, 33, 61, 34, 35, 36, 37, 38, 39, 40, 61, 56, 42, 61, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 61, 53, 59, 55, 56, 57, 58, 59, 60, 61]


In [21]:
from torch.nn.utils.rnn import pad_sequence

seq = pad_sequence(variables, batch_first=True)

In [22]:
embedding = nn.Embedding(num_embeddings=len(vocab), embedding_dim=100)

In [23]:
a = embedding(seq[0])
b = embedding(seq[1])

In [24]:
c = torch.stack([a,b], dim=0)
c.shape

torch.Size([2, 44, 100])

In [25]:
seq

tensor([[42,  1,  7, 61,  3,  4,  5,  6,  7, 61, 29, 33, 42, 11, 61, 12, 13, 37,
         47, 16, 56, 18, 19, 61, 20, 21, 61,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0],
        [22, 23, 24, 25, 26, 27, 43, 29, 47, 31, 32, 33, 61, 34, 35, 36, 37, 38,
         39, 40, 61, 56, 42, 61, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 61, 53,
         59, 55, 56, 57, 58, 59, 60, 61]])

In [26]:
seq.shape

torch.Size([2, 44])

In [27]:
from models import *

gru = WordGRU(100, len(vocab), bidirectional=True)

In [28]:
h_0 = torch.zeros(2, 2, 100)
o, h = gru(seq)
o.shape

TypeError: forward() missing 1 required positional argument: 'lengths'

- 2 sentences
- 44 words in each
- 100 dim of each word
- PyTorch LSTM is only concerned with the last dimension (100)
- For word attention all sentences( each sentence is a batch ) is padded

## Word Attention

In [None]:
attn = WordAttention(200)
s_vec = attn(o)

In [None]:
s_vec.shape

- 2 Sentences
- Each sentence of size 200
- A batch of documents would have shape: `[batch_size, max_sent_len, max_word_len]`

In [None]:
s_vec

## Sentence LSTM

In [None]:
sentence_gru = SentenceGRU(200, 100)

In [None]:
s_vec.unsqueeze(dim=0).shape

In [None]:
h_0_s = torch.zeros(2,1,100)
sentence_output, h_s = sentence_gru(s_vec.unsqueeze(dim=0), h_0_s)

In [None]:
sentence_output.shape

## Sentence Attention

In [None]:
s_attn = SentenceAttention(200)

In [None]:
d_vec = s_attn(sentence_output)

In [None]:
d_vec.shape

In [None]:
output_layer = OutputLayer(200,2)
output = output_layer(d_vec)

In [None]:
output.squeeze(dim=1)

In [None]:
loss = nn.NLLLoss()

In [None]:
l = loss(output, torch.LongTensor([1]))

In [None]:
l.backward()

Things that the HAN module should do:
   Take in a batch of documents -> 

## Try out a complete HAN

In [None]:
documents.shape

In [None]:
test_word_gru = WordGRU(50, 6, 2, 50)
test_word_attn = WordAttention(100)

words = []
sentence_vec = []
for document in documents:
    encoded_words, encoded_hidden = test_word_gru(document)
    words.append(encoded_words)
    encoded_sentence = test_word_attn(encoded_words) 
    sentence_vec.append(encoded_sentence)

In [None]:
doc_tensor = torch.stack(sentence_vec, dim=0)

In [None]:
test_sent_gru = SentenceGRU(100, 50 )
encoded_sentence, sentence_hidden = test_sent_gru(doc_tensor)

In [None]:
encoded_sentence.shape

In [None]:
test_sent_attn = SentenceAttention(100)
encoded_doc = test_sent_attn(encoded_sentence)

In [None]:
encoded_doc.shape

In [None]:
sentence_vec[0].shape

In [None]:
encoded_sentence.shape

In [None]:
len(sentence_vec)

In [37]:
from models import HAN

han = HAN(
    vocab_size=6,
    embedding_dim=50,
    word_hidden_size=50,
    sent_hidden_size=50,
    num_labels=2,
    bidirectional=True,
    cuda=False,
)


In [38]:
han.word_gru

WordGRU(
  (embedding): Embedding(6, 50)
  (gru): GRU(50, 50, batch_first=True, bidirectional=True)
)

In [39]:
han.word_attn

WordAttention(
  (linear): Linear(in_features=100, out_features=100, bias=True)
)

In [40]:
documents = torch.LongTensor(
[
    [
        [
            1,2,3,4,5
        ], # sent 1
        [
            1,2,3,0,0
            
        ], # sent 2
    ], #Doc 1
    [
        [1,2,3,4,5],
        [1,2,3,4,5]
    ], #Doc 2
    [
        [1,2,3,4,5],
        [0,0,0,0,0]
    ] #Doc 3
]
)

- 3 Documents
- A Max of 2 sentences 
- Max of 5 words in each sentence

In [41]:
o = han(documents)

Dimension of input to WordGRU  torch.Size([2, 5])
Dimension of output from WordGRU  torch.Size([2, 5, 100])
Dimension of input to WordAttn torch.Size([2, 5, 100])
Dimension of input to WordGRU  torch.Size([2, 5])
Dimension of output from WordGRU  torch.Size([2, 5, 100])
Dimension of input to WordAttn torch.Size([2, 5, 100])
Dimension of input to WordGRU  torch.Size([2, 5])
Dimension of output from WordGRU  torch.Size([2, 5, 100])
Dimension of input to WordAttn torch.Size([2, 5, 100])
Size of Doc Vector  torch.Size([3, 2, 100])


  o = self.softmax(o)


In [42]:
o

tensor([[-0.4218, -1.0667],
        [-0.6086, -0.7856],
        [-0.3362, -1.2536]], grad_fn=<LogSoftmaxBackward>)

In [64]:
documents = torch.randint(0,99, (3,20,100), dtype=torch.LongTensor)

TypeError: randint() received an invalid combination of arguments - got (int, int, tuple, dtype=torch.tensortype), but expected one of:
 * (int high, tuple of ints size, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool requires_grad)
 * (int high, tuple of ints size, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool requires_grad)
 * (int low, int high, tuple of ints size, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool requires_grad)
 * (int low, int high, tuple of ints size, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool requires_grad)


In [67]:
d = torch.rand_like(documents)

RuntimeError: _th_uniform_ is not implemented for type torch.LongTensor

In [80]:
d = torch.randint(0,5, (10,100))

In [81]:
d = d.view(10,20,5)

In [82]:
han(d)

Dimension of input to WordGRU  torch.Size([20, 5])
Dimension of output from WordGRU  torch.Size([20, 5, 100])
Dimension of input to WordAttn torch.Size([20, 5, 100])
Dimension of input to WordGRU  torch.Size([20, 5])
Dimension of output from WordGRU  torch.Size([20, 5, 100])
Dimension of input to WordAttn torch.Size([20, 5, 100])
Dimension of input to WordGRU  torch.Size([20, 5])
Dimension of output from WordGRU  torch.Size([20, 5, 100])
Dimension of input to WordAttn torch.Size([20, 5, 100])
Dimension of input to WordGRU  torch.Size([20, 5])
Dimension of output from WordGRU  torch.Size([20, 5, 100])
Dimension of input to WordAttn torch.Size([20, 5, 100])
Dimension of input to WordGRU  torch.Size([20, 5])
Dimension of output from WordGRU  torch.Size([20, 5, 100])
Dimension of input to WordAttn torch.Size([20, 5, 100])
Dimension of input to WordGRU  torch.Size([20, 5])
Dimension of output from WordGRU  torch.Size([20, 5, 100])
Dimension of input to WordAttn torch.Size([20, 5, 100])
Dime

  o = self.softmax(o)


tensor([[-3.1366e-03, -5.7662e+00],
        [-2.1672e+00, -1.2161e-01],
        [-3.5557e+00, -2.8976e-02],
        [-1.0627e+01, -2.3842e-05],
        [-1.4499e+00, -2.6735e-01],
        [-1.3893e-02, -4.2833e+00],
        [-3.0451e-03, -5.7956e+00],
        [-3.2339e+00, -4.0200e-02],
        [-4.0753e+00, -1.7133e-02],
        [-1.1701e-02, -4.4539e+00]], grad_fn=<LogSoftmaxBackward>)