In [1]:
from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = "all"

%load_ext autoreload
%autoreload 2

from vocab import VocabEntry
from utils import pad_sents_char

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
sentences = [['Human:', 'What', 'do', 'we', 'want?'], 
             ['Computer:', 'Natural', 'language', 'processing!'],
             ['Human:', 'When', 'do', 'we', 'want', 'it?'], 
             ['Computer:', 'When', 'do', 'we', 'want', 'what?']]

len(sentences)

4

### VocabEntry

In [74]:
from vocab import VocabEntry
voc_entry = VocabEntry()
voc_entry_output = voc_entry.to_input_tensor_char(sentences, 'cpu')
voc_entry_output.shape
#voc_entry_output

torch.Size([6, 4, 21])

### Vocab

In [75]:
from utils import read_corpus
from vocab import Vocab
src_sents = read_corpus('./en_es_data/train_tiny.es', source='src')
tgt_sents = read_corpus('./en_es_data/train_tiny.en', source='tgt')

vocab = Vocab.build(src_sents, tgt_sents, vocab_size=200, freq_cutoff=1)

initialize source vocabulary ..
number of word types: 128, number of word types w/ frequency >= 1: 128
initialize target vocabulary ..
number of word types: 130, number of word types w/ frequency >= 1: 130


In [5]:
vocab.src['<pad>']

0

### CNN

In [26]:
from cnn import CNN

input_data = torch.randn(2,3,6) # num_of_sentence, num_of_dim per word, num_of_words_per_sent

cnn_obj = CNN(3, 1)

output = cnn_obj(input_data)

output.shape
output

torch.Size([2, 1])

tensor([[0.7477],
        [0.8650]], grad_fn=<MaxBackward0>)

### Highway

In [7]:
from highway import Highway
highway_obj = Highway(5)

x_input = torch.randn(2,3,5)

highway_obj(x_input).shape

torch.Size([2, 3, 5])

### Model_Embedding

In [72]:
# initialization of model embedding
from model_embeddings import ModelEmbeddings

voc_entry = VocabEntry()
voc_entry_output = voc_entry.to_input_tensor_char(sentences, 'cpu')

model_embed_obj = ModelEmbeddings(5, vocab.src)

#### x_padded to x_word_emb. that is the goal

- feed x_padded to x_emb
- Embedding to CNN
- CNN to Highway

In [73]:
## use of model embedding

voc_entry = VocabEntry()
voc_entry_output = voc_entry.to_input_tensor_char(sentences, 'cpu')
voc_entry_output.shape
#voc_entry_output

output = model_embed_obj(voc_entry_output)
output.shape

torch.Size([6, 4, 21])

torch.Size([24, 5])

## Embedding Exploration

In [32]:
embedding = nn.Embedding(10, 3) # maximum number of words is 10. Each word to 3 dims
#embedding.weight
input_data = torch.LongTensor([[0,2,0,5, 9]])   # here 0, 2, 5, 9 are the integer index of the word
input_data.shape ## torch.Size([1, 5])

output = embedding(input_data)
output.shape   # torch.Size([1, 5, 3])

torch.Size([1, 5])

torch.Size([1, 5, 3])

In [None]:
embedding = nn.Embedding(10, 3, padding_idx=0)
input = torch.LongTensor([[0,2,0,5]])
embedding(input)


In [None]:
embedding = nn.Embedding(10, 3)
embedding.weight
input = torch.LongTensor([[0,2,0,5, 9]])
embedding(input)

In [None]:
embedding = nn.Embedding(10, 3, padding_idx=2)
embedding.weight
input = torch.LongTensor([[0,2,0,5, 9]])
input
input.shape
output = embedding(input)
output
output.shape

In [28]:
embedding = nn.Embedding(10, 3, padding_idx=0)

input = torch.randint(0,10, (15, 1, 5))  # 15 sent lenght, 1 batch, each senten has 5 words
input.shape
output = embedding(input)
output.shape

torch.Size([15, 1, 5])

torch.Size([15, 1, 5, 3])

In [None]:
output[1,0,:2]

In [None]:
input[1,0,:2]

## Conv1D exploration

In [None]:
my_input = torch.ones(1, 4, 5) # there are 4 words each of which has 5 dimensions
my_input.shape

# we want number of dimensions as rows and number of words as columns
rand_arr_permute = my_input.clone().permute(0,2,1)
rand_arr_permute.shape

# how conv1d input and output shapes are related
conv1 = nn.Conv1d(5, 1, (2,)) # 5 dims (see my_input), 1 output channel, kernel=(2,)
conv1.weight
conv1.bias

with torch.no_grad():
    conv1.weight.fill_(1)
    conv1.bias.fill_(0)
    print(torch.sum((rand_arr_permute[:,:,:2]*conv1.weight)) + conv1.bias)
    
    output = conv1(rand_arr_permute)
    print(output)
    print(output.shape)


#### Conv1D with 4 dimensions

In [55]:
a = torch.randint(1, 20, (2,3,4,10)) # batch 2, sentences 3, words 4 per sent, embed 5 per word
a = torch.randn(2,3,4,10)
a.shape

# let us permute so bring 10 at -2 
b = a.permute(0,1,3,2)
b.shape

# but conv1d does not take 4 dimension. So, reduce it to 3
b_reduced = b.view(-1, b.shape[-2], b.shape[-1])
b_reduced.shape

torch.Size([2, 3, 4, 10])

torch.Size([2, 3, 10, 4])

torch.Size([6, 10, 4])

In [56]:
# goal is to reduce the embed size of 10  to 5
# b_reduced.shape = torch.Size([6, 10, 4])
# expected shape is 6, 5 , <>
conv1 = nn.Conv1d(10, 5, 2) 

In [58]:
c = conv1(b_reduced)
c.shape

torch.Size([6, 5, 3])

In [None]:
sents = [[[1, 30, 2],         [1, 31, 2], [1, 32, 70, 2]], 
         [[1, 85, 33, 85, 2], [1, 32, 2], [1, 31, 2], [1, 30, 2]]]


In [None]:
[[[1, 11, 50, 42, 30, 43, 71, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 26, 37, 30, 49, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 33, 44, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 30, 43, 49, 70, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 6, 44, 42, 45, 50, 49, 34, 47, 71, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 17, 30, 49, 50, 47, 30, 41, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 41, 30, 43, 36, 50, 30, 36, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 45, 47, 44, 32, 34, 48, 48, 38, 43, 36, 69, 2, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 11, 50, 42, 30, 43, 71, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 26, 37, 34, 43, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 33, 44, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 30, 43, 49, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 38, 49, 70, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 6, 44, 42, 45, 50, 49, 34, 47, 71, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 26, 37, 34, 43, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 33, 44, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 30, 43, 49, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 37, 30, 49, 70, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]] 

[[[1, 11, 50, 42, 30, 43, 71, 2, 0, 0, 0, 0, 0], [1, 26, 37, 30, 49, 2, 0, 0, 0, 0, 0, 0, 0], [1, 33, 44, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 30, 43, 49, 70, 2, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 6, 44, 42, 45, 50, 49, 34, 47, 71, 2, 0, 0], [1, 17, 30, 49, 50, 47, 30, 41, 2, 0, 0, 0, 0], [1, 41, 30, 43, 36, 50, 30, 36, 34, 2, 0, 0, 0], [1, 45, 47, 44, 32, 34, 48, 48, 38, 43, 36, 69, 2], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 11, 50, 42, 30, 43, 71, 2, 0, 0, 0, 0, 0], [1, 26, 37, 34, 43, 2, 0, 0, 0, 0, 0, 0, 0], [1, 33, 44, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 30, 43, 49, 2, 0, 0, 0, 0, 0, 0, 0], [1, 38, 49, 70, 2, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 6, 44, 42, 45, 50, 49, 34, 47, 71, 2, 0, 0], [1, 26, 37, 34, 43, 2, 0, 0, 0, 0, 0, 0, 0], [1, 33, 44, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 52, 30, 43, 49, 2, 0, 0, 0, 0, 0, 0, 0], [1, 52, 37, 30, 49, 70, 2, 0, 0, 0, 0, 0, 0]]]

In [None]:
import torch


In [None]:
a = [[1,2], [3,4]]
torch.IntTensor(a)

In [None]:
max_sent_len = max([len(s) for s in sents])
max_word_len = max([max([len(w) for w in s]) for s in sents])
sents_padded = []

char_pad_token = 0

for s in sents:
    words_padded = []
    for w in s:
        padded_word = [char_pad_token] * max_word_len
        padded_word[:len(w)] = w
        words_padded.append(padded_word)
    print(words_padded)
        
    padded_sent = [[char_pad_token]*max_word_len] * max_sent_len
    padded_sent[:len(words_padded)] = words_padded
    sents_padded.append(padded_sent)
    


sents_padded

In [None]:
char_pad_token = 0
[[char_pad_token]*max_word_len]*max_sent_len

In [None]:
my_input = torch.ones(1, 4, 5) # there are 3 words each of which has 5 dimensions
my_input.shape

# we want number of dimensions as rows and number of words as columns
rand_arr_permute = my_input.clone().permute(0,2,1)

print('input shape ', rand_arr_permute.shape)

# how conv1d input and output shapes are related
conv1 = nn.Conv1d(5, 50, (2,)) # here 50 is embedding dimension
output = conv1(rand_arr_permute)

print('conv1 output shape', output.shape)

# but, we are getting 3 in torch.Size([1, 50, 3])

#relu

relu = torch.nn.ReLU()
output_relu = relu(output)
print('relu shape ', output_relu.shape)

# maxpool

In [None]:
# pool of size=3, stride=2

input_data = torch.randn(1, 1, 50)
print('input shape ', input_data.shape)
m = nn.MaxPool1d(1, stride=1)
output = m(input_data)
output.shape


In [None]:
a = torch.randn(1,1,10)
#a = torch.randint(1,10, (1,1,10))
a.shape
a

m = nn.MaxPool1d(3, stride=1)
m_a = m(a)
m_a.shape
m_a

In [None]:
D_in = 10

linear1 = torch.nn.Linear(D_in, D_in)
linear2 = torch.nn.Linear(D_in, D_in)
relu = torch.nn.ReLU()
tanh_gate = torch.nn.Tanh()
    


In [None]:
x_conv_out = torch.randn(40,15,D_in)
print('Input shape ', x_conv_out.shape)

x_proj = relu(linear1(x_conv_out))
x_gate = tanh_gate(linear2(x_conv_out))


x_proj.shape, x_gate.shape

x_highway = (x_proj * x_gate) +  (1-x_gate)*x_conv_out # skip connection

x_highway.shape

In [None]:
a = torch.randn(2,3)
b = torch.randn(2,3)
c = a * b
print(c.shape)

### Linear layer

In [None]:
linear_model = nn.Linear(3, 2, bias=False)  # (num of input features, num of output features, bias=True)
print('weights are:',linear_model.weight, '\n\nbias are: ',linear_model.bias)

numel_list = [p.numel() for p in linear_model.parameters()]
print('Total parameters ', sum(numel_list), numel_list)

In [None]:
m = nn.Linear(20, 30)

input_data = torch.randn(2, 128, 20)
output = m(input_data)

print(output.shape)


In [None]:
del input

In [None]:
x = torch.ones(1) # batch is missing
x.shape
linear_model(x)

In [None]:
a = torch.randn(1,1,10)
#a = torch.randint(1,10, (1,1,10))
a.shape
a

m = nn.MaxPool1d(3, stride=1)
m_a = m(a)
m_a.shape
m_a



In [None]:
a = torch.randint(1,100, (2,3,4))
a

In [None]:
b = torch.max(a, dim=2)
type(b)

In [None]:
b.values.shape
b.values

In [None]:
b[0].shape

In [None]:
b.indices