In [164]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Highway(nn.Module):
    """
    The Highway Network as in https://arxiv.org/abs/1505.00387
    """

    def __init__(self, e_word):
        """
        Init the layers for the network
        @param e_word (int): input size
        """
        super(Highway, self).__init__()
        self.h_projection = nn.Linear(e_word, e_word, bias = True)
        self.h_gate = nn.Linear(e_word, e_word, bias = True)

    def forward(self, x_conv_out):
        """
        Looks up character-based CNN embeddings for the words in a batch of sentences.
        @param x_conv_out: Tensor of shape (batch_size, e_word)

        @param x_highway: Tensor of shape (batch_size, e_word)
        """

        x_proj = F.relu(self.h_projection(x_conv_out))
        x_gate = F.sigmoid(self.h_gate(x_conv_out))

        x_highway = x_gate * x_proj + (1 - x_gate) * x_conv_out

        return x_highway


In [165]:
e_word = 3
a = Highway(3)
input = torch.Tensor([[1,1,1],[2,2,2]])
a(input)

tensor([[0.2859, 0.8559, 0.4367],
        [0.3571, 1.4384, 0.9562]], grad_fn=<AddBackward0>)

In [166]:
nn.init.constant(a.h_projection.bias, 10)
nn.init.constant(a.h_gate.bias, 10)
nn.init.constant(a.h_projection.weight, 1)
nn.init.constant(a.h_gate.weight, 1)

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


Parameter containing:
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], requires_grad=True)

In [167]:
a(input)

tensor([[13.0000, 13.0000, 13.0000],
        [16.0000, 16.0000, 16.0000]], grad_fn=<AddBackward0>)

In [168]:
F.sigmoid(torch.Tensor([10.0]))

tensor([1.0000])

In [169]:
1-a

TypeError: unsupported operand type(s) for -: 'int' and 'Highway'

In [170]:
m = nn.Conv1d(16, 33, 3, stride=2)

In [171]:
input = torch.randn(20, 16, 50)
output = m(input)
output.size()

torch.Size([20, 33, 24])

In [208]:
import torch
import torch.nn as nn

class CNN(nn.Module):
    """
    The CNN module in the embedding
    """

    def __init__(self, e_char, e_word, k = 5, padding = 1):
        """
        Init the layers for the network
        @param e_char (int): input size
        @param e_word (int): filter number, set to e_word
        @param k (int): kernel size
        """
        super(CNN, self).__init__()
        self.projection = nn.Conv1d(e_char, e_word, k, padding = padding)

    def forward(self, x_reshape):
        """
        Calculate CNN layer outputs.
        @param x_reshape: Tensor of shape (batch_size, e_char, m_word)

        @param x_conv_out: Tensor of shape (batch_size, e_word)
        """

        x_conv = self.projection(x_reshape)
        x_conv_out = F.relu(x_conv).max(dim = -1).values.squeeze(-1)
        
        return x_conv_out


In [221]:
e_char = 3
m_word = 2
e_word = 4
k = 2
a = CNN(e_char,4, k = 2)

In [222]:
nn.init.constant(a.projection.bias, 10)
nn.init.constant(a.projection.weight, 2)


  """Entry point for launching an IPython kernel.
  


Parameter containing:
tensor([[[2., 2.],
         [2., 2.],
         [2., 2.]],

        [[2., 2.],
         [2., 2.],
         [2., 2.]],

        [[2., 2.],
         [2., 2.],
         [2., 2.]],

        [[2., 2.],
         [2., 2.],
         [2., 2.]]], requires_grad=True)

In [223]:
input = torch.Tensor([[[1,1],[2,2],[0,0]],[[1,1],[2,2],[1,1]]])
input.size()

torch.Size([2, 3, 2])

In [224]:
input[0]

tensor([[1., 1.],
        [2., 2.],
        [0., 0.]])

In [225]:
a(input).size()

torch.Size([2, 4])

In [226]:
a.projection(input)

tensor([[[16., 22., 16.],
         [16., 22., 16.],
         [16., 22., 16.],
         [16., 22., 16.]],

        [[18., 26., 18.],
         [18., 26., 18.],
         [18., 26., 18.],
         [18., 26., 18.]]], grad_fn=<SqueezeBackward1>)

In [227]:
a(input)

tensor([[22., 22., 22., 22.],
        [26., 26., 26., 26.]], grad_fn=<SqueezeBackward1>)

In [180]:
import numpy as np
s = nn.Embedding(400, 7, padding_idx=0)
a = torch.LongTensor(np.array([[[2.0,3],[2,3],[0,1]],[[2,3],[2,3],[0,1]]]))
a.size()

torch.Size([2, 3, 2])

In [181]:
s(a).permute(0,1,3,2).size()

torch.Size([2, 3, 7, 2])

In [182]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
CS224N 2019-20: Homework 5
model_embeddings.py: Embeddings for the NMT model
Pencheng Yin <pcyin@cs.cmu.edu>
Sahil Chopra <schopra8@stanford.edu>
Anand Dhoot <anandd@stanford.edu>
Michael Hahn <mhahn2@stanford.edu>
"""

import torch.nn as nn

# Do not change these imports; your module names should be
#   `CNN` in the file `cnn.py`
#   `Highway` in the file `highway.py`
# Uncomment the following two imports once you're ready to run part 1(j)




# End "do not change"

class ModelEmbeddings(nn.Module):
    """
    Class that converts input words to their CNN-based embeddings.
    """

    def __init__(self, word_embed_size, vocab, char_embed_size = 50, dropout_rate = 0.3):
        """
        Init the Embedding layer for one language
        @param word_embed_size (int): Embedding size (dimensionality) for the output word
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.

        Hints: - You may find len(self.vocab.char2id) useful when create the embedding
        """
        super(ModelEmbeddings, self).__init__()

        self.embedding = nn.Embedding(30, char_embed_size, padding_idx=0)
        self.cnn = CNN(char_embed_size, word_embed_size)
        self.highway = Highway(word_embed_size)
        self.dropout = nn.Dropout(p = dropout_rate)
        self.word_embed_size = word_embed_size

        ### YOUR CODE HERE for part 1h

        ### END YOUR CODE

    def forward(self, input):
        """
        Looks up character-based CNN embeddings for the words in a batch of sentences.
        @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where
            each integer is an index into the character vocabulary

        @param output: Tensor of shape (sentence_length, batch_size, word_embed_size), containing the
            CNN-based embeddings for each word of the sentences in the batch
        """
        x_emb = self.embedding(input)
        x_reshaped = x_emb.permute(0,1,3,2)
        x_conv_out = self.cnn(x_reshaped)
        x_highway = self.highway(x_conv_out)
        x_word_emb = self.dropout(x_highway)
        return x_word_emb




In [183]:
m = ModelEmbeddings(6,{})

In [184]:
e_char = 50
m_word = 5
e_word = 6
sentence_length = 2
batch_size = 3
input = torch.LongTensor([[[1,1,2,2,2],[2,2,2,2,2],[0,0,2,2,2]],[[1,1,2,2,2],[2,2,2,2,2],[1,1,2,2,2]]])
input.size()

torch.Size([2, 3, 5])

In [185]:
x_emb = m.embedding(input)

In [186]:
x_reshaped = x_emb.permute(0,1,3,2)

In [191]:
x_emb = m.embedding(input)
x_reshaped = x_emb.permute(0,1,3,2)

dim_0 = x_reshaped.size()[0]
dim_1 = x_reshaped.size()[1]

x_reshaped_flattened = torch.flatten(x_reshaped, start_dim=0, end_dim=1)

x_conv_out = m.cnn(x_reshaped_flattened)
x_highway = m.highway(x_conv_out)
x_word_emb = m.dropout(x_highway)

x_word_emb_unflatten = x_word_emb.reshape(dim_0,dim_1,x_highway.size()[-1])

In [192]:
x_word_emb_unflatten.size()

torch.Size([2, 3, 6])

In [189]:
x_highway.size()

torch.Size([6, 6])

In [158]:
s = torch.flatten(x_reshaped, start_dim=0, end_dim=1)

In [160]:
s2 = x_word_emb.reshape(dim_0,dim_0,x_highway.size()[-1])

In [162]:
s2 == x_reshaped

tensor([[[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          ...,
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]],

         [[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          ...,
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]],

         [[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          ...,
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]]],


        [[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          ...,
          [True, True, True, True, True],
        

In [193]:
mp = nn.MaxPool1d(kernel_size=14 - 5 + 1)

In [230]:
F.max_pool1d(kernel_size = 2, input = a.projection(input)).squeeze(-1)

tensor([[22., 22., 22., 22.],
        [26., 26., 26., 26.]], grad_fn=<SqueezeBackward1>)

In [229]:
a(input)

tensor([[22., 22., 22., 22.],
        [26., 26., 26., 26.]], grad_fn=<SqueezeBackward1>)

In [260]:
torch.max(F.relu(input), dim=2)[0]

tensor([[1, 2, 2],
        [2, 2, 2]])

In [264]:
F.relu(input).max(dim = -1)[0]

tensor([[1, 2, 2],
        [2, 2, 2]])

In [244]:
input = torch.LongTensor([[[1,1,1],[2,2,2],[0,0,2]],[[1,1,2],[2,2,2],[1,1,2]]])



In [253]:
char_embed_size = 10
word_embed_size = 5
dropout_rate = 0.3
embedding = nn.Embedding(30, char_embed_size, padding_idx=0)
cnn = CNN(char_embed_size, word_embed_size)
highway = Highway(word_embed_size)
dropout = nn.Dropout(p = dropout_rate)
word_embed_size = word_embed_size
X_word_emb_list = []
# print("input size: {}".format(input.size()))
# divide input into sentence_length batchs
for X_padded in input:
    # print("X_padded {}".format(X_padded.size()))

    X_emb = embedding(X_padded)
    X_reshaped = torch.transpose(X_emb, dim0=-1, dim1=-2)
    # conv1d can only take 3-dim mat as input
    # so it needs to concat/stack all the embeddings of word
    # after going through the network
    # print("X_shaped {}".format(X_reshaped.size()))
    X_conv_out = cnn(X_reshaped)

    X_highway = highway(X_conv_out)
    X_word_emb = X_highway
    X_word_emb_list.append(X_word_emb)

x_word_emb_unflatten = torch.stack(X_word_emb_list)
# return x_word_emb_unflatten
### YOUR CODE HERE for part 1j
# print("input: ")
# print(input.size())
batch_size, seq_len, max_word_length = input.shape[1], input.shape[0], input.shape[2]
# print('batch size', batch_size)
# print('max word length', max_word_length)
#print('seq len', seq_len)

x_char_embed = embedding(input)  # shape: (sentence_length, batch_size, max_word_length, e_char)
#print('x_char embed shape', x_char_embed.shape)
x_reshaped = x_char_embed.permute(0, 1, 3, 2)  # shape: (sentence_length, batch_size, e_char, max_word_length)
#print('x_reshaped shape', x_reshaped.shape)
x_conv = cnn(x_reshaped.view(-1, char_embed_size, max_word_length)) # shape (seq_len*batch_size, e_word)
#print('x_conv shape', x_conv.shape)
x_highway = highway(x_conv)  # shape: (batch_size*seq_len, e_word)
#print('x_highway shape', x_highway.shape)
x_word_embed2 = x_highway.view(seq_len, batch_size, word_embed_size)
x_word_embed = dropout(x_highway.view(seq_len, batch_size, word_embed_size))

In [256]:
x_emb = embedding(input)
x_reshaped = x_emb.permute(0,1,3,2)

dim_0 = x_reshaped.size()[0]
dim_1 = x_reshaped.size()[1]

x_reshaped_flattened = torch.flatten(x_reshaped, start_dim=0, end_dim=1)

x_conv_out = cnn(x_reshaped_flattened)
x_highway = highway(x_conv_out)
x_word_emb = x_highway

x_word_emb_unflatten = x_word_emb.reshape(dim_0,dim_1,x_highway.size()[-1])  

In [251]:
x_word_embed = dropout(x_highway.view(seq_len, batch_size, word_embed_size))

In [258]:
x_word_embed

tensor([[[0.1527, 0.3462, 0.0000, 0.0000, 0.3792],
         [0.3134, 0.0000, 0.4361, 0.0000, 0.0000],
         [0.1890, 0.1417, 0.0000, 0.0000, 0.0741]],

        [[0.0000, 0.1770, 0.2188, 0.0000, 0.0202],
         [0.0000, 0.1536, 0.0000, 0.0000, 0.0000],
         [0.2940, 0.1770, 0.0000, 0.0000, 0.0202]]], grad_fn=<MulBackward0>)

In [254]:
x_word_embed2

tensor([[[0.1069, 0.2424, 0.1167, 0.0000, 0.2654],
         [0.2194, 0.1075, 0.3053, 0.0000, 0.0000],
         [0.1323, 0.0992, 0.0000, 0.0000, 0.0519]],

        [[0.2058, 0.1239, 0.1531, 0.0000, 0.0141],
         [0.2194, 0.1075, 0.3053, 0.0000, 0.0000],
         [0.2058, 0.1239, 0.1531, 0.0000, 0.0141]]], grad_fn=<ViewBackward>)

In [257]:
x_word_emb_unflatten

tensor([[[0.1069, 0.2424, 0.1167, 0.0000, 0.2654],
         [0.2194, 0.1075, 0.3053, 0.0000, 0.0000],
         [0.1323, 0.0992, 0.0000, 0.0000, 0.0519]],

        [[0.2058, 0.1239, 0.1531, 0.0000, 0.0141],
         [0.2194, 0.1075, 0.3053, 0.0000, 0.0000],
         [0.2058, 0.1239, 0.1531, 0.0000, 0.0141]]], grad_fn=<ViewBackward>)

In [267]:
for y in torch.split(input, split_size_or_sections = 1):
    Y_t = torch.squeeze(y, dim=0)
    print(Y_t)



for Y_t in input:
    print(Y_t)

tensor([[1, 1, 1],
        [2, 2, 2],
        [0, 0, 2]])
tensor([[1, 1, 2],
        [2, 2, 2],
        [1, 1, 2]])
tensor([[1, 1, 1],
        [2, 2, 2],
        [0, 0, 2]])
tensor([[1, 1, 2],
        [2, 2, 2],
        [1, 1, 2]])


In [272]:
class CharDecoder(nn.Module):
    def __init__(self, hidden_size, char_embedding_size=50, target_vocab=None):
        """ Init Character Decoder.

        @param hidden_size (int): Hidden size of the decoder LSTM
        @param char_embedding_size (int): dimensionality of character embeddings
        @param target_vocab (VocabEntry): vocabulary for the target language. See vocab.py for documentation.
        """
        super(CharDecoder, self).__init__()
        self.target_vocab = target_vocab
        self.charDecoder = nn.LSTM(char_embedding_size, hidden_size)
        self.char_output_projection = nn.Linear(hidden_size, 20)
        self.decoderCharEmb = nn.Embedding(20, char_embedding_size,
                                           padding_idx=0)

    def forward(self, input, dec_hidden=None):
        """ Forward pass of character decoder.

        @param input (Tensor): tensor of integers, shape (length, batch_size)
        @param dec_hidden (tuple(Tensor, Tensor)): internal state of the LSTM before reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size)

        @returns scores (Tensor): called s_t in the PDF, shape (length, batch_size, self.vocab_size)
        @returns dec_hidden (tuple(Tensor, Tensor)): internal state of the LSTM after reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size)
        """
        ### YOUR CODE HERE for part 2a
        ### TODO - Implement the forward pass of the character decoder.
        x = self.decoderCharEmb(input)
        dec_hidden, (last_hidden, last_cell) = self.charDecoder(x, dec_hidden)

        scores = self.char_output_projection(dec_hidden)
        return scores, (last_hidden, last_cell)

In [283]:
s = CharDecoder(11)

In [1]:

scores = torch.LongTensor([[[1,2,3,4],[1,2,0,0],[2,2,2,0]],[[1,2,3,0],[1,2,0,1],[2,2,2,2]]])
scores.size()

NameError: name 'torch' is not defined

In [3]:
char_sequence = torch.LongTensor([[1,0,0],[1,2,0]])
char_sequence.size()

torch.Size([2, 3])

In [2]:
import torch

In [12]:
dec_masks = torch.zeros(2,3)
dec_masks[char_sequence == 0] =  float('inf')
dec_masks


tensor([[0., inf, inf],
        [0., 0., inf]])

AttributeError: 'float' object has no attribute 'long'

In [20]:
scores = torch.LongTensor([[[1,2,3,4],[1,2,0,0],[2,2,2,0]],[[1,2,3,0],[1,2,0,1],[2,2,2,2]]])
scores.size()
# scores[:,:, 0] += dec_masks.long()

torch.Size([2, 3, 4])

In [21]:
torch.argmax(scores, dim = -1)

tensor([[3, 1, 2],
        [2, 1, 3]])

In [9]:
scores

tensor([[[                   1,                    2,                    3,
                             4],
         [-9223372036854775807,                    2,                    0,
                             0],
         [-9223372036854775806,                    2,                    2,
                             0]],

        [[                   1,                    2,                    3,
                             0],
         [                   1,                    2,                    0,
                             1],
         [-9223372036854775806,                    2,                    2,
                             2]]])

In [288]:
s(inputs)[1][1].size()

torch.Size([1, 3, 11])

In [344]:
loss = nn.CrossEntropyLoss()
input = torch.Tensor([[1,8888,1],[1,4444,1]])
target = torch.LongTensor([0,0])
output = loss(input, target)

In [345]:
output

tensor(6665.)

In [342]:
target

tensor([0, 0])

In [343]:
target

tensor([0, 0])

In [355]:
1 - (target == 0).float()

tensor([0., 0.])

In [347]:
type(target==0)

torch.Tensor

In [17]:
char_sequence[1:].contiguous().view(-1)

tensor([1, 2, 0])

In [18]:
char_sequence[1:].view(-1)

tensor([1, 2, 0])

In [19]:
scores

tensor([[[                   1,                    2,                    3,
                             4],
         [-9223372036854775807,                    2,                    0,
                             0],
         [-9223372036854775806,                    2,                    2,
                             0]],

        [[                   1,                    2,                    3,
                             0],
         [                   1,                    2,                    0,
                             1],
         [-9223372036854775806,                    2,                    2,
                             2]]])

In [22]:
current_char_tensor = torch.argmax(scores, dim=1)

In [23]:
current_char_tensor.size()

torch.Size([2, 4])

In [24]:
scores.size()

torch.Size([2, 3, 4])