In [1]:
import os
import re
import sys
import pathlib
import numpy as np
from csv import reader
from stanfordcorenlp import StanfordCoreNLP
#from pytorch_pretrained_bert.tokenization import BertTokenizer

In [18]:
def pre_process_text(text):
    '''
    Lowercase, TOkenize (Stanford CoreNLP)
    '''
    text = text.lower()
    #result = nlp.word_tokenize(text)
    result = text.split(" ")
    return result


def bert_embedding(text):
    '''
    Get bert tokenized sentences
    '''
    return tokenizer.encode(tokenizer.tokenize(text))


def glove_cove_embedding(text):
    '''
    Get Glove_Cove Embedding
    '''
    #tokens = [nlp.word_tokenize(sentence) for sentence in text]
    tokens = [sentence.split(" ") for sentence in text]
    glove_embed = glove_model.encode(tokens)
    cove_embed = cove_model.encode(tokens)
    result = np.concatenate(glove_embed,cove_embed, axis=2)
    return result




def load_data(folder= "Subtask-A", filename="SubtaskA_EvaluationData_labeled.csv"):
    '''
    Args : folder name, file name
    Ret : return data loaded into list of lists [id, string, labels] 
    '''
    #prefix = str(pathlib.Path(__file__).parent.parent)
    #path = os.path.join(prefix,"data", folder, filename)
    path = "C:\\Users\\bhara\\Downloads\\NNNlpHW3\\suggestionMining\\data\\Subtask-A\\SubtaskA_EvaluationData.csv"
    f = open(path,'r', encoding="utf-8")
    data_reader = reader(f, delimiter=",")
    data = [row for row in data_reader]
    f.close()
    return data


def pre_process_data_from_dataset(data):
    '''
    Args : data is list of lists [id, string, label]
    Ret: list of features, labels, id_map (i.e index to id mapping)
    '''
    ids = [ datum[0]+",\""+datum[1]+"\"" for datum in data]
    id_map = {k:v for k,v in enumerate(ids)}

    labels = [datum[2] for datum in data]

    bert_feats = [bert_embedding(datum[1]) for datum in data]
    glove_cove_feats = glove_cove_embedding([datum[1] for datum in data])
    return glove_cove_feats, bert_feats, labels, id_map


def create_folds(data, folds=10):
    '''
    Split data into 'folds' number of batches

    Args : data, list of lists of form [id, string, label]
            folds, number of batches of data to be created
    Rets : data batched into 'fold' lists, each wich is in turn list of lists [id, string, label]
    '''
    data_size = len(data)
    batch_size = int(data_size/folds)
    data_batch = []
    last_index = 0
    for i in range(folds-1):
        batch = data[i * batch_size: (i+1)*batch_size]
        data_batch.append(batch)
    data_batch.append(data[(folds-1) * batch_size:])
    return data_batch


def create_cross_val_train_test(data_batches,id, folds=10):
    '''
    Create test set from batched data, where test set while batch[id]
    and train set will everything else

    Args : data_batches, data that is split into 'fold' number of groups
            id, index of batch to be made as test_set
            folds, number of batches the data is split into
    Rets :
    '''
    train = []
    test = data_batches[id]
    for i in range(folds):
        if i != id:
            train += data_batches[i]
    return train, test

In [20]:
data = load_data()
data_folds = create_folds(data)
for datum in data_folds:
    print(len(datum))
print(data_folds[0][0])
train, test = create_cross_val_train_test(data_folds,0)
print("Length of train and test: ", len(train),len(test))

#feats, bert, labels, id_map = pre_process_data_from_dataset(train)
#print(feats[0])
#print(bert[0])

83
83
83
83
83
83
83
83
83
86
['9566', 'This would enable live traffic aware apps.', 'X']
Length of train and test:  750 83


In [1]:
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe

from cove import MTLSTM


In [2]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)

def tokenizerfnc(str):
    return tokenizer.encode(str,max_length=512, pad_to_max_length=True)

In [4]:
TEXT = data.Field(lower=True, include_lengths=True, batch_first=True)
#LABEL = data.Field(lower=True, include_lengths=True, batch_first=True)

In [5]:
LABEL = data.Field(sequential=False, use_vocab=False)

TEXT_BERT = data.Field(
    use_vocab=False,
    batch_first=True,
    pad_token=tokenizer.pad_token_id,
    tokenize=tokenizerfnc
)

In [6]:
train_path = "C:\\Users\\bhara\\Downloads\\NNNlpHW3\\suggestionMining\\data\\Subtask-A\\V1.4_Training.csv"
"""
train = data.TabularDataset(
        path=train_path, format='csv',
        fields=[('id', None),
                ('sentence', TEXT),
                 ('label', LABEL)])
"""

"\ntrain = data.TabularDataset(\n        path=train_path, format='csv',\n        fields=[('id', None),\n                ('sentence', TEXT),\n                 ('label', LABEL)])\n"

In [8]:
train = data.TabularDataset(
        path=train_path, format='csv',
        skip_header = False,
        fields={'sentence':[('sentence',TEXT),('bert_enc',TEXT_BERT)],
                'label':('label',LABEL)
                })

In [34]:
#TEXT.vocab.freqs

Counter({'"please': 226,
         'enable': 47,
         'removing': 14,
         'language': 61,
         'code': 156,
         'from': 628,
         'the': 7249,
         'dev': 97,
         'center': 63,
         '"language': 3,
         'history"': 2,
         'for': 1875,
         'example': 119,
         'if': 673,
         'you': 845,
         'ever': 20,
         'selected': 41,
         '"ru"': 18,
         'and': 2981,
         '"ru-ru"': 6,
         'laguages': 4,
         'published': 31,
         'this': 1033,
         'xap': 18,
         'to': 5973,
         'store': 255,
         'then': 247,
         'it': 1625,
         'causes': 27,
         'tile': 61,
         'localization': 14,
         'show': 118,
         'en-us(default)': 4,
         'which': 383,
         'is': 2309,
         'bad."': 4,
         '"note:': 4,
         'in': 2239,
         'your': 219,
         '.csproj': 1,
         'file,': 11,
         'there': 314,
         'a': 3296,
         'supportedcu

In [11]:
TEXT.build_vocab(train, vectors=GloVe(name='840B', dim=300, cache='.embeddings'))
#LABEL.build_vocab(train)
outputs_cove_with_glove = MTLSTM(n_vocab=len(TEXT.vocab), vectors=TEXT.vocab.vectors, residual_embeddings=True, model_cache='.embeddings')
#glove_then_first_then_last_layer_cove = outputs_both_layer_cove_with_glove(<pass a sentence Glove embedding>)

In [64]:
len(TEXT.vocab)

18349

In [10]:
TEXT.vocab.vectors.shape

torch.Size([18349, 300])

In [12]:
train_iter = data.Iterator(
    (train),
    batch_size=5)

In [116]:
z.sentence[1]

tensor([14, 15,  5, 15, 11])

In [17]:
import os
from collections import defaultdict
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data


class CNN_GC(nn.Module):

    def __init__(self,out_dim=200,gc_dim=900,max_len=1000, dropout=0.5):
        super(CNN_GC, self).__init__()
        
        """cove_model = CoVeEmbeddings(
            word_embeddings_dir='../model/text/stanford/glove/', 
            tokenizer=tokenizer,
            max_sequence_length=max_len, verbose=20)
        glove_model = GloVeEmbeddings()
        glove_model.load_model(dest_dir='../model/text/stanford/glove/', process=False)"""
        
        self.gc_dim = gc_dim
        self.out_dim = out_dim
        self.max_len = max_len
        self.dropout = dropout

        self.conv_3 = nn.Conv1d(gc_dim, out_dim, 3, stride=1, padding=1)
        self.conv_5 = nn.Conv1d(gc_dim, out_dim, 5, stride=1, padding=2)
        self.conv_7 = nn.Conv1d(gc_dim, out_dim, 7, stride=1, padding=3)
        self.attn = nn.Linear(3*out_dim*max_len, max_len)

    def forward(self,x):
        #cove_embed = cove_model.encode(x)
        #tokens = [sentence.split(" ") for sentence in x]
        #glove_embed = glove_model.encode(tokens)
        #x = torch.cat([cove_embed,glove_embed], 2)
        
        conv_3 = F.relu(self.conv_3(x))
        conv_5 = F.relu(self.conv_5(x))
        conv_7 = F.relu(self.conv_7(x))
        x = torch.cat([conv_3,conv_5,conv_7], 1)
        print(x.shape)
        
        non_linear_x = F.relu(x.view(-1, 600*self.max_len))
        print(non_linear_x.shape)
        attn_weights = F.softmax(self.attn(non_linear_x), dim=1)
        print(attn_weights.shape)
        #attn_applied = torch.bmm(attn_weights.unsqueeze(1), x)
        #attn_applied = attn_weights*x
        attn_applied = torch.zeros(x.shape[0], x.shape[1], x.shape[2])
        for i in range(x.shape[0]):
            attn_applied[i,:,:] = x[i,:,:]*attn_weights[i]
        print("hello")
        print(attn_applied.shape)
        
        x = attn_applied.sum(dim=2)
        return x

In [15]:
model = CNN_GC()

In [239]:
a = torch.randn(5, 600, 1000)
print(a.shape)
list = [[0,2]*500]*5
b = torch.tensor(list2)
print(b.shape)

c = torch.zeros(5, 600, 1000)
for i in range(a.shape[0]):
    c[i,:,:] = a[i,:,:]*b[i]
print(c.shape)

torch.Size([5, 600, 1000])
torch.Size([5, 1000])
torch.Size([5, 600, 1000])


In [19]:
z = None
for batch_idx, batch in enumerate(train_iter):
    z = batch
    
    glove_then_last_layer_cove = outputs_cove_with_glove(*batch.sentence)
    
    target = torch.zeros(5, 1000, 900)
    max_sentence_len_in_batch = max(batch.sentence[1].tolist())
    target[:, :max_sentence_len_in_batch, :] = glove_then_last_layer_cove
    glove_then_last_layer_cove = target.permute(0,2,1)
    print(glove_then_last_layer_cove.shape)
    output = model(glove_then_last_layer_cove)
    print(output.shape)
    break

torch.Size([5, 900, 1000])
torch.Size([5, 600, 1000])
torch.Size([5, 600000])
torch.Size([5, 1000])
hello
torch.Size([5, 600, 1000])
torch.Size([5, 600])


In [21]:
z.sentence

(tensor([[   48,    20,    81,  1256,    13,    12,   965,    17,     3,   262,
              5,   130,     9,     2,   179,  2634,  1320,  2182,     3,    14,
            672,   284,   477,     1,     1,     1],
         [ 8328,   182,    20,    10,   118,    24, 17975,   118, 13713,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1],
         [  108,    64, 14073,   312, 16793,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1],
         [  508,     6,    30,    79,    17,    22,    86,     9,   310,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1],
         [  112,    11,     6,    19,  2746,   206,    75,     6,    11,    19,
            456,    23,  5720,     9,    95,  7519,    66,   134,   

In [26]:
TEXT.vocab.itos[1]

'<pad>'

In [156]:
target = torch.zeros(5, 1000, 900)
target[:, :24, :] = y
target[4][500]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

24

In [147]:
z.sentence[1]

tensor([24, 20, 13, 21, 16])

In [19]:
z.sentence

(tensor([[2838,   60,  178,  ...,    1,    1,    1],
         [ 115,    4, 1228,  ...,    1,    1,    1],
         [ 199,   18,  569,  ...,    1,    1,    1],
         [  43, 3978,  798,  ...,    1,    1,    1],
         [ 380,   72,  539,  ...,    1,    1,    1]]),
 tensor([11, 53, 12, 40,  9]))

In [95]:
train.examples[0]

<torchtext.data.example.Example at 0x1f81911e198>

In [15]:
z


[torchtext.data.batch.Batch of size 5]
	[.sentence]:('[torch.LongTensor of size 5x1000]', '[torch.LongTensor of size 5]')
	[.bert_enc]:[torch.LongTensor of size 5x512]
	[.label]:[torch.LongTensor of size 5]

In [243]:
z


[torchtext.data.batch.Batch of size 5]
	[.sentence]:('[torch.LongTensor of size 5x24]', '[torch.LongTensor of size 5]')
	[.bert_enc]:[torch.LongTensor of size 5x512]
	[.label]:[torch.LongTensor of size 5]

In [142]:
TEXT.vocab.vectors[2]

tensor([ 2.7204e-01, -6.2030e-02, -1.8840e-01,  2.3225e-02, -1.8158e-02,
         6.7192e-03, -1.3877e-01,  1.7708e-01,  1.7709e-01,  2.5882e+00,
        -3.5179e-01, -1.7312e-01,  4.3285e-01, -1.0708e-01,  1.5006e-01,
        -1.9982e-01, -1.9093e-01,  1.1871e+00, -1.6207e-01, -2.3538e-01,
         3.6640e-03, -1.9156e-01, -8.5662e-02,  3.9199e-02, -6.6449e-02,
        -4.2090e-02, -1.9122e-01,  1.1679e-02, -3.7138e-01,  2.1886e-01,
         1.1423e-03,  4.3190e-01, -1.4205e-01,  3.8059e-01,  3.0654e-01,
         2.0167e-02, -1.8316e-01, -6.5186e-03, -8.0549e-03, -1.2063e-01,
         2.7507e-02,  2.9839e-01, -2.2896e-01, -2.2882e-01,  1.4671e-01,
        -7.6301e-02, -1.2680e-01, -6.6651e-03, -5.2795e-02,  1.4258e-01,
         1.5610e-01,  5.5510e-02, -1.6149e-01,  9.6290e-02, -7.6533e-02,
        -4.9971e-02, -1.0195e-02, -4.7641e-02, -1.6679e-01, -2.3940e-01,
         5.0141e-03, -4.9175e-02,  1.3338e-02,  4.1923e-01, -1.0104e-01,
         1.5111e-02, -7.7706e-02, -1.3471e-01,  1.1

In [143]:
TEXT.vocab.itos[2]

'the'

In [25]:
z.bert_enc[1]

tensor([  101,  1000,  2043,  1037,  5971,  8013,  5363,  2000, 12040, 10439,
         1010,  2019,  7561,  4471,  1000,  2115,  4070,  2987,  1005,  1056,
         2031,  6656,  2000, 12040, 18726,  2000,  1996,  3573,  1000,  8834,
         1998,  2027,  3685, 12040,  2151, 10439,  1012,  1045,  2113,  2008,
         2023,  4471,  2965,  2043, 25353,  2386, 26557, 29525,  3436,  2003,
        13330,  1010,  2021,  2023,  4471,  2003, 13727,  1998,  2987,  1005,
         1056,  5254,  2054,  5310,  3791,  2000,  2079,  1012,  1000,  1000,
          102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

In [123]:
z.sentence[1].shape

torch.Size([5])

In [119]:
z.sentence

(tensor([[  508,   337,     2,   173,    68,   562,  1286,   177,     3,     2,
            576,  3330,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1],
         [  652, 13747,   806,  3969,  6304,     7,     4,  6371,     9,  1820,
            392,     3, 14979,  1063,    15,   418,   982,     5,     2,   107,
              3,   982,   333, 13740,     9,   190,  1141],
         [ 7778,    46,   250,  7435,    96,    19,   991,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1],
         [ 1093,    21,    90, 14471,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1],
         [15277,   321, 18052,    24, 13303,  5045,   171,  4041,     7, 13657,
              5,   208,   322, 11165,   

In [120]:
z.sentence[0][0]

tensor([ 508,  337,    2,  173,   68,  562, 1286,  177,    3,    2,  576, 3330,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1])

In [121]:
#z.dataset.examples[0].sentence
TEXT.vocab.itos[1]

'<pad>'

In [118]:
z.sentence

(tensor([[  508,   337,     2,   173,    68,   562,  1286,   177,     3,     2,
            576,  3330,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1],
         [  652, 13747,   806,  3969,  6304,     7,     4,  6371,     9,  1820,
            392,     3, 14979,  1063,    15,   418,   982,     5,     2,   107,
              3,   982,   333, 13740,     9,   190,  1141],
         [ 7778,    46,   250,  7435,    96,    19,   991,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1],
         [ 1093,    21,    90, 14471,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1],
         [15277,   321, 18052,    24, 13303,  5045,   171,  4041,     7, 13657,
              5,   208,   322, 11165,   

In [103]:
len([  267,     9,   255,   334,    39,    13,    34,    18,    58,  8072,
            349,     6,  3dd202,  1221,     2,   849,   544,    41,   228,    15,
             57,   494,  4272])

23

In [46]:
from torch import nn
x = nn.Embedding(len(TEXT.vocab), 300)
x.weight.data = TEXT.vocab.vectors
print(x(z.sentence[0]).shape)
x(z.sentence[0])

torch.Size([5, 1000, 300])


tensor([[[-0.1306,  0.1034, -0.1024,  ..., -0.7768,  0.1127, -0.1153],
         [-0.6581,  0.3264, -0.2968,  ...,  0.1498,  0.2905,  0.1792],
         [ 0.3588, -0.0043, -0.0425,  ..., -0.2057,  0.1942, -0.0247],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [-0.0241, -0.1368, -0.1341,  ..., -0.3616, -0.1302,  0.0225],
         [ 0.0602,  0.2180, -0.0425,  ...,  0.1171, -0.1669, -0.0941],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.1222,  0.3166, -0.5548,  ..., -0

In [133]:
TEXT.vocab.vectors

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.2720, -0.0620, -0.1884,  ...,  0.1302, -0.1832,  0.1323],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

In [44]:
z.sentence

(tensor([[ 494,  861,  107,  ...,    1,    1,    1],
         [3665,  193,    8,  ...,    1,    1,    1],
         [ 630,   30,    7,  ...,    1,    1,    1],
         [ 112,  192,  171,  ...,    1,    1,    1],
         [ 193,   37,   49,  ...,    1,    1,    1]]),
 tensor([24, 20, 13, 21, 16]))

In [49]:
inputs = x(z.sentence[0])
inputs.shape

torch.Size([5, 1000, 300])

In [70]:
lens, indices = torch.sort(z.sentence[1], 0, True)
print(lens, indices)

tensor([24, 21, 20, 16, 13]) tensor([0, 3, 1, 4, 2])


In [57]:
outputs = [inputs]
outputs[0].shape

torch.Size([5, 1000, 300])

In [66]:
len_list = lens.tolist()
len_list

[24, 21, 20, 16, 13]

In [69]:
inputs[indices].shape

torch.Size([5, 1000, 300])

In [72]:
from torch.nn.utils.rnn import pad_packed_sequence as unpack
from torch.nn.utils.rnn import pack_padded_sequence as pack

packed_inputs = pack(inputs[indices], len_list, batch_first=True)
print(packed_inputs[0].shape)
print(packed_inputs)

torch.Size([94, 300])
PackedSequence(data=tensor([[-0.1306,  0.1034, -0.1024,  ..., -0.7768,  0.1127, -0.1153],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [-0.1722,  0.1823, -0.2785,  ..., -0.1267, -0.4386,  0.3877],
        [ 0.1542,  0.0862,  0.3333,  ..., -0.1949, -0.8467,  0.2624],
        [ 0.6127, -0.1795,  0.1991,  ...,  0.2813, -0.9914,  0.2804]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 2, 1, 1, 1]), sorted_indices=None, unsorted_indices=None)


In [75]:
rnn1 = nn.LSTM(300, 300, num_layers=2, bidirectional=True, batch_first=True)

In [81]:
outputs1, hidden_t1 = rnn1(packed_inputs, None)
print(outputs1[0].shape)

torch.Size([94, 600])


In [84]:
unpacked_outputs1 = unpack(outputs1, batch_first=True)[0]
print(unpacked_outputs1.shape)

torch.Size([5, 24, 600])


In [107]:
unpacked_outputs1[4][20]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 