### Initialization

In [0]:
# For Colab only!

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [0]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [0]:
import torch
from torch.nn import functional as F
from torchtext import data, datasets
from torch import nn

In [4]:
print(tf.__version__)
print(tf.test.is_gpu_available())

2.2.0-rc1
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


In [5]:
print(torch.__version__)
print(torch.cuda.is_available())

1.4.0
True


In [0]:
import numpy as np

num_epochs = 10
batch_size = 128
learning_rate = 0.001


In [0]:
total_words = 10000
max_review_words = 200
embedding_len = 100

### Tensorflow
IMDB data set

#### Load data

In [0]:
(x_train, y_train),(x_test, y_test) = keras.datasets.imdb.load_data(num_words = total_words)
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(25000,) (25000,) (25000,) (25000,)


In [0]:
# x_train = keras.preprocessing.sequence.pad_sequences(x_train)
# x_test = keras.preprocessing.sequence.pad_sequences(x_test)

# print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(25000, 2494) (25000,) (25000, 2315) (25000,)


In [0]:
## pad sequence to the same length
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen = max_review_words)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen = max_review_words)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(25000, 200) (25000,) (25000, 200) (25000,)


In [0]:
# See an actual review in words
# Reverse from integers to words using the DICTIONARY (given by keras...need to do nothing to create it)

word_index = keras.datasets.imdb.get_word_index()

reverse_word_index = dict(
[(value, key) for (key, value) in word_index.items()])

decoded_review = ' '.join(
[reverse_word_index.get(i - 3, '?') for i in x_train[1]])

print(decoded_review)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
? ? ? ? ? ? ? ? ? ? ? ? big hair big boobs bad music and a giant safety pin these are the words to best describe this terrible movie i love cheesy horror movies and i've seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an abomination the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clothes are sickening and funny in equal ? the hair is big lots of boobs ? men wear those cut ? shirts that show off their ? sickening that men actually wore them and the music is just ? trash that plays over and over again in almost every scene there is trashy music boobs and ? taking away bodies and the gym still doesn't close for ? all joking aside this is a truly bad film whose only charm is to look back on the disaster that was the 

In [0]:
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [0]:
ds_train=ds_train.shuffle(1000).batch(batch_size, drop_remainder = True)
ds_test=ds_test.shuffle(1000).batch(batch_size, drop_remainder = True)

In [0]:
print(type(ds_train))
data, label = next(iter(ds_train))
print(data.shape, label.shape)

decoded_review = ' '.join(
[reverse_word_index.get(i - 3, '?') for i in data[0].numpy()])

print(decoded_review)

#### SimpleRNN




Tensorflow

[b,max_review_words] --> **embedding** --> [b, max_review_words, embedding_len]

[b, max_review_words, embedding_len]--> **RNN/LSTM/GRU layers** with *num_units* -->[b,num_units ]

[b, num_units] --> [b, 1]



In [0]:
class RNN_model(keras.Model):
    def __init__(self, num_units):
        super().__init__()
    
        # embedding   [b, 80] ->[b, 80 embedding_len=100]  
        # self.embedding = layers.Embedding(input_dim=total_words,output_dim=embedding_len,input_length=max_review_words)
        self.embedding = layers.Embedding(input_dim=total_words,output_dim=embedding_len)
        # [b, 80, 100] ->[b, num_units]
        self.RNN1 = layers.SimpleRNN(units=num_units,dropout=0.5, return_sequences=True)
        self.RNN2 = layers.SimpleRNN(units=num_units,dropout=0.5, return_sequences=False)
        
        # [b, num_units] ->[b,1]
        self.fc = layers.Dense(1)
    
    def call(self,x, training = None):
        # print("input shape: ", x.shape)       
        outputs = self.embedding(x)

        # print("embedding output shape: ", outputs.shape)       
        outputs = self.RNN1(outputs, training = training)
        outputs = self.RNN2(outputs, training = training)
        
        # print("RNN output shape: ", outputs.shape)
        outputs = self.fc(outputs)
        # print("fc output shape", outputs.shape)
        return outputs
    



#### GRU

In [0]:
class GRU_model(keras.Model):
    def __init__(self, num_units):
        super().__init__()
    
        # embedding   [b, 80] ->[b, 80 embedding_len=100]  
        self.embedding = layers.Embedding(input_dim=total_words,output_dim=embedding_len,input_length=max_review_words)
        
        # [b, 80, 100] ->[b, num_units]
        self.GRU1 = layers.GRU(units=num_units,dropout=0.5, return_sequences=True)
        self.GRU2 = layers.GRU(units=num_units,dropout=0.5, return_sequences=False)
        
        # [b, num_units] ->[b,1] 
        self.fc = layers.Dense(1)


    
    def call(self,x, training = None):
        # print("input shape: ", x.shape)
        
        outputs = self.embedding(x)

        # print("embedding output shape: ", outputs.shape)
        
        outputs = self.GRU1(outputs, training = training)
        outputs = self.GRU2(outputs, training = training)
        
        # print("RNN output shape: ", outputs.shape)

        outputs = self.fc(outputs)

        # print("fc output shape", outputs.shape)
        return outputs
    

#### LSTM


In [0]:
class LSTM_model(keras.Model):
    def __init__(self, num_units):
        super().__init__()
    
        # embedding   [b, 80] ->[b, 80 embedding_len=100]  
        self.embedding = layers.Embedding(input_dim=total_words,output_dim=embedding_len,input_length=max_review_words)
        
        # [b, 80, 100] ->[b, num_units]
        self.LSTM1 = layers.LSTM(units=num_units,dropout=0.5, return_sequences=True)
        self.LSTM2 = layers.LSTM(units=num_units,dropout=0.5, return_sequences=False)
        
        # [b, num_units] ->[b,1] 
        self.fc = layers.Dense(1)


    
    def call(self,x, training = None):
        # print("input shape: ", x.shape)
        
        outputs = self.embedding(x)

        # print("embedding output shape: ", outputs.shape)
        
        outputs = self.LSTM1(outputs, training = training)
        outputs = self.LSTM2(outputs, training = training)
        
        # print("RNN output shape: ", outputs.shape)

        outputs = self.fc(outputs)

        # print("fc output shape", outputs.shape)
        return outputs
    

#### Quick fit

In [0]:
model = RNN_model(64)
model.build(input_shape=(None, max_review_words))
model.summary()

model.compile(optimizer=keras.optimizers.Adam(learning_rate),
              loss = tf.losses.BinaryCrossentropy(from_logits=True),
              metrics = ['accuracy'])
history = model.fit(ds_train, epochs=num_epochs, validation_data=ds_test)

Model: "rnn_model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  1000000   
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     multiple                  10560     
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     multiple                  8256      
_________________________________________________________________
dense_1 (Dense)              multiple                  65        
Total params: 1,018,881
Trainable params: 1,018,881
Non-trainable params: 0
_________________________________________________________________
Train for 195 steps, validate for 195 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

In [0]:
model = GRU_model(64)
model.build(input_shape=(None, max_review_words))
model.summary()

Model: "gru_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  1000000   
_________________________________________________________________
gru (GRU)                    multiple                  31872     
_________________________________________________________________
gru_1 (GRU)                  multiple                  24960     
_________________________________________________________________
dense_1 (Dense)              multiple                  65        
Total params: 1,056,897
Trainable params: 1,056,897
Non-trainable params: 0
_________________________________________________________________


#### TF2.0 Train

In [0]:
optimizer = keras.optimizers.Adam(learning_rate= learning_rate)

categorical_accuracy_train = keras.metrics.BinaryAccuracy()
categorical_accuracy_test = keras.metrics.BinaryAccuracy()


In [0]:
for epoch in range(num_epochs):
  for step, (x, y) in enumerate(ds_train):
    y = tf.reshape(y,[-1,1])
    with tf.GradientTape() as tape:
      logits = model(x, training = True)
      loss = tf.keras.losses.binary_crossentropy(y_true=y, y_pred=logits, from_logits=True)
      loss = tf.reduce_mean(loss)  
    
    grad = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(grads_and_vars= zip(grad, model.trainable_variables))

    [x_train, y_train] = next(iter(ds_train))
    train_logits = model(x_train, training = False)
    categorical_accuracy_train.update_state(y_true = y_train, y_pred=train_logits)
    train_accuracy = categorical_accuracy_train.result().numpy()
    
    [x_test, y_test] = next(iter(ds_test))
    logits = model(x_test, training = False)
    categorical_accuracy_test.update_state(y_true = y_test, y_pred=logits)
    accuracy = categorical_accuracy_test.result().numpy()

    if step%20 == 0:
      print("epoch: {}, step: {}, loss: {}, train_accuracy: {} test_accuracy: {}".format(epoch, step, loss.numpy(),train_accuracy,accuracy))

      # with summary_writer.as_default():
      #   tf.summary.scalar("loss epoch: "+str(epoch), loss.numpy(), step = step)
      #   tf.summary.scalar("test_acc epoch: "+str(epoch), accuracy, step = step)


epoch: 0, step: 0, loss: 0.6929845213890076, train_accuracy: 0.4921875 test_accuracy: 0.5
epoch: 0, step: 20, loss: 0.7019393444061279, train_accuracy: 0.4832589328289032 test_accuracy: 0.5372023582458496
epoch: 0, step: 40, loss: 0.6733893156051636, train_accuracy: 0.49333080649375916 test_accuracy: 0.535442054271698
epoch: 0, step: 60, loss: 0.6110574007034302, train_accuracy: 0.5466188788414001 test_accuracy: 0.5751792788505554
epoch: 0, step: 80, loss: 0.4642757177352905, train_accuracy: 0.6062885522842407 test_accuracy: 0.6302083134651184
epoch: 0, step: 100, loss: 0.4546666145324707, train_accuracy: 0.6482828259468079 test_accuracy: 0.664371907711029
epoch: 0, step: 120, loss: 0.4467814862728119, train_accuracy: 0.6799457669258118 test_accuracy: 0.6900180578231812
epoch: 0, step: 140, loss: 0.3914114236831665, train_accuracy: 0.7045655846595764 test_accuracy: 0.7134308218955994
epoch: 0, step: 160, loss: 0.37333858013153076, train_accuracy: 0.7238936424255371 test_accuracy: 0.729

### PyTorch

Tensorflow

[seq_len, b] --> **embedding** --> [seq_len, b, embedding_len]

[seq_len, b, embedding_len]--> **RNN/LSTM/GRU layers** with *hidden_dim* -->[seq_len, b, hidden_dim]
---> only use the last seq [b, hidden_dim]

[b, hidden_dim] --> [b, 1]

#### Load data

In [6]:
TEXT = data.Field(tokenize='spacy')
LABEL = data.LabelField(dtype=torch.float)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:07<00:00, 11.0MB/s]


In [7]:
print('len of train data:', len(train_data))
print('len of test data:', len(test_data))

len of train data: 25000
len of test data: 25000


In [8]:
print(train_data.examples[232].text)
print(train_data.examples[232].label)
print(len(train_data.examples[232].text))

['Having', 'borrowed', 'this', 'movie', 'from', 'the', 'local', 'library', 'a', 'couple', 'of', 'weeks', 'ago', 'intending', 'to', 'originally', 'see', 'this', 'on', 'or', 'a', 'few', 'days', 'after', 'Memorial', 'Day', ',', 'I', 'finally', 'got', 'to', 'seeing', 'Sayonara', 'just', 'this', 'morning', '.', 'In', 'this', 'one', 'Marlon', 'Brando', 'plays', 'Major', 'Lloyd', '"', 'Ace', '"', 'Gruver', ',', 'a', 'General', "'s", 'son', 'who', "'s", 'been', 'raised', 'a', 'certain', 'way', ',', 'being', 'transfered', 'from', 'Korea', 'to', 'Japan', 'where', 'his', 'girlfriend', 'Eileen', 'Webster', '(', 'Patricia', 'Owens', ')', 'conveniently', 'happens', 'to', 'be', '.', 'Before', 'leaving', ',', 'he', 'tries', 'to', 'persuade', 'one', 'of', 'his', 'men', ',', 'a', 'Joe', 'Kelly', '(', 'Red', 'Buttons', ')', ',', 'out', 'of', 'marrying', 'Japanese', 'woman', 'Katsumi', '(', 'Miyoshi', 'Umeki', ')', 'since', 'that', "'s", 'a', 'violation', 'of', 'military', 'fraternization', 'laws', '.', '

In [9]:
# word2vec, glove
TEXT.build_vocab(train_data, max_size=10000, vectors='glove.6B.100d')
LABEL.build_vocab(train_data)


batchsz = 64
device = torch.device('cuda')
train_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, test_data),
    batch_size = batchsz,
    device=device
)

.vector_cache/glove.6B.zip: 862MB [06:31, 2.20MB/s]                           
100%|█████████▉| 398208/400000 [00:15<00:00, 23829.91it/s]

In [10]:
print(type(train_iterator))
ds = next(iter(train_iterator))
print(ds.text.shape)
print(ds.label.shape)
# print(ds.text[:,1])
# print(ds.label[1])

<class 'torchtext.data.iterator.BucketIterator'>
torch.Size([940, 64])
torch.Size([64])


#### RNN module

In [0]:
class RNN_nn(nn.Module):

  def __init__(self, vocab_size, hidden_dim):
    super().__init__()

    self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_len)
    self.rnn = nn.RNN(input_size=embedding_len, hidden_size=hidden_dim, num_layers=2, dropout=0.5)
    self.fc = nn.Linear(in_features=hidden_dim, out_features=1)

  def forward(self, x):  

    
    #[seq_len, b] -> [seq_len, b, embedding_len = 100]
    output = self.embedding(x)
    # print('embedding size: ', output.shape)
    
    #[seq_len, b, embedding_len] ->
    # out: [seq_len, b, hidden_dim]
    # h:   [n_layer=2, b, hidden_dim]
    # last seq of out == last layer of h
    output, h = self.rnn(output)


    # Last seq of the output
    out = output[-1:,:,:].squeeze()
    
    out = self.fc(out)

    return out

In [0]:
class GRU_nn(nn.Module):

  def __init__(self, vocab_size, hidden_dim):
    super().__init__()

    self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_len)
    self.rnn = nn.GRU(input_size=embedding_len, hidden_size=hidden_dim, num_layers=2, dropout=0.5)
    self.fc = nn.Linear(in_features=hidden_dim, out_features=1)

  def forward(self, x):  

    print('input size: ', x.shape)
    #[seq_len, b] -> [seq_len, b, embedding_len = 100]
    output = self.embedding(x)
    print('embedding size: ', output.shape)
    
    #[seq_len, b, embedding_len] ->
    # out: [seq_len, b, hidden_dim]
    # h:   [n_layer=2, b, hidden_dim]
    # last seq of out == last layer of h
    output, h = self.rnn(output)


    # Last seq of the output
    out = output[-1:,:,:].squeeze()
    
    out = self.fc(out)

    return out

In [0]:
class LSTM_nn(nn.Module):
    
    def __init__(self, vocab_size, hidden_dim):
        """
        """
        super().__init__()
        
        # [0-10001] => [100]
        self.embedding = nn.Embedding(vocab_size, embedding_len)
        # [100] => [256]
        self.rnn = nn.LSTM(embedding_len, hidden_dim, num_layers=2, 
                           bidirectional=True, dropout=0.5)
        # [256*2] => [1]
        self.fc = nn.Linear(hidden_dim*2, 1)
        self.dropout = nn.Dropout(0.5)
        
        
    def forward(self, x):
        """
        x: [seq_len, b] vs [b, 3, 28, 28]
        """
        # [seq, b, 1] => [seq, b, 100]
        embedding = self.dropout(self.embedding(x))
        
        # output: [seq, b, hid_dim*2]
        # hidden/h: [num_layers*2, b, hid_dim]
        # cell/c: [num_layers*2, b, hid_di]
        output, (hidden, cell) = self.rnn(embedding)
        
        # [num_layers*2, b, hid_dim] => 2 of [b, hid_dim] => [b, hid_dim*2]
        hidden = torch.cat([hidden[-2], hidden[-1]], dim=1)
        
        # [b, hid_dim*2] => [b, 1]
        hidden = self.dropout(hidden)
        out = self.fc(hidden)
        
        return out

In [0]:
# Test

x = torch.randint(1, 10000, (1002,128))

model = GRU_nn(total_words,64)

out = model(x)

In [0]:
def binary_acc(preds, y):
    """
    get accuracy
    """
    preds = torch.round(torch.sigmoid(preds))
    correct = torch.eq(preds, y).float()
    acc = correct.sum() / len(correct)
    return acc

In [0]:
network = LSTM_nn(len(TEXT.vocab), 128).to(device)

# pretrained_embedding = TEXT.vocab.vectors
# print('pretrained_embedding:', pretrained_embedding.shape)
# network.embedding.weight.data.copy_(pretrained_embedding)
# print('embedding layer inited.')


optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate)
criteon = nn.BCEWithLogitsLoss().to(device)

In [27]:
for epoch in range(10):
  for step, batch in enumerate(train_iterator):

    pred = network(batch.text).squeeze()

    loss = criteon(pred, batch.label)
    acc = binary_acc(pred, batch.label).item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if step%100 == 0:
      print("epoch: {}, step: {}, loss: {}, acc:{}".format(epoch,step,loss.item(), acc))

epoch: 0, step: 0, loss: 0.6977689862251282, acc:0.5
epoch: 0, step: 100, loss: 0.6727264523506165, acc:0.578125
epoch: 0, step: 200, loss: 0.6923737525939941, acc:0.5
epoch: 0, step: 300, loss: 0.6616694331169128, acc:0.578125
epoch: 1, step: 0, loss: 0.7228837609291077, acc:0.59375
epoch: 1, step: 100, loss: 0.5663291811943054, acc:0.75
epoch: 1, step: 200, loss: 0.6952507495880127, acc:0.5
epoch: 1, step: 300, loss: 0.6980805993080139, acc:0.484375
epoch: 2, step: 0, loss: 0.6476609110832214, acc:0.640625
epoch: 2, step: 100, loss: 0.6129595041275024, acc:0.671875
epoch: 2, step: 200, loss: 0.6854571104049683, acc:0.5625
epoch: 2, step: 300, loss: 0.6750308275222778, acc:0.609375
epoch: 3, step: 0, loss: 0.6261641383171082, acc:0.671875
epoch: 3, step: 100, loss: 0.6630284786224365, acc:0.578125
epoch: 3, step: 200, loss: 0.696617841720581, acc:0.53125
epoch: 3, step: 300, loss: 0.7630640268325806, acc:0.5625
epoch: 4, step: 0, loss: 0.49356481432914734, acc:0.796875
epoch: 4, step:

In [28]:
avg_acc = []
with torch.no_grad():
  for batch in test_iterator:

    pred = network(batch.text).squeeze()

    loss = criteon(pred, batch.label)

    acc = binary_acc(pred, batch.label).item()
    avg_acc.append(acc)
    
    # print("test loss: {}, test acc: {}".format(loss.item(), acc))

acc_avg = np.array(avg_acc).mean()

print("Average acc: ", acc_avg)

Average acc:  0.8639226342406114
