In [2]:
%%capture
!pip install transformers

import torch 
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
import torch.optim as optim

from torchtext.data import Field, TabularDataset, BucketIterator, Iterator, LabelField
from torchtext import datasets

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import copy
import os

sns.set(palette='summer')

In [3]:
LOCAL_LAUNCH = False

In [4]:
if not LOCAL_LAUNCH:
    from google.colab import drive
    drive.mount('/content/drive')
    destination_folder = '/content/drive/My Drive/bert'
else:
    if not os.path.exists('models/'):
            os.makedirs('models/')
    destination_folder = '/models'  # Folder to save model weights

Mounted at /content/drive


In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
len(tokenizer)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




30522

In [7]:
# const values
BATCH_SIZE = 32
MAX_SEQ_LEN = 128  # Make short sequence to avoid cuda memory limits on google colab
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
SOFTMAX_TEMP = 2

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Current device: ', device)

Current device:  cuda:0


In [8]:
# Fields
label_field = LabelField(batch_first=True)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True,
                   fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)

In [9]:
%%time
train, test = datasets.IMDB.splits(text_field, label_field)  # Takes about 3 minutes

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:08<00:00, 10.3MB/s]


CPU times: user 2min 53s, sys: 8.37 s, total: 3min 2s
Wall time: 3min 10s


In [10]:
label_field.build_vocab(train)

In [11]:
valid, test = test.split(0.5)

In [12]:
train_iter = BucketIterator(train, batch_size=BATCH_SIZE, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=BATCH_SIZE, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=BATCH_SIZE, device=device, train=False, shuffle=False, sort=False)

# Teacher model

All torch models in this notebook support the following interface:

model.forward function takes torch.LongTensor of indexes with shape [batch_size, seq_len] and returns logits of prababilities with shape [batch_size, num_classes].

In [13]:
class BERT(nn.Module):
    def __init__(self):
        super(BERT, self).__init__()

        options_name = "bert-base-uncased"
        self.encoder = BertForSequenceClassification.from_pretrained(options_name)  # Use HuggingFace

    def forward(self, text):
        """
        :param text: torch.LongTensor with shape [batch_size, seq_len]
        :return torch.FloatTensor with shape [batch_size, 2]. Logits classes 
        """
        res = self.encoder(text, return_dict=True)

        return res['logits']

### Utils

Function for convenience

In [14]:
# =========================== BEGINING OF BORROWED CODE =======================

def save_checkpoint(save_path, model):    
    torch.save(model.state_dict(), save_path)

    print(f'Model saved to ==> {save_path}')

def load_checkpoint(load_path, model):
    
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    
    model.load_state_dict(torch.load(load_path, map_location=device))


def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):
    state_dict = {'train_loss_list': train_loss_list,
                  'valid_loss_list': valid_loss_list,
                  'global_steps_list': global_steps_list}
    
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def load_metrics(load_path):
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    
    return state_dict['train_loss_list'], state_dict['valid_loss_list'], state_dict['global_steps_list']
# ============================ ENDING OF BORROWED CODE =========================


def calculate_classification_metrics(pred, real):
    """
    Calculates binary classification metrics: accuracy, precision, recall, f1.

    :param pred, real: np.array with same shape. 
    :return (accuracy, precision, recall, f1): tuple of floats 
    """
    tp = (pred[real == 1] == 1).sum()
    fp = (pred[real == 0] == 1).sum()
    fn = (pred[real == 1] == 0).sum()

    accuracy = (pred == real).sum() / real.shape[0]
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)
    
    return accuracy, precision, recall, f1

def soft_cross_entropy(predicted, target):
    """Calculate cross entropy"""
    return -(target * torch.log(predicted)).sum(dim=1).mean()

def evaluate(eval_iter, model, temperature=SOFTMAX_TEMP, criterion=nn.NLLLoss(), show=False):
    """
    Evaluate model on eval_iter
    :param eval_iter: torch.Iterator
    :param model: torch.nn.Module. Model to evaluate
    :param temperature: float. Temperature used if softmax targets. 
    :param criterion: torch loss function.
    :param show: bool. Whether to print accuracy, precision etc scores.
      
    :return (valid_running_loss, (accuracy, precision, recall, f1_score)): all floats
    """
    model.eval()

    predicted = torch.LongTensor([0])
    real = torch.LongTensor([0])
    valid_running_loss = []

    with torch.no_grad():
        for batch in eval_iter:
            text = batch.text.to(device)
            labels = batch.label.to(device)
            logits = model(text)
            
            softmax_targets = nn.LogSoftmax(dim=1)(logits / temperature)
            
            valid_running_loss.append(criterion(
                softmax_targets,
                labels
                ).item())

            predicted = torch.cat(
                [predicted, softmax_targets.argmax(dim=1).to('cpu')]
                )
            real = torch.cat([real, labels.to('cpu')])
    metrics = calculate_classification_metrics(predicted.numpy(), real.numpy())

    if show:
        print("Accuracy {} \nPrecision {}\nRecall {}\nF1 {}".format(*metrics))

    return np.array(valid_running_loss).mean(), metrics

def print_metrics(path):
    """
    Plot metrics from torch state_dict from path. 
    """
    train_loss, valid_loss, steps = load_metrics(path)
    plt.figure(figsize=(12, 8))
    
    plt.plot(steps, train_loss, label='Train', color='Blue')
    plt.plot(steps, valid_loss, label='Validation')

    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.legend()

    plt.show() 

### Train session 

In [16]:
from tqdm import notebook

In [20]:
def train(model,
          optimizer,
          criterion=nn.NLLLoss(),
          train_loader=train_iter,
          valid_loader=valid_iter,
          num_epochs=2,
          eval_every=len(train_iter) // 2,
          file_path=destination_folder,
          temperature=1, 
          best_valid_loss=float("Inf"),
          name='bert.pt',
          metrics_name='metrics.pt',
          distillation=False,
          distill_model=None,
          show=False
          ):
    """
    Function for training the model

    :param model: torch model for training
    :param criterion: torch loss fucntion to optimize
    :param train_loader: torch.Iterator; contains training dataset
    :param valid_loader: torch.Iterator; contains validation dataset
    :param num_epoch: int; Quantitu of epochs
    :param evar_every: int; number of steps after which perform validation
    :param file_path: str; path to folder to save model parameters
    :param temperature: float; Temperature for sof targets
    :param best_valid_loss: float
    :param name: str; name for saving model params  
    :param metrics_name: str; name for saving model metrics
    :param distillitaion: bool; whether train with softtargetss
    :param distill_model: torch model; It generates soft targets. Only used when distillation=True
    :param show: bool; Show metrics during train.

    :return None
    """
    assert not (distillation and distill_model is None), 'Try to distillate without distillation model :('

    # initialize running values
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []
    metrics = []  # tuple of classification metrics

    # training loop
    model.train()
    for epoch in range(num_epochs):
        print('Current epoch: {}'.format(epoch))
        for batch in notebook.tqdm(train_loader):
            text = batch.text.to(device)
            labels = batch.label.to(device)
            logits = model(text)

            if distillation:  # Creating targets 
                softmax_targets = nn.Softmax(dim=1)(logits / temperature)
                with torch.no_grad():
                    soft_labels = nn.Softmax(dim=1)(distill_model(text) / temperature)
                loss = soft_cross_entropy(softmax_targets, soft_labels.detach())
            else:
                softmax_targets = nn.LogSoftmax(dim=1)(logits / temperature)
                loss = criterion(softmax_targets, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            global_step += 1

            torch.cuda.empty_cache()

            # =========================== BEGINING OF BORROWED CODE =======================

            if global_step % eval_every == 0:
                valid_running_loss, current_metrics = evaluate(valid_loader, model, show=show)

                # evaluation
                average_train_loss = running_loss / eval_every
                average_valid_loss = valid_running_loss
                train_loss_list.append(average_train_loss)
                valid_loss_list.append(average_valid_loss)
                global_steps_list.append(global_step)
                metrics.append(current_metrics)

                # resetting running values
                running_loss = 0.0                
                valid_running_loss = 0.0
                model.train()

                # print progress
                print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
                      .format(epoch+1, num_epochs, global_step, num_epochs*len(train_loader),
                              average_train_loss, average_valid_loss))
                # checkpoint
                if best_valid_loss > average_valid_loss:
                    best_valid_loss = average_valid_loss
                    save_checkpoint(file_path + '/' + name, model)
                    save_metrics(file_path + '/' + metrics_name, train_loss_list, valid_loss_list, global_steps_list)
    
    save_metrics(file_path + '/' + metrics_name, train_loss_list, valid_loss_list, global_steps_list)
    # ============================ ENDING OF BORROWED CODE =========================
    
    print('=' * 30 + 'Train finished!' + '=' * 30)

In [96]:
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)

In [109]:
train(model=model, optimizer=optimizer, temperature=SOFTMAX_TEMP, show=true)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [260/1564], Train Loss: 0.3049, Valid Loss: 0.3275
Accuracy 0.8563429851223804 
Precision 0.8972457627118644
Recall 0.8071791613722998
F1 0.8498327759197324
Model saved to ==> /content/drive/My Drive/bert/bert.pt
Model saved to ==> /content/drive/My Drive/bert/metrics.pt
Epoch [1/2], Step [520/1564], Train Loss: 0.3057, Valid Loss: 0.2926
Accuracy 0.877459606462966 
Precision 0.8851875808538163
Recall 0.8694409148665819
F1 0.8772435897435897
Model saved to ==> /content/drive/My Drive/bert/bert.pt
Model saved to ==> /content/drive/My Drive/bert/metrics.pt
Epoch [1/2], Step [780/1564], Train Loss: 0.3990, Valid Loss: 0.3214
Accuracy 0.8614621660534314 
Precision 0.9095477386934674
Recall 0.8049555273189326
F1 0.8540613414223122



HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1040/1564], Train Loss: 0.1675, Valid Loss: 0.2957
Accuracy 0.8787394016957287 
Precision 0.8649969456322542
Recall 0.8996188055908514
F1 0.8819682341949548
Epoch [2/2], Step [1300/1564], Train Loss: 0.1866, Valid Loss: 0.3044
Accuracy 0.8745800671892497 
Precision 0.8394600804135555
Recall 0.9285260482846251
F1 0.8817496229260935
Epoch [2/2], Step [1560/1564], Train Loss: 0.2701, Valid Loss: 0.3234
Accuracy 0.8710606302991521 
Precision 0.8846911957950065
Recall 0.8554637865311309
F1 0.8698320413436693

Model saved to ==> /content/drive/My Drive/bert/metrics.pt
Finished Training!


In [24]:
bert_model = BERT().to(device)

load_checkpoint(destination_folder + '/bert.pt', bert_model)
ret = evaluate(test_iter, bert_model, show=True)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…


Model loaded from <== /content/drive/My Drive/bert/bert.pt
Accuracy 0.874970002399808 
Precision 0.8759915816739517
Recall 0.8716172680412371
F1 0.8737989503431569


## Actual experiments

### Neural nets

In [25]:
class CNN(nn.Module):
    def __init__(self, hidden_dim, emb_dim, vocab_size, kernel_sizes, dropout=0.5):
        super().__init__()

        self.embed = nn.Embedding(vocab_size, emb_dim)
        self.convolutions = nn.ModuleList()
        self.lin1 = nn.Linear(hidden_dim * len(kernel_sizes), hidden_dim * len(kernel_sizes))
        self.lin2 = nn.Linear(hidden_dim * len(kernel_sizes), 2)
        self.dropout = nn.Dropout(dropout)
        self.nonlin = nn.LeakyReLU(0.2)

        for k_size in kernel_sizes:
          self.convolutions.append(
              nn.Sequential(
                  nn.Conv1d(emb_dim, hidden_dim, kernel_size=k_size, padding=k_size // 2),
                  nn.LeakyReLU(0.2)
                  )
              )
    
    def forward(self, x): 
        """
        :param x: torch.LongTensor of indexes with shape [batch_size, seq_len] 
        :return logit: torch.FloatTensor with shape [batch_size, seq_len]
        """

        x = self.embed(x)  # [batch_size, seq_len, D]
        x = x.permute((0, 2, 1))  # [batch_size, D, seq_len]

        res = []

        for layer in self.convolutions:
            current = torch.max(layer(x), dim=2).values
            res.append(current)
        
        x = self.nonlin(self.lin1(torch.cat(res, dim=1)))
        logit = self.lin2(self.dropout(x))
        return logit

In [26]:
class RNN(nn.Module):
    def __init__(self, hidden_dim, emb_dim, vocab_size, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim) 
        self.GRU = nn.GRU(emb_dim, hidden_dim, num_layers=1, batch_first=True) 
        self.lin1 = nn.Linear(hidden_dim, hidden_dim)
        self.lin2 = nn.Linear(hidden_dim, 2)
        self.dropout = nn.Dropout(dropout)
        self.nonlin = nn.LeakyReLU(0.2)

        self.logprob = nn.LogSoftmax(dim=1)
            
    def forward(self, x):
        """
        :param x: torch.LongTensor of indexes with shape [batch_size, seq_len] 
        :return logit: torch.FloatTensor with shape [batch_size, seq_len]
        """
        inputs = self.embedding(x)
        output, hidden = self.GRU(inputs)
        hidden = self.nonlin(self.lin1(self.nonlin(hidden.squeeze(0))))
        logit = self.lin2(self.dropout(hidden))

        return logit

### Solid train

Train models from scratch

In [27]:
HIDDEN_DIMS = [64, 128, 256]

In [28]:
def chunk_train(
    models=[], 
    hidden_dims=HIDDEN_DIMS, 
    save_name='unk', 
    epoch_num=3, 
    model_type='unk', 
    distill_model=None
    ):
    """
    Function to train various models.
    
    :param models: list of models
    :param hidden_dims: list of ints. Sizes of hidden dimensions
    :param save_name: saving name
    :param epoch_num: int. Quantity of training epochs
    :param model_type: str. In this notebook: RNN and CNN
    :param distill_model: model to create soft targets. If None then train on hard targets

    :return list of floats (accuracy, precision, recall, f1)
    """
    assert len(models) == len(hidden_dims)

    model_metrics = {}
    for current_model, current_hidden_dim in zip(models, HIDDEN_DIMS):
        print('Running experiment for {} with {} hidden units'.format(model_type, current_hidden_dim))

        if distill_model is None:
            train(
                model=current_model, 
                optimizer=optim.Adam(current_model.parameters(), lr=1e-3), 
                temperature=SOFTMAX_TEMP, 
                name=save_name + '_{}.pt'.format(current_hidden_dim), 
                metrics_name=save_name + '{}_metrics.pt'.format(current_hidden_dim),
                num_epochs=epoch_num,
                show=False
                )
        else:
            train(
                model=current_model, 
                optimizer=optim.Adam(current_model.parameters(), lr=1e-3), 
                temperature=SOFTMAX_TEMP, 
                name=save_name + '_{}.pt'.format(current_hidden_dim), 
                metrics_name=save_name + '{}_metrics.pt'.format(current_hidden_dim),
                num_epochs=epoch_num,
                distillation=True,
                distill_model=distill_model,
                show=False
                )
        _, model_metrics[current_hidden_dim] = evaluate(test_iter, current_model, show=True)
        print('=' * 100)

    return model_metrics

### Creating model

In [42]:
rnn_models = []

for i in range(len(HIDDEN_DIMS)):  # Creating models for experiments 
    rnn_models.append(
        RNN(
            hidden_dim=HIDDEN_DIMS[i],
            emb_dim=128, 
            vocab_size=len(tokenizer)).to(device)
            )

# Copiing models to make them equally initialized
rnn_models_distill = copy.deepcopy(rnn_models)

In [46]:
cnn_models = []
kernel_sizes = [3,4,5]

for i in range(len(HIDDEN_DIMS)):  # Creating models for experiments 
    cnn_models.append(
        CNN(
            hidden_dim=HIDDEN_DIMS[i],
            emb_dim=128, 
            vocab_size=len(tokenizer), 
            kernel_sizes=kernel_sizes).to(device)
            )
    
cnn_models_distill = copy.deepcopy(cnn_models)

In [44]:
_ = chunk_train(
    models=rnn_models, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='rnn', 
    epoch_num=4, 
    model_type='rnn'
    )

Running experiment for rnn with 64 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6911, Valid Loss: 0.6952
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6866, Valid Loss: 0.6889
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6536, Valid Loss: 0.6537
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.6167, Valid Loss: 0.6168
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.5359, Valid Loss: 0.6135
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.5076, Valid Loss: 0.5705
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.3736, Valid Loss: 0.4997
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.3846, Valid Loss: 0.4820
Model saved to ==> /content/drive/My Drive/bert/rnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn64_metrics.pt
Accuracy 0.7842572594192465 
Precision 0.7885911556797632
Recall 0.7727126288659794
F1 0.7805711496216744
Running experiment for rnn with 128 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6889, Valid Loss: 0.6848
Model saved to ==> /content/drive/My Drive/bert/rnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn128_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6664, Valid Loss: 0.6534
Model saved to ==> /content/drive/My Drive/bert/rnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn128_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.5393, Valid Loss: 0.5362
Model saved to ==> /content/drive/My Drive/bert/rnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn128_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.4830, Valid Loss: 0.4944
Model saved to ==> /content/drive/My Drive/bert/rnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn128_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.3143, Valid Loss: 0.4381
Model saved to ==> /content/drive/My Drive/bert/rnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn128_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.3471, Valid Loss: 0.4402

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.2044, Valid Loss: 0.5074
Epoch [4/4], Step [3128/3128], Train Loss: 0.2354, Valid Loss: 0.4729

Model saved to ==> /content/drive/My Drive/bert/rnn128_metrics.pt
Accuracy 0.8016958643308535 
Precision 0.7781590332686856
Recall 0.8402061855670103
F1 0.8079931841065757
Running experiment for rnn with 256 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6848, Valid Loss: 0.7559
Model saved to ==> /content/drive/My Drive/bert/rnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn256_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6476, Valid Loss: 0.6154
Model saved to ==> /content/drive/My Drive/bert/rnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn256_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.4369, Valid Loss: 0.4436
Model saved to ==> /content/drive/My Drive/bert/rnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn256_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.4459, Valid Loss: 0.4103
Model saved to ==> /content/drive/My Drive/bert/rnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn256_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.2388, Valid Loss: 0.4591
Epoch [3/4], Step [2346/3128], Train Loss: 0.3132, Valid Loss: 0.4166

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.1479, Valid Loss: 0.6371
Epoch [4/4], Step [3128/3128], Train Loss: 0.1730, Valid Loss: 0.5251

Model saved to ==> /content/drive/My Drive/bert/rnn256_metrics.pt
Accuracy 0.8020958323334133 
Precision 0.7945724203218681
Recall 0.8112113402061856
F1 0.8028056751155748


In [47]:
_  = chunk_train(
    models=cnn_models, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='cnn', 
    epoch_num=3, 
    model_type='cnn'
    )

Running experiment for cnn with 64 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/3], Step [391/2346], Train Loss: 0.5858, Valid Loss: 0.5145
Model saved to ==> /content/drive/My Drive/bert/cnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn64_metrics.pt
Epoch [1/3], Step [782/2346], Train Loss: 0.5589, Valid Loss: 0.5005
Model saved to ==> /content/drive/My Drive/bert/cnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn64_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/3], Step [1173/2346], Train Loss: 0.3541, Valid Loss: 0.4289
Model saved to ==> /content/drive/My Drive/bert/cnn_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn64_metrics.pt
Epoch [2/3], Step [1564/2346], Train Loss: 0.4301, Valid Loss: 0.4888

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/3], Step [1955/2346], Train Loss: 0.2127, Valid Loss: 0.4659
Epoch [3/3], Step [2346/2346], Train Loss: 0.2724, Valid Loss: 0.5439

Model saved to ==> /content/drive/My Drive/bert/cnn64_metrics.pt
Accuracy 0.7728181745460363 
Precision 0.8672045355429568
Recall 0.640625
F1 0.7368908652955346
Running experiment for cnn with 128 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/3], Step [391/2346], Train Loss: 0.5972, Valid Loss: 0.5062
Model saved to ==> /content/drive/My Drive/bert/cnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn128_metrics.pt
Epoch [1/3], Step [782/2346], Train Loss: 0.5601, Valid Loss: 0.4616
Model saved to ==> /content/drive/My Drive/bert/cnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn128_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/3], Step [1173/2346], Train Loss: 0.3461, Valid Loss: 0.4246
Model saved to ==> /content/drive/My Drive/bert/cnn_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn128_metrics.pt
Epoch [2/3], Step [1564/2346], Train Loss: 0.4179, Valid Loss: 0.4465

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/3], Step [1955/2346], Train Loss: 0.1925, Valid Loss: 0.4725
Epoch [3/3], Step [2346/2346], Train Loss: 0.2509, Valid Loss: 0.4805

Model saved to ==> /content/drive/My Drive/bert/cnn128_metrics.pt
Accuracy 0.8075353971682265 
Precision 0.8239604635310157
Recall 0.7788337628865979
F1 0.8007618416694269
Running experiment for cnn with 256 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/3], Step [391/2346], Train Loss: 0.6123, Valid Loss: 0.5436
Model saved to ==> /content/drive/My Drive/bert/cnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn256_metrics.pt
Epoch [1/3], Step [782/2346], Train Loss: 0.5574, Valid Loss: 0.4723
Model saved to ==> /content/drive/My Drive/bert/cnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn256_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/3], Step [1173/2346], Train Loss: 0.3444, Valid Loss: 0.5447
Epoch [2/3], Step [1564/2346], Train Loss: 0.3991, Valid Loss: 0.4023
Model saved to ==> /content/drive/My Drive/bert/cnn_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn256_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/3], Step [1955/2346], Train Loss: 0.2044, Valid Loss: 0.5481
Epoch [3/3], Step [2346/2346], Train Loss: 0.2424, Valid Loss: 0.4896

Model saved to ==> /content/drive/My Drive/bert/cnn256_metrics.pt
Accuracy 0.804655627549796 
Precision 0.8134154460719041
Recall 0.7872100515463918
F1 0.800098231827112


### Knowledge distillation

In [48]:
chunk_train(
    models=rnn_models_distill, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='rnn_distill', 
    epoch_num=4, 
    model_type='rnn',
    distill_model=bert_model
    )

Running experiment for rnn with 64 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

  self.dropout, self.training, self.bidirectional, self.batch_first)


Epoch [1/4], Step [391/3128], Train Loss: 0.6918, Valid Loss: 0.6975
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6742, Valid Loss: 0.6666
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6262, Valid Loss: 0.6217
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.5770, Valid Loss: 0.5727
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.4525, Valid Loss: 0.4641
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.4801, Valid Loss: 0.4334
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.3306, Valid Loss: 0.4036
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.4327, Valid Loss: 0.3923
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Accuracy 0.8200143988480921 
Precision 0.7957262402869097
Recall 0.857764175257732
F1 0.8255813953488371
Running experiment for rnn with 128 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6891, Valid Loss: 0.6879
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6782, Valid Loss: 0.6941

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6435, Valid Loss: 0.6279
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.5546, Valid Loss: 0.4816
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.3689, Valid Loss: 0.3970
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.4519, Valid Loss: 0.3847
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.2966, Valid Loss: 0.3733
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.4137, Valid Loss: 0.3717
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Accuracy 0.8284937205023598 
Precision 0.8018419489007724
Recall 0.8695231958762887
F1 0.8343122102009274
Running experiment for rnn with 256 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6857, Valid Loss: 0.6775
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6282, Valid Loss: 0.5598
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.4160, Valid Loss: 0.4207
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.4723, Valid Loss: 0.3908
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.3061, Valid Loss: 0.3820
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.4204, Valid Loss: 0.3733
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.2695, Valid Loss: 0.3768
Epoch [4/4], Step [3128/3128], Train Loss: 0.3903, Valid Loss: 0.3760

Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt
Accuracy 0.8271338292936565 
Precision 0.8053417836124943
Recall 0.8596971649484536
F1 0.8316322555512271


{64: (0.8200143988480921,
  0.7957262402869097,
  0.857764175257732,
  0.8255813953488371),
 128: (0.8284937205023598,
  0.8018419489007724,
  0.8695231958762887,
  0.8343122102009274),
 256: (0.8271338292936565,
  0.8053417836124943,
  0.8596971649484536,
  0.8316322555512271)}

In [49]:
_ = chunk_train(
    models=cnn_models_distill, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='cnn_distill', 
    epoch_num=2, 
    model_type='cnn',
    distill_model=bert_model
    )

Running experiment for cnn with 64 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.5822, Valid Loss: 0.4785
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.5480, Valid Loss: 0.4608
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.3795, Valid Loss: 0.4113
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt
Epoch [2/2], Step [1564/1564], Train Loss: 0.4706, Valid Loss: 0.4140

Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt
Accuracy 0.8111351091912647 
Precision 0.800875958079149
Recall 0.8247422680412371
F1 0.8126339179430203
Running experiment for cnn with 128 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.5886, Valid Loss: 0.4762
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.5429, Valid Loss: 0.4669
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.3696, Valid Loss: 0.4128
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt
Epoch [2/2], Step [1564/1564], Train Loss: 0.4584, Valid Loss: 0.4053
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt
Accuracy 0.8194544436445085 
Precision 0.8312929733355694
Recall 0.798485824742268
F1 0.8145591980938296
Running experiment for cnn with 256 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.6048, Valid Loss: 0.5562
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.5486, Valid Loss: 0.4658
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.3659, Valid Loss: 0.4815
Epoch [2/2], Step [1564/1564], Train Loss: 0.4574, Valid Loss: 0.3932
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt
Accuracy 0.8234541236701064 
Precision 0.8257612766650383
Recall 0.8168492268041238
F1 0.8212810753907199


## Experiments for additional training with hard targtes 

In [51]:
trained_rnn = copy.deepcopy(rnn_models_distill)

In [54]:
trained_cnn = copy.deepcopy(cnn_models_distill)

In [52]:
_ = chunk_train(
    models=trained_rnn, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='rnn_distill', 
    epoch_num=2, 
    model_type='rnn',
    )

Running experiment for rnn with 64 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

  self.dropout, self.training, self.bidirectional, self.batch_first)


Epoch [1/2], Step [391/1564], Train Loss: 0.2350, Valid Loss: 0.4049
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.3769, Valid Loss: 0.3886
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.1638, Valid Loss: 0.4613
Epoch [2/2], Step [1564/1564], Train Loss: 0.2786, Valid Loss: 0.4212

Model saved to ==> /content/drive/My Drive/bert/rnn_distill64_metrics.pt
Accuracy 0.8049756019518438 
Precision 0.8384201077199281
Recall 0.7522551546391752
F1 0.793003905586687
Running experiment for rnn with 128 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.2144, Valid Loss: 0.3896
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.3638, Valid Loss: 0.3812
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.1427, Valid Loss: 0.4801
Epoch [2/2], Step [1564/1564], Train Loss: 0.2567, Valid Loss: 0.4028

Model saved to ==> /content/drive/My Drive/bert/rnn_distill128_metrics.pt
Accuracy 0.8176945844332454 
Precision 0.827362106315614
Recall 0.7997744845360825
F1 0.8133344254238676
Running experiment for rnn with 256 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.1980, Valid Loss: 0.4248
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.3512, Valid Loss: 0.3805
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.1112, Valid Loss: 0.5542
Epoch [2/2], Step [1564/1564], Train Loss: 0.2216, Valid Loss: 0.4124

Model saved to ==> /content/drive/My Drive/bert/rnn_distill256_metrics.pt
Accuracy 0.8084153267738581 
Precision 0.8392952482648158
Recall 0.7596649484536082
F1 0.797497252050393


In [55]:
_ = chunk_train(
    models=trained_cnn, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='cnn_distill', 
    epoch_num=2, 
    model_type='cnn'
    )

Running experiment for cnn with 64 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.2692, Valid Loss: 0.5460
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.4157, Valid Loss: 0.4012
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_64.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.1567, Valid Loss: 0.5324
Epoch [2/2], Step [1564/1564], Train Loss: 0.2735, Valid Loss: 0.5086

Model saved to ==> /content/drive/My Drive/bert/cnn_distill64_metrics.pt
Accuracy 0.7707383409327254 
Precision 0.8871640407784986
Recall 0.616784793814433
F1 0.7276700874192322
Running experiment for cnn with 128 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.2627, Valid Loss: 0.4128
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_128.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.4170, Valid Loss: 0.4161

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.1468, Valid Loss: 0.5071
Epoch [2/2], Step [1564/1564], Train Loss: 0.2683, Valid Loss: 0.4485

Model saved to ==> /content/drive/My Drive/bert/cnn_distill128_metrics.pt
Accuracy 0.7990560755139589 
Precision 0.8682742128337984
Recall 0.7018363402061856
F1 0.7762337430963834
Running experiment for cnn with 256 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/2], Step [391/1564], Train Loss: 0.2798, Valid Loss: 0.4020
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt
Epoch [1/2], Step [782/1564], Train Loss: 0.4163, Valid Loss: 0.3837
Model saved to ==> /content/drive/My Drive/bert/cnn_distill_256.pt
Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/2], Step [1173/1564], Train Loss: 0.2049, Valid Loss: 0.5471
Epoch [2/2], Step [1564/1564], Train Loss: 0.2759, Valid Loss: 0.8632

Model saved to ==> /content/drive/My Drive/bert/cnn_distill256_metrics.pt
Accuracy 0.6917046636269099 
Precision 0.9485518292682927
Recall 0.4009342783505155
F1 0.5636322463768116


## Additional experiments with small hidden sizes

In [56]:
rnn_models = []
HIDDEN_DIMS = [16, 32]

for i in range(len(HIDDEN_DIMS)):  # Creating models for experiments 
    rnn_models.append(
        RNN(
            hidden_dim=HIDDEN_DIMS[i],
            emb_dim=128, 
            vocab_size=len(tokenizer)).to(device)
            )

# Copiing models to make them equally initialized
rnn_models_distill = copy.deepcopy(rnn_models)

_ = chunk_train(
    models=rnn_models, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='rnn', 
    epoch_num=4, 
    model_type='rnn'
    )

Running experiment for rnn with 16 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6931, Valid Loss: 0.6922
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6922, Valid Loss: 0.6911
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6844, Valid Loss: 0.6895
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.6776, Valid Loss: 0.6832
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.6253, Valid Loss: 0.6251
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.5993, Valid Loss: 0.6021
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.4542, Valid Loss: 0.5788
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.4724, Valid Loss: 0.5700
Model saved to ==> /content/drive/My Drive/bert/rnn_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn16_metrics.pt
Accuracy 0.7315414766818654 
Precision 0.7554639914009316
Recall 0.679284793814433
F1 0.7153519932145885
Running experiment for rnn with 32 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6930, Valid Loss: 0.6923
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6906, Valid Loss: 0.6912
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6792, Valid Loss: 0.6564
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.6470, Valid Loss: 0.6315
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.5729, Valid Loss: 0.6186
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.5739, Valid Loss: 0.5780
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.4658, Valid Loss: 0.5749
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.4769, Valid Loss: 0.5637
Model saved to ==> /content/drive/My Drive/bert/rnn_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn32_metrics.pt
Accuracy 0.732501399888009 
Precision 0.7679640718562875
Recall 0.6610824742268041
F1 0.7105263157894737


In [57]:
_ = chunk_train(
    models=rnn_models_distill, 
    hidden_dims=HIDDEN_DIMS, 
    save_name='rnn_distill', 
    epoch_num=4, 
    model_type='rnn',
    distill_model=bert_model
    )

Running experiment for rnn with 16 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

  self.dropout, self.training, self.bidirectional, self.batch_first)


Epoch [1/4], Step [391/3128], Train Loss: 0.6936, Valid Loss: 0.6920
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6917, Valid Loss: 0.6914
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6811, Valid Loss: 0.6791
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.6567, Valid Loss: 0.6342
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.5659, Valid Loss: 0.5900
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.5831, Valid Loss: 0.5686
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.4585, Valid Loss: 0.5376
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.5264, Valid Loss: 0.5236
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_16.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn_distill16_metrics.pt
Accuracy 0.7471402287816975 
Precision 0.7250701728468016
Recall 0.7905927835051546
F1 0.7564151961162058
Running experiment for rnn with 32 hidden units
Current epoch: 0


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [1/4], Step [391/3128], Train Loss: 0.6934, Valid Loss: 0.6931
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt
Epoch [1/4], Step [782/3128], Train Loss: 0.6917, Valid Loss: 0.6934

Current epoch: 1


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [2/4], Step [1173/3128], Train Loss: 0.6765, Valid Loss: 0.6807
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt
Epoch [2/4], Step [1564/3128], Train Loss: 0.6474, Valid Loss: 0.6177
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt

Current epoch: 2


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [3/4], Step [1955/3128], Train Loss: 0.5174, Valid Loss: 0.5178
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt
Epoch [3/4], Step [2346/3128], Train Loss: 0.5357, Valid Loss: 0.4897
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt

Current epoch: 3


HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))

Epoch [4/4], Step [2737/3128], Train Loss: 0.3723, Valid Loss: 0.4452
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt
Epoch [4/4], Step [3128/3128], Train Loss: 0.4708, Valid Loss: 0.4304
Model saved to ==> /content/drive/My Drive/bert/rnn_distill_32.pt
Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt

Model saved to ==> /content/drive/My Drive/bert/rnn_distill32_metrics.pt
Accuracy 0.8092152627789777 
Precision 0.8068710868518221
Recall 0.8096005154639175
F1 0.8082334968239929
