In [1]:
import sys
%load_ext autoreload
%autoreload 2
sys.path.append('..')

import numpy as np
import random
import torch

from pytorch_pretrained_bert.tokenization import BertTokenizer

from lib import data_processors, tasks
from lib.bert import BertForSequenceClassification
from lib.train_eval import train, evaluate, predict

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
%env CUDA_VISIBLE_DEVICES=1

params = {
    'data_dir': '../datasets/SST-2',
    'output_dir': '../output',
    'cache_dir': '../model_cache',
    'task_name': 'sst2',
    'bert_model': 'bert-base-uncased',
    'max_seq_length': 128,
    'train_batch_size': 32,
    'eval_batch_size': 8,
    'learning_rate': 2e-5,
    'warmup_proportion': 0.1,
    'num_train_epochs': 1,
    'seed': 1331,
    'device': torch.device(
        'cuda' if torch.cuda.is_available()
        else 'cpu')
}

random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])

env: CUDA_VISIBLE_DEVICES=1


<torch._C.Generator at 0x7f220411ccd0>

In [3]:
params['num_labels'] = tasks.num_labels[params['task_name']]
params['label_list'] = tasks.label_lists[params['task_name']]

processor = tasks.processors[params['task_name']]()
tokenizer = BertTokenizer.from_pretrained(
    params['bert_model'], do_lower_case=True)

train_examples = processor.get_train_examples(params['data_dir'])
dev_examples = processor.get_dev_examples(params['data_dir'])
model = BertForSequenceClassification.from_pretrained(
    params['bert_model'],
    cache_dir=params['cache_dir'],
    num_labels=params['num_labels']).to(params['device'])

03/21/2019 23:18:52 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/ramild/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
03/21/2019 23:18:53 - INFO - lib.bert -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at ../model_cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
03/21/2019 23:18:53 - INFO - lib.bert -   extracting archive file ../model_cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmpwen5uar8
03/21/2019 23:18:57 - INFO - lib.bert -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden

In [5]:
EPOCH_NUM = 1

params['num_train_epochs'] = 1
checkpoint_files = {
    'config': 'bert_config.json',
    'model_weigths': 'model_{}_epoch_{}.pth'.format(
        params['task_name'], EPOCH_NUM)
}

model, result = train(model, tokenizer, params,
                      train_examples,
                      valid_examples=dev_examples,
                      checkpoint_files=checkpoint_files)
result

***** Running training *****
Num examples: 67349
Batch size:   32
Num steps:    2104

Epoch: 1

Iteration: 100%|██████████| 2105/2105 [15:51<00:00,  2.39it/s]



{'train_loss': 0.2006387873201005}
***** Running evaluation *****
Num examples:  872
Batch size:    8


  test_preds += list(softmax(logits).numpy())
Evaluating: 100%|██████████| 109/109 [00:04<00:00, 25.49it/s]


{'eval_loss': 0.20041756064859603, 'eval_accuracy': 0.9288990825688074}


{'loss': 0.2006387873201005, 'global_step': 2105}

In [6]:
result, prob_preds = evaluate(model, tokenizer, params,
                              dev_examples)
result

***** Running evaluation *****
Num examples:  872
Batch size:    8


  model.eval()
Evaluating: 100%|██████████| 109/109 [00:04<00:00, 26.28it/s]


{'eval_loss': 0.20041756064859603, 'eval_accuracy': 0.9288990825688074}

In [16]:
EPOCH_NUM = 2

params['num_train_epochs'] = 1
checkpoint_files = {
    'config': 'bert_config.json',
    'model_weigths': 'model_{}_epoch_{}.pth'.format(
        params['task_name'], EPOCH_NUM)
}

model, result = train(model, tokenizer, params,
                      train_examples,
                      valid_examples=dev_examples,
                      checkpoint_files=checkpoint_files)
result

***** Running training *****
Num examples: 67349
Batch size:   32
Num steps:    2104


Iteration:   0%|          | 0/2105 [00:00<?, ?it/s]


Epoch: 1


Iteration: 100%|██████████| 2105/2105 [15:51<00:00,  2.36it/s]


{'train_loss': 0.08285011941226389}
***** Running evaluation *****
Num examples:  872
Batch size:    8


Evaluating: 100%|██████████| 109/109 [00:04<00:00, 26.02it/s]


{'eval_loss': 0.2209548539067661, 'eval_accuracy': 0.9254587155963303}


{'train_loss': 0.08285011941226389, 'train_global_step': 2105}

In [32]:
result, prob_preds = evaluate(model, tokenizer, params,
                              dev_examples)
result

***** Running evaluation *****
Num examples:  872
Batch size:    8


Evaluating: 100%|██████████| 109/109 [00:04<00:00, 26.29it/s]


{'eval_loss': 0.2209548539067661,
 'eval_accuracy': 0.9254587155963303,
 'eval_f1_score': 0.9267192784667418,
 'eval_matthews_corrcoef': 0.8508757396537342}