In [1]:
import os
import sys
%load_ext autoreload
%autoreload 2
sys.path.append('..')

import numpy as np
import random
import torch

from pytorch_pretrained_bert.tokenization import BertTokenizer

from lib import data_processors, tasks
from lib.bert import BertForSequenceClassification
from lib.train_eval import train, evaluate, predict

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
%env CUDA_VISIBLE_DEVICES=0

params = {
    'data_dir': '../datasets/RTE',
    'output_dir': '../output',
    'cache_dir': '../model_cache',
    'task_name': 'rte',
    'bert_model': 'bert-base-uncased',
    'max_seq_length': 128,
    'train_batch_size': 32,
    'eval_batch_size': 8,
    'learning_rate': 2e-5,
    'warmup_proportion': 0.1,
    'num_train_epochs': 1,
    'seed': 1331,
    'device': torch.device(
        'cuda' if torch.cuda.is_available()
        else 'cpu')
}

random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])

env: CUDA_VISIBLE_DEVICES=0


<torch._C.Generator at 0x7f86622ae310>

In [3]:
params['num_labels'] = tasks.num_labels[params['task_name']]
params['label_list'] = tasks.label_lists[params['task_name']]

processor = tasks.processors[params['task_name']]()
tokenizer = BertTokenizer.from_pretrained(
    params['bert_model'], do_lower_case=True)

train_examples = processor.get_train_examples(params['data_dir'])
dev_examples = processor.get_dev_examples(params['data_dir'])

model = BertForSequenceClassification.from_pretrained(
    params['bert_model'],
    cache_dir=params['cache_dir'],
    num_labels=params['num_labels']).to(params['device'])

## Training and evaluation

In [4]:
TOTAL_EPOCH_NUMS = 7

for epoch_num in range(1, TOTAL_EPOCH_NUMS + 1):
    params['num_train_epochs'] = 1
    checkpoint_files = {
        'config': 'bert_config.json',
        'file_to_save': 'model_{}_epoch_{}.pth'.format(
            params['task_name'], epoch_num)
    }

    model, result = train(model, tokenizer, params,
                          train_examples,
                          valid_examples=dev_examples,
                          checkpoint_files=checkpoint_files)

Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:36<00:00,  2.31it/s]


{'train_loss': 0.7008157930313013, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.39it/s]


{'eval_loss': 0.6681361278041605, 'eval_accuracy': 0.5992779783393501, 'eval_f1_score': 0.5873605947955391, 'eval_matthews_corrcoef': 0.20621234891822715}


Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:36<00:00,  2.19it/s]


{'train_loss': 0.6767277950659777, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.30it/s]


{'eval_loss': 0.6643979622568895, 'eval_accuracy': 0.592057761732852, 'eval_f1_score': 0.6626865671641792, 'eval_matthews_corrcoef': 0.17678265696515783}


Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:35<00:00,  2.34it/s]


{'train_loss': 0.5593152191394415, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.26it/s]


{'eval_loss': 0.647120522973985, 'eval_accuracy': 0.6498194945848376, 'eval_f1_score': 0.6881028938906752, 'eval_matthews_corrcoef': 0.29515603368584087}


Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:35<00:00,  2.30it/s]


{'train_loss': 0.3729557619454005, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.21it/s]


{'eval_loss': 0.7789937539315288, 'eval_accuracy': 0.6173285198555957, 'eval_f1_score': 0.6845238095238094, 'eval_matthews_corrcoef': 0.23143089615411272}


Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:37<00:00,  2.20it/s]


{'train_loss': 0.2733649337091125, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.18it/s]


{'eval_loss': 0.9758022572737631, 'eval_accuracy': 0.6642599277978339, 'eval_f1_score': 0.7084639498432601, 'eval_matthews_corrcoef': 0.32576228102748156}


Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:37<00:00,  2.16it/s]


{'train_loss': 0.2784867717239719, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.14it/s]


{'eval_loss': 1.1060749355258945, 'eval_accuracy': 0.6462093862815884, 'eval_f1_score': 0.7012195121951219, 'eval_matthews_corrcoef': 0.29051665718428304}


Iteration:   0%|          | 0/78 [00:00<?, ?it/s]

***** Running training *****
Num examples: 2490
Batch size:   32
Num steps:    77

Epoch: 1


Iteration: 100%|██████████| 78/78 [00:36<00:00,  2.27it/s]


{'train_loss': 0.23839517706670824, 'train_global_step': 78}
***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.21it/s]


{'eval_loss': 1.4871535574429373, 'eval_accuracy': 0.6389891696750902, 'eval_f1_score': 0.7175141242937854, 'eval_matthews_corrcoef': 0.2903802456679771}


In [5]:
best_epoch = 5

best_model = BertForSequenceClassification.from_pretrained(
    params['bert_model'],
    cache_dir=params['cache_dir'],
    num_labels=params['num_labels']).to(params['device'])

best_model.load_state_dict(torch.load(
    os.path.join(params['output_dir'], 'model_{}_epoch_{}.pth'.format(
        params['task_name'], best_epoch))))

In [6]:
result, prob_preds = evaluate(best_model, tokenizer, params,
                              dev_examples)
result

***** Running evaluation *****
Num examples:  277
Batch size:    8


Evaluating: 100%|██████████| 35/35 [00:01<00:00, 26.64it/s]


{'eval_loss': 0.9758022572737631,
 'eval_accuracy': 0.6642599277978339,
 'eval_f1_score': 0.7084639498432601,
 'eval_matthews_corrcoef': 0.32576228102748156}