#### Скачаем `SWAG` датасет

In [None]:
!git clone https://github.com/rowanz/swagaf.git
!mv swagaf/data/ ../datasets/SWAG
!rm -fr swagaf

In [1]:
import sys
%load_ext autoreload
%autoreload 2
sys.path.append('..')

import numpy as np
import random
import torch
import os
from pytorch_pretrained_bert.tokenization import BertTokenizer

from lib import data_processors, tasks
from pytorch_pretrained_bert import BertForMultipleChoice
from lib.train_eval import train, evaluate, predict

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:

params = {
    'data_dir': '../datasets/SWAG',
    'output_dir': '../output',
    'cache_dir': '../model_cache',
    'task_name': 'swag',
    'bert_model': 'bert-base-uncased',
    'max_seq_length': 128,
    'train_batch_size': 12,
    'eval_batch_size': 8,
    'learning_rate': 2e-5,
    'warmup_proportion': 0.1,
    'num_train_epochs': 1,
    'seed': 1331,
    'device': torch.device(
        'cuda' if torch.cuda.is_available()
        else 'cpu')
}

random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])

<torch._C.Generator at 0x7f17a41645b0>

In [3]:
processor = tasks.processors[params['task_name']]()
tokenizer = BertTokenizer.from_pretrained(
    params['bert_model'], do_lower_case=True)

train_examples = processor.get_train_examples(params['data_dir'])
dev_examples = processor.get_dev_examples(params['data_dir'])

model = BertForMultipleChoice.from_pretrained(
    params['bert_model'],
    cache_dir=params['cache_dir'], num_choices=4).to(params['device'])

In [4]:
EPOCH_NUM = 1

params['num_train_epochs'] = 1
checkpoint_files = {
    'config': 'bert_config.json',
    'model_weigths': 'model_{}_epoch_{}.pth'.format(
        params['task_name'], EPOCH_NUM)
}

model, result = train(model, tokenizer, params,
                      train_examples,
                      valid_examples=dev_examples,
                      checkpoint_files=checkpoint_files)


converting examples:   6%|▌         | 4280/73546 [00:05<01:23, 830.33it/s]


KeyboardInterrupt: 

In [10]:
from sklearn.metrics import f1_score
from lib import metrics

def f1_score_multiclass(true_labels, prob_preds):
    pred_labels = np.argmax(prob_preds, axis=1)
    return f1_score(true_labels, pred_labels, average='macro')

print("***** Running evaluation *****")
print("Num examples: ", len(dev_examples))
print("Batch size:   ", params['eval_batch_size'])

prob_preds = predict(model, tokenizer, params, dev_examples)
true_labels = np.array([int(example.label)
                        for i, example in enumerate(dev_examples)])
result = {
    'eval_loss': metrics.log_loss(true_labels, prob_preds),
    'eval_accuracy': metrics.accuracy(true_labels, prob_preds),
    'eval_f1_score': f1_score_multiclass(true_labels, prob_preds),
}
result

converting examples:   0%|          | 72/20006 [00:00<00:27, 719.02it/s]

***** Running evaluation *****
Num examples:  20006
Batch size:    8


converting examples: 100%|██████████| 20006/20006 [00:23<00:00, 869.80it/s]
Evaluating: 100%|██████████| 2501/2501 [01:02<00:00, 40.08it/s]


{'eval_loss': 1.3916393557536797,
 'eval_accuracy': 0.22358292512246325,
 'eval_f1_score': 0.21350037939898828}