In [1]:
import logging
from pathlib import Path

import torch
import apex

from fast_bert.data_cls import BertDataBunch
from fast_bert.learner_cls import BertLearner
from fast_bert.metrics import accuracy

In [2]:
DATA_PATH = Path('data/')
LABEL_PATH = Path('data/')

MODEL_PATH = Path('bert_models/')
LOG_PATH = Path('bert_logs/')
OUTPUT_PATH = MODEL_PATH / 'output'

MODEL_PATH.mkdir(exist_ok=True)
LOG_PATH.mkdir(exist_ok=True)
OUTPUT_PATH.mkdir(exist_ok=True)

BERT_PRETRAINED_PATH = 'bert-base-uncased' # distilbert-base-uncased

args = {
    'max_seq_length': 32,
    'do_lower_case': False, # True
    'train_batch_size': 8,
    'learning_rate': 6e-5,
    'num_train_epochs': 12.0,
    'warmup_proportion': 0.002,
    'local_rank': -1,
    'gradient_accumulation_steps': 1,
    'fp16': True,
    'loss_scale': 128
}

### GPU & device

In [3]:
device = torch.device('cuda')

# check if multiple GPUs are available
if torch.cuda.device_count() > 1:
    multi_gpu = True
else:
    multi_gpu = False

### 1. Create a DataBunch object

In [4]:
from fast_bert.data_cls import BertDataBunch

databunch = BertDataBunch(DATA_PATH, LABEL_PATH,
                          tokenizer=BERT_PRETRAINED_PATH,
                          train_file='train_297000.csv',
                          val_file='val_99000.csv',
                          label_file='labels.csv',
                          text_col='text',
                          label_col='label',
                          batch_size_per_gpu=args['train_batch_size'],
                          max_seq_length=args['max_seq_length'],
                          multi_gpu=multi_gpu,
                          multi_label=False,
                          model_type='bert') # distilbert

### 2. Create a Learner Object

In [5]:
logger = logging.getLogger()
metrics = [{'name': 'accuracy', 'function': accuracy}]

learner = BertLearner.from_pretrained_model(databunch,
                                            pretrained_path=BERT_PRETRAINED_PATH,
                                            metrics=metrics,
                                            device=device,
                                            logger=logger,
                                            output_dir=OUTPUT_PATH,
                                            finetuned_wgts_path=None,
                                            warmup_steps=500,
                                            multi_gpu=multi_gpu,
                                            is_fp16=args['fp16'],
                                            multi_label=False,
                                            logging_steps=100)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=361.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




### 3. Train the model

In [6]:
learner.fit(epochs=6,
            lr=args['learning_rate'],
            validate=True, # Evaluate the model after each epoch
            schedule_type='warmup_cosine_hard_restarts',
            optimizer_type='lamb')

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


RuntimeError: CUDA out of memory. Tried to allocate 90.00 MiB (GPU 0; 2.00 GiB total capacity; 933.22 MiB already allocated; 9.30 MiB free; 1.16 GiB reserved in total by PyTorch)

In [None]:
learner.save_model()