In [1]:
from fast_bert.data_abs import BertAbsDataBunch
from fast_bert.learner_abs import BertAbsLearner
from box import Box
import logging
import torch
from pathlib import Path
from transformers import BertTokenizer

In [2]:
from tokenizers import (ByteLevelBPETokenizer,
                            BPETokenizer,
                            SentencePieceBPETokenizer,
                            BertWordPieceTokenizer)

In [3]:
PATH = Path("../../summarisation/")
DATA_PATH = PATH/'data'
MODEL_PATH = PATH/'model'

In [4]:
logger = logging.getLogger()

In [5]:
args = Box({
    "max_seq_length": 512,
    "batch_size": 8,
    "learning_rate": 5e-3,
    "num_train_epochs": 6,
    "fp16": True,
    "model_name": 'bertabs-finetuned-cnndm',
    "model_type": 'bert'
})

In [6]:
device = torch.device('cuda') if torch.cuda.device_count() else torch.device('cpu')
if torch.cuda.device_count() > 1:
    args.multi_gpu = True
else:
    args.multi_gpu = False

In [7]:
tokenizer = BertWordPieceTokenizer(str(MODEL_PATH/'vocab.txt'), lowercase=True)

In [8]:
databunch = BertAbsDataBunch(data_dir=DATA_PATH, tokenizer=tokenizer, device=device)
databunch_old = BertAbsDataBunch(data_dir=DATA_PATH, tokenizer=args.model_name, device=device)

In [9]:
databunch_with_data = BertAbsDataBunch(data_dir=DATA_PATH, tokenizer=args.model_name, device=device)

In [10]:
databunch_with_data_new_tokeniser = BertAbsDataBunch(data_dir=DATA_PATH, tokenizer=tokenizer, device=device)

In [11]:
learner = BertAbsLearner.from_pretrained_model(databunch, MODEL_PATH, device, logger=logger)

In [12]:
texts = databunch_with_data.test_dl.dataset[0][1]
texts

['Customer called through to BG on 27.8.16 about a leak from the shower, an engineer attended from dyno on 31.8.16 (SO-6757449-GDZS). The engineer spent about 5 mins at thr property shone his torch down the side of the shower and blamed the leak on sealent, he offered the customer a temp repair that woulf have been chargeable but the customer declined. The first engineer then left the property promptly without locating a leak and assured the customer that it was to do with the sealent. Since the appt the customer noticed that the leak was still occuring and causing damage and she called for another dyno engineer booked for 10.11.16 (SO-6985967-8DHD). The engineer arrived at the property and pulled the shower out to look behind and found that there were 2 leaks behind the shower that had been ongoing for a long time and he said that if the first engineer would have looked properly then she wouldnt have had so much damage to the property. pics were taken of damage and given to LO and cus

In [16]:
%%timeit
learner.predict_batch(texts)

9.89 s ± 377 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
learner_old = BertAbsLearner.from_pretrained_model(databunch_old, MODEL_PATH, device, logger=logger)

In [None]:
%%timeit
learner_old.predict_batch(texts)