In [1]:
from fastai import *
from fastai.text import * 

In [2]:
import fastai; fastai.__version__

'1.0.22'

In [3]:
path = untar_data(URLs.IMDB_SAMPLE)
path

PosixPath('/home/ubuntu/.fastai/data/imdb_sample')

In [4]:
df = pd.read_csv(path/'texts.csv')
df.head()

Unnamed: 0,label,text,is_valid
0,negative,Un-bleeping-believable! Meg Ryan doesn't even ...,False
1,positive,This is a extremely well-made film. The acting...,False
2,negative,Every once in a long while a movie will come a...,False
3,positive,Name just says it all. I watched this movie wi...,False
4,negative,This movie succeeds at being one of the most u...,False


In [15]:
df.shape

(1000, 3)

In [14]:
df.label.value_counts()

negative    524
positive    476
Name: label, dtype: int64

In [5]:
# Language model data
data_lm = TextLMDataBunch.from_csv(path, 'texts.csv')
# Classifier model data
data_clas = TextClasDataBunch.from_csv(path, 'texts.csv', vocab=data_lm.train_ds.vocab, bs=32)

In [6]:
data_lm.save()
data_clas.save()

In [7]:
data_lm = TextLMDataBunch.load(path)
data_clas = TextClasDataBunch.load(path, bs=32)
# Note that you can load the data with different DataBunch parameters (batch size, bptt,...)

In [8]:
# Fine-tuning a language model
# We can use the data_lm object we created earlier to fine-tune a pretrained language model. fast.ai has an English model available that we can download. We can create a learner object that will directly create a model, download the pretrained weights and be ready for fine-tuning.

learn = language_model_learner(data_lm, pretrained_model=URLs.WT103, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2)

Total time: 00:04
epoch  train_loss  valid_loss  accuracy
1      4.747180    4.236037    0.246342  (00:04)



In [9]:
# Like a computer vision model, we can then unfreeze the model and fine-tune it.

learn.unfreeze()
learn.fit_one_cycle(1, 1e-3)

Total time: 00:05
epoch  train_loss  valid_loss  accuracy
1      4.466356    4.133717    0.253531  (00:05)



In [10]:
# And finally we save the encoder to be able to use it for classification in the next section.
learn.save_encoder('ft_enc')
# Building a classifier
# We now use the data_clas object we created earlier to build a classifier with our fine-tuned encoder. The learner object can be done in a single line.

In [11]:
learn = text_classifier_learner(data_clas, drop_mult=0.5)
learn.load_encoder('ft_enc')
learn.fit_one_cycle(1, 1e-2)

Total time: 00:05
epoch  train_loss  valid_loss  accuracy
1      0.655628    0.882717    0.452736  (00:05)



In [12]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(5e-3/2., 5e-3))

Total time: 00:06
epoch  train_loss  valid_loss  accuracy
1      0.603185    1.094246    0.278607  (00:06)



In [13]:
learn.unfreeze()
learn.fit_one_cycle(1, slice(2e-3/100, 2e-3))

Total time: 00:14
epoch  train_loss  valid_loss  accuracy
1      0.524795    1.347872    0.288557  (00:14)



In [16]:
preds,y=learn.get_preds()

In [18]:
from sklearn.metrics import f1_score

In [23]:
f1_score(y, preds[:, 0]>0.5)

0.7410714285714286