In [0]:
from fastai.text import * 
from fastai import * 


**[1][Regularizing RNNs by Stabilizing Activations](https://arxiv.org/abs/1511.08400)**
 - by David Krueger, Roland Memisevic



In [2]:

bs=48
path = untar_data(URLs.IMDB)
path.ls()
data_lm = (TextList.from_folder(path)
           #Inputs: all the text files in path
            .filter_by_folder(include=['train', 'test', 'unsup']) 
           #We may have other temp folders that contain text files so we only keep what's in train and test
            .split_by_rand_pct(0.1)
           #We randomly split and keep 10% (10,000 reviews) for validation
            .label_for_lm()           
           #We want to do a language model so we label accordingly
            .databunch(bs=bs))
data_lm.save('data_lm.pkl')

data_lm = load_data(path, 'data_lm.pkl', bs=bs)
data_lm.show_batch()

idx,text
0,"the humor might , as has been pointed out , be lost on foreigners , but that has n't hindered the film from being translated to more than 70 languages , and being shown somewhere in the world at any given time ! \n \n i must say that it 's a shame that it is n't available in the us , but i 've heard rumors of a"
1,", frail , and sickly , and she was young , sexy , and looking for money . xxmaj she used him for sympathy , and he did have it for her . xxmaj oh , and the scene with the maid and xxmaj chopin ! xxmaj completely made up for sex appeal ! xxmaj what were they thinking ? xxmaj perhaps they just wanted to show that xxmaj chopin"
2,""" . \n \n "" xxmaj american xxmaj graffiti "" is a story about one night in the lives of a group of small town teens . xxmaj they are all on the verge of something great , but none of them know it yet . xxmaj that something is called growing up . xxmaj this night is their farewell to innocent youth . xxmaj it is an obvious"
3,"disappointment is that , they change some of the character 's personality ! xxup another big disappointment is that they did n't put some of the good story lines in the mangas into the movie . i know that does n't seem like a big deal and i would like to give an example . xxmaj there 's this one character who loves another character from the other angel ("
4,"sounds as written as the xxmaj declaration of xxmaj independence , not the spontaneous remarks of a boy . ( xxmaj the same applies to every character , actually . ) xxmaj the direction is flat and incredibly unimaginative for a major studio release . xxmaj scenes which could have had real dynamism are played in flat tableau . xxmaj sequences of physical action are staged as badly as a"


In [0]:
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.3)
# learn.lr_find()
# learn.recorder.plot(skip_end=15)

In [0]:
learn.save_encoder('fine_tuned_enc')

In [5]:
path = untar_data(URLs.IMDB)
data_clas = (TextList.from_folder(path, vocab=data_lm.vocab)
             #grab all the text files in path
             .split_by_folder(valid='test')
             #split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
             .label_from_folder(classes=['neg', 'pos'])
             #label them all with their folders
             .databunch(bs=bs))

data_clas.save('data_clas.pkl')
data_clas = load_data(path, 'data_clas.pkl', bs=bs)
data_clas.show_batch()

text,target
xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules,pos
"xxbos * * xxmaj attention xxmaj spoilers * * \n \n xxmaj first of all , let me say that xxmaj rob xxmaj roy is one of the best films of the 90 's . xxmaj it was an amazing achievement for all those involved , especially the acting of xxmaj liam xxmaj neeson , xxmaj jessica xxmaj lange , xxmaj john xxmaj hurt , xxmaj brian xxmaj cox",pos
"xxbos xxmaj some have praised _ xxunk _ as a xxmaj disney adventure for adults . i do n't think so -- at least not for thinking adults . \n \n xxmaj this script suggests a beginning as a live - action movie , that struck someone as the type of crap you can not sell to adults anymore . xxmaj the "" crack staff "" of many older",neg
"xxbos xxmaj some have praised xxunk xxmaj lost xxmaj xxunk as a xxmaj disney adventure for adults . i do n't think so -- at least not for thinking adults . \n \n xxmaj this script suggests a beginning as a live - action movie , that struck someone as the type of crap you can not sell to adults anymore . xxmaj the "" crack staff "" of",neg
"xxbos xxmaj the premise of this movie has been tickling my imagination for quite some time now . xxmaj we 've all heard or read about it in some kind of con - text . xxmaj what would you do if you were all alone in the world ? xxmaj what would you do if the entire world suddenly disappeared in front of your eyes ? xxmaj in fact ,",pos


First using fastai's basic methodology we've created a language model fine tuned on the Wikipedia text. After that we've established a baseline using the fastai's fit_one_cycle policy. 

In [0]:
def apply_ar(alpha,out): return alpha * out[-1].float().pow(2).mean()

def apply_tar(beta,h): return beta * (h[:,1:] - h[:,:-1]).float().pow(2).mean()

def apply_normstable(beta,h): return beta/h.shape[1] *  (h[:,1:].pow(2).sum().sqrt() - h[:,:-1].pow(2).sum().sqrt()).float().pow(2)

class RNNTrainerNorm(LearnerCallback):
    def __init__(self, learn:Learner, alpha:float=2., beta:float=1., beta_norm:float=50.):
        super().__init__(learn)
        self.not_min += ['raw_out', 'out']
        self.alpha,self.beta,self.beta_norm = alpha,beta,beta_norm
        
    def on_epoch_begin(self, **kwargs):
        "Reset the hidden state of the model."
        self.learn.model.reset()

    def on_loss_begin(self, last_output:Tuple[Tensor,Tensor,Tensor], **kwargs):
        "Save the extra outputs for later and only returns the true output."
        self.raw_out,self.out = last_output[1],last_output[2]
        return {'last_output': last_output[0]}

    def on_backward_begin(self, last_loss:Rank0Tensor, last_input:Tensor, **kwargs):
        import pdb;pdb.set_trace()
        "Apply AR and TAR to `last_loss`."
        #AR and TAR
        if self.alpha != 0.:  last_loss += apply_ar(self.alpha,self.out)
        if self.beta != 0.:
            h = self.raw_out[-1]
            if len(h)>1: last_loss += apply_tar(self.beta,h)

        if self.beta_norm != 0.:
            h = self.raw_out[-1]
            if len(h)>1: last_loss += apply_normstable(self.beta_norm,h)        
        
        return {'last_loss': last_loss}



In [0]:
def has_params(m:nn.Module)->bool:
    "Check if `m` has at least one parameter"
    return len(list(m.parameters())) > 0


In [0]:
modules = [m for m in flatten_model(learn.model) if has_params(m)]

In [0]:
modules

[Embedding(60000, 400, padding_idx=1),
 Embedding(60000, 400, padding_idx=1),
 LSTM(400, 1152, batch_first=True),
 ParameterModule(),
 LSTM(1152, 1152, batch_first=True),
 ParameterModule(),
 LSTM(1152, 400, batch_first=True),
 ParameterModule(),
 BatchNorm1d(1200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 Linear(in_features=1200, out_features=50, bias=True),
 BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 Linear(in_features=50, out_features=2, bias=True)]

In [0]:
def requires_grad_bool(m:nn.Module)->Optional[bool]:
    ps = list(m.parameters())
    return ps[0].requires_grad


In [0]:
for it in modules:
  print(requires_grad_bool(it),it)

False Embedding(60000, 400, padding_idx=1)
False Embedding(60000, 400, padding_idx=1)
False LSTM(400, 1152, batch_first=True)
False ParameterModule()
False LSTM(1152, 1152, batch_first=True)
False ParameterModule()
False LSTM(1152, 400, batch_first=True)
False ParameterModule()
True BatchNorm1d(1200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True Linear(in_features=1200, out_features=50, bias=True)
True BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True Linear(in_features=50, out_features=2, bias=True)


In [0]:
learn.freeze_to(-2)

In [0]:
modules = [m for m in flatten_model(learn.model) if has_params(m)]

In [0]:
for it in modules:
  print(requires_grad_bool(it),it)

False Embedding(60000, 400, padding_idx=1)
False Embedding(60000, 400, padding_idx=1)
False LSTM(400, 1152, batch_first=True)
False ParameterModule()
False LSTM(1152, 1152, batch_first=True)
False ParameterModule()
True LSTM(1152, 400, batch_first=True)
True ParameterModule()
True BatchNorm1d(1200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True Linear(in_features=1200, out_features=50, bias=True)
True BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
True Linear(in_features=50, out_features=2, bias=True)


In [0]:
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5,)
learn = learn.load_encoder('fine_tuned_enc')
learn.callbacks =[RNNTrainerNorm(learn,2.,0.,100.)]
learn.freeze_to(-2)
learn.lr_find()
learn.recorder.plot()

epoch,train_loss,valid_loss,accuracy,time


In [0]:
learn.fit_one_cycle(1, 1e-3, moms=(0.8,0.7))

epoch,train_loss,valid_loss,accuracy,time
0,0.402022,0.315708,0.86636,12:47
