###  `Fbeta_binary` for text classification

In [8]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [9]:
from fastai import *
from fastai.text import *

#### Load sample text

In [10]:
path = untar_data(URLs.IMDB_SAMPLE)
path.ls()

[PosixPath('/home/wyquek/.fastai/data/imdb_sample/tmp_clas'),
 PosixPath('/home/wyquek/.fastai/data/imdb_sample/models'),
 PosixPath('/home/wyquek/.fastai/data/imdb_sample/texts.csv'),
 PosixPath('/home/wyquek/.fastai/data/imdb_sample/tmp'),
 PosixPath('/home/wyquek/.fastai/data/imdb_sample/fake_or_real_news.csv')]

In [11]:
data_lm = TextDataBunch.from_csv(path, 'texts.csv')

In [12]:
data_lm.save()

In [13]:
data_lm = TextLMDataBunch.load(path)
data_lm.show_batch(rows=3)

idx,text
0,"xxfld 1 thin story concerns two small town brothers and their struggles over family honor . david morse is the responsible , straight - laced cop and ' good ' brother ; xxunk mortensen , the ' bad ' boy , is a former soldier and ex - xxunk . as an actor ( particularly in his xxunk years ) , sean penn seems to have xxunk his performances under the method . turning first - time writer and director for this xxunk , xxunk drama , he works his script and characters out through"
1,"night shortly after the movie had opened in japan . 30 minutes in i was amazed they stayed . i stayed so i would have the right to criticize it . the whole movie was xxunk my xxunk and xxunk laughs of disbelief from my japanese girlfriend . everyone i saw walking out of that cinema had looks of confusion and disappointment on their faces . \n\n to the makers of this movie , you owe me two hours . xxfld 1 i picked this movie on the cover alone thinking that i was in"
2,". "" you 're a better man than i am , gunga din "" ! one of hollywood 's classics and a perfect 10 xxrep 4 ! xxfld 1 let me start off by saying that after watching this episode for the first time on xxup dvd at 10 xxunk xxunk one night , i could not fall asleep until about xxunk xxunk \n\n this brief review may contain spoilers . \n\n i 'm a long - time fan of the xxunk and i can safely say this is the best episode i 've seen"


#### Train LM

In [14]:
learn = language_model_learner(data_lm, pretrained_model=URLs.WT103, drop_mult=0.3)

#### TextClasDataBunch

In [15]:
data_clas = (TextList.from_csv(path, 'texts.csv', col='text',vocab=data_lm.vocab)
                .random_split_by_pct(0.1) 
                .label_from_df(cols=0)
                .databunch())
data_clas.save('tmp_clas')

In [16]:
data_clas = TextClasDataBunch.load(path, 'tmp_clas', bs=50)
data_clas.show_batch(rows = 3)

text,label
"xxfld 1 raising victor vargas : a review \n\n you know , raising victor vargas is like sticking your hands into a big , xxunk bowl of xxunk . it 's warm and gooey , but you 're not sure if it feels right . try as i might ,",negative
"xxfld 1 xxup the xxup shop xxup around xxup the xxup xxunk is one of the xxunk and most feel - good romantic comedies ever made . there 's just no getting around that , and it 's hard to actually put one 's feeling for this film into words",positive
"xxfld 1 now that che(2008 ) has finished its relatively short australian cinema run ( extremely limited xxunk screen in xxunk , after xxunk ) , i can xxunk join both xxunk of "" at the movies "" in taking steven soderbergh to task . \n\n it 's usually satisfying",negative


In [17]:
@dataclass
class Fbeta_binary(Callback):
    "Computes the fbeta between preds and targets for single-label classification"
    beta2: int = 2
    eps: float = 1e-9
    clas:int=1
    
    def on_epoch_begin(self, **kwargs):
        self.TP = 0
        self.total_y_pred = 0   
        self.total_y_true = 0
    
    def on_batch_end(self, last_output, last_target, **kwargs):
        y_pred = last_output.argmax(dim=1)
        y_true = last_target.float()
        
        self.TP += ((y_pred==self.clas) * (y_true==self.clas)).float().sum()
        self.total_y_pred += (y_pred==self.clas).float().sum()
        self.total_y_true += (y_true==self.clas).float().sum()
    
    def on_epoch_end(self, **kwargs):
        beta2=self.beta2**2
        prec = self.TP/(self.total_y_pred+self.eps)
        rec = self.TP/(self.total_y_true+self.eps)       
        res = (prec*rec)/(prec*beta2+rec+self.eps)*(1+beta2)
        self.metric = res 

### F1 for class = 1

In [18]:
learn = text_classifier_learner(data_clas, drop_mult=0.5)
learn.load_encoder('fine_tuned_enc')
learn.metrics=[accuracy, Fbeta_binary(beta2=1,clas = 1)]

In [19]:
learn.fit_one_cycle(2, 1e-2, moms=(0.8,0.7))

Total time: 02:37
epoch  train_loss  valid_loss  accuracy  fbeta_binary
1      0.592195    0.639369    0.630000  0.683761      (01:18)
2      0.548734    0.556811    0.770000  0.741573      (01:18)



### F1 for class = 0

In [20]:
learn = text_classifier_learner(data_clas, drop_mult=0.5)
learn.load_encoder('fine_tuned_enc')
learn.metrics=[accuracy, Fbeta_binary(beta2=1,clas = 0)]

In [21]:
learn.fit_one_cycle(2, 1e-2, moms=(0.8,0.7))

Total time: 02:49
epoch  train_loss  valid_loss  accuracy  fbeta_binary
1      0.606534    0.684188    0.570000  0.638655      (01:25)
2      0.571822    0.604035    0.640000  0.714286      (01:24)



### Both F1  together

In [18]:
learn = text_classifier_learner(data_clas, drop_mult=0.5)
learn.load_encoder('fine_tuned_enc')
f1_label1 = Fbeta_binary(1,clas = 0)
f1_label0 = Fbeta_binary(1,clas = 1)
learn.metrics=[accuracy, f1_label1,f1_label0]

In [19]:
learn.fit_one_cycle(2, 1e-2, moms=(0.8,0.7))

Total time: 02:43
epoch  train_loss  valid_loss  accuracy  fbeta_binary  fbeta_binary
1      0.611638    0.632096    0.600000  0.722222      0.285714      (01:17)
2      0.557955    0.729778    0.620000  0.712121      0.441176      (01:25)

