In [0]:
from pathlib import Path
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
import torch
import torch.nn.functional as F

In [0]:
PATH = Path('/content/drive/My Drive/cse_class_train/word_emb')
CLAS_PATH = Path('/content/drive/My Drive/cse_class_train/word_emb/classifier')
LM_PATH = Path('/content/drive/My Drive/cse_class_train/word_emb/language_model')

In [0]:
from ulangel.data.data_packer import LanguageModelDataset, DataBunch, TextClassificationDataset
from ulangel.data.data_packer import ValidationSampler, TrainingSampler, pad_collate
from ulangel.rnn.nn_block import AWD_LSTM, LinearDecoder, SequentialRNN, SentenceEncoder
from ulangel.rnn.nn_block import PoolingLinearClassifier
from ulangel.utils.callbacks import TrainEvalCallback, CudaCallback, Recorder, LR_Find
from ulangel.utils.callbacks import RNNTrainer, combine_scheds, ParamScheduler, sched_cos
from ulangel.utils.learner import Learner, freeze_all, unfreeze_all, freeze_upto
from ulangel.utils.optimizer import Optimizer, StatefulOptimizer, AverageGrad, AverageSqrGrad
from ulangel.utils.optimizer import adam_opt
from ulangel.utils.stats import AvgStatsCallback, accuracy, accuracy_flat, cross_entropy_flat

In [0]:
trn_lm = np.load(LM_PATH/'trn_lm_ids.npy', allow_pickle=True)
val_lm = np.load(LM_PATH/'val_lm_ids.npy', allow_pickle=True)

trn_dl = DataLoader(LanguageModelDataset(trn_lm, bptt=16), batch_size=64)
val_dl = DataLoader(LanguageModelDataset(val_lm, bptt=16), batch_size=64*2)

In [0]:
language_model_data = DataBunch(trn_dl, val_dl)

In [0]:
class LmArg:
    def __init__(self):
        self.ntokens = 15484
        self.batchsize = 64
        self.emsize = 400
        self.pad_token = 1
        self.emb_drop = 0.05
        # number of activation in hidden layer
        self.nhid = 1150
        self.nlayers = 3
        # decoder dropout
        self.dropout = 0.4
        # dropout for rnn layers
        self.dropouth = 0.3
        # dropout for input embedding layers
        self.dropouti = 0.65
        # dropout to remove words from embedding layer
        self.dropoute = 0.1
        # amount of weight dropout to apply to the RNN hidden to hidden matrix
        self.wdrop = 0.5
        self.tied = False
        self.bidir = False
        # recurrent dropout of lstm (from t to t+1)
        self.lstm_weights_drop = 0.5
        self.cuda = torch.cuda.is_available()
        self.bptt = 16

In [0]:
encode_args = LmArg()
lstm_enc = AWD_LSTM(
    encode_args.ntokens, encode_args.emsize, encode_args.nhid,
    encode_args.nlayers, encode_args.pad_token,
    encode_args.dropouth, encode_args.dropouti, encode_args.dropoute,
    encode_args.wdrop)

In [0]:
decoder = LinearDecoder(
    encode_args.ntokens, encode_args.emsize, encode_args.dropout, tie_encoder=lstm_enc.emb, bias=True
)

In [0]:
language_model = SequentialRNN(lstm_enc, decoder)

In [10]:
language_model.modules

<bound method Module.modules of SequentialRNN(
  (0): AWD_LSTM(
    (emb): Embedding(15484, 400, padding_idx=1)
    (emb_dp): EmbeddingDropout(
      (emb): Embedding(15484, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): ConnectionWeightDropout(
        (module): LSTM(400, 1150, batch_first=True)
      )
      (1): ConnectionWeightDropout(
        (module): LSTM(1150, 1150, batch_first=True)
      )
      (2): ConnectionWeightDropout(
        (module): LSTM(1150, 400, batch_first=True)
      )
    )
    (input_dp): ActivationDropout()
    (hidden_dps): ModuleList(
      (0): ActivationDropout()
      (1): ActivationDropout()
      (2): ActivationDropout()
    )
  )
  (1): LinearDecoder(
    (output_dp): ActivationDropout()
    (decoder): Linear(in_features=400, out_features=15484, bias=True)
  )
)>

In [0]:
cbs_languagemodel = [
    CudaCallback(), TrainEvalCallback(), AvgStatsCallback([accuracy_flat]),
    Recorder(), RNNTrainer(alpha=2., beta=1.)]

In [0]:
language_model_learner = Learner(
      model=language_model,
      data=language_model_data,
      loss_func=cross_entropy_flat,
      opt_func=adam_opt(),
      lr=1e-5,
      cbs=cbs_languagemodel)

In [13]:
language_model_learner.model.load_state_dict(torch.load(PATH/'models'/'tw_pretrained_20191030.pkl'))

<All keys matched successfully>

In [0]:
language_model_layer_groups = [
  (language_model_learner.model[0].emb, language_model_learner.model[0].input_dp),
  *zip(language_model_learner.model[0].rnns, language_model_learner.model[0].hidden_dps),
  (language_model_learner.model[1].decoder,language_model_learner.model[1].output_dp)
]

In [16]:
# go through all parameters
i = 0
for layer in language_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight True
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
parameter module.bias_hh_l0 True
******
parameter module.weight_hh_l0_raw True
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
parameter module.bias_hh_l0 True
******
parameter module.weight_hh_l0_raw True
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_ih_l0 True


In [0]:
freeze_all(language_model_layer_groups)

In [18]:
# go through all parameters
i = 0
for layer in language_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight False
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
parameter module.weight_hh_l0_raw False
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
parameter module.weight_hh_l0_raw False
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_

In [0]:
unfreeze_all(language_model_layer_groups)

In [23]:
# go through all parameters
i = 0
for layer in language_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight True
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
parameter module.bias_hh_l0 True
******
parameter module.weight_hh_l0_raw True
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
parameter module.bias_hh_l0 True
******
parameter module.weight_hh_l0_raw True
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_ih_l0 True


In [0]:
freeze_upto(language_model_layer_groups, 3)

In [25]:
# go through all parameters
i = 0
for layer in language_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight True
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
parameter module.weight_hh_l0_raw False
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
parameter module.weight_hh_l0_raw False
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_i

In [26]:
language_model_learner.fit(3)

0
train: [5.1576648517309875, tensor(0.2359, device='cuda:0')]
valid: [4.239763603608715, tensor(0.3656, device='cuda:0')]
1
train: [4.090590859108967, tensor(0.3605, device='cuda:0')]
valid: [4.014514121368588, tensor(0.3802, device='cuda:0')]
2
train: [3.9299823531498297, tensor(0.3768, device='cuda:0')]
valid: [3.9086641440649115, tensor(0.3863, device='cuda:0')]


exemple for classification

In [0]:
trn_orig_ids = np.load(CLAS_PATH/'train_orig_ids.npy', allow_pickle=True)
val_orig_ids = np.load(CLAS_PATH/'test_orig_ids.npy', allow_pickle=True)

trn_orig_labels = np.load(CLAS_PATH/'train_orig_labels.npy', allow_pickle=True)
val_orig_labels = np.load(CLAS_PATH/'test_orig_labels.npy', allow_pickle=True)

trn_orig_ds = TextClassificationDataset(trn_orig_ids, trn_orig_labels)
val_orig_ds = TextClassificationDataset(val_orig_ids, val_orig_labels)

trn_orig_sampler = TrainingSampler(trn_orig_ds.x, key=lambda t: len(trn_orig_ds.x[t]), bs=encode_args.batchsize)
trn_orig_dl = DataLoader(trn_orig_ds, batch_size=encode_args.batchsize, sampler=trn_orig_sampler, collate_fn=pad_collate)
val_orig_sampler = ValidationSampler(val_orig_ds.x, key=lambda t: len(val_orig_ds.x[t]))
val_orig_dl = DataLoader(val_orig_ds, batch_size=encode_args.batchsize*2, sampler=val_orig_sampler, collate_fn=pad_collate)
db_trn_orig_val_orig = DataBunch(trn_orig_dl, val_orig_dl)

sent_enc = SentenceEncoder(lstm_enc, encode_args.bptt)
pool_clas = PoolingLinearClassifier(layers=[3*encode_args.emsize, 100, 4], drops=[0.2, 0.1])
clas_model = SequentialRNN(sent_enc, pool_clas)

In [0]:
cbs_clas = [CudaCallback(), TrainEvalCallback(), AvgStatsCallback([accuracy]), Recorder()]
trn_orig_learn = Learner(clas_model, db_trn_orig_val_orig, F.cross_entropy, opt_func=adam_opt(), lr=1e-5, cbs=cbs_clas)

In [31]:
trn_orig_learn.model

SequentialRNN(
  (0): SentenceEncoder(
    (module): AWD_LSTM(
      (emb): Embedding(15484, 400, padding_idx=1)
      (emb_dp): EmbeddingDropout(
        (emb): Embedding(15484, 400, padding_idx=1)
      )
      (rnns): ModuleList(
        (0): ConnectionWeightDropout(
          (module): LSTM(400, 1150, batch_first=True)
        )
        (1): ConnectionWeightDropout(
          (module): LSTM(1150, 1150, batch_first=True)
        )
        (2): ConnectionWeightDropout(
          (module): LSTM(1150, 400, batch_first=True)
        )
      )
      (input_dp): ActivationDropout()
      (hidden_dps): ModuleList(
        (0): ActivationDropout()
        (1): ActivationDropout()
        (2): ActivationDropout()
      )
    )
  )
  (1): PoolingLinearClassifier(
    (layers): Sequential(
      (0): BatchNorm1d(1200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Dropout(p=0.2, inplace=False)
      (2): Linear(in_features=1200, out_features=100, bias=True)
      (3

In [0]:
clas_model_layer_groups = [
  (trn_orig_learn.model[0].module.emb, trn_orig_learn.model[0].module.input_dp),
  *zip(trn_orig_learn.model[0].module.rnns, trn_orig_learn.model[0].module.hidden_dps),
  (trn_orig_learn.model[1].layers[0],trn_orig_learn.model[1].layers[1],trn_orig_learn.model[1].layers[2],trn_orig_learn.model[1].layers[3]),
  (trn_orig_learn.model[1].layers[4],trn_orig_learn.model[1].layers[5],trn_orig_learn.model[1].layers[6],trn_orig_learn.model[1].layers[7])
]

In [33]:
# go through all parameters
i = 0
for layer in clas_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight True
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
**

In [0]:
freeze_all(clas_model_layer_groups)

In [35]:
# go through all parameters
i = 0
for layer in clas_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight False
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 Fals

In [0]:
unfreeze_all(clas_model_layer_groups)

In [37]:
# go through all parameters
i = 0
for layer in clas_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight True
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
parameter module.bias_hh_l0 True
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
parameter module.bias_hh_l0 True
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_ih_l0 True
******
parameter module.weight_hh_l0 True
******
parameter module.bias_ih_l0 True
******
par

In [0]:
freeze_upto(clas_model_layer_groups, 5)

In [39]:
# go through all parameters
i = 0
for layer in clas_model_layer_groups:
  i += 1
  print('layer', i, ': ')
  for f in layer:
    print('function: ', f)
    for name, p in f.named_parameters():
      print('parameter', name, p.requires_grad)
      print('***'*2)
    print('---'*4)
  print('==='*8)

layer 1 : 
function:  Embedding(15484, 400, padding_idx=1)
parameter weight False
******
------------
function:  ActivationDropout()
------------
layer 2 : 
function:  ConnectionWeightDropout(
  (module): LSTM(400, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
------------
function:  ActivationDropout()
------------
layer 3 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 1150, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 False
******
parameter module.bias_hh_l0 False
******
------------
function:  ActivationDropout()
------------
layer 4 : 
function:  ConnectionWeightDropout(
  (module): LSTM(1150, 400, batch_first=True)
)
parameter module.weight_ih_l0 False
******
parameter module.weight_hh_l0 False
******
parameter module.bias_ih_l0 Fals

In [41]:
trn_orig_learn.fit(3)

0
train: [1.4411440886092823, tensor(0.2559, device='cuda:0')]
valid: [1.3730971323551464, tensor(0.3570, device='cuda:0')]
1
train: [1.4609557924355527, tensor(0.2270, device='cuda:0')]
valid: [1.3866414309920825, tensor(0.2937, device='cuda:0')]
2
train: [1.4449442968170438, tensor(0.2470, device='cuda:0')]
valid: [1.4008582878478886, tensor(0.2898, device='cuda:0')]
