### Roberta-base with mixout and CLS token

In [1]:
from __future__ import absolute_import

import sys
import os

try:
    from dotenv import find_dotenv, load_dotenv
except:
    pass

import argparse

try:
    sys.path.append(os.path.join(os.path.dirname(__file__), '../src'))
except:
    sys.path.append(os.path.join(os.getcwd(), '../src'))
    
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torchcontrib.optim import SWA
from torch.optim import Adam, SGD 
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, CyclicLR, \
                                     CosineAnnealingWarmRestarts

from consNLP.data import load_data, data_utils, fetch_dataset
from consNLP.models import transformer_models, activations, layers, losses, scorers
from consNLP.visualization import visualize
from consNLP.trainer.trainer import BasicTrainer, PLTrainer, test_pl_trainer
from consNLP.trainer.trainer_utils import set_seed, _has_apex, _torch_lightning_available, _has_wandb, _torch_gpu_available, _num_gpus, _torch_tpu_available
from consNLP.preprocessing.custom_tokenizer import BERTweetTokenizer

if _has_apex:
    #from torch.cuda import amp
    from apex import amp

if _torch_tpu_available:
    import torch_xla
    import torch_xla.core.xla_model as xm
    import torch_xla.distributed.xla_multiprocessing as xmp

if _has_wandb:
    import wandb
    try:
        load_dotenv(find_dotenv())
        wandb.login(key=os.environ['WANDB_API_KEY'])
    except:
        _has_wandb = False

if _torch_lightning_available:
    import pytorch_lightning as pl
    from pytorch_lightning import Trainer, seed_everything
    from pytorch_lightning.loggers import WandbLogger
    from pytorch_lightning.metrics.metric import NumpyMetric
    from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, Callback

import tokenizers
from transformers import AutoModel, AutoTokenizer, AdamW, get_linear_schedule_with_warmup, AutoConfig

I0811 23:51:42.003524 4689735104 file_utils.py:41] PyTorch version 1.5.0 available.
I0811 23:51:55.148578 4689735104 file_utils.py:57] TensorFlow version 2.2.0-rc3 available.
I0811 23:51:58.876697 4689735104 modeling.py:230] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
wandb: Appending key for api.wandb.ai to your netrc file: /Users/victor/.netrc
scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.
  _deprecated()
I0811 23:52:02.329663 4689735104 textcleaner.py:37] 'pattern' package not found; tag filters are not available for English
W0811 23:52:02.783984 4689735104 deprecation.py:323] From /Users/victor/anaconda3/lib/python3.7/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
wandb: Appending

In [2]:
load_dotenv(find_dotenv())

True

In [5]:
parser = argparse.ArgumentParser(prog='Torch trainer function',conflict_handler='resolve')

parser.add_argument('--train_data', type=str, default='../data/raw/COVID19Tweet-master/train.tsv', required=False,
                    help='train data')
parser.add_argument('--val_data', type=str, default='../data/raw/COVID19Tweet-master/valid.tsv', required=False,
                    help='validation data')
parser.add_argument('--test_data', type=str, default=None, required=False,
                    help='test data')

parser.add_argument('--task_type', type=str, default='binary_sequence_classification', required=False,
                    help='type of task')

parser.add_argument('--transformer_model_pretrained_path', type=str, default='roberta-base', required=False,
                    help='transformer model pretrained path or huggingface model name')
parser.add_argument('--transformer_config_path', type=str, default='roberta-base', required=False,
                    help='transformer config file path or huggingface model name')
parser.add_argument('--transformer_tokenizer_path', type=str, default='roberta-base', required=False,
                    help='transformer tokenizer file path or huggingface model name')
parser.add_argument('--bpe_vocab_path', type=str, default='', required=False,
                    help='bytepairencoding vocab file path')
parser.add_argument('--bpe_merges_path', type=str, default='', required=False,
                    help='bytepairencoding merges file path')
parser.add_argument('--berttweettokenizer_path', type=str, default='', required=False,
                    help='BERTweet tokenizer path')

parser.add_argument('--max_text_len', type=int, default=80, required=False,
                    help='maximum length of text')
parser.add_argument('--epochs', type=int, default=5, required=False,
                    help='number of epochs')
parser.add_argument('--lr', type=float, default=.00003, required=False,
                    help='learning rate')
parser.add_argument('--loss_function', type=str, default='bcelogit', required=False,
                    help='loss function')
parser.add_argument('--metric', type=str, default='f1', required=False,
                    help='scorer metric')

parser.add_argument('--use_lightning_trainer', type=bool, default=True, required=False,
                    help='if lightning trainer needs to be used')
parser.add_argument('--use_torch_trainer', type=bool, default=False, required=False,
                    help='if custom torch trainer needs to be used')
parser.add_argument('--use_apex', type=bool, default=False, required=False,
                    help='if apex needs to be used')
parser.add_argument('--use_gpu', type=bool, default=False, required=False,
                    help='GPU mode')
parser.add_argument('--use_TPU', type=bool, default=False, required=False,
                    help='TPU mode')
parser.add_argument('--num_gpus', type=int, default=0, required=False,
                    help='Number of GPUs')
parser.add_argument('--num_tpus', type=int, default=0, required=False,
                    help='Number of TPUs')

parser.add_argument('--train_batch_size', type=int, default=32, required=False,
                    help='train batch size')
parser.add_argument('--eval_batch_size', type=int, default=16, required=False,
                    help='eval batch size')

parser.add_argument('--model_save_path', type=str, default='../models/model1_mixout/', required=False,
                    help='seed')

parser.add_argument('--wandb_logging', type=bool, default=False, required=False,
                    help='wandb logging needed')

parser.add_argument('--seed', type=int, default=42, required=False,
                    help='seed')

args, _ = parser.parse_known_args()

print ("Wandb Logging: {}, GPU: {}, Pytorch Lightning: {}, TPU: {}, Apex: {}".format(\
            _has_wandb and args.wandb_logging, _torch_gpu_available,\
            _torch_lightning_available and args.use_lightning_trainer, _torch_tpu_available, _has_apex))

Wandb Logging: False, GPU: False, Pytorch Lightning: True, TPU: False, Apex: False


In [6]:
reshape = False
final_activation = None
convert_output = None

if args.task_type == 'binary_sequence_classification':
    if args.metric != 'roc_auc_score': 
        convert_output = 'round'
    if args.loss_function == 'bcelogit':
        final_activation = 'sigmoid'
        
elif args.task_type == 'multiclass_sequence_classification':
    convert_output = 'max'
    
elif args.task_type == 'binary_token_classification':
    reshape = True
    if args.metric != 'roc_auc_score': 
        convert_output = 'round'
    if args.loss_function == 'bcelogit':
        final_activation = 'sigmoid'
        
elif args.task_type == 'multiclass_token_classification':
    reshape = True
    convert_output = 'max'

In [7]:
train_df = load_data.load_custom_text_as_pd(args.train_data,sep='\t',header=True, \
                              text_column=['Text'],target_column=['Label'])
val_df = load_data.load_custom_text_as_pd(args.val_data,sep='\t', header=True, \
                          text_column=['Text'],target_column=['Label'])

train_df = pd.DataFrame(train_df,copy=False)
val_df = pd.DataFrame(val_df,copy=False)

In [8]:
train_df.head(5)

Unnamed: 0,Id,words,labels
0,1241490299215634434,Official death toll from #covid19 in the Unite...,INFORMATIVE
1,1245916400981381130,"Dearest Mr. President @USER 1,169 coronavirus ...",INFORMATIVE
2,1241132432402849793,Latest Updates March 20 ⚠️5274 new cases and 3...,INFORMATIVE
3,1236107253666607104,真把公主不当干部 BREAKING: 21 people on Grand Princess...,INFORMATIVE
4,1239673817552879619,OKLAHOMA CITY — The State Department of Educat...,UNINFORMATIVE


In [9]:
model_save_dir = args.model_save_path
try:
    os.makedirs(model_save_dir)
except OSError:
    pass

In [10]:
train_df.labels, label2idx = data_utils.convert_categorical_label_to_int(train_df.labels, \
                                                         save_path=os.path.join(model_save_dir,'label2idx.pkl'))

val_df.labels, _ = data_utils.convert_categorical_label_to_int(val_df.labels, \
                                                         save_path=os.path.join(model_save_dir,'label2idx.pkl'))

In [11]:
train_df.head(5)

Unnamed: 0,Id,words,labels
0,1241490299215634434,Official death toll from #covid19 in the Unite...,1
1,1245916400981381130,"Dearest Mr. President @USER 1,169 coronavirus ...",1
2,1241132432402849793,Latest Updates March 20 ⚠️5274 new cases and 3...,1
3,1236107253666607104,真把公主不当干部 BREAKING: 21 people on Grand Princess...,1
4,1239673817552879619,OKLAHOMA CITY — The State Department of Educat...,0


In [12]:
if args.berttweettokenizer_path:
    tokenizer = BERTweetTokenizer(args.berttweettokenizer_path)
else:
    tokenizer = AutoTokenizer.from_pretrained(args.transformer_model_pretrained_path)

if not args.berttweettokenizer_path:
    try:
        bpetokenizer = tokenizers.ByteLevelBPETokenizer(args.bpe_vocab_path, \
                                        args.bpe_merges_path)
    except:
        bpetokenizer = None 
else:
    bpetokenizer = None

I0811 23:55:14.491278 4689735104 configuration_utils.py:283] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json from cache at /Users/victor/.cache/torch/transformers/e1a2a406b5a05063c31f4dfdee7608986ba7c6393f7f79db5e69dcd197208534.117c81977c5979de8c088352e74ec6e70f5c66096c28b61d3c50101609b39690
I0811 23:55:14.492293 4689735104 configuration_utils.py:319] Model config RobertaConfig {
  "_num_labels": 2,
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bad_words_ids": null,
  "bos_token_id": 0,
  "decoder_start_token_id": null,
  "do_sample": false,
  "early_stopping": false,
  "eos_token_id": 2,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL

In [13]:
train_dataset = data_utils.TransformerDataset(train_df.words, bpetokenizer=bpetokenizer, tokenizer=tokenizer, MAX_LEN=args.max_text_len, \
              target_label=train_df.labels, sequence_target=False, target_text=None, conditional_label=None, conditional_all_labels=None)

val_dataset = data_utils.TransformerDataset(val_df.words, bpetokenizer=bpetokenizer, tokenizer=tokenizer, MAX_LEN=args.max_text_len, \
              target_label=val_df.labels, sequence_target=False, target_text=None, conditional_label=None, conditional_all_labels=None)

In [14]:
class TransformerWithMixout(nn.Module):
    def __init__(self, model, main_dropout_prob=0, mixout_prob=.7, dropout=.3, n_out=1):
        super(TransformerWithMixout, self).__init__()
        for i in range(model.config.num_hidden_layers):
            num = '{}'.format(i)
            for name, module in model._modules['encoder']._modules['layer']._modules[num]._modules['output']._modules.items():
                if name == 'dropout' and isinstance(module, nn.Dropout):
                    model._modules['encoder']._modules['layer']._modules[num]._modules['output']._modules[name] = nn.Dropout(main_dropout_prob)
                    #setattr(model, name, nn.Dropout(0))
                if name.split('.')[-1] == 'dense' and isinstance(module, nn.Linear):
                    target_state_dict = module.state_dict()
                    bias = True if module.bias is not None else False
                    new_module = layers.MixLinear(module.in_features, module.out_features, 
                                           bias, target_state_dict['weight'], mixout_prob)
                    new_module.load_state_dict(target_state_dict)
                    #setattr(model, name, new_module)
                    model._modules['encoder']._modules['layer']._modules[num]._modules['output']._modules[name] = new_module
            
            #model._modules['drop'] = nn.Dropout(main_dropout_prob)
            
            #module = model._modules['out']
            #target_state_dict = module.state_dict()
            #bias = True if module.bias is not None else False
            #new_module = MixLinear(module.in_features, module.out_features, 
            #                       bias, target_state_dict['weight'], mixout_prob)
            #new_module.load_state_dict(target_state_dict)
                    
            #model._modules['out'] = new_module

        self.base_model = model
        self.drop = nn.Dropout(dropout)
        self.out = nn.Linear(model.config.hidden_size, n_out)

    def forward(self, ids, mask, token_type_ids):
        o2 = self.base_model(ids, attention_mask=mask, token_type_ids=token_type_ids)
        o2 = o2[0][:,1:,:]
        bo = self.drop(o2)
        bo = torch.mean(o2, dim=1)
        #bo = self.drop(o2)
        output = self.out(bo)

        return output

In [15]:
config = AutoConfig.from_pretrained(args.transformer_config_path, output_hidden_states=True, output_attentions=True)
basemodel = AutoModel.from_pretrained(args.transformer_model_pretrained_path,config=config)
model = TransformerWithMixout(basemodel)

I0811 23:56:08.665372 4689735104 configuration_utils.py:283] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json from cache at /Users/victor/.cache/torch/transformers/e1a2a406b5a05063c31f4dfdee7608986ba7c6393f7f79db5e69dcd197208534.117c81977c5979de8c088352e74ec6e70f5c66096c28b61d3c50101609b39690
I0811 23:56:08.666308 4689735104 configuration_utils.py:319] Model config RobertaConfig {
  "_num_labels": 2,
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bad_words_ids": null,
  "bos_token_id": 0,
  "decoder_start_token_id": null,
  "do_sample": false,
  "early_stopping": false,
  "eos_token_id": 2,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL

In [16]:
if _torch_tpu_available and args.use_TPU:
    train_sampler = torch.utils.data.distributed.DistributedSampler(
      train_dataset,
      num_replicas=xm.xrt_world_size(),
      rank=xm.get_ordinal(),
      shuffle=True
    )

    val_sampler = torch.utils.data.distributed.DistributedSampler(
      val_dataset,
      num_replicas=xm.xrt_world_size(),
      rank=xm.get_ordinal(),
      shuffle=False
    )

if _torch_tpu_available and args.use_TPU:
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.train_batch_size, sampler=train_sampler,
        drop_last=True,num_workers=2)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args.eval_batch_size, sampler=val_sampler,
        drop_last=False,num_workers=1)
else:
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.train_batch_size)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args.eval_batch_size)

In [18]:
if args.use_torch_trainer:
    device = torch.device("cuda" if _torch_gpu_available and args.use_gpu else "cpu")

    if _torch_tpu_available and args.use_TPU:
        device=xm.xla_device()

    print ("Device: {}".format(device))
    
    if args.use_TPU and _torch_tpu_available and args.num_tpus > 1:
        train_data_loader = torch_xla.distributed.parallel_loader.ParallelLoader(train_data_loader, [device])
        train_data_loader = train_data_loader.per_device_loader(device)


    trainer = BasicTrainer(model, train_data_loader, val_data_loader, device, args.transformer_model_pretrained_path, \
                               final_activation=final_activation, \
                               test_data_loader=val_data_loader)

    param_optimizer = list(trainer.model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int(len(train_data_loader) * args.epochs)

    if _torch_tpu_available and args.use_TPU:
        optimizer = AdamW(optimizer_parameters, lr=args.lr*xm.xrt_world_size())
    else:
        optimizer = AdamW(optimizer_parameters, lr=args.lr)

    if args.use_apex and _has_apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")


    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)
    
    loss = losses.get_loss(args.loss_function)
    scorer = scorers.SKMetric(args.metric, convert=convert_output, reshape=reshape) 
    
    def _mp_fn(rank, flags, trainer, epochs, lr, metric, loss_function, optimizer, scheduler, model_save_path, num_gpus, num_tpus,  \
                max_grad_norm, early_stopping_rounds, snapshot_ensemble, is_amp, use_wandb, seed):
        torch.set_default_tensor_type('torch.FloatTensor')
        a = trainer.train(epochs, lr, metric, loss_function, optimizer, scheduler, model_save_path, num_gpus, num_tpus,  \
                max_grad_norm, early_stopping_rounds, snapshot_ensemble, is_amp, use_wandb, seed)

    FLAGS = {}
    if _torch_tpu_available and args.use_TPU:
        xmp.spawn(_mp_fn, args=(FLAGS, trainer, args.epochs, args.lr, scorer, loss, optimizer, scheduler, args.model_save_path, args.num_gpus, args.num_tpus, \
                 1, 3, False, args.use_apex, False, args.seed), nprocs=8, start_method='fork')
    else:
        use_wandb = _has_wandb and args.wandb_logging
        trainer.train(args.epochs, args.lr, scorer, loss, optimizer, scheduler, args.model_save_path, args.num_gpus, args.num_tpus,  \
                max_grad_norm=1, early_stopping_rounds=3, snapshot_ensemble=False, is_amp=args.use_apex, use_wandb=use_wandb, seed=args.seed)

elif args.use_lightning_trainer and _torch_lightning_available:
    from pytorch_lightning import Trainer, seed_everything
    seed_everything(args.seed)
    
    loss = losses.get_loss(args.loss_function)
    scorer = scorers.PLMetric(args.metric, convert=convert_output, reshape=reshape)
    
    log_args = {'description': args.transformer_model_pretrained_path, 'loss': loss.__class__.__name__, 'epochs': args.epochs, 'learning_rate': args.lr}

    if _has_wandb and not _torch_tpu_available and args.wandb_logging:
        wandb.init(project="WNUT-Task-2",config=log_args)
        wandb_logger = WandbLogger()

    checkpoint_callback = ModelCheckpoint(
                filepath=args.model_save_path,
                save_top_k=1,
                verbose=True,
                monitor='val_metric',
                mode='max'
                )
    earlystop = EarlyStopping(
                monitor='val_metric',
                patience=3,
               verbose=False,
               mode='max'
               )

    if args.use_gpu and _torch_gpu_available:
        print ("using GPU")
        if args.wandb_logging:
            if _has_apex:
                trainer = Trainer(gpus=args.num_gpus, max_epochs=args.epochs, logger=wandb_logger, precision=16, \
                            checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
            else:
                trainer = Trainer(gpus=args.num_gpus, max_epochs=args.epochs, logger=wandb_logger, \
                            checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
        else:
            if _has_apex:
                trainer = Trainer(gpus=args.num_gpus, max_epochs=args.epochs, precision=16, \
                            checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
            else:
                trainer = Trainer(gpus=args.num_gpus, max_epochs=args.epochs, \
                            checkpoint_callback=checkpoint_callback, callbacks=[earlystop])

    elif args.use_TPU and _torch_tpu_available:
        print ("using TPU")
        if _has_apex:
            trainer = Trainer(num_tpu_cores=args.num_tpus, max_epochs=args.epochs, precision=16, \
                        checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
        else:
            trainer = Trainer(num_tpu_cores=args.num_tpus, max_epochs=args.epochs, \
                        checkpoint_callback=checkpoint_callback, callbacks=[earlystop])

    else:
        print ("using CPU")
        if args.wandb_logging:
            if _has_apex:
                trainer = Trainer(max_epochs=args.epochs, logger=wandb_logger, precision=16, \
                        checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
            else:
                trainer = Trainer(max_epochs=args.epochs, logger=wandb_logger, \
                        checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
        else:
            if _has_apex:
                trainer = Trainer(max_epochs=args.epochs, precision=16, \
                        checkpoint_callback=checkpoint_callback, callbacks=[earlystop])
            else:
                trainer = Trainer(max_epochs=args.epochs, checkpoint_callback=checkpoint_callback, callbacks=[earlystop])

    num_train_steps = int(len(train_data_loader) * args.epochs)

    pltrainer = PLTrainer(num_train_steps, model, scorer, loss, args.lr, \
                          final_activation=final_activation, seed=42)

    #try:
    #    print ("Loaded model from previous checkpoint")
    #    pltrainer = PLTrainer.load_from_checkpoint(args.model_save_path)
    #except:
    #    pass

    trainer.fit(pltrainer, train_data_loader, val_data_loader) 

GPU available: False, used: False
I0812 08:03:41.591238 4689735104 distributed.py:29] GPU available: False, used: False
TPU available: False, using: 0 TPU cores
I0812 08:03:41.595145 4689735104 distributed.py:29] TPU available: False, using: 0 TPU cores

  | Name   | Type                  | Params
-------------------------------------------------
0 | model  | TransformerWithMixout | 124 M 
1 | metric | PLMetric              | 0     
I0812 08:03:41.677448 4689735104 lightning.py:1495] 
  | Name   | Type                  | Params
-------------------------------------------------
0 | model  | TransformerWithMixout | 124 M 
1 | metric | PLMetric              | 0     


using CPU
[LOG] Total number of parameters to learn 124646401


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

val loss = 0.351 val metric = 0.903 


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…


Epoch 00000: val_metric reached 0.90166 (best 0.90166), saving model to ../models/model1_mixout/epoch=0_v0.ckpt as top 1
I0812 08:32:03.452784 4689735104 model_checkpoint.py:346] 
Epoch 00000: val_metric reached 0.90166 (best 0.90166), saving model to ../models/model1_mixout/epoch=0_v0.ckpt as top 1


val loss = 0.501 val metric = 0.902 
Train loss = 0.017 Train metric = 0.994


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…


Epoch 00001: val_metric  was not in top 1
I0812 09:00:05.145328 4689735104 model_checkpoint.py:314] 
Epoch 00001: val_metric  was not in top 1


val loss = 0.672 val metric = 0.884 
Train loss = 0.019 Train metric = 0.993


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…


Epoch 00002: val_metric  was not in top 1
I0812 09:33:04.674398 4689735104 model_checkpoint.py:314] 
Epoch 00002: val_metric  was not in top 1


val loss = 0.461 val metric = 0.895 
Train loss = 0.016 Train metric = 0.995


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…


Epoch 00003: val_metric  was not in top 1
I0812 10:07:17.873924 4689735104 model_checkpoint.py:314] 
Epoch 00003: val_metric  was not in top 1


val loss = 0.533 val metric = 0.883 
Train loss = 0.013 Train metric = 0.996



In [23]:
from tqdm import tqdm

test_output2 = []

for val_batch in tqdm(val_data_loader):
    out = torch.sigmoid(pltrainer(val_batch)).detach().cpu().numpy()
    test_output2.extend(out[:,0].tolist())
    
#test_output2 = np.concatenate(test_output2)


  0%|          | 0/13 [00:00<?, ?it/s][A
  8%|▊         | 1/13 [00:02<00:27,  2.33s/it][A
 15%|█▌        | 2/13 [00:04<00:23,  2.17s/it][A
 23%|██▎       | 3/13 [00:05<00:20,  2.08s/it][A
 31%|███       | 4/13 [00:08<00:18,  2.07s/it][A
 38%|███▊      | 5/13 [00:10<00:16,  2.06s/it][A
 46%|████▌     | 6/13 [00:12<00:14,  2.03s/it][A
 54%|█████▍    | 7/13 [00:13<00:11,  1.97s/it][A
 62%|██████▏   | 8/13 [00:15<00:09,  1.92s/it][A
 69%|██████▉   | 9/13 [00:17<00:07,  1.91s/it][A
 77%|███████▋  | 10/13 [00:19<00:05,  1.88s/it][A
 85%|████████▍ | 11/13 [00:21<00:03,  1.86s/it][A
 92%|█████████▏| 12/13 [00:23<00:01,  1.85s/it][A
100%|██████████| 13/13 [00:23<00:00,  1.84s/it][A


In [24]:
test_output1 = np.array(test_output1)[:,0]
test_output2 = np.array(test_output2)
np.corrcoef(test_output1,test_output2)

array([[1.        , 0.90560145],
       [0.90560145, 1.        ]])