In [1]:
def load_dataset(file_name):
    import pandas as pd
    import json
    
    df = []
    with open('/mnt/ceph/storage/data-in-progress/data-research/web-search/ECIR-22/ecir22-clickbait-spoiling/' + file_name) as f:
        for i in f:
            i = json.loads(i)
            tweet = i['postText']
            label = i['tags']
            
            assert len(tweet) == 1
            tweet = tweet[0]
            
            assert len(label) == 1
            label = label[0]
            
            if label not in ['phrase', 'phrases', 'passage', 'multi']:
                print(label)
                
            assert label in ['phrase', 'phrases', 'passage', 'multi']
            
            if label == 'multi':
                continue
            
            df += [{'text': tweet, 'labels': (label == 'phrase' or label == 'phrases')}]

    return pd.DataFrame(df)   
            
    
test_dataset = load_dataset('test.jsonl')
train_dataset = load_dataset('train.jsonl')
validation_dataset = load_dataset('validation.jsonl')

In [6]:
test_dataset

Unnamed: 0,text,labels
0,This simple household item saves lives,True
1,You'll Never Believe Who Robert Downey Jr.'s '...,True
2,THE NEWEST THEORY ON HIDDLESWIFT'S ROMANCE IS ...,False
3,"The little girl from ""Titanic"" is all grown up...",True
4,Why we never really get over that first love,False
...,...,...
821,Inside our three-month effort to attend Apple'...,False
822,Dad And Son Are Seconds From Assassination By ...,False
823,Kanye West Reveals His Favorite Song... Of All...,False
824,"""Pippi Longstocking"" star arrested",True


In [7]:
train_dataset

Unnamed: 0,text,labels
0,"Wes Welker Wanted Dinner With Tom Brady, But P...",False
1,NASA sets date for full recovery of ozone hole,True
2,This is what makes employees happy -- and it's...,True
3,The perfect way to cook rice so that it's perf...,True
4,What happens if your new AirPods get lost or s...,False
...,...,...
2636,If You See A Purple Butterfly Sticker At The H...,False
2637,Has Facebook's video explosion completely shak...,False
2638,Cop Is Eating At A Chili's When Teen Hands Him...,False
2639,You need to see this Twitter account that pred...,True


In [8]:
validation_dataset

Unnamed: 0,text,labels
0,Five Nights at Freddy’s Sequel Delayed for Wei...,False
1,Here’s how much you should be tipping your hai...,True
2,A man swallowed a microSD card and you won't b...,False
3,This popular soda could cure your hangovers sc...,True
4,The anytime snack you won't feel guilty about ...,True
...,...,...
652,"Dog Dies One Hour After Hiking With His Owner,...",False
653,This is what happens when you leave a hotel cl...,False
654,This Texas GOP elector announces that he won't...,True
655,WikiLeaks' Julian Assange Reported Dead,False


# Training

In [2]:
configurations = []

for learn_rate in [1e-5, 4e-5, 1e-4]:
    for warumup_ratio in [0.02, 0.06, 0.1]:
        for batch_size in [8, 16, 32]:
            configurations += [{
                "overwrite_output_dir": True,
                "num_train_epochs": 10,
                "fp16": False,
                "train_batch_size": batch_size,
                "gradient_accumulation_steps": 4,
                "evaluate_during_training": True,
                "max_seq_length": 64,
                "learning_rate": learn_rate,
                "early_stopping_consider_epochs": True,
                "early_stopping_delta": 0.01,
                "early_stopping_metric": "acc",
                "early_stopping_metric_minimize": False,
                "early_stopping_patience": 3,
                "evaluate_during_training_steps": 331,
                "output_dir": "outputs/",
                'warmup_ratio': warumup_ratio,
                # 'warmup_steps': 0, # usually empfohlen, warmup_ratio ist prozentuales äquivalent 
                #                    # <> überschreiben sich gegenseitig
                'save_steps': 2000,
                "manual_seed": 12345
            }]

In [7]:
def train_model(config, num):
    from simpletransformers.classification import ClassificationModel
    import sklearn
    
    config["output_dir"] = "outputs/bert_" + str(num)
    
    model = ClassificationModel("bert", "bert-large-cased", args = config)
    # model.train_model(train_dataset)
    model.train_model(train_dataset, eval_df=validation_dataset, acc=sklearn.metrics.accuracy_score)

In [9]:
for config, num in zip(configurations, range(len(configurations))):
    print('Running configuration number', num)
    
    train_model(config, num)

Running configuration number 0


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 1


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 2


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 3


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 4


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 5


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 6


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 7


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 8


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 9


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 10


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 11


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 12


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 13


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 14


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 15


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 16


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 17


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 18


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 19


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running configuration number 20


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 21


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running configuration number 22


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running configuration number 23


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 24


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running configuration number 25


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 4 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 5 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 6 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 7 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 8 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running Epoch 9 of 10:   0%|          | 0/166 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Running configuration number 26


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

In [42]:
from simpletransformers.classification import ClassificationModel
model = ClassificationModel("roberta", "roberta-large", args = args)
# model.train_model(train_dataset)
model.train_model(train_dataset, eval_df=validation_dataset, acc=sklearn.metrics.accuracy_score)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classif

  0%|          | 0/2641 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

(820,
 defaultdict(list,
             {'global_step': [82,
               164,
               246,
               300,
               328,
               410,
               492,
               574,
               600,
               656,
               738,
               820],
              'train_loss': [0.4724636971950531,
               0.23830775916576385,
               0.4726167321205139,
               0.324043333530426,
               0.04376447573304176,
               0.0019246878800913692,
               0.011581802740693092,
               0.0007018963224254549,
               0.0034602258820086718,
               0.00038723601028323174,
               0.0016906267264857888,
               9.881961887003854e-05],
              'mcc': [0.4379899140133909,
               0.5700232642378193,
               0.5278943599035283,
               0.5303111080019447,
               0.5986388216056269,
               0.5522782622892372,
               0.5474424397630353,
           

In [38]:
model.eval_model(validation_dataset, acc=sklearn.metrics.accuracy_score)

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

({'mcc': 0.5189907816932701,
  'tp': 264,
  'tn': 235,
  'fp': 87,
  'fn': 71,
  'auroc': 0.8332344488736442,
  'auprc': 0.8547997162455769,
  'acc': 0.7595129375951294,
  'eval_loss': 1.5449147986592473},
 array([[-4.50758696,  4.8231678 ],
        [-4.62452698,  4.63890123],
        [ 4.56198597, -4.33566332],
        ...,
        [ 0.98726308, -0.87757879],
        [ 3.27670145, -3.22399449],
        [-4.55029869,  4.87138462]]),
 [['Five Nights at Freddy’s Sequel Delayed for Weird Reason',
   'Here’s how much you should be tipping your hairdresser',
   "A man swallowed a microSD card and you won't believe what happened next!",
   'This popular soda could cure your hangovers scientists say:',
   "The anytime snack you won't feel guilty about eating",
   'You won\'t believe this stunning "Harry Potter" revelation about Professor McGonagall',
   "J.J. Abrams has an answer on if there will be a post-credits scene in the new 'Star Wars'",
   'Kristin Cavallari (@KristinCav) opens up abo

In [37]:
model.eval_model(test_dataset, acc=sklearn.metrics.accuracy_score)

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

({'mcc': 0.5662373938122638,
  'tp': 338,
  'tn': 309,
  'fp': 94,
  'fn': 85,
  'auroc': 0.83468548533751,
  'auprc': 0.8406305749647641,
  'acc': 0.7832929782082324,
  'eval_loss': 1.4019140609235854},
 array([[-4.73007059,  4.75768614],
        [-4.66822481,  4.89313078],
        [ 4.12303448, -3.80747771],
        ...,
        [-4.3079958 ,  4.59369469],
        [-4.60104084,  4.89709044],
        [-4.45188284,  4.57753611]]),
 [])

# Model Selection with Validation data

In [None]:
from glob import glob
from simpletransformers.classification import ClassificationModel
import sklearn
import pandas as pd
from tqdm import tqdm

df = []

for config, num in zip(configurations, range(len(configurations))):
    directory = "outputs/bert_" + str(num) + "/"
    
    for checkpoint in tqdm(glob(directory + "checkpoint-*")):
        # print(checkpoint)
        model = ClassificationModel("bert", checkpoint)
        
        valid_acc = model.eval_model(validation_dataset, acc=sklearn.metrics.accuracy_score)[0]['acc']
        test_acc = model.eval_model(test_dataset, acc=sklearn.metrics.accuracy_score)[0]['acc']
        
        df += [{"checkpoint": checkpoint, "valid_acc": valid_acc, "test_acc": test_acc,
               "config": config}]
        
df = pd.DataFrame(df)
df

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  8%|▊         | 1/12 [00:34<06:20, 34.56s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 17%|█▋        | 2/12 [01:09<05:47, 34.70s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 25%|██▌       | 3/12 [01:43<05:10, 34.49s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 33%|███▎      | 4/12 [02:17<04:33, 34.23s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 42%|████▏     | 5/12 [02:51<03:59, 34.21s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 6/12 [03:25<03:25, 34.20s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 58%|█████▊    | 7/12 [03:59<02:50, 34.13s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 67%|██████▋   | 8/12 [04:33<02:16, 34.04s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 75%|███████▌  | 9/12 [05:07<01:42, 34.10s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 83%|████████▎ | 10/12 [05:42<01:08, 34.19s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 92%|█████████▏| 11/12 [06:17<00:34, 34.37s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 12/12 [06:51<00:00, 34.28s/it]
  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  9%|▉         | 1/11 [00:35<05:56, 35.62s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 18%|█▊        | 2/11 [01:10<05:17, 35.32s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 27%|██▋       | 3/11 [01:46<04:42, 35.32s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 36%|███▋      | 4/11 [02:21<04:07, 35.32s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 45%|████▌     | 5/11 [02:56<03:31, 35.17s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 55%|█████▍    | 6/11 [03:32<02:56, 35.38s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 64%|██████▎   | 7/11 [04:06<02:20, 35.14s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 73%|███████▎  | 8/11 [04:41<01:45, 35.05s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 82%|████████▏ | 9/11 [05:16<01:09, 34.96s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 91%|█████████ | 10/11 [05:52<00:35, 35.29s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 11/11 [06:27<00:00, 35.26s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 10%|█         | 1/10 [00:35<05:19, 35.50s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 20%|██        | 2/10 [01:10<04:42, 35.27s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 30%|███       | 3/10 [01:45<04:06, 35.21s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 40%|████      | 4/10 [02:20<03:30, 35.15s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 5/10 [02:56<02:56, 35.23s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 60%|██████    | 6/10 [03:30<02:20, 35.04s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 70%|███████   | 7/10 [04:05<01:45, 35.00s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 80%|████████  | 8/10 [04:40<01:10, 35.04s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 90%|█████████ | 9/10 [05:15<00:35, 35.05s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 10/10 [05:51<00:00, 35.16s/it]
  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  8%|▊         | 1/12 [00:36<06:36, 36.08s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 17%|█▋        | 2/12 [01:11<05:59, 35.92s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 25%|██▌       | 3/12 [01:48<05:24, 36.03s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 33%|███▎      | 4/12 [02:23<04:46, 35.85s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 42%|████▏     | 5/12 [02:59<04:11, 35.97s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 6/12 [03:41<03:46, 37.82s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 58%|█████▊    | 7/12 [04:20<03:11, 38.35s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 67%|██████▋   | 8/12 [04:57<02:31, 37.95s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 75%|███████▌  | 9/12 [05:36<01:54, 38.17s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 83%|████████▎ | 10/12 [06:13<01:15, 37.84s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 92%|█████████▏| 11/12 [06:54<00:38, 38.89s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 12/12 [07:36<00:00, 38.01s/it]
  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  9%|▉         | 1/11 [00:45<07:30, 45.05s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 18%|█▊        | 2/11 [01:27<06:32, 43.60s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 27%|██▋       | 3/11 [02:12<05:52, 44.09s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 36%|███▋      | 4/11 [02:53<05:01, 43.14s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 45%|████▌     | 5/11 [03:36<04:16, 42.77s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 55%|█████▍    | 6/11 [04:25<03:44, 44.98s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 64%|██████▎   | 7/11 [05:07<02:55, 43.90s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 73%|███████▎  | 8/11 [05:43<02:04, 41.41s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 82%|████████▏ | 9/11 [06:19<01:19, 39.73s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 91%|█████████ | 10/11 [06:54<00:38, 38.49s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 11/11 [07:36<00:00, 41.51s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 10%|█         | 1/10 [00:40<06:08, 40.97s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 20%|██        | 2/10 [01:22<05:28, 41.02s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 30%|███       | 3/10 [02:05<04:53, 41.93s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 40%|████      | 4/10 [02:45<04:07, 41.17s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 5/10 [03:31<03:36, 43.22s/it]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 80%|████████  | 8/10 [05:40<01:26, 43.02s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 90%|█████████ | 9/10 [06:20<00:42, 42.05s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 10/10 [07:03<00:00, 42.36s/it]
  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  8%|▊         | 1/12 [00:39<07:10, 39.17s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 17%|█▋        | 2/12 [01:22<06:57, 41.77s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 25%|██▌       | 3/12 [02:08<06:30, 43.40s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 33%|███▎      | 4/12 [02:47<05:35, 41.93s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 42%|████▏     | 5/12 [03:29<04:52, 41.85s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 6/12 [04:08<04:05, 40.89s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 58%|█████▊    | 7/12 [04:47<03:21, 40.34s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 67%|██████▋   | 8/12 [05:27<02:40, 40.19s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 75%|███████▌  | 9/12 [06:09<02:02, 40.88s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 83%|████████▎ | 10/12 [06:57<01:26, 43.08s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 92%|█████████▏| 11/12 [07:44<00:43, 43.99s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 12/12 [08:29<00:00, 42.49s/it]
  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  9%|▉         | 1/11 [00:48<08:04, 48.45s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 18%|█▊        | 2/11 [01:40<07:35, 50.56s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 27%|██▋       | 3/11 [02:31<06:45, 50.68s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 36%|███▋      | 4/11 [03:21<05:53, 50.47s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 55%|█████▍    | 6/11 [05:46<04:51, 58.36s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 64%|██████▎   | 7/11 [06:45<03:55, 58.83s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 73%|███████▎  | 8/11 [07:45<02:57, 59.08s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 82%|████████▏ | 9/11 [08:45<01:58, 59.46s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 91%|█████████ | 10/11 [09:44<00:59, 59.30s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 11/11 [10:43<00:00, 58.54s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 10%|█         | 1/10 [00:58<08:43, 58.14s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 20%|██        | 2/10 [01:50<07:19, 54.90s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 30%|███       | 3/10 [02:47<06:28, 55.55s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 40%|████      | 4/10 [03:44<05:37, 56.23s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 5/10 [04:36<04:33, 54.61s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 60%|██████    | 6/10 [05:30<03:38, 54.63s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 70%|███████   | 7/10 [06:27<02:45, 55.25s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 80%|████████  | 8/10 [07:19<01:48, 54.34s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 90%|█████████ | 9/10 [08:18<00:55, 55.64s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 10/10 [09:16<00:00, 55.69s/it]
  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  8%|▊         | 1/12 [00:54<10:01, 54.64s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 17%|█▋        | 2/12 [01:50<09:11, 55.12s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 25%|██▌       | 3/12 [02:45<08:18, 55.35s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 33%|███▎      | 4/12 [03:40<07:19, 54.99s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 6/12 [05:32<05:34, 55.72s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 58%|█████▊    | 7/12 [06:30<04:42, 56.56s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 67%|██████▋   | 8/12 [07:26<03:45, 56.28s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 75%|███████▌  | 9/12 [08:25<02:51, 57.22s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 83%|████████▎ | 10/12 [09:25<01:55, 57.92s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 92%|█████████▏| 11/12 [10:19<00:56, 56.73s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 12/12 [11:15<00:00, 56.29s/it]
  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  9%|▉         | 1/11 [00:59<09:51, 59.19s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 18%|█▊        | 2/11 [01:54<08:33, 57.01s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 27%|██▋       | 3/11 [02:55<07:51, 58.94s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 36%|███▋      | 4/11 [04:00<07:07, 61.07s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 45%|████▌     | 5/11 [05:00<06:04, 60.78s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 55%|█████▍    | 6/11 [06:00<05:02, 60.47s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 64%|██████▎   | 7/11 [06:58<03:58, 59.56s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 73%|███████▎  | 8/11 [07:55<02:56, 58.87s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 82%|████████▏ | 9/11 [08:54<01:57, 58.78s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 91%|█████████ | 10/11 [09:54<00:59, 59.34s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 11/11 [10:48<00:00, 58.91s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 10%|█         | 1/10 [00:55<08:15, 55.09s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 20%|██        | 2/10 [01:51<07:28, 56.07s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 30%|███       | 3/10 [02:45<06:25, 55.12s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 40%|████      | 4/10 [03:38<05:24, 54.03s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 5/10 [04:34<04:34, 54.99s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 60%|██████    | 6/10 [05:28<03:38, 54.60s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 70%|███████   | 7/10 [06:20<02:41, 53.73s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 80%|████████  | 8/10 [07:15<01:48, 54.09s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 90%|█████████ | 9/10 [08:13<00:55, 55.39s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

100%|██████████| 10/10 [09:10<00:00, 55.00s/it]
  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

  8%|▊         | 1/12 [00:52<09:39, 52.69s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 17%|█▋        | 2/12 [01:52<09:31, 57.13s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 25%|██▌       | 3/12 [02:50<08:36, 57.40s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 33%|███▎      | 4/12 [03:46<07:33, 56.69s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 42%|████▏     | 5/12 [04:44<06:40, 57.26s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 50%|█████     | 6/12 [05:38<05:37, 56.27s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 58%|█████▊    | 7/12 [06:35<04:41, 56.38s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/826 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/104 [00:00<?, ?it/s]

 67%|██████▋   | 8/12 [07:31<03:45, 56.39s/it]

  0%|          | 0/657 [00:00<?, ?it/s]

In [None]:
df.to_json('outputs/bert_eval_results.csv')

In [None]:
print('finished!')

In [16]:
df[df['checkpoint'].str.contains('epoch')].sort_values('valid_acc', ascending=False)

Unnamed: 0,checkpoint,valid_acc,test_acc,config
143,outputs/bert_12/checkpoint-164-epoch-2,0.783866,0.757869,"{'overwrite_output_dir': True, 'num_train_epoc..."
274,outputs/bert_24/checkpoint-574-epoch-7,0.780822,0.774818,"{'overwrite_output_dir': True, 'num_train_epoc..."
265,outputs/bert_24/checkpoint-656-epoch-8,0.773212,0.778450,"{'overwrite_output_dir': True, 'num_train_epoc..."
259,outputs/bert_23/checkpoint-80-epoch-4,0.773212,0.746973,"{'overwrite_output_dir': True, 'num_train_epoc..."
108,outputs/bert_9/checkpoint-492-epoch-6,0.767123,0.766344,"{'overwrite_output_dir': True, 'num_train_epoc..."
...,...,...,...,...
234,outputs/bert_21/checkpoint-410-epoch-5,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."
207,outputs/bert_18/checkpoint-492-epoch-6,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."
201,outputs/bert_18/checkpoint-410-epoch-5,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."
240,outputs/bert_21/checkpoint-492-epoch-6,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."


In [17]:
df.sort_values('test_acc', ascending=False)

Unnamed: 0,checkpoint,valid_acc,test_acc,config
265,outputs/bert_24/checkpoint-656-epoch-8,0.773212,0.778450,"{'overwrite_output_dir': True, 'num_train_epoc..."
274,outputs/bert_24/checkpoint-574-epoch-7,0.780822,0.774818,"{'overwrite_output_dir': True, 'num_train_epoc..."
132,outputs/bert_12/checkpoint-246-epoch-3,0.765601,0.772397,"{'overwrite_output_dir': True, 'num_train_epoc..."
109,outputs/bert_9/checkpoint-574-epoch-7,0.750381,0.771186,"{'overwrite_output_dir': True, 'num_train_epoc..."
294,outputs/bert_26/checkpoint-180-epoch-9,0.750381,0.771186,"{'overwrite_output_dir': True, 'num_train_epoc..."
...,...,...,...,...
283,outputs/bert_25/checkpoint-164-epoch-4,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."
234,outputs/bert_21/checkpoint-410-epoch-5,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."
243,outputs/bert_22/checkpoint-123-epoch-3,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."
242,outputs/bert_21/checkpoint-164-epoch-2,0.490107,0.487893,"{'overwrite_output_dir': True, 'num_train_epoc..."


In [18]:
for _, i in [i for i in df[df['checkpoint'].str.contains('epoch')].sort_values('valid_acc', ascending=False).iterrows()][:5]:
    print(i['valid_acc'], i['config'])

0.7838660578386606 {'overwrite_output_dir': True, 'num_train_epochs': 10, 'fp16': False, 'train_batch_size': 8, 'gradient_accumulation_steps': 4, 'evaluate_during_training': True, 'max_seq_length': 64, 'learning_rate': 4e-05, 'early_stopping_consider_epochs': True, 'early_stopping_delta': 0.01, 'early_stopping_metric': 'acc', 'early_stopping_metric_minimize': False, 'early_stopping_patience': 3, 'evaluate_during_training_steps': 331, 'output_dir': 'outputs/bert_12', 'warmup_ratio': 0.06, 'save_steps': 2000, 'manual_seed': 12345}
0.7808219178082192 {'overwrite_output_dir': True, 'num_train_epochs': 10, 'fp16': False, 'train_batch_size': 8, 'gradient_accumulation_steps': 4, 'evaluate_during_training': True, 'max_seq_length': 64, 'learning_rate': 0.0001, 'early_stopping_consider_epochs': True, 'early_stopping_delta': 0.01, 'early_stopping_metric': 'acc', 'early_stopping_metric_minimize': False, 'early_stopping_patience': 3, 'evaluate_during_training_steps': 331, 'output_dir': 'outputs/ber