In [1]:
import json
from pprint import pprint, pformat
from pathlib import Path
from dataclasses import dataclass

import bigbench.api.results as bb

from lass.log_handling import LogLoader

# Creating test set

Things to keep in mind:
- Instances are used for multiple systems. These multiple results should not be split across train and test.
- Instances are used for multiple n-shots. These multiple results should not be split across train and test.
- Not all models have results available for all tasks, specifically, BIG-G sparse results are not available for multiple tasks.

In [2]:
loader = (LogLoader(logdir = Path('../artifacts/logs'))
        .with_tasks('paper-full')
        .with_model_families(['BIG-G T=0'])
        .with_model_sizes(['128b'])
        .with_shots([0,1,2,3])
        .with_query_types([bb.MultipleChoiceQuery])
)

samples = list(loader.load_per_sample())
print(f"{len(samples)} samples\nSample #0:\n{pformat(vars(samples[0]))} ")

221724 samples
Sample #0:
{'absolute_scores': [-17.807415008544922,
                     -18.298959732055664,
                     -12.581655502319336,
                     -17.21178436279297,
                     -31.068716049194336],
 'correct': 0.0,
 'input': 'In what follows, we provide short narratives, each of which '
          'illustrates a common proverb. \n'
          'Narrative: Carla was having trouble juggling college, working at '
          'the diner and being a mother. She never had a day off and was burnt '
          'out. Her friend told her that her hard work would pay off one day '
          "and to keep going! Carla's friend was right; after a few more years "
          'of hard work, Carla graduated school and was able to get a better '
          'job. She was able to take a vacation and become overall '
          'successful.\n'
          'This narrative is a good illustration of the following proverb: ',
 'metrics': {'calibration_multiple_choice_brier_score': 0.

In [3]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1

import torch

print(torch.cuda.device_count())

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1


1


In [4]:
import pandas as pd

print("Original data:")
df = pd.DataFrame(samples)
display(df.head(1))
display(df['correct'].value_counts().head(4))

print("\n\nHuggingface ready data:")
# df_hf stands for HuggingFace dataset compatible DataFrame
df_hf = df[['input', 'correct']].rename(columns={'input':'text','correct':'label'})
df_hf = df_hf[df_hf['label'].isin([0.0, 1.0])]
df_hf[['label']] = df_hf[['label']].astype(int)
display(df_hf.head(1))
display(df_hf['label'].value_counts().head(5))



Original data:


Unnamed: 0,input,targets,scores,target_values,correct,absolute_scores,normalized_scores,metrics
0,"In what follows, we provide short narratives, ...","[Seek and you shall find, It's better to light...","[-17.807415008544922, -18.298959732055664, -12...","{'April showers bring forth May flowers': 1, '...",0.0,"[-17.807415008544922, -18.298959732055664, -12...","[-5.244009971618652, -5.7355546951293945, -0.0...",{'calibration_multiple_choice_brier_score': 0....


0.0    141612
1.0     78670
0.8       321
0.6       273
Name: correct, dtype: int64



Huggingface ready data:


Unnamed: 0,text,label
0,"In what follows, we provide short narratives, ...",0


0    141612
1     78670
Name: label, dtype: int64

In [5]:
from datasets import Dataset, DatasetDict

test_fraction = 0.2

# Per instance
df_hf_train=df_hf.sample(frac=(1-test_fraction),random_state=1234)
df_hf_test=df_hf.drop(df_hf_train.index)

# Per task (approx)
# df_hf_train = df.iloc[:900,:]
# df_hf_test = df.iloc[900:,:]

display(f"Train accuracy: {df_hf_train['label'].mean():.2f} ({len(df_hf_train)} instances)")
display(f"Test accuracy: {df_hf_test['label'].mean():.2f} ({len(df_hf_test)} instances)")

ds = DatasetDict()
ds['train'] = Dataset.from_pandas(df_hf_train, split='train')
ds['test'] = Dataset.from_pandas(df_hf_test, split='test')

dataset = ds
ds['train'][0]

'Train accuracy: 0.36 (176226 instances)'

'Test accuracy: 0.35 (44056 instances)'

{'text': 'This is a natural language inference task. There are two sentences in English. The answer is "entailment" if the first sentence entails the second, "contradiction" if the second sentence contradicts the first, and "neutral" if neither is of those two cases holds.\n\n\nSentence 1: Speaker B: what\'s going to come out of this, not because of Democrats, but because people, Trump supporters and other things, you showed up at ours, we\'re going to show up at yours. this only gets worse.\r\nSpeaker A: Well, I\'m not sure about that, but it isn\'t Donald Trump who has been inciting the violence. \nSentence 2: Someone has been inciting Donald Trump\n\nThe answer is: \nneutral\n\nSentence 1: And I think George is absolutely right, they\'re going to start moving for delays. But they\'ve got to figure out what kind of delays they go for and how much they can afford to possibly alienate some Democratic senators who want to go ahead and get this thing over with. So they\'ve got a tight, i

In [6]:
# Analyse LM results
import math
import sklearn as sk
import numpy as np

lm_preds = df[df['correct'].isin([0.0, 1.0])]
lm_preds[['correct']] = lm_preds[['correct']].astype(int)
lm_preds = lm_preds.drop(df_hf_train.index)

# def target_idx(row):
#     tvs = row['target_values']
#     targets = row['targets']
#     try:
#         target_idx = [idx for idx, t in enumerate(targets) if tvs[t] == 1][0]
#     # Goddamn questions where all answers are wrong...
#     except IndexError as e:
#         # print(row.name)
#         return 0
    
#     return int(target_idx)
def confidence(row):
    return np.max(row['normalized_scores'])

# lm_preds['target_idx'] = lm_preds.apply(target_idx, axis=1)
# lm_preds['target_score_normalized'] = lm_preds.apply(lambda row: row['normalized_scores'][row['target_idx']], axis=1)
# lm_preds['target_score_normalized'].head()

# Assumes the actual output is arg_max
lm_preds['confidence_normalized'] = lm_preds.apply(lambda row: math.exp(np.max(row['normalized_scores'])), axis=1)
lm_preds['confidence_absolute'] = lm_preds.apply(lambda row: math.exp(np.max(row['absolute_scores'])), axis=1)

display(sk.metrics.roc_auc_score(lm_preds['correct'], lm_preds['confidence_normalized']))
sk.metrics.roc_auc_score(lm_preds['correct'], lm_preds['confidence_absolute'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lm_preds[['correct']] = lm_preds[['correct']].astype(int)


0.6029696957278889

0.589004826718492

In [7]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, return_tensors="np")

tokenized_datasets = dataset.map(tokenize_function, batched=True)



  0%|          | 0/177 [00:00<?, ?ba/s]

  0%|          | 0/45 [00:00<?, ?ba/s]

In [8]:
train_dataset = tokenized_datasets["train"].shuffle(seed=42) #.select(range(50))
eval_dataset = tokenized_datasets["test"].shuffle(seed=42) #.select(range(50))
len(train_dataset), len(eval_dataset)

(176226, 44056)

In [9]:
%env WANDB_PROJECT=lass
%env WANDB_LOG_MODEL=true
%env TOKENIZERS_PARALLELISM=true
import wandb
wandb.login()

env: WANDB_PROJECT=lass
env: WANDB_LOG_MODEL=true
env: TOKENIZERS_PARALLELISM=true


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mwschella[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [10]:
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
import numpy as np
import scipy
import torch.nn as nn
import lass
import lass.metrics.hf

# model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
model = AutoModelForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2)
# model = AutoModelForSequenceClassification.from_pretrained("./test_trainer/checkpoint-13500", num_labels=2)
# model = AutoModelForSequenceClassification.from_pretrained("../artifacts/assessors/bert-bs32/checkpoint-3000", num_labels=2)

training_args = TrainingArguments(
    output_dir="albert-bs32-allsh-wd",
    run_name="albert-bs32-allsh-wd",
    evaluation_strategy="epoch",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    report_to="wandb",
    num_train_epochs=6
)

compute_metrics = lass.metrics.hf.get_metric_computer([
  "accuracy",
  "precision",
  "recall",
  "f1",
  "roc_auc",
  "brier_score",
])

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.decoder.weight', 'predictions.dense.weight', 'predictions.LayerNorm.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
trainer.train()

wandb.finish()

The following columns in the training set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 176226


  Num Epochs = 6


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 32


  Gradient Accumulation steps = 1


  Total optimization steps = 33048


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc,Brier Score
1,0.6068,0.595102,0.649877,0.503353,0.684589,0.580146,0.709231,0.204504
2,0.5652,0.567389,0.700722,0.613775,0.412732,0.493566,0.746172,0.193317
3,0.5393,0.56168,0.696318,0.559703,0.65883,0.605234,0.758735,0.191566
4,0.5034,0.553262,0.714976,0.6127,0.525599,0.565817,0.766167,0.186669
5,0.4363,0.586626,0.712888,0.600413,0.560416,0.579726,0.761794,0.192301
6,0.3976,0.620371,0.716429,0.614055,0.531573,0.569845,0.755209,0.201727


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-500


Configuration saved in albert-bs32-allsh-wd/checkpoint-500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-1000


Configuration saved in albert-bs32-allsh-wd/checkpoint-1000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-1000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-1500


Configuration saved in albert-bs32-allsh-wd/checkpoint-1500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-1500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-2000


Configuration saved in albert-bs32-allsh-wd/checkpoint-2000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-2000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-2500


Configuration saved in albert-bs32-allsh-wd/checkpoint-2500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-2500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-3000


Configuration saved in albert-bs32-allsh-wd/checkpoint-3000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-3000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-3500


Configuration saved in albert-bs32-allsh-wd/checkpoint-3500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-3500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-4000


Configuration saved in albert-bs32-allsh-wd/checkpoint-4000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-4000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-4500


Configuration saved in albert-bs32-allsh-wd/checkpoint-4500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-4500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-5000


Configuration saved in albert-bs32-allsh-wd/checkpoint-5000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-5000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-5500


Configuration saved in albert-bs32-allsh-wd/checkpoint-5500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-5500/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 44056


  Batch size = 32


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-6000


Configuration saved in albert-bs32-allsh-wd/checkpoint-6000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-6000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-6500


Configuration saved in albert-bs32-allsh-wd/checkpoint-6500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-6500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-7000


Configuration saved in albert-bs32-allsh-wd/checkpoint-7000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-7000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-7500


Configuration saved in albert-bs32-allsh-wd/checkpoint-7500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-7500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-8000


Configuration saved in albert-bs32-allsh-wd/checkpoint-8000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-8000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-8500


Configuration saved in albert-bs32-allsh-wd/checkpoint-8500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-8500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-9000


Configuration saved in albert-bs32-allsh-wd/checkpoint-9000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-9000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-9500


Configuration saved in albert-bs32-allsh-wd/checkpoint-9500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-9500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-10000


Configuration saved in albert-bs32-allsh-wd/checkpoint-10000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-10000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-10500


Configuration saved in albert-bs32-allsh-wd/checkpoint-10500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-10500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-11000


Configuration saved in albert-bs32-allsh-wd/checkpoint-11000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-11000/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 44056


  Batch size = 32


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-11500


Configuration saved in albert-bs32-allsh-wd/checkpoint-11500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-11500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-12000


Configuration saved in albert-bs32-allsh-wd/checkpoint-12000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-12000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-12500


Configuration saved in albert-bs32-allsh-wd/checkpoint-12500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-12500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-13000


Configuration saved in albert-bs32-allsh-wd/checkpoint-13000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-13000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-13500


Configuration saved in albert-bs32-allsh-wd/checkpoint-13500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-13500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-14000


Configuration saved in albert-bs32-allsh-wd/checkpoint-14000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-14000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-14500


Configuration saved in albert-bs32-allsh-wd/checkpoint-14500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-14500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-15000


Configuration saved in albert-bs32-allsh-wd/checkpoint-15000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-15000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-15500


Configuration saved in albert-bs32-allsh-wd/checkpoint-15500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-15500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-16000


Configuration saved in albert-bs32-allsh-wd/checkpoint-16000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-16000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-16500


Configuration saved in albert-bs32-allsh-wd/checkpoint-16500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-16500/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 44056


  Batch size = 32


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-17000


Configuration saved in albert-bs32-allsh-wd/checkpoint-17000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-17000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-17500


Configuration saved in albert-bs32-allsh-wd/checkpoint-17500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-17500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-18000


Configuration saved in albert-bs32-allsh-wd/checkpoint-18000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-18000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-18500


Configuration saved in albert-bs32-allsh-wd/checkpoint-18500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-18500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-19000


Configuration saved in albert-bs32-allsh-wd/checkpoint-19000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-19000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-19500


Configuration saved in albert-bs32-allsh-wd/checkpoint-19500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-19500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-20000


Configuration saved in albert-bs32-allsh-wd/checkpoint-20000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-20000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-20500


Configuration saved in albert-bs32-allsh-wd/checkpoint-20500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-20500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-21000


Configuration saved in albert-bs32-allsh-wd/checkpoint-21000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-21000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-21500


Configuration saved in albert-bs32-allsh-wd/checkpoint-21500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-21500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-22000


Configuration saved in albert-bs32-allsh-wd/checkpoint-22000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-22000/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 44056


  Batch size = 32


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-22500


Configuration saved in albert-bs32-allsh-wd/checkpoint-22500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-22500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-23000


Configuration saved in albert-bs32-allsh-wd/checkpoint-23000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-23000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-23500


Configuration saved in albert-bs32-allsh-wd/checkpoint-23500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-23500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-24000


Configuration saved in albert-bs32-allsh-wd/checkpoint-24000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-24000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-24500


Configuration saved in albert-bs32-allsh-wd/checkpoint-24500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-24500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-25000


Configuration saved in albert-bs32-allsh-wd/checkpoint-25000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-25000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-25500


Configuration saved in albert-bs32-allsh-wd/checkpoint-25500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-25500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-26000


Configuration saved in albert-bs32-allsh-wd/checkpoint-26000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-26000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-26500


Configuration saved in albert-bs32-allsh-wd/checkpoint-26500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-26500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-27000


Configuration saved in albert-bs32-allsh-wd/checkpoint-27000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-27000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-27500


Configuration saved in albert-bs32-allsh-wd/checkpoint-27500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-27500/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 44056


  Batch size = 32


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-28000


Configuration saved in albert-bs32-allsh-wd/checkpoint-28000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-28000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-28500


Configuration saved in albert-bs32-allsh-wd/checkpoint-28500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-28500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-29000


Configuration saved in albert-bs32-allsh-wd/checkpoint-29000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-29000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-29500


Configuration saved in albert-bs32-allsh-wd/checkpoint-29500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-29500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-30000


Configuration saved in albert-bs32-allsh-wd/checkpoint-30000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-30000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-30500


Configuration saved in albert-bs32-allsh-wd/checkpoint-30500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-30500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-31000


Configuration saved in albert-bs32-allsh-wd/checkpoint-31000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-31000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-31500


Configuration saved in albert-bs32-allsh-wd/checkpoint-31500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-31500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-32000


Configuration saved in albert-bs32-allsh-wd/checkpoint-32000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-32000/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-32500


Configuration saved in albert-bs32-allsh-wd/checkpoint-32500/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-32500/pytorch_model.bin


Saving model checkpoint to albert-bs32-allsh-wd/checkpoint-33000


Configuration saved in albert-bs32-allsh-wd/checkpoint-33000/config.json


Model weights saved in albert-bs32-allsh-wd/checkpoint-33000/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `AlbertForSequenceClassification.forward` and have been ignored: __index_level_0__, text. If __index_level_0__, text are not expected by `AlbertForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 44056


  Batch size = 32




Training completed. Do not forget to share your model on huggingface.co/models =)




Saving model checkpoint to /tmp/tmp3ic3dhdz


Configuration saved in /tmp/tmp3ic3dhdz/config.json


Model weights saved in /tmp/tmp3ic3dhdz/pytorch_model.bin


VBox(children=(Label(value='44.592 MB of 44.592 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
eval/accuracy,▁▆▆███
eval/brier_score,█▄▃▁▃▇
eval/f1,▆▁█▆▆▆
eval/loss,▅▂▂▁▄█
eval/precision,▁█▅█▇█
eval/recall,█▁▇▄▅▄
eval/roc_auc,▁▆▇█▇▇
eval/runtime,▃▂▁█▆█
eval/samples_per_second,▆▇█▁▃▁
eval/steps_per_second,▆▇█▁▃▁

0,1
eval/accuracy,0.71643
eval/brier_score,0.20173
eval/f1,0.56984
eval/loss,0.62037
eval/precision,0.61405
eval/recall,0.53157
eval/roc_auc,0.75521
eval/runtime,459.2081
eval/samples_per_second,95.939
eval/steps_per_second,2.999


In [12]:
import wandb
# wandb.finish()