## Novel model approach

This notebook serves as the source code for all the model testing and training (along with hyperparam grid search) before the development/submission of the final best model. This model approach tries a variation on the transformer architecture, with different heads, as detailed in the report.

In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score, average_precision_score
import itertools
from tqdm import tqdm

In [2]:
from pcl_tf.dataset_manager import DatasetManager as DM
from pcl_tf.collation import collate_fn
from pcl_tf.tf import warmup_model, get_tokenizer

In [3]:
NUM_LABELS = 7
LOAD_BATCH_SIZE = 64
LOCAL_CACHE_DIR = './models_cache'
MODEL_NAME = "albert-base-v2"
NUM_WORKERS = 8
PIN_MEMORY = False
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

Using device: cuda


In [4]:
print('Warming up tokenizer...')
tokenizer = get_tokenizer(MODEL_NAME)

print('Warming up encoder (downloads model if needed)...')

_ = warmup_model(MODEL_NAME, device=None, cache_dir=LOCAL_CACHE_DIR)
print('Model cache warmup completed.')

Warming up tokenizer...
Warming up encoder (downloads model if needed)...


Loading weights:   0%|          | 0/25 [00:00<?, ?it/s]

[1mAlbertModel LOAD REPORT[0m from: albert-base-v2
Key                          | Status     |  | 
-----------------------------+------------+--+-
predictions.LayerNorm.bias   | UNEXPECTED |  | 
predictions.bias             | UNEXPECTED |  | 
predictions.dense.bias       | UNEXPECTED |  | 
predictions.LayerNorm.weight | UNEXPECTED |  | 
predictions.dense.weight     | UNEXPECTED |  | 
predictions.decoder.bias     | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Model cache warmup completed.


In [5]:
train_labels_path = "data/train_semeval_parids-labels.csv"
dev_labels_path = "data/dev_semeval_parids-labels.csv"
texts_path = "data/dontpatronizeme_pcl_cleaned.csv"
test_path = "data/task4_test.tsv"
cats_path = "data/dontpatronizeme_categories.tsv"

In [6]:
texts_df = pd.read_csv(texts_path, low_memory=False)
texts_df["par_id"] = texts_df["par_id"].astype(int)
texts_df = texts_df.set_index("par_id")

In [7]:
training_ds = DM(train_labels_path, texts_df=texts_df)
training_ds.print_stats()

Total samples: 8375
Binary distribution: [7581  794]
Multilabel distribution: [574. 160. 162. 192. 145. 363.  29.]


In [8]:
dev_ds = DM(dev_labels_path, texts_df=texts_df)
dev_ds.print_stats()

Total samples: 2093
Binary distribution: [1894  199]
Multilabel distribution: [142.  36.  62.  38.  52. 106.  11.]


In [9]:
tokenizer = get_tokenizer(MODEL_NAME)


train_loader = DataLoader(training_ds, batch_size=LOAD_BATCH_SIZE, shuffle=True, 
                          collate_fn=lambda b: collate_fn(tokenizer, b), pin_memory=PIN_MEMORY, num_workers=NUM_WORKERS)

dev_loader = DataLoader(dev_ds, batch_size=LOAD_BATCH_SIZE, shuffle=False, 
                        collate_fn=lambda b: collate_fn(tokenizer, b), pin_memory=PIN_MEMORY, num_workers=NUM_WORKERS)

In [10]:
def evaluate_dev(model, dataloader, device):
    model.eval()
    bin_probs=[]
    bin_labels=[]
    multi_probs=[]
    multi_labels=[]

    with torch.no_grad():
        for b in dataloader:
            input_ids = b["input_ids"].to(device)
            attention_mask = b["attention_mask"].to(device)
            labels = b["labels"].to(device)
            out = model(input_ids=input_ids, attention_mask=attention_mask)
            bin_probs.append(torch.sigmoid(out["logit_bin"]).cpu().numpy())
            multi_probs.append(torch.sigmoid(out["logit_multi"]).cpu().numpy())
            bin_labels.append(labels[:,0].cpu().numpy())
            multi_labels.append(labels[:,1:].cpu().numpy())
            
    bin_probs = np.concatenate(bin_probs); bin_labels = np.concatenate(bin_labels)
    multi_probs = np.concatenate(multi_probs); multi_labels = np.concatenate(multi_labels)
    micro_f1 = f1_score(multi_labels.flatten(), (multi_probs>=0.5).astype(int).flatten(), zero_division=0)
    return {"micro_f1": micro_f1, "bin_ap": average_precision_score(bin_labels, bin_probs)}


In [11]:
from pcl_tf.tf import PCLModel

def train_and_eval(config):
    model = PCLModel(config["model_name"], n_labels=NUM_LABELS, dropout=config["dropout"], device=DEVICE).to(DEVICE)
    optim = torch.optim.AdamW(model.parameters(), lr=config["lr"], weight_decay=config["wd"])
    print("Model and optimizer created")

    print("Starting training...")
    for epoch in range(config["epochs"]):
        model.train()
        running = 0.0
        for batch in train_loader:
            input_ids = batch["input_ids"].to(DEVICE, non_blocking=True)
            attention_mask = batch["attention_mask"].to(DEVICE, non_blocking=True)
            labels = batch["labels"].to(DEVICE, non_blocking=True)
            optim.zero_grad()
            out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = out["loss"]
            loss.backward()
            optim.step()
            running += loss.item()
        # optionally print per-epoch
        print(f"Epoch {epoch+1}, Loss: {running/len(train_loader)}")
    metrics = evaluate_dev(model, dev_loader, DEVICE)
    return metrics

In [None]:
grid = {
    "model_name": ["albert-base-v2", "roberta-base"],
    "lr": [2e-5, 5e-5, 1e-4, 5e-4],
    "wd": [1e-3, 1e-2, 5e-3],
    "max_len": [128, 256],
    "dropout": [0, 0.1, 0.01],
    "epochs": [6]
}

In [13]:
torch.cuda.empty_cache()

In [None]:
keys, values = zip(*grid.items())
results = []
for combo in tqdm(list(itertools.product(*values)), desc="Grid"):
    cfg = dict(zip(keys, combo))
    try:
        metrics = train_and_eval(cfg)
        results.append({**cfg, **metrics})
        print("CFG:", cfg, "=>", metrics)
    except Exception as e:
        print("Error for cfg", cfg, ":", e)
        results.append({**cfg, "error": str(e)})

res_df = pd.DataFrame(results)
res_df.to_csv("grid_results.csv", index=False)
print("Saved grid_results.csv")
best_idx = res_df["micro_f1"].idxmax() if "micro_f1" in res_df.columns else None
if best_idx is not None:
    print("Best config:\n", res_df.loc[best_idx].to_dict())
else:
    print("No successful runs found.")

Grid:   0%|          | 0/72 [00:00<?, ?it/s]

Model and optimizer created
Starting training...


Grid:   1%|▏         | 1/72 [00:00<00:57,  1.23it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.001, 'max_len': 128, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 57.50 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.87 GiB memory in use. Of the allocated memory 6.69 GiB is allocated by PyTorch, and 15.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:   3%|▎         | 2/72 [00:01<00:43,  1.61it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.001, 'max_len': 128, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 57.50 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.87 GiB memory in use. Of the allocated memory 6.69 GiB is allocated by PyTorch, and 15.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:   4%|▍         | 3/72 [00:01<00:39,  1.76it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.001, 'max_len': 128, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 64.31 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.87 GiB memory in use. Of the allocated memory 6.69 GiB is allocated by PyTorch, and 15.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:   6%|▌         | 4/72 [00:02<00:36,  1.88it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.001, 'max_len': 256, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 66.25 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.87 GiB memory in use. Of the allocated memory 6.69 GiB is allocated by PyTorch, and 15.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:   7%|▋         | 5/72 [00:02<00:34,  1.96it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.001, 'max_len': 256, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 65.25 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.91 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 15.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:   8%|▊         | 6/72 [00:03<00:33,  1.98it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.001, 'max_len': 256, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 64.44 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.91 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 15.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  10%|▉         | 7/72 [00:03<00:32,  1.98it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.01, 'max_len': 128, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 63.88 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.91 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 15.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  11%|█         | 8/72 [00:04<00:31,  2.01it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.01, 'max_len': 128, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 64.44 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.91 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 15.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Exception ignored in: <function _afterFork at 0x745322dbb920>
Traceback (most recent call last):
  File "/usr/lib/python3.13/logging/__init__.py", line 245, in _afterFork
    def _afterFork():
KeyboardInterrupt: 
Grid:  12%|█▎        | 9/72 [00:04<00:30,  2.04it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.01, 'max_len': 128, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 64.94 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.91 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 15.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  14%|█▍        | 10/72 [00:05<00:29,  2.07it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.01, 'max_len': 256, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 65.25 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.91 GiB memory in use. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 15.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  15%|█▌        | 11/72 [00:05<00:30,  2.03it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.01, 'max_len': 256, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 75.00 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.96 GiB memory in use. Of the allocated memory 6.79 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  17%|█▋        | 12/72 [00:06<00:29,  2.00it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.01, 'max_len': 256, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 74.25 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.96 GiB memory in use. Of the allocated memory 6.79 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  18%|█▊        | 13/72 [00:06<00:29,  2.00it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.005, 'max_len': 128, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 74.62 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.96 GiB memory in use. Of the allocated memory 6.79 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  19%|█▉        | 14/72 [00:07<00:28,  2.02it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.005, 'max_len': 128, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 79.31 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.96 GiB memory in use. Of the allocated memory 6.79 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  21%|██        | 15/72 [00:07<00:28,  2.02it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.005, 'max_len': 128, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 59.50 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  22%|██▏       | 16/72 [00:08<00:27,  2.02it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.005, 'max_len': 256, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 47.50 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  24%|██▎       | 17/72 [00:08<00:28,  1.90it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.005, 'max_len': 256, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 47.06 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  25%|██▌       | 18/72 [00:09<00:27,  1.97it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 2e-05, 'wd': 0.005, 'max_len': 256, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 49.12 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  26%|██▋       | 19/72 [00:09<00:26,  1.99it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.001, 'max_len': 128, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 49.38 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  28%|██▊       | 20/72 [00:10<00:26,  1.98it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.001, 'max_len': 128, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 50.00 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  29%|██▉       | 21/72 [00:10<00:25,  1.97it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.001, 'max_len': 128, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 50.69 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  31%|███       | 22/72 [00:11<00:25,  1.97it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.001, 'max_len': 256, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 51.12 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  32%|███▏      | 23/72 [00:11<00:24,  1.96it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.001, 'max_len': 256, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 47.06 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  33%|███▎      | 24/72 [00:12<00:24,  1.99it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.001, 'max_len': 256, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 46.19 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  35%|███▍      | 25/72 [00:12<00:23,  2.00it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.01, 'max_len': 128, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 46.50 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  36%|███▌      | 26/72 [00:13<00:22,  2.02it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.01, 'max_len': 128, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 65.62 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  38%|███▊      | 27/72 [00:13<00:22,  1.99it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.01, 'max_len': 128, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 72.62 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  39%|███▉      | 28/72 [00:14<00:21,  2.03it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.01, 'max_len': 256, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 73.06 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  40%|████      | 29/72 [00:14<00:21,  2.03it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.01, 'max_len': 256, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 70.88 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  42%|████▏     | 30/72 [00:15<00:20,  2.04it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.01, 'max_len': 256, 'dropout': 0.01, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 70.88 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  43%|████▎     | 31/72 [00:15<00:20,  1.99it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.005, 'max_len': 128, 'dropout': 0, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 70.94 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...


Grid:  44%|████▍     | 32/72 [00:16<00:19,  2.00it/s]

Error for cfg {'model_name': 'albert-base-v2', 'lr': 5e-05, 'wd': 0.005, 'max_len': 128, 'dropout': 0.1, 'epochs': 6} : CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 7.65 GiB of which 71.88 MiB is free. Process 6267 has 37.41 MiB memory in use. Process 6351 has 39.41 MiB memory in use. Process 44088 has 21.16 MiB memory in use. Including non-PyTorch memory, this process has 6.99 GiB memory in use. Of the allocated memory 6.81 GiB is allocated by PyTorch, and 13.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Model and optimizer created
Starting training...
